sidorenkoda
@sidorenkoda
Программист, верстальщик, руководитель проектов

Как заставить sphinx искать короткие слова (меньше 4-х символов)?

Настроил, работает
Если для поиска передаю /_sphinx/index.php?q=печка , то находит много результатов, с учетом морфологии, все супер
Но если /_sphinx/index.php?q=печ то ничего не работает =)
Куда копать?

Кстати /_sphinx/index.php?q=п он находит, тк есть фраза "высотой 9 п.м."

Я пробовал параметры
#min_word_len = 1
#min_infix_len = 1
#enable_star = 0
#min_word_len = 1
#html_strip = 1
#min_infix_len = 1
#index_exact_words = 1
В различных комбинациях, не помогло =)

Вот конфиг сфинкса
source src1
{
	type			= mysql

	sql_host		= localhost
	sql_user		= root
	sql_pass		=
	sql_db			= yii2advanced2
	sql_port		= 3306	# optional, default is 3306

	sql_query_pre = SET NAMES utf8
    sql_query_pre = SET CHARACTER SET utf8

	sql_query	= SELECT id_product, id_old, model, UNIX_TIMESTAMP(create_time) AS date_added, seo_name, full_description FROM products

	sql_attr_uint		= id_product
	sql_attr_uint		= id_old
	sql_attr_uint		= id_manufacturer
	sql_attr_timestamp	= date_added
}


index test1
{
	source			= src1
	path			= c:\OpenServer\modules\sphinx\configdir\data\test1
	#min_word_len = 1
	#min_infix_len = 1
	#enable_star = 0
	#min_word_len      = 1
	#html_strip        = 1
	#min_infix_len     = 1
	#index_exact_words = 1
}


index testrt
{
	type			= rt
	rt_mem_limit		= 128M
	path			= c:\OpenServer\modules\sphinx\configdir\data\testrt
	rt_field		= seo_name
	rt_field		= full_description
	rt_attr_uint		= gid
}


indexer
{
	mem_limit		= 128M
}


searchd
{
	listen			= 9312
	listen			= 9306:mysql41
	log			= c:\OpenServer\modules\sphinx\configdir\log\searchd.log
	query_log		= c:\OpenServer\modules\sphinx\configdir\log\query.log
	read_timeout		= 5
	max_children		= 30
	pid_file		= c:\OpenServer\modules\sphinx\configdir\log\searchd.pid
	seamless_rotate		= 1
	preopen_indexes		= 1
	unlink_old		= 1
	workers			= threads # for RT to work
	binlog_path		= c:\OpenServer\modules\sphinx\configdir\data
}


А вот сам php скрипт, может кому полезен будет
На качество кода не смотреть, это наспех переделанный пример из phpapi =)
<?php

header('Content-type: text/html; charset=utf-8');

error_reporting( 0 );

require( "sphinxapi.php" );

//////////////////////
// parse command line
//////////////////////
$_SERVER["argv"] = [];
$_SERVER["argv"][1] = $_GET['q'];
#$_SERVER["argv"][1] = 'печ';

//$_SERVER["argv"]['-l'] = 110;

// for very old PHP versions, like at my home test server
if ( is_array( $argv ) && !isset( $_SERVER["argv"] ) ) $_SERVER["argv"] = $argv;
unset ( $_SERVER["argv"][0] );

// build query
if ( !is_array( $_SERVER["argv"] ) || empty( $_SERVER["argv"] ) ) {
	print ( "Usage: php -f test.php [OPTIONS] query words\n\n" );
	print ( "Options are:\n" );
	print ( "-h, --host <HOST>\tconnect to searchd at host HOST\n" );
	print ( "-p, --port\t\tconnect to searchd at port PORT\n" );
	print ( "-i, --index <IDX>\tsearch through index(es) specified by IDX\n" );
	print ( "-s, --sortby <CLAUSE>\tsort matches by 'CLAUSE' in sort_extended mode\n" );
	print ( "-S, --sortexpr <EXPR>\tsort matches by 'EXPR' DESC in sort_expr mode\n" );
	print ( "-a, --any\t\tuse 'match any word' matching mode\n" );
	print ( "-b, --boolean\t\tuse 'boolean query' matching mode\n" );
	print ( "-e, --extended\t\tuse 'extended query' matching mode\n" );
	print ( "-ph,--phrase\t\tuse 'exact phrase' matching mode\n" );
	print ( "-f, --filter <ATTR>\tfilter by attribute 'ATTR' (default is 'group_id')\n" );
	print ( "-fr,--filterrange <ATTR> <MIN> <MAX>\n\t\t\tadd specified range filter\n" );
	print ( "-v, --value <VAL>\tadd VAL to allowed 'group_id' values list\n" );
	print ( "-g, --groupby <EXPR>\tgroup matches by 'EXPR'\n" );
	print ( "-gs,--groupsort <EXPR>\tsort groups by 'EXPR'\n" );
	print ( "-d, --distinct <ATTR>\tcount distinct values of 'ATTR''\n" );
	print ( "-l, --limit <COUNT>\tretrieve COUNT matches (default: 20)\n" );
	print ( "--select <EXPRLIST>\tuse 'EXPRLIST' as select-list (default: *)\n" );
	exit;
}

$args = array();
foreach ( $_SERVER["argv"] as $arg ) $args[] = $arg;

$cl = new SphinxClient ();

$q = "";
$sql = "";
$mode = SPH_MATCH_ALL;
$host = "localhost";
$port = 9312;
$index = "*";
$groupby = "";
$groupsort = "@group desc";
$filter = "group_id";
$filtervals = array();
$distinct = "";
$sortby = "";
$sortexpr = "";
$limit = 210;
$ranker = SPH_RANK_PROXIMITY_BM25;
$select = "";
for ( $i = 0; $i < count( $args ); $i++ ) {
	$arg = $args[$i];

	if ( $arg == "-h" || $arg == "--host" ) $host = $args[++$i]; else if ( $arg == "-p" || $arg == "--port" ) $port = (int)$args[++$i]; else if ( $arg == "-i" || $arg == "--index" ) $index = $args[++$i]; else if ( $arg == "-s" || $arg == "--sortby" ) {
		$sortby = $args[++$i];
		$sortexpr = "";
	} else if ( $arg == "-S" || $arg == "--sortexpr" ) {
		$sortexpr = $args[++$i];
		$sortby = "";
	} else if ( $arg == "-a" || $arg == "--any" ) $mode = SPH_MATCH_ANY; else if ( $arg == "-b" || $arg == "--boolean" ) $mode = SPH_MATCH_BOOLEAN; else if ( $arg == "-e" || $arg == "--extended" ) $mode = SPH_MATCH_EXTENDED; else if ( $arg == "-e2" ) $mode = SPH_MATCH_EXTENDED2; else if ( $arg == "-ph" || $arg == "--phrase" ) $mode = SPH_MATCH_PHRASE; else if ( $arg == "-f" || $arg == "--filter" ) $filter = $args[++$i]; else if ( $arg == "-v" || $arg == "--value" ) $filtervals[] = $args[++$i]; else if ( $arg == "-g" || $arg == "--groupby" ) $groupby = $args[++$i]; else if ( $arg == "-gs" || $arg == "--groupsort" ) $groupsort = $args[++$i]; else if ( $arg == "-d" || $arg == "--distinct" ) $distinct = $args[++$i]; else if ( $arg == "-l" || $arg == "--limit" ) $limit = (int)$args[++$i]; else if ( $arg == "--select" ) $select = $args[++$i]; else if ( $arg == "-fr" || $arg == "--filterrange" ) $cl->SetFilterRange( $args[++$i], $args[++$i], $args[++$i] ); else if ( $arg == "-r" ) {
		$arg = strtolower( $args[++$i] );
		if ( $arg == "bm25" ) $ranker = SPH_RANK_BM25;
		if ( $arg == "none" ) $ranker = SPH_RANK_NONE;
		if ( $arg == "wordcount" ) $ranker = SPH_RANK_WORDCOUNT;
		if ( $arg == "fieldmask" ) $ranker = SPH_RANK_FIELDMASK;
		if ( $arg == "sph04" ) $ranker = SPH_RANK_SPH04;
	} else
		$q .= $args[$i] . " ";
}


////////////
// do query
////////////

$cl->SetServer( $host, $port );
$cl->SetConnectTimeout( 1 );
$cl->SetArrayResult( true );
$cl->SetMatchMode( $mode );
if ( count( $filtervals ) ) $cl->SetFilter( $filter, $filtervals );
if ( $groupby ) $cl->SetGroupBy( $groupby, SPH_GROUPBY_ATTR, $groupsort );
if ( $sortby ) $cl->SetSortMode( SPH_SORT_EXTENDED, $sortby );
if ( $sortexpr ) $cl->SetSortMode( SPH_SORT_EXPR, $sortexpr );
if ( $distinct ) $cl->SetGroupDistinct( $distinct );
if ( $select ) $cl->SetSelect( $select );
if ( $limit ) $cl->SetLimits( 0, $limit, ( $limit > 1000 ) ? $limit : 1000 );
$cl->SetRankingMode( $ranker );
$res = $cl->Query( $q, $index );

////////////////
// print me out
////////////////

if ( $res === false ) {
	print "Query failed: " . $cl->GetLastError() . ".\n";
} else {
	if ( $cl->GetLastWarning() ) print "WARNING: " . $cl->GetLastWarning() . "\n\n";
	if ( is_array( $res["words"] ) ) foreach ( $res["words"] as $word => $info )

		if ( is_array( $res["matches"] ) ) {
			$n = 1;
			foreach ( $res["matches"] as $docinfo ) {
				#echo '<div><a target="_blank" href="http://jullycms/item/' . $docinfo[id] . '">Дом печей - ' . $docinfo[weight] . '</a></div>';
				$n++;
			}
		}
}
$answer = [];
foreach ( $res['matches'] as $k => $v ) {
	$answer[] = $v['id'];
}
$answer = json_encode($answer);
echo ($answer);
function p($a) {
	echo '<pre>';
	print_r( $a );
	echo '</pre>';
}

?>
  • Вопрос задан
  • 2208 просмотров
Решения вопроса 1
MaxDukov
@MaxDukov
впишусь в проект как SRE/DevOps.
если верить разработчикам, то
sphinxsearch.com/docs/current/conf-min-word-len.html
min_word_len
Minimum indexed word length. Optional, default is 1 (index everything).
проблема в том, что "печ" не равно "печка". вот "печ*" равно "печка".

почитайте тут
Ответ написан
Пригласить эксперта
Ответы на вопрос 1
R0dger
@R0dger
Laravel/Yii/2 AngularJs PHP RESTful API
почитайте вот тут, думаю поможет
sphinxsearch.com/forum/view.html?id=13352
Ответ написан
Комментировать
Ваш ответ на вопрос

Войдите, чтобы написать ответ

Похожие вопросы