Enable ligature / alternate-spelling matching
This commit is contained in:
parent
a6304d2f5d
commit
0f69a2d2c8
|
@ -423,7 +423,7 @@ if (!$_ODATA['s_result_template']) {
|
|||
{{#errors}}
|
||||
<ul>
|
||||
{{#error_list}}
|
||||
<li>{{text}}</li>
|
||||
<li>{{.}}</li>
|
||||
{{/error_list}}
|
||||
</ul>
|
||||
{{/errors}}
|
||||
|
@ -721,7 +721,14 @@ $_RDATA['sp_smart'] = array(
|
|||
'‖' => '|'
|
||||
);
|
||||
$_RDATA['s_latin'] = array(
|
||||
'center' => array('centre'),
|
||||
'color' => array('colour'),
|
||||
'fiber' => array('fibre'),
|
||||
|
||||
'ae' => array('æ', 'Æ'),
|
||||
'oe' => array('œ', 'Œ'),
|
||||
'sz' => array('ß'),
|
||||
'th' => array('þ', 'Þ'),
|
||||
'a' => array('á', 'Á', 'à', 'À', 'â', 'Â', 'ä', 'Ä', 'ã', 'Ã', 'å', 'Å', 'ą', 'Ą', 'ă', 'Ă'),
|
||||
'c' => array('ç', 'Ç', 'ć', 'Ć', 'č', 'Č'),
|
||||
'd' => array('ð', 'Ð', 'ď', 'Ď', 'đ', 'Đ'),
|
||||
|
@ -730,12 +737,9 @@ $_RDATA['s_latin'] = array(
|
|||
'i' => array('í', 'Í', 'ì', 'Ì', 'î', 'Î', 'ï', 'Ï', 'ı', 'İ'),
|
||||
'l' => array('ł', 'Ł', 'ľ', 'Ľ', 'ĺ', 'Ĺ'),
|
||||
'n' => array('ñ', 'Ñ', 'ń', 'Ń', 'ň', 'Ň'),
|
||||
'oe' => array('œ', 'Œ'),
|
||||
'o' => array('ó', 'Ó', 'ò', 'Ò', 'ô', 'Ô', 'ö', 'Ö', 'õ', 'Õ', 'ø', 'Ø', 'ő', 'Ő'),
|
||||
'r' => array('ŕ', 'Ŕ', 'ř', 'Ř'),
|
||||
'sz' => array('ß'),
|
||||
's' => array('ş', 'Ş', 'ś', 'Ś', 'š', 'Š'),
|
||||
'th' => array('þ', 'Þ'),
|
||||
't' => array('ť', 'Ť', 'ţ', 'Ţ'),
|
||||
'u' => array('ú', 'Ú', 'ù', 'Ù', 'û', 'Û', 'ü', 'Ü', 'ů', 'Ů', 'ű', 'Ű'),
|
||||
'x' => array('×'),
|
||||
|
|
|
@ -23,6 +23,23 @@ $_SDATA = array(
|
|||
foreach ($_RDATA['s_weights'] as $key => $weight)
|
||||
$_RDATA['s_weights'][$key] = (float)$weight;
|
||||
|
||||
// Prepare regexp translation array for accented / ligature characters
|
||||
$_RDATA['s_latin_pcre'] = array();
|
||||
$_RDATA['s_latin_pcre_multi'] = array();
|
||||
foreach ($_RDATA['s_latin'] as $char => $latin) {
|
||||
if (strlen($char) > 1) {
|
||||
$pcre = '('.$char.'|'.implode('|', $latin).')';
|
||||
} else $pcre = '['.$char.implode('', $latin).']';
|
||||
$_RDATA['s_latin_pcre'][$char] = $pcre;
|
||||
foreach ($latin as $lchar)
|
||||
$_RDATA['s_latin_pcre'][$lchar] = $pcre;
|
||||
if (strlen($char) > 1) {
|
||||
$_RDATA['s_latin_pcre_multi'][$char] = $pcre;
|
||||
foreach ($latin as $lchar)
|
||||
$_RDATA['s_latin_pcre_multi'][$lchar] = $pcre;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// {{{{{ Initialize the Mustache templating engine
|
||||
class OS_Mustache {
|
||||
|
@ -164,13 +181,9 @@ if ($_RDATA['s_searchable_pages']) {
|
|||
if ($type == 'term')
|
||||
$_SDATA['formatted'][] = $term;
|
||||
|
||||
// Regexp for later use pattern matching results
|
||||
$_SDATA['terms'][$key][2] = preg_quote(strtolower($term), '/');
|
||||
foreach ($_RDATA['s_latin'] as $char => $latin) {
|
||||
$_SDATA['terms'][$key][2] = str_replace($latin, $char, $_SDATA['terms'][$key][2]);
|
||||
if (strlen($char) > 1) {
|
||||
$_SDATA['terms'][$key][2] = str_replace($char, '('.$char.'|'.implode('|', $latin).')', $_SDATA['terms'][$key][2]);
|
||||
} else $_SDATA['terms'][$key][2] = str_replace($char, '['.$char.implode('', $latin).']', $_SDATA['terms'][$key][2]);
|
||||
}
|
||||
$_SDATA['terms'][$key][2] = strtr($_SDATA['terms'][$key][2], $_RDATA['s_latin_pcre']);
|
||||
$_SDATA['terms'][$key][2] = '/('.$_SDATA['terms'][$key][2].')/iu';
|
||||
|
||||
}
|
||||
|
@ -240,37 +253,44 @@ if ($_RDATA['s_searchable_pages']) {
|
|||
$ors = array();
|
||||
$negs = array();
|
||||
foreach ($_SDATA['terms'] as list($type, $term, $pcre)) {
|
||||
|
||||
// Regexp only for SQL use
|
||||
$term = preg_quote(strtolower($term), '\'');
|
||||
|
||||
// Regexp alternation for multi-character ligatures
|
||||
$term = strtr($term, $_RDATA['s_latin_pcre_multi']);
|
||||
|
||||
switch ($type) {
|
||||
case 'filetype': // Nothing for filetype yet
|
||||
break;
|
||||
|
||||
case 'exclude':
|
||||
$negs[] = '`content` NOT LIKE \'%'.addslashes($term).'%\'';
|
||||
$negs[] = '`url` NOT LIKE \'%'.addslashes($term).'%\'';
|
||||
$negs[] = '`title` NOT LIKE \'%'.addslashes($term).'%\'';
|
||||
$negs[] = '`description` NOT LIKE \'%'.addslashes($term).'%\'';
|
||||
$negs[] = '`keywords` NOT LIKE \'%'.addslashes($term).'%\'';
|
||||
$negs[] = '`weighted` NOT LIKE \'%'.addslashes($term).'%\'';
|
||||
$negs[] = '`content` NOT REGEXP \''.$term.'\'';
|
||||
$negs[] = '`url` NOT REGEXP \''.$term.'\'';
|
||||
$negs[] = '`title` NOT REGEXP \''.$term.'\'';
|
||||
$negs[] = '`description` NOT REGEXP \''.$term.'\'';
|
||||
$negs[] = '`keywords` NOT REGEXP \''.$term.'\'';
|
||||
$negs[] = '`weighted` NOT REGEXP \''.$term.'\'';
|
||||
break;
|
||||
|
||||
case 'phrase':
|
||||
$ands[] = '('.implode(' OR ', array(
|
||||
'`content` LIKE \'%'.addslashes($term).'%\'',
|
||||
'`url` LIKE \'%'.addslashes($term).'%\'',
|
||||
'`title` LIKE \'%'.addslashes($term).'%\'',
|
||||
'`description` LIKE \'%'.addslashes($term).'%\'',
|
||||
'`keywords` LIKE \'%'.addslashes($term).'%\'',
|
||||
'`weighted` LIKE \'%'.addslashes($term).'%\''
|
||||
'`content` REGEXP \''.$term.'\'',
|
||||
'`url` REGEXP \''.$term.'\'',
|
||||
'`title` REGEXP \''.$term.'\'',
|
||||
'`description` REGEXP \''.$term.'\'',
|
||||
'`keywords` REGEXP \''.$term.'\'',
|
||||
'`weighted` REGEXP \''.$term.'\''
|
||||
)).')';
|
||||
break;
|
||||
|
||||
case 'term':
|
||||
$ors[] = '`content` LIKE \'%'.addslashes($term).'%\'';
|
||||
$ors[] = '`url` LIKE \'%'.addslashes($term).'%\'';
|
||||
$ors[] = '`title` LIKE \'%'.addslashes($term).'%\'';
|
||||
$ors[] = '`description` LIKE \'%'.addslashes($term).'%\'';
|
||||
$ors[] = '`keywords` LIKE \'%'.addslashes($term).'%\'';
|
||||
$ors[] = '`weighted` LIKE \'%'.addslashes($term).'%\'';
|
||||
$ors[] = '`content` REGEXP \''.$term.'\'';
|
||||
$ors[] = '`url` REGEXP \''.$term.'\'';
|
||||
$ors[] = '`title` REGEXP \''.$term.'\'';
|
||||
$ors[] = '`description` REGEXP \''.$term.'\'';
|
||||
$ors[] = '`keywords` REGEXP \''.$term.'\'';
|
||||
$ors[] = '`weighted` REGEXP \''.$term.'\'';
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue