upgrade search engine
Some checks failed
ci/woodpecker/push/build Pipeline failed

This commit is contained in:
Simon Vieille 2023-09-24 19:28:27 +02:00
parent 138b4f24ee
commit b11da225fb
Signed by: deblan
GPG key ID: 579388D585F70417
2 changed files with 39 additions and 32 deletions

View file

@ -72,7 +72,7 @@ pipeline:
commands: commands:
- apt-get update && apt-get -y install rsync - apt-get update && apt-get -y install rsync
- cd /data/deblan/deblan.io-murph/ - cd /data/deblan/deblan.io-murph/
- rsync -avz "$CI_WORKSPACE/" "$CI_COMMIT_SHA" - rsync -az "$CI_WORKSPACE/" "$CI_COMMIT_SHA"
services: services:
db: db:

View file

@ -65,7 +65,8 @@ class PostRepositoryQuery extends RepositoryQuery
{ {
$keywords = explode(' ', $keywords); $keywords = explode(' ', $keywords);
$filter = fn($keyword) => trim($keyword) !== '' && preg_match('/[a-zA-Z]+/', $keyword) && mb_strlen($keyword) > 3; $filterWords = fn($keyword) => trim($keyword) !== '' && preg_match('/[a-zA-Z]+/', $keyword) && mb_strlen($keyword) > 2;
$filter = fn($keyword) => trim($keyword) !== '' && preg_match('/[a-zA-Z]+/', $keyword) && mb_strlen($keyword) > 2;
$keywords = array_filter($keywords, $filter); $keywords = array_filter($keywords, $filter);
@ -93,54 +94,60 @@ class PostRepositoryQuery extends RepositoryQuery
$matches = []; $matches = [];
foreach ($results as $k => $v) { foreach ($results as $k => $v) {
$added = false;
$initWords = explode(' ', $v['title']); $initWords = explode(' ', $v['title']);
$words = []; $words = [];
foreach ($initWords as $initWord) { foreach ($initWords as $initWord) {
$words = array_merge($words, preg_split('/[:_-]+/', $initWord)); $words = array_merge($words, preg_split('/[:_\'-]+/', $initWord));
} }
$words = array_filter($words, $filter); $words = array_filter($words, $filterWords);
foreach ($keywords as $keyword) { foreach ($keywords as $keyword) {
if ($added) { if(str_contains(mb_strtolower($v['content']), mb_strtolower($keyword))) {
continue; $similarity = 99;
if (isset($matches[$v['id']])) {
$matches[$v['id']]['similarity'] += $similarity;
} else {
$matches[$v['id']] = [
'id' => $v['id'],
'title' => $v['title'],
'published_at' => $v['published_at'],
'similarity' => $similarity,
];
}
} }
foreach ($words as $word) { foreach ($words as $word) {
if ($added) {
continue;
}
if (str_contains(mb_strtolower($word), mb_strtolower($keyword))) { if (str_contains(mb_strtolower($word), mb_strtolower($keyword))) {
$matches[] = [ $similarity = 150;
'id' => $v['id'],
'published_at' => $v['published_at'],
'similarity' => 100,
];
$added = true; if (isset($matches[$v['id']])) {
} elseif(str_contains($v['content'], $keyword)) { $matches[$v['id']]['similarity'] += $similarity;
$matches[] = [ } else {
'id' => $v['id'], $matches[$v['id']] = [
'published_at' => $v['published_at'], 'id' => $v['id'],
'similarity' => 99, 'title' => $v['title'],
]; 'published_at' => $v['published_at'],
'similarity' => $similarity,
$added = true; ];
}
} else { } else {
$lev = levenshtein($word, $keyword); $lev = levenshtein($word, $keyword);
$similarity = 100 - ($lev * 100 / mb_strlen($word)); $similarity = 100 - ($lev * 100 / mb_strlen($word));
if ($similarity > 70) { if ($similarity > 70) {
$matches[] = [ if (isset($matches[$v['id']])) {
'id' => $v['id'], $matches[$v['id']]['similarity'] += $similarity;
'published_at' => $v['published_at'], } else {
'similarity' => $similarity, $matches[$v['id']] = [
]; 'id' => $v['id'],
'title' => $v['title'],
$added = true; 'published_at' => $v['published_at'],
'similarity' => $similarity,
];
}
} }
} }
} }