From 1a4bcf875570ddd824bdfe12c9684eb51c575340 Mon Sep 17 00:00:00 2001 From: Simon Vieille Date: Sun, 24 Sep 2023 19:35:27 +0200 Subject: [PATCH] upgrade search engine --- src/Repository/Blog/PostRepositoryQuery.php | 38 +++++++++++++-------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/src/Repository/Blog/PostRepositoryQuery.php b/src/Repository/Blog/PostRepositoryQuery.php index 5f3d3b3..6b3e04e 100644 --- a/src/Repository/Blog/PostRepositoryQuery.php +++ b/src/Repository/Blog/PostRepositoryQuery.php @@ -54,19 +54,12 @@ class PostRepositoryQuery extends RepositoryQuery ; } - protected function filterHandler(string $name, $value) - { - if ('category' === $name) { - $this->inCategory($value); - } - } - public function search(?string $keywords, ?string $tag) { $keywords = explode(' ', $keywords); - $filterWords = fn($keyword) => trim($keyword) !== '' && preg_match('/[a-zA-Z]+/', $keyword) && mb_strlen($keyword) > 2; - $filter = fn($keyword) => trim($keyword) !== '' && preg_match('/[a-zA-Z]+/', $keyword) && mb_strlen($keyword) > 2; + $filterWords = fn ($keyword) => '' !== trim($keyword) && preg_match('/[a-zA-Z]+/', $keyword); + $filter = fn ($keyword) => '' !== trim($keyword) && preg_match('/[a-zA-Z]+/', $keyword); $keywords = array_filter($keywords, $filter); @@ -83,7 +76,8 @@ class PostRepositoryQuery extends RepositoryQuery WHERE post.status = 1 AND post.published_at < :date - '); + ' + ); $statement = $query->execute([ ':date' => (new \DateTime())->format('Y-m-d H:i:s'), @@ -104,17 +98,19 @@ class PostRepositoryQuery extends RepositoryQuery $words = array_filter($words, $filterWords); foreach ($keywords as $keyword) { - if(str_contains(mb_strtolower($v['content']), mb_strtolower($keyword))) { + if (str_contains(mb_strtolower($v['content']), mb_strtolower($keyword))) { $similarity = 99; if (isset($matches[$v['id']])) { $matches[$v['id']]['similarity'] += $similarity; + ++$matches[$v['id']]['count']; } else { $matches[$v['id']] = [ 'id' => $v['id'], 'title' => $v['title'], 'published_at' => $v['published_at'], 'similarity' => $similarity, + 'count' => 1, ]; } } @@ -125,16 +121,18 @@ class PostRepositoryQuery extends RepositoryQuery if (isset($matches[$v['id']])) { $matches[$v['id']]['similarity'] += $similarity; + ++$matches[$v['id']]['count']; } else { $matches[$v['id']] = [ 'id' => $v['id'], 'title' => $v['title'], 'published_at' => $v['published_at'], 'similarity' => $similarity, + 'count' => 1, ]; } } else { - $lev = levenshtein($word, $keyword); + $lev = levenshtein($word, $keyword); $similarity = 100 - ($lev * 100 / mb_strlen($word)); if ($similarity > 70) { @@ -146,6 +144,7 @@ class PostRepositoryQuery extends RepositoryQuery 'title' => $v['title'], 'published_at' => $v['published_at'], 'similarity' => $similarity, + 'count' => 1, ]; } } @@ -154,7 +153,11 @@ class PostRepositoryQuery extends RepositoryQuery } } - usort($matches, function($a, $b) { + $matches = array_filter($matches, function($match) use ($keywords) { + return (100 * $match['count'] / count($keywords)) > 80; + }); + + usort($matches, function ($a, $b) { if ($a['similarity'] > $b['similarity']) { return -1; } @@ -163,7 +166,7 @@ class PostRepositoryQuery extends RepositoryQuery return 1; } - return ($a['published_at'] <> $b['published_at']) * -1; + return ($a['published_at'] != $b['published_at']) * -1; }); $ids = array_column($matches, 'id'); @@ -188,4 +191,11 @@ class PostRepositoryQuery extends RepositoryQuery return $this; } + + protected function filterHandler(string $name, $value) + { + if ('category' === $name) { + $this->inCategory($value); + } + } }