Skip to content

Commit 4b25c8b

Browse files
committed
add ranking + priority to search
1 parent 6244834 commit 4b25c8b

File tree

1 file changed

+41
-32
lines changed

1 file changed

+41
-32
lines changed

pcweb/components/docpage/navbar/typesense.py

Lines changed: 41 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import textdistance
55
import unicodedata
66

7+
# the score cutoff -> returns only strong to medium hits without sneaking in the weaker ones + acts as a natural max cap to the results
8+
CUTTOFF = 0.6
79

810
class FuzzySearch(rx.State):
911
query: str
@@ -27,13 +29,30 @@ def _normalize_and_split_query(self) -> list[str]:
2729
if w
2830
]
2931

30-
def _check_fuzzy_match(self, query_word: str, target_words: list[str]) -> bool:
31-
"""Helper method to check if a query word matches any target words with fuzzy logic."""
32-
max_dist = 1 if len(query_word) <= 4 else 2
33-
return any(
34-
query_word in word or textdistance.levenshtein(query_word, word) <= max_dist
35-
for word in target_words
36-
)
32+
def _similarity_score(self, query_word: str, target_word: str) -> float:
33+
"""Return similarity score between 0 and 1 (higher = better)."""
34+
35+
if query_word == target_word:
36+
return 1.0
37+
38+
if target_word.startswith(query_word):
39+
return 0.9
40+
41+
return textdistance.levenshtein.normalized_similarity(query_word, target_word)
42+
43+
def _score_match(self, query_words: list[str], target_fields: list[str]) -> float:
44+
"""Compute total match score for query vs. target fields."""
45+
total_score = 0.0
46+
for query_word in query_words:
47+
best_word_score = 0.0
48+
for field in target_fields:
49+
for word in field.split():
50+
best_word_score = max(
51+
best_word_score,
52+
self._similarity_score(query_word, word)
53+
)
54+
total_score += best_word_score
55+
return total_score / len(query_words) if query_words else 0.0
3756

3857
@rx.event(background=True)
3958
async def serve_fuzzy_blogs(self):
@@ -43,26 +62,19 @@ async def serve_fuzzy_blogs(self):
4362
if not query_words:
4463
return
4564

65+
scored_results = []
4666
for blog in self.idxed_blogs:
4767
blog_fields = [
4868
unicodedata.normalize("NFKD", blog.get(key, "").lower()).strip()
4969
for key in ["title", "description", "author"]
5070
]
5171

52-
all_matched = True
53-
for query_word in query_words:
54-
matched = False
55-
for field in blog_fields:
56-
field_words = field.split()
57-
if self._check_fuzzy_match(query_word, field_words):
58-
matched = True
59-
break
60-
if not matched:
61-
all_matched = False
62-
break
63-
64-
if all_matched:
65-
self.idxed_blogs_results.append(blog)
72+
score = self._score_match(query_words, blog_fields)
73+
if score > CUTTOFF:
74+
scored_results.append((score, blog))
75+
76+
scored_results.sort(key=lambda x: x[0], reverse=True)
77+
self.idxed_blogs_results = [b for _, b in scored_results]
6678

6779
@rx.event(background=True)
6880
async def serve_fuzzy_query(self):
@@ -72,24 +84,21 @@ async def serve_fuzzy_query(self):
7284
if not query_words:
7385
return
7486

87+
scored_results = []
7588
for doc in self.idxed_docs:
7689
term_words_list = [
7790
unicodedata.normalize("NFKD", term.lower()).strip().split()
7891
for term in doc["parts"]
7992
]
8093

81-
all_matched = True
82-
for query_word in query_words:
83-
matched = any(
84-
self._check_fuzzy_match(query_word, term_words)
85-
for term_words in term_words_list
86-
)
87-
if not matched:
88-
all_matched = False
89-
break
94+
flat_terms = [w for words in term_words_list for w in words]
95+
96+
score = self._score_match(query_words, flat_terms)
97+
if score > CUTTOFF:
98+
scored_results.append((score, doc))
9099

91-
if all_matched:
92-
self.idxed_docs_results.append(doc)
100+
scored_results.sort(key=lambda x: x[0], reverse=True)
101+
self.idxed_docs_results = [d for _, d in scored_results]
93102

94103

95104
suggestion_items = [

0 commit comments

Comments
 (0)