44import textdistance
55import unicodedata
66
7+ # the score cutoff -> returns only strong to medium hits without sneaking in the weaker ones + acts as a natural max cap to the results
8+ CUTTOFF = 0.6
79
810class FuzzySearch (rx .State ):
911 query : str
@@ -27,13 +29,30 @@ def _normalize_and_split_query(self) -> list[str]:
2729 if w
2830 ]
2931
30- def _check_fuzzy_match (self , query_word : str , target_words : list [str ]) -> bool :
31- """Helper method to check if a query word matches any target words with fuzzy logic."""
32- max_dist = 1 if len (query_word ) <= 4 else 2
33- return any (
34- query_word in word or textdistance .levenshtein (query_word , word ) <= max_dist
35- for word in target_words
36- )
32+ def _similarity_score (self , query_word : str , target_word : str ) -> float :
33+ """Return similarity score between 0 and 1 (higher = better)."""
34+
35+ if query_word == target_word :
36+ return 1.0
37+
38+ if target_word .startswith (query_word ):
39+ return 0.9
40+
41+ return textdistance .levenshtein .normalized_similarity (query_word , target_word )
42+
43+ def _score_match (self , query_words : list [str ], target_fields : list [str ]) -> float :
44+ """Compute total match score for query vs. target fields."""
45+ total_score = 0.0
46+ for query_word in query_words :
47+ best_word_score = 0.0
48+ for field in target_fields :
49+ for word in field .split ():
50+ best_word_score = max (
51+ best_word_score ,
52+ self ._similarity_score (query_word , word )
53+ )
54+ total_score += best_word_score
55+ return total_score / len (query_words ) if query_words else 0.0
3756
3857 @rx .event (background = True )
3958 async def serve_fuzzy_blogs (self ):
@@ -43,26 +62,19 @@ async def serve_fuzzy_blogs(self):
4362 if not query_words :
4463 return
4564
65+ scored_results = []
4666 for blog in self .idxed_blogs :
4767 blog_fields = [
4868 unicodedata .normalize ("NFKD" , blog .get (key , "" ).lower ()).strip ()
4969 for key in ["title" , "description" , "author" ]
5070 ]
5171
52- all_matched = True
53- for query_word in query_words :
54- matched = False
55- for field in blog_fields :
56- field_words = field .split ()
57- if self ._check_fuzzy_match (query_word , field_words ):
58- matched = True
59- break
60- if not matched :
61- all_matched = False
62- break
63-
64- if all_matched :
65- self .idxed_blogs_results .append (blog )
72+ score = self ._score_match (query_words , blog_fields )
73+ if score > CUTTOFF :
74+ scored_results .append ((score , blog ))
75+
76+ scored_results .sort (key = lambda x : x [0 ], reverse = True )
77+ self .idxed_blogs_results = [b for _ , b in scored_results ]
6678
6779 @rx .event (background = True )
6880 async def serve_fuzzy_query (self ):
@@ -72,24 +84,21 @@ async def serve_fuzzy_query(self):
7284 if not query_words :
7385 return
7486
87+ scored_results = []
7588 for doc in self .idxed_docs :
7689 term_words_list = [
7790 unicodedata .normalize ("NFKD" , term .lower ()).strip ().split ()
7891 for term in doc ["parts" ]
7992 ]
8093
81- all_matched = True
82- for query_word in query_words :
83- matched = any (
84- self ._check_fuzzy_match (query_word , term_words )
85- for term_words in term_words_list
86- )
87- if not matched :
88- all_matched = False
89- break
94+ flat_terms = [w for words in term_words_list for w in words ]
95+
96+ score = self ._score_match (query_words , flat_terms )
97+ if score > CUTTOFF :
98+ scored_results .append ((score , doc ))
9099
91- if all_matched :
92- self . idxed_docs_results . append ( doc )
100+ scored_results . sort ( key = lambda x : x [ 0 ], reverse = True )
101+ self . idxed_docs_results = [ d for _ , d in scored_results ]
93102
94103
95104suggestion_items = [
0 commit comments