@@ -159,12 +159,13 @@ async def _create_tables(self) -> None:
159159 path UNINDEXED,
160160 source UNINDEXED,
161161 start_line UNINDEXED,
162- end_line UNINDEXED
162+ end_line UNINDEXED,
163+ tokenize='trigram'
163164 )
164165 """ ,
165166 )
166167 self .fts_available = True
167- logger .info ("Created FTS5 table" )
168+ logger .info ("Created FTS5 table with trigram tokenizer " )
168169
169170 self .conn .commit ()
170171 cursor .close ()
@@ -538,7 +539,10 @@ async def vector_search(
538539
539540 results = []
540541 for _ , path , start , end , src , text , dist in cursor .fetchall ():
541- score = max (0.0 , 1.0 - dist )
542+ # Convert L2 distance to similarity score
543+ # For normalized vectors, L2 distance range is [0, 2]
544+ # Map to [1, 0] score range (higher score = more similar)
545+ score = max (0.0 , 1.0 - dist / 2.0 )
542546 snippet = text
543547 results .append (
544548 MemorySearchResult (
@@ -548,7 +552,7 @@ async def vector_search(
548552 score = score ,
549553 snippet = snippet ,
550554 source = MemorySource (src ),
551- distance = dist ,
555+ raw_metric = dist ,
552556 ),
553557 )
554558
@@ -568,7 +572,19 @@ def _sanitize_fts_query(self, query: str) -> str:
568572 - " (phrase search, needs escaping)
569573 - : (column filter)
570574 - ^ (start of line anchor, not standard FTS5)
571- - Other special chars that may interfere
575+ - ' (single quote, causes syntax errors)
576+ - ` (backtick, can cause issues)
577+ - | (pipe, OR operator)
578+ - + (plus, can be used for required terms)
579+ - - (minus, NOT operator)
580+ - = (equals, can cause issues)
581+ - < > (angle brackets, comparison operators)
582+ - ! (exclamation, NOT operator variant)
583+ - @ # $ % & (other special chars)
584+ - "\"
585+ - / (slash, can interfere)
586+ - ; (semicolon, statement separator)
587+ - , (comma, can interfere with phrase parsing)
572588
573589 Args:
574590 query: Raw query string
@@ -580,8 +596,38 @@ def _sanitize_fts_query(self, query: str) -> str:
580596 return ""
581597
582598 # Remove FTS5 special characters that we don't want users to use
583- # Keep only alphanumeric, spaces, and some safe punctuation
584- special_chars = ["*" , "?" , ":" , "^" , "(" , ")" , "[" , "]" , "{" , "}" ]
599+ # Keep only alphanumeric, spaces, periods, and underscores
600+ special_chars = [
601+ "*" ,
602+ "?" ,
603+ ":" ,
604+ "^" ,
605+ "(" ,
606+ ")" ,
607+ "[" ,
608+ "]" ,
609+ "{" ,
610+ "}" ,
611+ "'" ,
612+ '"' ,
613+ "`" ,
614+ "|" ,
615+ "+" ,
616+ "-" ,
617+ "=" ,
618+ "<" ,
619+ ">" ,
620+ "!" ,
621+ "@" ,
622+ "#" ,
623+ "$" ,
624+ "%" ,
625+ "&" ,
626+ "\\ " ,
627+ "/" ,
628+ ";" ,
629+ "," ,
630+ ]
585631 cleaned = query
586632 for char in special_chars :
587633 cleaned = cleaned .replace (char , " " )
@@ -650,6 +696,7 @@ async def keyword_search(
650696 score = score ,
651697 snippet = snippet ,
652698 source = MemorySource (src ),
699+ raw_metric = rank ,
653700 ),
654701 )
655702
0 commit comments