Skip to content

Commit 3fa93ff

Browse files
rbs333tylerhutcherson
authored andcommitted
tokenizer helper function
1 parent 9500853 commit 3fa93ff

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

redisvl/query/query.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,6 @@ def __init__(
704704
nltk.download('stopwords')
705705
self._stopwords = set(stopwords.words('english'))
706706

707-
708707
super().__init__(query_string)
709708

710709
# Handle query modifiers
@@ -727,6 +726,7 @@ def __init__(
727726

728727
def _tokenize_query(self, user_query: str) -> str:
729728
"""Convert a raw user query to a redis full text query joined by ORs"""
729+
730730
words = word_tokenize(user_query)
731731

732732
tokens = [token.strip().strip(",").lower() for token in user_query.split()]
@@ -747,6 +747,12 @@ def _build_query_string(self) -> str:
747747
#TODO is this method even needed? use
748748
return text_and_filter
749749

750+
# from redisvl.utils.token_escaper import TokenEscaper
751+
# escaper = TokenEscaper()
752+
# def tokenize_and_escape_query(user_query: str) -> str:
753+
# """Convert a raw user query to a redis full text query joined by ORs"""
754+
# tokens = [escaper.escape(token.strip().strip(",").replace("“", "").replace("”", "").lower()) for token in user_query.split()]
755+
# return " | ".join([token for token in tokens if token and token not in stopwords_en])
750756

751757
class HybridQuery(VectorQuery, TextQuery):
752758
def __init__():

0 commit comments

Comments
 (0)