Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 16 additions & 12 deletions python-recipes/vector-search/02_hybrid_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand All @@ -382,6 +382,10 @@
}
],
"source": [
"from redisvl.utils.token_escaper import TokenEscaper\n",
"\n",
"escaper = TokenEscaper()\n",
"\n",
"# list of stopwords to filter out noise from query string\n",
"stopwords = set([\n",
" \"a\", \"is\", \"the\", \"an\", \"and\", \"are\", \"as\", \"at\", \"be\", \"but\", \"by\", \"for\",\n",
Expand All @@ -391,8 +395,8 @@
"\n",
"def tokenize_query(user_query: str) -> str:\n",
" \"\"\"Convert a raw user query to a redis full text query joined by ORs\"\"\"\n",
" tokens = [token.strip().strip(\",\").lower() for token in user_query.split()]\n",
" return \" | \".join([token for token in tokens if token not in stopwords])\n",
" tokens = [escaper.escape(token.strip().strip(\",\").lower()) for token in user_query.split()]\n",
" return \" | \".join([token for token in tokens if token and token not in stopwords])\n",
"\n",
"# Example\n",
"tokenize_query(user_query)"
Expand All @@ -407,7 +411,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -438,8 +442,8 @@
" filter_expression=f\"~({Text(text_field) % tokenize_query(user_query)})\",\n",
" num_results=num_results,\n",
" return_fields=[\"title\", \"description\"],\n",
" dialect=4,\n",
" ).scorer(\"BM25\").with_scores()"
" dialect=2,\n",
" ).scorer(\"BM25STD\").with_scores()"
]
},
{
Expand Down Expand Up @@ -540,7 +544,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -581,13 +585,13 @@
"# Build the aggregation request\n",
"req = (\n",
" AggregateRequest(query.query_string())\n",
" .scorer(\"BM25\")\n",
" .scorer(\"BM25STD\")\n",
" .add_scores()\n",
" .apply(cosine_similarity=\"(2 - @vector_distance)/2\", bm25_score=\"@__score\")\n",
" .apply(hybrid_score=f\"0.3*@bm25_score + 0.7*@cosine_similarity\")\n",
" .load(\"title\", \"description\", \"cosine_similarity\", \"bm25_score\", \"hybrid_score\")\n",
" .sort_by(Desc(\"@hybrid_score\"), max=3)\n",
" .dialect(4)\n",
" .dialect(2)\n",
")\n",
"\n",
"# Run the query\n",
Expand Down Expand Up @@ -620,7 +624,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -634,13 +638,13 @@
" # Build aggregation\n",
" req = (\n",
" AggregateRequest(query.query_string())\n",
" .scorer(\"BM25\")\n",
" .scorer(\"BM25STD\")\n",
" .add_scores()\n",
" .apply(cosine_similarity=\"(2 - @vector_distance)/2\", bm25_score=\"@__score\")\n",
" .apply(hybrid_score=f\"{1-alpha}*@bm25_score + {alpha}*@cosine_similarity\")\n",
" .sort_by(Desc(\"@hybrid_score\"), max=num_results)\n",
" .load(\"title\", \"description\", \"cosine_similarity\", \"bm25_score\", \"hybrid_score\")\n",
" .dialect(4)\n",
" .dialect(2)\n",
" )\n",
"\n",
" # Run the query\n",
Expand Down
Loading