Merge branch 'master' into feat/iss-3408/add-scorer-aggregate

vladvildanov · web-flow · commit a0504eab07de · 2024-10-21T11:29:05.000+03:00
diff --git a/dev_requirements.txt b/dev_requirements.txt
@@ -9,7 +9,7 @@ packaging>=20.4
 pytest
 pytest-asyncio>=0.23.0,<0.24.0
 pytest-cov
-pytest-profiling
+pytest-profiling==1.7.0
 pytest-timeout
 ujson>=4.2.0
 uvloop
diff --git a/doctests/query_agg.py b/doctests/query_agg.py
@@ -0,0 +1,103 @@
+# EXAMPLE: query_agg
+# HIDE_START
+import json
+import redis
+from redis.commands.json.path import Path
+from redis.commands.search import Search
+from redis.commands.search.aggregation import AggregateRequest
+from redis.commands.search.field import NumericField, TagField
+from redis.commands.search.indexDefinition import IndexDefinition, IndexType
+import redis.commands.search.reducers as reducers
+
+r = redis.Redis(decode_responses=True)
+
+# create index
+schema = (
+    TagField("$.condition", as_name="condition"),
+    NumericField("$.price", as_name="price"),
+)
+
+index = r.ft("idx:bicycle")
+index.create_index(
+    schema,
+    definition=IndexDefinition(prefix=["bicycle:"], index_type=IndexType.JSON),
+)
+
+# load data
+with open("data/query_em.json") as f:
+    bicycles = json.load(f)
+
+pipeline = r.pipeline(transaction=False)
+for bid, bicycle in enumerate(bicycles):
+    pipeline.json().set(f'bicycle:{bid}', Path.root_path(), bicycle)
+pipeline.execute()
+# HIDE_END
+
+# STEP_START agg1
+search = Search(r, index_name="idx:bicycle")
+aggregate_request = AggregateRequest(query='@condition:{new}') \
+    .load('__key', 'price') \
+    .apply(discounted='@price - (@price * 0.1)')
+res = search.aggregate(aggregate_request)
+print(len(res.rows)) # >>> 5
+print(res.rows) # >>> [['__key', 'bicycle:0', ...
+#[['__key', 'bicycle:0', 'price', '270', 'discounted', '243'],
+# ['__key', 'bicycle:5', 'price', '810', 'discounted', '729'],
+# ['__key', 'bicycle:6', 'price', '2300', 'discounted', '2070'],
+# ['__key', 'bicycle:7', 'price', '430', 'discounted', '387'],
+# ['__key', 'bicycle:8', 'price', '1200', 'discounted', '1080']]
+# REMOVE_START
+assert len(res.rows) == 5
+# REMOVE_END
+# STEP_END
+
+# STEP_START agg2
+search = Search(r, index_name="idx:bicycle")
+aggregate_request = AggregateRequest(query='*') \
+    .load('price') \
+    .apply(price_category='@price<1000') \
+    .group_by('@condition', reducers.sum('@price_category').alias('num_affordable'))
+res = search.aggregate(aggregate_request)
+print(len(res.rows)) # >>> 3
+print(res.rows) # >>>
+#[['condition', 'refurbished', 'num_affordable', '1'],
+# ['condition', 'used', 'num_affordable', '1'],
+# ['condition', 'new', 'num_affordable', '3']]
+# REMOVE_START
+assert len(res.rows) == 3
+# REMOVE_END
+# STEP_END
+
+# STEP_START agg3
+search = Search(r, index_name="idx:bicycle")
+aggregate_request = AggregateRequest(query='*') \
+    .apply(type="'bicycle'") \
+    .group_by('@type', reducers.count().alias('num_total'))
+res = search.aggregate(aggregate_request)
+print(len(res.rows)) # >>> 1
+print(res.rows) # >>> [['type', 'bicycle', 'num_total', '10']]
+# REMOVE_START
+assert len(res.rows) == 1
+# REMOVE_END
+# STEP_END
+
+# STEP_START agg4
+search = Search(r, index_name="idx:bicycle")
+aggregate_request = AggregateRequest(query='*') \
+    .load('__key') \
+    .group_by('@condition', reducers.tolist('__key').alias('bicycles'))
+res = search.aggregate(aggregate_request)
+print(len(res.rows)) # >>> 3
+print(res.rows) # >>>
+#[['condition', 'refurbished', 'bicycles', ['bicycle:9']],
+# ['condition', 'used', 'bicycles', ['bicycle:1', 'bicycle:2', 'bicycle:3', 'bicycle:4']],
+# ['condition', 'new', 'bicycles', ['bicycle:5', 'bicycle:6', 'bicycle:7', 'bicycle:0', 'bicycle:8']]]
+# REMOVE_START
+assert len(res.rows) == 3
+# REMOVE_END
+# STEP_END
+
+# REMOVE_START
+# destroy index and data
+r.ft("idx:bicycle").dropindex(delete_documents=True)
+# REMOVE_END
diff --git a/doctests/query_combined.py b/doctests/query_combined.py
@@ -0,0 +1,124 @@
+# EXAMPLE: query_combined
+# HIDE_START
+import json
+import numpy as np
+import redis
+import warnings
+from redis.commands.json.path import Path
+from redis.commands.search.field import NumericField, TagField, TextField, VectorField
+from redis.commands.search.indexDefinition import IndexDefinition, IndexType
+from redis.commands.search.query import Query
+from sentence_transformers import  SentenceTransformer
+
+
+def embed_text(model, text):
+    return np.array(model.encode(text)).astype(np.float32).tobytes()
+
+warnings.filterwarnings("ignore", category=FutureWarning, message=r".*clean_up_tokenization_spaces.*")
+model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+query = "Bike for small kids"
+query_vector = embed_text(model, query)
+
+r = redis.Redis(decode_responses=True)
+
+# create index
+schema = (
+    TextField("$.description", no_stem=True, as_name="model"),
+    TagField("$.condition", as_name="condition"),
+    NumericField("$.price", as_name="price"),
+    VectorField(
+        "$.description_embeddings",
+        "FLAT",
+        {
+            "TYPE": "FLOAT32",
+            "DIM": 384,
+            "DISTANCE_METRIC": "COSINE",
+        },
+        as_name="vector",
+    ),
+)
+
+index = r.ft("idx:bicycle")
+index.create_index(
+    schema,
+    definition=IndexDefinition(prefix=["bicycle:"], index_type=IndexType.JSON),
+)
+
+# load data
+with open("data/query_vector.json") as f:
+    bicycles = json.load(f)
+
+pipeline = r.pipeline(transaction=False)
+for bid, bicycle in enumerate(bicycles):
+    pipeline.json().set(f'bicycle:{bid}', Path.root_path(), bicycle)
+pipeline.execute()
+# HIDE_END
+
+# STEP_START combined1
+q = Query("@price:[500 1000] @condition:{new}")
+res = index.search(q)
+print(res.total) # >>> 1
+# REMOVE_START
+assert res.total == 1
+# REMOVE_END
+# STEP_END
+
+# STEP_START combined2
+q = Query("kids @price:[500 1000] @condition:{used}")
+res = index.search(q)
+print(res.total) # >>> 1
+# REMOVE_START
+assert res.total == 1
+# REMOVE_END
+# STEP_END
+
+# STEP_START combined3
+q = Query("(kids | small) @condition:{used}")
+res = index.search(q)
+print(res.total) # >>> 2
+# REMOVE_START
+assert res.total == 2
+# REMOVE_END
+# STEP_END
+
+# STEP_START combined4
+q = Query("@description:(kids | small) @condition:{used}")
+res = index.search(q)
+print(res.total) # >>> 0
+# REMOVE_START
+assert res.total == 0
+# REMOVE_END
+# STEP_END
+
+# STEP_START combined5
+q = Query("@description:(kids | small) @condition:{new | used}")
+res = index.search(q)
+print(res.total) # >>> 0
+# REMOVE_START
+assert res.total == 0
+# REMOVE_END
+# STEP_END
+
+# STEP_START combined6
+q = Query("@price:[500 1000] -@condition:{new}")
+res = index.search(q)
+print(res.total) # >>> 2
+# REMOVE_START
+assert res.total == 2
+# REMOVE_END
+# STEP_END
+
+# STEP_START combined7
+q = Query("(@price:[500 1000] -@condition:{new})=>[KNN 3 @vector $query_vector]").dialect(2)
+# put query string here
+res = index.search(q,{ 'query_vector': query_vector })
+print(res.total) # >>> 2
+# REMOVE_START
+assert res.total == 2
+# REMOVE_END
+# STEP_END
+
+# REMOVE_START
+# destroy index and data
+r.ft("idx:bicycle").dropindex(delete_documents=True)
+# REMOVE_END