diff --git a/content/integrate/redisvl/api/searchindex.md b/content/integrate/redisvl/api/searchindex.md index 3d805d1477..5e00c89f49 100644 --- a/content/integrate/redisvl/api/searchindex.md +++ b/content/integrate/redisvl/api/searchindex.md @@ -14,7 +14,7 @@ type: integration ## SearchIndex -### `class SearchIndex(schema, redis_client=None, redis_url=None, connection_args={}, **kwargs)` +### `class SearchIndex(schema, redis_client=None, redis_url=None, connection_kwargs=None, **kwargs)` A search index class for interacting with Redis as a vector database. @@ -26,8 +26,7 @@ settings and field configurations. from redisvl.index import SearchIndex # initialize the index object with schema from file -index = SearchIndex.from_yaml("schemas/schema.yaml") -index.connect(redis_url="redis://localhost:6379") +index = SearchIndex.from_yaml("schemas/schema.yaml", redis_url="redis://localhost:6379") # create the index index.create(overwrite=True) @@ -49,7 +48,7 @@ kwargs. instantiated redis client. * **redis_url** (*Optional* *[* *str* *]*) – The URL of the Redis server to connect to. - * **connection_args** (*Dict* *[* *str* *,* *Any* *]* *,* *optional*) – Redis client connection + * **connection_kwargs** (*Dict* *[* *str* *,* *Any* *]* *,* *optional*) – Redis client connection args. #### `aggregate(*args, **kwargs)` @@ -85,13 +84,13 @@ extra options specific to the Redis connection. * **Parameters:** **redis_url** (*Optional* *[* *str* *]* *,* *optional*) – The URL of the Redis server to - connect to. If not provided, the method defaults to using the - REDIS_URL environment variable. + connect to. * **Raises:** * **redis.exceptions.ConnectionError** – If the connection to the Redis server fails. * **ValueError** – If the Redis URL is not provided nor accessible through the REDIS_URL environment variable. + * **ModuleNotFoundError** – If required Redis modules are not installed. ```python index.connect(redis_url="redis://localhost:6379") @@ -158,6 +157,16 @@ Check if the index exists in Redis. * **Return type:** bool +#### `expire_keys(keys, ttl)` + +Set the expiration time for a specific entry or entries in Redis. + +* **Parameters:** + * **keys** (*Union* *[* *str* *,* *List* *[* *str* *]* *]*) – The entry ID or IDs to set the expiration for. + * **ttl** (*int*) – The time-to-live in seconds. +* **Return type:** + int | *List*[int] + #### `fetch(id)` Fetch an object from Redis by id. @@ -210,6 +219,9 @@ Initialize from an existing search index in Redis by index name. instantiated redis client. * **redis_url** (*Optional* *[* *str* *]*) – The URL of the Redis server to connect to. +* **Raises:** + * **ValueError** – If redis_url or redis_client is not provided. + * **RedisModuleVersionError** – If required Redis modules are not installed. #### `classmethod from_yaml(schema_path, **kwargs)` @@ -438,7 +450,7 @@ hash or json. ## AsyncSearchIndex -### `class AsyncSearchIndex(schema, **kwargs)` +### `class AsyncSearchIndex(schema, *, redis_url=None, redis_client=None, connection_kwargs=None, **kwargs)` A search index class for interacting with Redis as a vector database in async-mode. @@ -451,8 +463,10 @@ various settings and field configurations. from redisvl.index import AsyncSearchIndex # initialize the index object with schema from file -index = AsyncSearchIndex.from_yaml("schemas/schema.yaml") -await index.connect(redis_url="redis://localhost:6379") +index = AsyncSearchIndex.from_yaml( + "schemas/schema.yaml", + redis_url="redis://localhost:6379" +) # create the index await index.create(overwrite=True) @@ -468,7 +482,11 @@ Initialize the RedisVL async search index with a schema. * **Parameters:** * **schema** ([*IndexSchema*]({{< relref "schema/#indexschema" >}})) – Index schema object. - * **connection_args** (*Dict* *[* *str* *,* *Any* *]* *,* *optional*) – Redis client connection + * **redis_url** (*Optional* *[* *str* *]* *,* *optional*) – The URL of the Redis server to + connect to. + * **redis_client** (*Optional* *[* *aredis.Redis* *]*) – An + instantiated redis client. + * **connection_kwargs** (*Optional* *[* *Dict* *[* *str* *,* *Any* *]* *]*) – Redis client connection args. #### `async aggregate(*args, **kwargs)` @@ -494,27 +512,12 @@ available and in-place for future insertions or updates. * **Return type:** int -#### `async connect(redis_url=None, **kwargs)` - -Connect to a Redis instance using the provided redis_url, falling -back to the REDIS_URL environment variable (if available). +#### `connect(redis_url=None, **kwargs)` -Note: Additional keyword arguments (\*\*kwargs) can be used to provide -extra options specific to the Redis connection. +[DEPRECATED] Connect to a Redis instance. Use connection parameters in \_\_init_\_. * **Parameters:** - **redis_url** (*Optional* *[* *str* *]* *,* *optional*) – The URL of the Redis server to - connect to. If not provided, the method defaults to using the - REDIS_URL environment variable. -* **Raises:** - * **redis.exceptions.ConnectionError** – If the connection to the Redis - server fails. - * **ValueError** – If the Redis URL is not provided nor accessible - through the REDIS_URL environment variable. - -```python -index.connect(redis_url="redis://localhost:6379") -``` + **redis_url** (*str* *|* *None*) #### `async create(overwrite=False, drop=False)` @@ -553,9 +556,9 @@ Delete the search index. * **Raises:** **redis.exceptions.ResponseError** – If the index does not exist. -#### `disconnect()` +#### `async disconnect()` -Disconnect and cleanup the underlying async redis connection. +Disconnect from the Redis database. #### `async drop_keys(keys)` @@ -577,6 +580,16 @@ Check if the index exists in Redis. * **Return type:** bool +#### `async expire_keys(keys, ttl)` + +Set the expiration time for a specific entry or entries in Redis. + +* **Parameters:** + * **keys** (*Union* *[* *str* *,* *List* *[* *str* *]* *]*) – The entry ID or IDs to set the expiration for. + * **ttl** (*int*) – The time-to-live in seconds. +* **Return type:** + int | *List*[int] + #### `async fetch(id)` Asynchronously etch an object from Redis by id. The id is typically @@ -805,29 +818,13 @@ to the redis-py ft.search() method. * **Return type:** Result -#### `async set_client(redis_client)` +#### `set_client(redis_client)` -Manually set the Redis client to use with the search index. - -This method configures the search index to use a specific -Async Redis client. It is useful for cases where an external, -custom-configured client is preferred instead of creating a new one. +[DEPRECATED] Manually set the Redis client to use with the search index. +This method is deprecated; please provide connection parameters in \_\_init_\_. * **Parameters:** - **redis_client** (*aredis.Redis*) – An Async Redis - client instance to be used for the connection. -* **Raises:** - **TypeError** – If the provided client is not valid. - -```python -import redis.asyncio as aredis -from redisvl.index import AsyncSearchIndex - -# async Redis client and index -client = aredis.Redis.from_url("redis://localhost:6379") -index = AsyncSearchIndex.from_yaml("schemas/schema.yaml") -await index.set_client(client) -``` + **redis_client** (*Redis* *|* *Redis*) #### `property client: Redis | None` diff --git a/content/integrate/redisvl/overview/cli.md b/content/integrate/redisvl/overview/cli.md index 1d1653aeda..565c3094a4 100644 --- a/content/integrate/redisvl/overview/cli.md +++ b/content/integrate/redisvl/overview/cli.md @@ -19,7 +19,7 @@ Before running this notebook, be sure to !rvl version ``` - 18:12:25 [RedisVL] INFO RedisVL version 0.3.9 + 16:19:10 [RedisVL] INFO RedisVL version 0.4.0 ## Commands diff --git a/content/integrate/redisvl/user_guide/_index.md b/content/integrate/redisvl/user_guide/_index.md index a133211c3b..8baf1e07e6 100644 --- a/content/integrate/redisvl/user_guide/_index.md +++ b/content/integrate/redisvl/user_guide/_index.md @@ -11,48 +11,52 @@ User guides provide helpful resources for using RedisVL and its different compon -* [Getting Started with RedisVL](01_getting_started/) - * [Define an `IndexSchema`](01_getting_started/#define-an-indexschema) - * [Sample Dataset Preparation](01_getting_started/#sample-dataset-preparation) - * [Create a `SearchIndex`](01_getting_started/#create-a-searchindex) - * [Inspect with the `rvl` CLI](01_getting_started/#inspect-with-the-rvl-cli) - * [Load Data to `SearchIndex`](01_getting_started/#load-data-to-searchindex) - * [Creating `VectorQuery` Objects](01_getting_started/#creating-vectorquery-objects) - * [Using an Asynchronous Redis Client](01_getting_started/#using-an-asynchronous-redis-client) - * [Updating a schema](01_getting_started/#updating-a-schema) - * [Check Index Stats](01_getting_started/#check-index-stats) - * [Cleanup](01_getting_started/#cleanup) -* [Querying with RedisVL](02_hybrid_queries/) - * [Hybrid Queries](02_hybrid_queries/#hybrid-queries) - * [Combining Filters](02_hybrid_queries/#combining-filters) - * [Non-vector Queries](02_hybrid_queries/#non-vector-queries) - * [Count Queries](02_hybrid_queries/#count-queries) - * [Range Queries](02_hybrid_queries/#range-queries) - * [Advanced Query Modifiers](02_hybrid_queries/#advanced-query-modifiers) -* [Semantic Caching for LLMs](03_llmcache/) - * [Initializing `SemanticCache`](03_llmcache/#initializing-semanticcache) - * [Basic Cache Usage](03_llmcache/#basic-cache-usage) - * [Customize the Distance Threshhold](03_llmcache/#customize-the-distance-threshhold) - * [Utilize TTL](03_llmcache/#utilize-ttl) - * [Simple Performance Testing](03_llmcache/#simple-performance-testing) - * [Cache Access Controls, Tags & Filters](03_llmcache/#cache-access-controls-tags-filters) -* [Vectorizers](04_vectorizers/) - * [Creating Text Embeddings](04_vectorizers/#creating-text-embeddings) - * [Search with Provider Embeddings](04_vectorizers/#search-with-provider-embeddings) - * [Selecting your float data type](04_vectorizers/#selecting-your-float-data-type) -* [Hash vs JSON Storage](05_hash_vs_json/) - * [Hash or JSON – how to choose?](05_hash_vs_json/#hash-or-json-how-to-choose) - * [Cleanup](05_hash_vs_json/#cleanup) -* [Rerankers](06_rerankers/) - * [Simple Reranking](06_rerankers/#simple-reranking) -* [LLM Session Memory](07_session_manager/) - * [Managing multiple users and conversations](07_session_manager/#managing-multiple-users-and-conversations) - * [Semantic conversation memory](07_session_manager/#semantic-conversation-memory) - * [Conversation control](07_session_manager/#conversation-control) -* [Semantic Routing](08_semantic_router/) - * [Define the Routes](08_semantic_router/#define-the-routes) - * [Initialize the SemanticRouter](08_semantic_router/#initialize-the-semanticrouter) - * [Simple routing](08_semantic_router/#simple-routing) - * [Update the routing config](08_semantic_router/#update-the-routing-config) - * [Router serialization](08_semantic_router/#router-serialization) - * [Clean up the router](08_semantic_router/#clean-up-the-router) +* [Getting Started with RedisVL](getting_started/) + * [Define an `IndexSchema`](getting_started/#define-an-indexschema) + * [Sample Dataset Preparation](getting_started/#sample-dataset-preparation) + * [Create a `SearchIndex`](getting_started/#create-a-searchindex) + * [Inspect with the `rvl` CLI](getting_started/#inspect-with-the-rvl-cli) + * [Load Data to `SearchIndex`](getting_started/#load-data-to-searchindex) + * [Creating `VectorQuery` Objects](getting_started/#creating-vectorquery-objects) + * [Using an Asynchronous Redis Client](getting_started/#using-an-asynchronous-redis-client) + * [Updating a schema](getting_started/#updating-a-schema) + * [Check Index Stats](getting_started/#check-index-stats) + * [Cleanup](getting_started/#cleanup) +* [Querying with RedisVL](hybrid_queries/) + * [Hybrid Queries](hybrid_queries/#hybrid-queries) + * [Combining Filters](hybrid_queries/#combining-filters) + * [Non-vector Queries](hybrid_queries/#non-vector-queries) + * [Count Queries](hybrid_queries/#count-queries) + * [Range Queries](hybrid_queries/#range-queries) + * [Advanced Query Modifiers](hybrid_queries/#advanced-query-modifiers) +* [Semantic Caching for LLMs](llmcache/) + * [Initializing `SemanticCache`](llmcache/#initializing-semanticcache) + * [Basic Cache Usage](llmcache/#basic-cache-usage) + * [Customize the Distance Threshhold](llmcache/#customize-the-distance-threshhold) + * [Utilize TTL](llmcache/#utilize-ttl) + * [Simple Performance Testing](llmcache/#simple-performance-testing) + * [Cache Access Controls, Tags & Filters](llmcache/#cache-access-controls-tags-filters) +* [Vectorizers](vectorizers/) + * [Creating Text Embeddings](vectorizers/#creating-text-embeddings) + * [Search with Provider Embeddings](vectorizers/#search-with-provider-embeddings) + * [Selecting your float data type](vectorizers/#selecting-your-float-data-type) +* [Hash vs JSON Storage](hash_vs_json/) + * [Hash or JSON – how to choose?](hash_vs_json/#hash-or-json-how-to-choose) + * [Cleanup](hash_vs_json/#cleanup) +* [Working with nested data in JSON](hash_vs_json/#working-with-nested-data-in-json) + * [Full JSON Path support](hash_vs_json/#full-json-path-support) + * [As an example:](hash_vs_json/#as-an-example) +* [Cleanup](hash_vs_json/#id1) +* [Rerankers](rerankers/) + * [Simple Reranking](rerankers/#simple-reranking) +* [LLM Session Memory](session_manager/) + * [Managing multiple users and conversations](session_manager/#managing-multiple-users-and-conversations) + * [Semantic conversation memory](session_manager/#semantic-conversation-memory) + * [Conversation control](session_manager/#conversation-control) +* [Semantic Routing](semantic_router/) + * [Define the Routes](semantic_router/#define-the-routes) + * [Initialize the SemanticRouter](semantic_router/#initialize-the-semanticrouter) + * [Simple routing](semantic_router/#simple-routing) + * [Update the routing config](semantic_router/#update-the-routing-config) + * [Router serialization](semantic_router/#router-serialization) + * [Clean up the router](semantic_router/#clean-up-the-router) diff --git a/content/integrate/redisvl/user_guide/getting_started.md b/content/integrate/redisvl/user_guide/getting_started.md index 6032bdb897..dd02b3c86c 100644 --- a/content/integrate/redisvl/user_guide/getting_started.md +++ b/content/integrate/redisvl/user_guide/getting_started.md @@ -148,7 +148,7 @@ index = SearchIndex.from_dict(schema) Now we also need to facilitate a Redis connection. There are a few ways to do this: - Create & manage your own client connection (recommended) -- Provide a simple Redis URL and let RedisVL connect on your behalf +- Provide a Redis URL and let RedisVL connect on your behalf (by default, it will connect to "redis://localhost:6379") ### Bring your own Redis connection instance @@ -159,9 +159,14 @@ This is ideal in scenarios where you have custom settings on the connection inst from redis import Redis client = Redis.from_url("redis://localhost:6379") +index = SearchIndex.from_dict(schema, redis_client=client) + +# alternatively, provide an async Redis client object to enable async index operations +# from redis.asyncio import Redis +# from redisvl.index import AsyncSearchIndex +# client = Redis.from_url("redis://localhost:6379") +# index = AsyncSearchIndex.from_dict(schema, redis_client=client) -index.set_client(client) -# optionally provide an async Redis client object to enable async index operations ``` @@ -177,8 +182,10 @@ This is ideal for simple cases: ```python -index.connect("redis://localhost:6379") -# optionally use an async client by passing use_async=True +index = SearchIndex.from_dict(schema, redis_url="redis://localhost:6379") + +# If you don't specify a client or Redis URL, the index will attempt to +# connect to Redis at the default address ("redis://localhost:6379"). ``` @@ -299,8 +306,11 @@ results = index.query(query) result_print(results) ``` + *=>[KNN 3 @user_embedding $vector AS vector_distance] RETURN 6 user age job credit_score vector_distance vector_distance SORTBY vector_distance ASC DIALECT 2 LIMIT 0 3 -
vector_distanceuseragejobcredit_score
0john1engineerhigh
0mary2doctorlow
0.0566299557686tyler9engineerhigh
+ + +table>vector_distanceuseragejobcredit_score0john1engineerhigh0mary2doctorlow0.0566299557686tyler9engineerhigh ## Using an Asynchronous Redis Client @@ -337,8 +347,7 @@ from redis.asyncio import Redis client = Redis.from_url("redis://localhost:6379") -index = AsyncSearchIndex.from_dict(schema) -await index.set_client(client) +index = AsyncSearchIndex.from_dict(schema, redis_client=client) ``` @@ -393,7 +402,7 @@ index.schema.add_fields([ await index.create(overwrite=True, drop=False) ``` - 11:53:25 redisvl.index.index INFO Index already exists, overwriting. + 11:28:32 redisvl.index.index INFO Index already exists, overwriting. @@ -404,7 +413,7 @@ result_print(results) ``` -
vector_distanceuseragejobcredit_score
0john1engineerhigh
0mary2doctorlow
0.0566299557686tyler9engineerhigh
+
vector_distanceuseragejobcredit_score
0mary2doctorlow
0john1engineerhigh
0.0566299557686tyler9engineerhigh
## Check Index Stats @@ -426,19 +435,19 @@ Use the `rvl` CLI to check the stats for the index: │ num_records │ 22 │ │ percent_indexed │ 1 │ │ hash_indexing_failures │ 0 │ - │ number_of_uses │ 5 │ - │ bytes_per_record_avg │ 50.9091 │ + │ number_of_uses │ 2 │ + │ bytes_per_record_avg │ 47.8 │ │ doc_table_size_mb │ 0.000423431 │ - │ inverted_sz_mb │ 0.00106812 │ + │ inverted_sz_mb │ 0.000911713 │ │ key_table_size_mb │ 0.000165939 │ - │ offset_bits_per_record_avg │ 8 │ - │ offset_vectors_sz_mb │ 5.72205e-06 │ - │ offsets_per_term_avg │ 0.272727 │ - │ records_per_doc_avg │ 5.5 │ + │ offset_bits_per_record_avg │ nan │ + │ offset_vectors_sz_mb │ 0 │ + │ offsets_per_term_avg │ 0 │ + │ records_per_doc_avg │ 5 │ │ sortable_values_size_mb │ 0 │ - │ total_indexing_time │ 0.197 │ - │ total_inverted_index_blocks │ 12 │ - │ vector_index_sz_mb │ 0.0201416 │ + │ total_indexing_time │ 0.239 │ + │ total_inverted_index_blocks │ 11 │ + │ vector_index_sz_mb │ 0.235603 │ ╰─────────────────────────────┴─────────────╯ diff --git a/content/integrate/redisvl/user_guide/hash_vs_json.md b/content/integrate/redisvl/user_guide/hash_vs_json.md index ade92835b1..b141ffe93c 100644 --- a/content/integrate/redisvl/user_guide/hash_vs_json.md +++ b/content/integrate/redisvl/user_guide/hash_vs_json.md @@ -102,10 +102,7 @@ hash_schema = { ```python # construct a search index from the hash schema -hindex = SearchIndex.from_dict(hash_schema) - -# connect to local redis instance -hindex.connect("redis://localhost:6379") +hindex = SearchIndex.from_dict(hash_schema, redis_url="redis://localhost:6379") # create the index (no data yet) hindex.create(overwrite=True) @@ -193,10 +190,12 @@ from redisvl.query.filter import Tag, Text, Num t = (Tag("credit_score") == "high") & (Text("job") % "enginee*") & (Num("age") > 17) -v = VectorQuery([0.1, 0.1, 0.5], - "user_embedding", - return_fields=["user", "credit_score", "age", "job", "office_location"], - filter_expression=t) +v = VectorQuery( + vector=[0.1, 0.1, 0.5], + vector_field_name="user_embedding", + return_fields=["user", "credit_score", "age", "job", "office_location"], + filter_expression=t +) results = hindex.query(v) @@ -216,30 +215,12 @@ hindex.delete() ``` ### Working with JSON -Redis also supports native **JSON** objects. These can be multi-level (nested) objects, with full JSONPath support for updating/retrieving sub elements: - -```python -{ - "name": "bike", - "metadata": { - "model": "Deimos", - "brand": "Ergonom", - "type": "Enduro bikes", - "price": 4972, - } -} -``` JSON is best suited for use cases with the following characteristics: - Ease of use and data model flexibility are top concerns - Application data is already native JSON - Replacing another document storage/db solution -#### Full JSON Path support -Because Redis enables full JSON path support, when creating an index schema, elements need to be indexed and selected by their path with the desired `name` AND `path` that points to where the data is located within the objects. - -By default, RedisVL will assume the path as `$.{name}` if not provided in JSON fields schema. - ```python # define the json index schema @@ -273,10 +254,7 @@ json_schema = { ```python # construct a search index from the json schema -jindex = SearchIndex.from_dict(json_schema) - -# connect to local redis instance -jindex.connect("redis://localhost:6379") +jindex = SearchIndex.from_dict(json_schema, redis_url="redis://localhost:6379") # create the index (no data yet) jindex.create(overwrite=True) @@ -297,8 +275,6 @@ Vectorized data stored in JSON must be stored as a pure array (python list) of f ```python -import numpy as np - json_data = data.copy() for d in json_data: @@ -344,3 +320,168 @@ result_print(jindex.query(v)) ```python jindex.delete() ``` + +# Working with nested data in JSON + +Redis also supports native **JSON** objects. These can be multi-level (nested) objects, with full JSONPath support for updating/retrieving sub elements: + +```json +{ + "name": "Specialized Stump jumper", + "metadata": { + "model": "Stumpjumper", + "brand": "Specialized", + "type": "Enduro bikes", + "price": 3000 + }, +} +``` + +#### Full JSON Path support +Because Redis enables full JSON path support, when creating an index schema, elements need to be indexed and selected by their path with the desired `name` AND `path` that points to where the data is located within the objects. + +By default, RedisVL will assume the path as `$.{name}` if not provided in JSON fields schema. If nested provide path as `$.object.attribute` + +### As an example: + + +```python +from redisvl.utils.vectorize import HFTextVectorizer + +emb_model = HFTextVectorizer() + +bike_data = [ + { + "name": "Specialized Stump jumper", + "metadata": { + "model": "Stumpjumper", + "brand": "Specialized", + "type": "Enduro bikes", + "price": 3000 + }, + "description": "The Specialized Stumpjumper is a versatile enduro bike that dominates both climbs and descents. Features a FACT 11m carbon fiber frame, FOX FLOAT suspension with 160mm travel, and SRAM X01 Eagle drivetrain. The asymmetric frame design and internal storage compartment make it a practical choice for all-day adventures." + }, + { + "name": "bike_2", + "metadata": { + "model": "Slash", + "brand": "Trek", + "type": "Enduro bikes", + "price": 5000 + }, + "description": "Trek's Slash is built for aggressive enduro riding and racing. Featuring Trek's Alpha Aluminum frame with RE:aktiv suspension technology, 160mm travel, and Knock Block frame protection. Equipped with Bontrager components and a Shimano XT drivetrain, this bike excels on technical trails and enduro race courses." + } +] + +bike_data = [{**d, "bike_embedding": emb_model.embed(d["description"])} for d in bike_data] + +bike_schema = { + "index": { + "name": "bike-json", + "prefix": "bike-json", + "storage_type": "json", # JSON storage type + }, + "fields": [ + { + "name": "model", + "type": "tag", + "path": "$.metadata.model" # note the '$' + }, + { + "name": "brand", + "type": "tag", + "path": "$.metadata.brand" + }, + { + "name": "price", + "type": "numeric", + "path": "$.metadata.price" + }, + { + "name": "bike_embedding", + "type": "vector", + "attrs": { + "dims": len(bike_data[0]["bike_embedding"]), + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float32" + } + + } + ], +} +``` + + /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + warnings.warn( + + + +```python +# construct a search index from the json schema +bike_index = SearchIndex.from_dict(bike_schema, redis_url="redis://localhost:6379") + +# create the index (no data yet) +bike_index.create(overwrite=True) +``` + + +```python +bike_index.load(bike_data) +``` + + + + + ['bike-json:de92cb9955434575b20f4e87a30b03d5', + 'bike-json:054ab3718b984532b924946fa5ce00c6'] + + + + +```python +from redisvl.query import VectorQuery + +vec = emb_model.embed("I'd like a bike for aggressive riding") + +v = VectorQuery( + vector=vec, + vector_field_name="bike_embedding", + return_fields=[ + "brand", + "name", + "$.metadata.type" + ] +) + + +results = bike_index.query(v) +``` + +**Note:** As shown in the example if you want to retrieve a field from json object that was not indexed you will also need to supply the full path as with `$.metadata.type`. + + +```python +results +``` + + + + + [{'id': 'bike-json:054ab3718b984532b924946fa5ce00c6', + 'vector_distance': '0.519989073277', + 'brand': 'Trek', + '$.metadata.type': 'Enduro bikes'}, + {'id': 'bike-json:de92cb9955434575b20f4e87a30b03d5', + 'vector_distance': '0.657624483109', + 'brand': 'Specialized', + '$.metadata.type': 'Enduro bikes'}] + + + +# Cleanup + + +```python +bike_index.delete() +``` diff --git a/content/integrate/redisvl/user_guide/hybrid_queries.md b/content/integrate/redisvl/user_guide/hybrid_queries.md index 15bfd11ccb..25b2ea8b2b 100644 --- a/content/integrate/redisvl/user_guide/hybrid_queries.md +++ b/content/integrate/redisvl/user_guide/hybrid_queries.md @@ -60,23 +60,33 @@ schema = { from redisvl.index import SearchIndex # construct a search index from the schema -index = SearchIndex.from_dict(schema) - -# connect to local redis instance -index.connect("redis://localhost:6379") +index = SearchIndex.from_dict(schema, redis_url="redis://localhost:6379") # create the index (no data yet) index.create(overwrite=True) ``` + 13:02:18 redisvl.index.index INFO Index already exists, overwriting. + + ```python # use the CLI to see the created index !rvl index listall ``` - 14:16:51 [RedisVL] INFO Indices: - 14:16:51 [RedisVL] INFO 1. user_queries + 13:02:25 [RedisVL] INFO Indices: + 13:02:25 [RedisVL] INFO 1. float64_cache + 13:02:25 [RedisVL] INFO 2. float64_session + 13:02:25 [RedisVL] INFO 3. float16_cache + 13:02:25 [RedisVL] INFO 4. float16_session + 13:02:25 [RedisVL] INFO 5. float32_session + 13:02:25 [RedisVL] INFO 6. float32_cache + 13:02:25 [RedisVL] INFO 7. bfloat_cache + 13:02:25 [RedisVL] INFO 8. user_queries + 13:02:25 [RedisVL] INFO 9. student tutor + 13:02:25 [RedisVL] INFO 10. tutor + 13:02:25 [RedisVL] INFO 11. bfloat_session @@ -112,7 +122,7 @@ result_print(results) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
@@ -125,7 +135,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0.217882037163taimurlow15CEO-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.217882037163taimurlow15CEO-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
@@ -138,7 +148,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
@@ -151,7 +161,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
What about scenarios where you might want to dynamically generate a list of tags? Have no fear. RedisVL allows you to do this gracefully without having to check for the **empty case**. The **empty case** is when you attempt to run a Tag filter on a field with no defined values to match: @@ -170,7 +180,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
### Numeric Filters @@ -188,7 +198,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
@@ -201,7 +211,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
@@ -214,7 +224,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
### Text Filters @@ -233,7 +243,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
@@ -246,7 +256,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
@@ -259,7 +269,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
@@ -272,7 +282,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
@@ -285,7 +295,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
@@ -298,7 +308,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
Use raw query strings as input. Below we use the `~` flag to indicate that the full text query is optional. We also choose the BM25 scorer and return document scores along with the result. @@ -314,15 +324,15 @@ index.query(v) - [{'id': 'user_queries_docs:409ff48274724984ba14865db0495fc5', - 'score': 0.9090908893868948, + [{'id': 'user_queries_docs:01JMJJHE28ZW4F33ZNRKXRHYCS', + 'score': 1.8181817787737895, 'vector_distance': '0', 'user': 'john', 'credit_score': 'high', 'age': '18', 'job': 'engineer', 'office_location': '-122.4194,37.7749'}, - {'id': 'user_queries_docs:69cb262c303a4147b213dfdec8bd4b01', + {'id': 'user_queries_docs:01JMJJHE2899024DYPXT6424N9', 'score': 0.0, 'vector_distance': '0', 'user': 'derrick', @@ -330,15 +340,47 @@ index.query(v) 'age': '14', 'job': 'doctor', 'office_location': '-122.4194,37.7749'}, - {'id': 'user_queries_docs:562263669ff74a0295c515018d151d7b', - 'score': 0.9090908893868948, + {'id': 'user_queries_docs:01JMJJPEYCQ89ZQW6QR27J72WT', + 'score': 1.8181817787737895, + 'vector_distance': '0', + 'user': 'john', + 'credit_score': 'high', + 'age': '18', + 'job': 'engineer', + 'office_location': '-122.4194,37.7749'}, + {'id': 'user_queries_docs:01JMJJPEYD544WB1TKDBJ3Z3J9', + 'score': 0.0, + 'vector_distance': '0', + 'user': 'derrick', + 'credit_score': 'low', + 'age': '14', + 'job': 'doctor', + 'office_location': '-122.4194,37.7749'}, + {'id': 'user_queries_docs:01JMJJHE28B5R6T00DH37A7KSJ', + 'score': 1.8181817787737895, + 'vector_distance': '0.109129190445', + 'user': 'tyler', + 'credit_score': 'high', + 'age': '100', + 'job': 'engineer', + 'office_location': '-122.0839,37.3861'}, + {'id': 'user_queries_docs:01JMJJPEYDPF9S5328WHCQN0ND', + 'score': 1.8181817787737895, 'vector_distance': '0.109129190445', 'user': 'tyler', 'credit_score': 'high', 'age': '100', 'job': 'engineer', 'office_location': '-122.0839,37.3861'}, - {'id': 'user_queries_docs:94176145f9de4e288ca2460cd5d1188e', + {'id': 'user_queries_docs:01JMJJHE28G5F943YGWMB1ZX1V', + 'score': 0.0, + 'vector_distance': '0.158808946609', + 'user': 'tim', + 'credit_score': 'high', + 'age': '12', + 'job': 'dermatologist', + 'office_location': '-122.0839,37.3861'}, + {'id': 'user_queries_docs:01JMJJPEYDKA9ARKHRK1D7KPXQ', 'score': 0.0, 'vector_distance': '0.158808946609', 'user': 'tim', @@ -346,7 +388,7 @@ index.query(v) 'age': '12', 'job': 'dermatologist', 'office_location': '-122.0839,37.3861'}, - {'id': 'user_queries_docs:d0bcf6842862410583901004b6b3aeba', + {'id': 'user_queries_docs:01JMJJHE28NR7KF0EZEA433T2J', 'score': 0.0, 'vector_distance': '0.217882037163', 'user': 'taimur', @@ -354,21 +396,13 @@ index.query(v) 'age': '15', 'job': 'CEO', 'office_location': '-122.0839,37.3861'}, - {'id': 'user_queries_docs:3dec0e9f2db04e19bff224c5a2a0ba3c', + {'id': 'user_queries_docs:01JMJJPEYD9EAVGJ2AZ8K9VX7Q', 'score': 0.0, - 'vector_distance': '0.266666650772', - 'user': 'nancy', - 'credit_score': 'high', - 'age': '94', - 'job': 'doctor', - 'office_location': '-122.4194,37.7749'}, - {'id': 'user_queries_docs:93ee6c0e4ccb42f6b7af7858ea6a6408', - 'score': 0.0, - 'vector_distance': '0.653301358223', - 'user': 'joe', - 'credit_score': 'medium', - 'age': '35', - 'job': 'dentist', + 'vector_distance': '0.217882037163', + 'user': 'taimur', + 'credit_score': 'low', + 'age': '15', + 'job': 'CEO', 'office_location': '-122.0839,37.3861'}] @@ -389,7 +423,7 @@ result_print(index.query(v)) ``` -
scorevector_distanceusercredit_scoreagejoboffice_location
0.45454544469344740johnhigh18engineer-122.4194,37.7749
0.45454544469344740derricklow14doctor-122.4194,37.7749
0.45454544469344740.266666650772nancyhigh94doctor-122.4194,37.7749
+
scorevector_distanceusercredit_scoreagejoboffice_location
0.45454544469344740johnhigh18engineer-122.4194,37.7749
0.45454544469344740derricklow14doctor-122.4194,37.7749
0.45454544469344740johnhigh18engineer-122.4194,37.7749
0.45454544469344740derricklow14doctor-122.4194,37.7749
0.45454544469344740.266666650772nancyhigh94doctor-122.4194,37.7749
0.45454544469344740.266666650772nancyhigh94doctor-122.4194,37.7749
@@ -402,7 +436,7 @@ result_print(index.query(v)) ``` -
scorevector_distanceusercredit_scoreagejoboffice_location
0.45454544469344740johnhigh18engineer-122.4194,37.7749
0.45454544469344740derricklow14doctor-122.4194,37.7749
0.45454544469344740.109129190445tylerhigh100engineer-122.0839,37.3861
0.45454544469344740.158808946609timhigh12dermatologist-122.0839,37.3861
0.45454544469344740.217882037163taimurlow15CEO-122.0839,37.3861
0.45454544469344740.266666650772nancyhigh94doctor-122.4194,37.7749
0.45454544469344740.653301358223joemedium35dentist-122.0839,37.3861
+
scorevector_distanceusercredit_scoreagejoboffice_location
0.45454544469344740johnhigh18engineer-122.4194,37.7749
0.45454544469344740derricklow14doctor-122.4194,37.7749
0.45454544469344740johnhigh18engineer-122.4194,37.7749
0.45454544469344740derricklow14doctor-122.4194,37.7749
0.45454544469344740.109129190445tylerhigh100engineer-122.0839,37.3861
0.45454544469344740.109129190445tylerhigh100engineer-122.0839,37.3861
0.45454544469344740.158808946609timhigh12dermatologist-122.0839,37.3861
0.45454544469344740.158808946609timhigh12dermatologist-122.0839,37.3861
0.45454544469344740.217882037163taimurlow15CEO-122.0839,37.3861
0.45454544469344740.217882037163taimurlow15CEO-122.0839,37.3861
@@ -415,7 +449,7 @@ result_print(index.query(v)) ``` -
scorevector_distanceusercredit_scoreagejoboffice_location
0.00.109129190445tylerhigh100engineer-122.0839,37.3861
0.00.158808946609timhigh12dermatologist-122.0839,37.3861
0.00.217882037163taimurlow15CEO-122.0839,37.3861
0.00.653301358223joemedium35dentist-122.0839,37.3861
+
scorevector_distanceusercredit_scoreagejoboffice_location
0.00.109129190445tylerhigh100engineer-122.0839,37.3861
0.00.109129190445tylerhigh100engineer-122.0839,37.3861
0.00.158808946609timhigh12dermatologist-122.0839,37.3861
0.00.158808946609timhigh12dermatologist-122.0839,37.3861
0.00.217882037163taimurlow15CEO-122.0839,37.3861
0.00.217882037163taimurlow15CEO-122.0839,37.3861
0.00.653301358223joemedium35dentist-122.0839,37.3861
0.00.653301358223joemedium35dentist-122.0839,37.3861
## Combining Filters @@ -442,7 +476,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
### Union ("or") @@ -461,7 +495,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
### Dynamic Combination @@ -497,7 +531,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
@@ -509,7 +543,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
+
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
@@ -521,7 +555,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
@@ -533,7 +567,7 @@ result_print(index.query(v)) ``` -
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
+
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
## Non-vector Queries @@ -557,7 +591,7 @@ result_print(results) ``` -
usercredit_scoreagejob
derricklow14doctor
taimurlow15CEO
+
usercredit_scoreagejob
derricklow14doctor
taimurlow15CEO
derricklow14doctor
taimurlow15CEO
## Count Queries @@ -577,7 +611,7 @@ count = index.query(filter_query) print(f"{count} records match the filter expression {str(has_low_credit)} for the given index.") ``` - 2 records match the filter expression @credit_score:{low} for the given index. + 4 records match the filter expression @credit_score:{low} for the given index. ## Range Queries @@ -602,7 +636,7 @@ result_print(results) ``` -
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
0.109129190445tylerhigh100engineer
0.158808946609timhigh12dermatologist
+
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
0johnhigh18engineer
0derricklow14doctor
0.109129190445tylerhigh100engineer
0.109129190445tylerhigh100engineer
0.158808946609timhigh12dermatologist
0.158808946609timhigh12dermatologist
We can also change the distance threshold of the query object between uses if we like. Here we will set ``distance_threshold==0.1``. This means that the query object will return all matches that are within 0.1 of the query object. This is a small distance, so we expect to get fewer matches than before. @@ -615,7 +649,7 @@ result_print(index.query(range_query)) ``` -
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
+
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
0johnhigh18engineer
0derricklow14doctor
Range queries can also be used with filters like any other query type. The following limits the results to only include records with a ``job`` of ``engineer`` while also being within the vector range (aka distance). @@ -630,7 +664,7 @@ result_print(index.query(range_query)) ``` -
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
+
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0johnhigh18engineer
## Advanced Query Modifiers @@ -653,7 +687,7 @@ result_print(result) ``` -
vector_distanceageusercredit_scorejoboffice_location
0.109129190445100tylerhighengineer-122.0839,37.3861
018johnhighengineer-122.4194,37.7749
+
vector_distanceageusercredit_scorejoboffice_location
0.109129190445100tylerhighengineer-122.0839,37.3861
0.109129190445100tylerhighengineer-122.0839,37.3861
018johnhighengineer-122.4194,37.7749
018johnhighengineer-122.4194,37.7749
### Raw Redis Query String @@ -715,10 +749,14 @@ for r in results.docs: print(r.__dict__) ``` - {'id': 'user_queries_docs:409ff48274724984ba14865db0495fc5', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\x00\x00\x00?'} - {'id': 'user_queries_docs:3dec0e9f2db04e19bff224c5a2a0ba3c', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\x00\x00\x00?'} - {'id': 'user_queries_docs:562263669ff74a0295c515018d151d7b', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\x00\x00\x00?'} - {'id': 'user_queries_docs:94176145f9de4e288ca2460cd5d1188e', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\x00\x00\x00?'} + {'id': 'user_queries_docs:01JMJJHE28G5F943YGWMB1ZX1V', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\x00\x00\x00?'} + {'id': 'user_queries_docs:01JMJJHE28ZW4F33ZNRKXRHYCS', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\x00\x00\x00?'} + {'id': 'user_queries_docs:01JMJJHE28B5R6T00DH37A7KSJ', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\x00\x00\x00?'} + {'id': 'user_queries_docs:01JMJJHE28EX13NEE7BGBM8FH3', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\x00\x00\x00?'} + {'id': 'user_queries_docs:01JMJJPEYCQ89ZQW6QR27J72WT', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\x00\x00\x00?'} + {'id': 'user_queries_docs:01JMJJPEYDAN0M3V7EQEVPS6HX', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\x00\x00\x00?'} + {'id': 'user_queries_docs:01JMJJPEYDPF9S5328WHCQN0ND', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\x00\x00\x00?'} + {'id': 'user_queries_docs:01JMJJPEYDKA9ARKHRK1D7KPXQ', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\x00\x00\x00?'} diff --git a/content/integrate/redisvl/user_guide/vectorizers.md b/content/integrate/redisvl/user_guide/vectorizers.md index 516861aca3..fe538de131 100644 --- a/content/integrate/redisvl/user_guide/vectorizers.md +++ b/content/integrate/redisvl/user_guide/vectorizers.md @@ -75,6 +75,25 @@ print("Vector dimensions: ", len(test)) test[:10] ``` + Vector dimensions: 1536 + + + + + + [-0.0011391325388103724, + -0.003206387162208557, + 0.002380132209509611, + -0.004501554183661938, + -0.010328996926546097, + 0.012922565452754498, + -0.005491119809448719, + -0.0029864837415516376, + -0.007327961269766092, + -0.03365817293524742] + + + ```python # Create many embeddings at once @@ -89,6 +108,22 @@ embeddings[0][:10] ``` + + + [-0.017466850578784943, + 1.8471690054866485e-05, + 0.00129731057677418, + -0.02555876597762108, + -0.019842341542243958, + 0.01603139191865921, + -0.0037347301840782166, + 0.0009670283179730177, + 0.006618348415941, + -0.02497442066669464] + + + + ```python # openai also supports asyncronous requests, which we can use to speed up the vectorization process. embeddings = await oai.aembed_many(sentences) @@ -96,6 +131,9 @@ print("Number of Embeddings:", len(embeddings)) ``` + Number of Embeddings: 3 + + ### Azure OpenAI The ``AzureOpenAITextVectorizer`` is a variation of the OpenAI vectorizer that calls OpenAI models within Azure. If you've already installed ``openai``, then you're ready to use Azure OpenAI. @@ -132,6 +170,66 @@ test[:10] ``` + --------------------------------------------------------------------------- + + ValueError Traceback (most recent call last) + + Cell In[7], line 4 + 1 from redisvl.utils.vectorize import AzureOpenAITextVectorizer + 3 # create a vectorizer + ----> 4 az_oai = AzureOpenAITextVectorizer( + 5 model=deployment_name, # Must be your CUSTOM deployment name + 6 api_config={ + 7 "api_key": api_key, + 8 "api_version": api_version, + 9 "azure_endpoint": azure_endpoint + 10 }, + 11 ) + 13 test = az_oai.embed("This is a test sentence.") + 14 print("Vector dimensions: ", len(test)) + + + File ~/src/redis-vl-python/redisvl/utils/vectorize/text/azureopenai.py:78, in AzureOpenAITextVectorizer.__init__(self, model, api_config, dtype) + 54 def __init__( + 55 self, + 56 model: str = "text-embedding-ada-002", + 57 api_config: Optional[Dict] = None, + 58 dtype: str = "float32", + 59 ): + 60 """Initialize the AzureOpenAI vectorizer. + 61 + 62 Args: + (...) + 76 ValueError: If an invalid dtype is provided. + 77 """ + ---> 78 self._initialize_clients(api_config) + 79 super().__init__(model=model, dims=self._set_model_dims(model), dtype=dtype) + + + File ~/src/redis-vl-python/redisvl/utils/vectorize/text/azureopenai.py:106, in AzureOpenAITextVectorizer._initialize_clients(self, api_config) + 99 azure_endpoint = ( + 100 api_config.pop("azure_endpoint") + 101 if api_config + 102 else os.getenv("AZURE_OPENAI_ENDPOINT") + 103 ) + 105 if not azure_endpoint: + --> 106 raise ValueError( + 107 "AzureOpenAI API endpoint is required. " + 108 "Provide it in api_config or set the AZURE_OPENAI_ENDPOINT\ + 109 environment variable." + 110 ) + 112 api_version = ( + 113 api_config.pop("api_version") + 114 if api_config + 115 else os.getenv("OPENAI_API_VERSION") + 116 ) + 118 if not api_version: + + + ValueError: AzureOpenAI API endpoint is required. Provide it in api_config or set the AZURE_OPENAI_ENDPOINT environment variable. + + + ```python # Just like OpenAI, AzureOpenAI supports batching embeddings and asynchronous requests. sentences = [ @@ -423,10 +521,7 @@ fields: from redisvl.index import SearchIndex # construct a search index from the schema -index = SearchIndex.from_yaml("./schema.yaml") - -# connect to local redis instance -index.connect("redis://localhost:6379") +index = SearchIndex.from_yaml("./schema.yaml", redis_url="redis://localhost:6379") # create the index (no data yet) index.create(overwrite=True)