diff --git a/.gitignore b/.gitignore index b6e40e93..311d909e 100644 --- a/.gitignore +++ b/.gitignore @@ -221,3 +221,8 @@ libs/redis/docs/.Trash* .python-version .idea/* java-recipes/.* + +python-recipes/vector-search/beir_datasets/ +python-recipes/vector-search/datasets + +.python-version diff --git a/.python-version b/.python-version deleted file mode 100644 index b6d8b761..00000000 --- a/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.11.8 diff --git a/python-recipes/redisvl-release/0.5.0_release_overview.ipynb b/python-recipes/redisvl-release/0.5.0_release_overview.ipynb new file mode 100644 index 00000000..8add4eab --- /dev/null +++ b/python-recipes/redisvl-release/0.5.0_release_overview.ipynb @@ -0,0 +1,681 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "# RedisVL 0.5.0 - Release overview\n", + "\n", + "This notebook provides an overview of what's new with the 0.5.0 release of redisvl. It also highlights changes and potential enhancements for existing usage.\n", + "\n", + "\"Open\n", + "\n", + "# What's new?\n", + "\n", + "- Hybrid query and text query classes\n", + "- Threshold optimizer classes\n", + "- Schema validation\n", + "- Timestamp filters\n", + "- Batched queries\n", + "- Vector normalization\n", + "- Hybrid policy on knn with filters\n", + "\n", + "# Env setup\n", + "\n", + "## Install Redis Stack\n", + "\n", + "#### For Colab\n", + "Use the shell script below to download, extract, and install [Redis Stack](https://redis.io/docs/getting-started/install-stack/) directly from the Redis package archive." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# NBVAL_SKIP\n", + "%%sh\n", + "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\n", + "echo \"deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/redis.list\n", + "sudo apt-get update > /dev/null 2>&1\n", + "sudo apt-get install redis-stack-server > /dev/null 2>&1\n", + "redis-stack-server --daemonize yes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### For Alternative Environments\n", + "There are many ways to get the necessary redis-stack instance running\n", + "1. On cloud, deploy a [FREE instance of Redis in the cloud](https://redis.com/try-free/). Or, if you have your\n", + "own version of Redis Enterprise running, that works too!\n", + "2. Per OS, [see the docs](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/)\n", + "3. With docker: `docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest`\n", + "\n", + "### Define the Redis Connection URL\n", + "\n", + "By default this notebook connects to the local instance of Redis Stack. **If you have your own Redis Enterprise instance** - replace REDIS_PASSWORD, REDIS_HOST and REDIS_PORT values with your own." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Replace values below with your own if using Redis Cloud instance\n", + "REDIS_HOST = os.getenv(\"REDIS_HOST\", \"localhost\") # ex: \"redis-18374.c253.us-central1-1.gce.cloud.redislabs.com\"\n", + "REDIS_PORT = os.getenv(\"REDIS_PORT\", \"6379\") # ex: 18374\n", + "REDIS_PASSWORD = os.getenv(\"REDIS_PASSWORD\", \"\") # ex: \"1TNxTEdYRDgIDKM2gDfasupCADXXXX\"\n", + "\n", + "# If SSL is enabled on the endpoint, use rediss:// as the URL prefix\n", + "REDIS_URL = f\"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Install redisvl 0.5.0" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting git+https://github.com/redis/redis-vl-python.git@0.5.0\n", + " Cloning https://github.com/redis/redis-vl-python.git (to revision 0.5.0) to /private/var/folders/_g/rr4lnxxx1_z7m78lz89dhvsm0000gp/T/pip-req-build-8zytawrt\n", + " Running command git clone --filter=blob:none --quiet https://github.com/redis/redis-vl-python.git /private/var/folders/_g/rr4lnxxx1_z7m78lz89dhvsm0000gp/T/pip-req-build-8zytawrt\n", + " Running command git checkout -b 0.5.0 --track origin/0.5.0\n", + " Switched to a new branch '0.5.0'\n", + " branch '0.5.0' set up to track 'origin/0.5.0'.\n", + " Resolved https://github.com/redis/redis-vl-python.git to commit 7ffe89e27e4783fe38c94c7b09ba436e9614ac51\n", + " Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25hRequirement already satisfied: coloredlogs<16.0,>=15.0 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from redisvl==0.4.1) (15.0.1)\n", + "Requirement already satisfied: jsonpath-ng<2.0.0,>=1.5.0 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from redisvl==0.4.1) (1.7.0)\n", + "Requirement already satisfied: ml-dtypes<0.5.0,>=0.4.0 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from redisvl==0.4.1) (0.4.1)\n", + "Requirement already satisfied: numpy<2,>=1 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from redisvl==0.4.1) (1.26.4)\n", + "Requirement already satisfied: pydantic<3,>=2 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from redisvl==0.4.1) (2.10.6)\n", + "Requirement already satisfied: python-ulid<4.0.0,>=3.0.0 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from redisvl==0.4.1) (3.0.0)\n", + "Requirement already satisfied: pyyaml<7.0,>=5.4 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from redisvl==0.4.1) (6.0.1)\n", + "Requirement already satisfied: redis<6.0,>=5.0 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from redisvl==0.4.1) (5.2.1)\n", + "Requirement already satisfied: tabulate<0.10.0,>=0.9.0 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from redisvl==0.4.1) (0.9.0)\n", + "Requirement already satisfied: tenacity>=8.2.2 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from redisvl==0.4.1) (8.5.0)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from coloredlogs<16.0,>=15.0->redisvl==0.4.1) (10.0)\n", + "Requirement already satisfied: ply in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from jsonpath-ng<2.0.0,>=1.5.0->redisvl==0.4.1) (3.11)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from pydantic<3,>=2->redisvl==0.4.1) (0.6.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from pydantic<3,>=2->redisvl==0.4.1) (2.27.2)\n", + "Requirement already satisfied: typing-extensions>=4.12.2 in /Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages (from pydantic<3,>=2->redisvl==0.4.1) (4.12.2)\n", + "Building wheels for collected packages: redisvl\n", + " Building wheel for redisvl (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for redisvl: filename=redisvl-0.4.1-py3-none-any.whl size=124143 sha256=3bc67b2ba793ce48a3a039f69d054dbea177276b73924c9cb44814e864e68971\n", + " Stored in directory: /private/var/folders/_g/rr4lnxxx1_z7m78lz89dhvsm0000gp/T/pip-ephem-wheel-cache-ai8zqeop/wheels/95/dc/1e/d8dc251e38989044675dae0b596a2dee10cbfdecac5c62ccdf\n", + "Successfully built redisvl\n", + "Installing collected packages: redisvl\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "llama-index-vector-stores-redis 0.4.0 requires redisvl<0.4.0,>=0.3.4, but you have redisvl 0.4.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed redisvl-0.4.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install git+https://github.com/redis/redis-vl-python.git@0.5.0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hybrid query and text query classes\n", + "\n", + "In 0.5.0 we introduced classes to make it easier to perform lexical search in redis both standalone and combined with vector search.\n", + "\n", + "> TODO: update hybrid search notebook to use the class and make sure it works the same" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Threshold optimization\n", + "\n", + "In redis 0.5.0 we added the ability to quickly configure either you're semantic cache or semantic router with test data examples. This requires a bit of setup so check out:\n", + "\n", + "See [semantic-cache/02_semantic_cache_optimization.ipynb](../semantic-cache/02_semantic_cache_optimization.ipynb) and [semantic-router/01_routing_optimization.ipynb](../semantic-router/01_routing_optimization.ipynb) for the full implementation details. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Schema validation\n", + "\n", + "This feature makes it easier to make sure your data is in the right format." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m09:35:07\u001b[0m \u001b[34mredisvl.index.index\u001b[0m \u001b[1;30mINFO\u001b[0m Index already exists, overwriting.\n" + ] + }, + { + "data": { + "text/plain": [ + "['cars:01JQRS067CVA87WKDVE4GXB9Y7', 'cars:01JQRS0699VDN8WB82VWHWFJ7B']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from redisvl.index import SearchIndex\n", + "\n", + "# sample schema\n", + "car_schema = {\n", + " \"index\": {\n", + " \"name\": \"cars\",\n", + " \"prefix\": \"cars\",\n", + " \"storage_type\": \"json\",\n", + " },\n", + " \"fields\": [\n", + " {\"name\": \"make\", \"type\": \"text\"},\n", + " {\"name\": \"model\", \"type\": \"text\"},\n", + " {\"name\": \"description\", \"type\": \"text\"},\n", + " {\"name\": \"mpg\", \"type\": \"numeric\"},\n", + " {\n", + " \"name\": \"car_embedding\",\n", + " \"type\": \"vector\",\n", + " \"attrs\": {\n", + " \"dims\": 3,\n", + " \"distance_metric\": \"cosine\",\n", + " \"algorithm\": \"flat\",\n", + " \"datatype\": \"float32\"\n", + " }\n", + "\n", + " }\n", + " ],\n", + "}\n", + "\n", + "sample_data_bad = [\n", + " {\n", + " \"make\": \"Toyota\",\n", + " \"model\": \"Camry\",\n", + " \"description\": \"A reliable sedan with great fuel economy.\",\n", + " \"mpg\": 28,\n", + " \"car_embedding\": [0.1, 0.2, 0.3]\n", + " },\n", + " {\n", + " # missing make and model\n", + " \"description\": \"A luxury SUV with advanced technology.\",\n", + " \"mpg\": 22,\n", + " \"car_embedding\": [0.4, 0.5, 0.6]\n", + " }\n", + "]\n", + "\n", + "# this should now throw an error\n", + "index = SearchIndex.from_dict(car_schema, redis_url=REDIS_URL, validate_on_load=True)\n", + "index.create(overwrite=True)\n", + "index.load(sample_data_bad)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Timestamp filters\n", + "\n", + "In Redis datetime objects are stored as numeric epoch times. Timestamp filter makes it easier to handle querying by these fields by handling conversion for you." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m09:48:49\u001b[0m \u001b[34mredisvl.index.index\u001b[0m \u001b[1;30mINFO\u001b[0m Index already exists, overwriting.\n" + ] + }, + { + "data": { + "text/plain": [ + "['jobs:01JQRSS9E2ENS2J2NSHEJS0THA',\n", + " 'jobs:01JQRSS9E2E7WXW5CQEB5VZZG8',\n", + " 'jobs:01JQRSS9E2J9YTY5DSFSF5HT0T']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# populate example \n", + "from redisvl.utils.vectorize import HFTextVectorizer\n", + "from redisvl.index import SearchIndex\n", + "import datetime as dt\n", + "\n", + "emb_model = HFTextVectorizer()\n", + "\n", + "job_data = [\n", + " {\n", + " \"job_title\": \"Software Engineer\",\n", + " \"job_description\": \"Develop and maintain web applications using JavaScript, React, and Node.js.\",\n", + " \"posted\": (dt.datetime.now() - dt.timedelta(days=1)).timestamp() # day ago\n", + " },\n", + " {\n", + " \"job_title\": \"Data Analyst\",\n", + " \"job_description\": \"Analyze large datasets to provide business insights and create data visualizations.\",\n", + " \"posted\": (dt.datetime.now() - dt.timedelta(days=7)).timestamp() # week ago\n", + " },\n", + " {\n", + " \"job_title\": \"Marketing Manager\",\n", + " \"job_description\": \"Develop and implement marketing strategies to drive brand awareness and customer engagement.\",\n", + " \"posted\": (dt.datetime.now() - dt.timedelta(days=30)).timestamp() # month ago\n", + " }\n", + "]\n", + "\n", + "job_data = [{**job, \"job_embedding\": emb_model.embed(job[\"job_description\"], as_buffer=True)} for job in job_data]\n", + "\n", + "\n", + "job_schema = {\n", + " \"index\": {\n", + " \"name\": \"jobs\",\n", + " \"prefix\": \"jobs\",\n", + " \"storage_type\": \"hash\", # default setting -- HASH\n", + " },\n", + " \"fields\": [\n", + " {\"name\": \"job_title\", \"type\": \"text\"},\n", + " {\"name\": \"job_description\", \"type\": \"text\"},\n", + " {\"name\": \"posted\", \"type\": \"numeric\"},\n", + " {\n", + " \"name\": \"job_embedding\",\n", + " \"type\": \"vector\",\n", + " \"attrs\": {\n", + " \"dims\": 768,\n", + " \"distance_metric\": \"cosine\",\n", + " \"algorithm\": \"flat\",\n", + " \"datatype\": \"float32\"\n", + " }\n", + "\n", + " }\n", + " ],\n", + "}\n", + "\n", + "index = SearchIndex.from_dict(job_schema, redis_url=REDIS_URL)\n", + "index.create(overwrite=True, drop=True)\n", + "index.load(job_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Filter by Datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': 'jobs:01JQRSS9E2ENS2J2NSHEJS0THA',\n", + " 'job_title': 'Software Engineer',\n", + " 'job_description': 'Develop and maintain web applications using JavaScript, React, and Node.js.',\n", + " 'posted': '1743428929.91'},\n", + " {'id': 'jobs:01JQRSS9E2E7WXW5CQEB5VZZG8',\n", + " 'job_title': 'Data Analyst',\n", + " 'job_description': 'Analyze large datasets to provide business insights and create data visualizations.',\n", + " 'posted': '1742910529.91'},\n", + " {'id': 'jobs:01JQRSS9E2J9YTY5DSFSF5HT0T',\n", + " 'job_title': 'Marketing Manager',\n", + " 'job_description': 'Develop and implement marketing strategies to drive brand awareness and customer engagement.',\n", + " 'posted': '1740926929.91'}]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from redisvl.query import FilterQuery\n", + "from redisvl.query.filter import Timestamp\n", + "\n", + "now = dt.datetime.now()\n", + "\n", + "# find all jobs\n", + "ts = Timestamp(\"posted\") < now\n", + "\n", + "filter_query = FilterQuery(\n", + " return_fields=[\"job_title\", \"job_description\", \"posted\"], \n", + " filter_expression=ts,\n", + " num_results=10,\n", + ")\n", + "res = index.query(filter_query)\n", + "res" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': 'jobs:01JQRSS9E2ENS2J2NSHEJS0THA',\n", + " 'job_title': 'Software Engineer',\n", + " 'job_description': 'Develop and maintain web applications using JavaScript, React, and Node.js.',\n", + " 'posted': '1743428929.91'}]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# jobs posted in the last 3 days => 1 job\n", + "ts = Timestamp(\"posted\") > now - dt.timedelta(days=3)\n", + "\n", + "filter_query = FilterQuery(\n", + " return_fields=[\"job_title\", \"job_description\", \"posted\"], \n", + " filter_expression=ts,\n", + " num_results=10,\n", + ")\n", + "res = index.query(filter_query)\n", + "res" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': 'jobs:01JQRSS9E2E7WXW5CQEB5VZZG8',\n", + " 'job_title': 'Data Analyst',\n", + " 'job_description': 'Analyze large datasets to provide business insights and create data visualizations.',\n", + " 'posted': '1742910529.91'}]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# more than 3 days ago but less than 14 days ago => 1 job\n", + "ts = Timestamp(\"posted\").between(\n", + " now - dt.timedelta(days=14),\n", + " now - dt.timedelta(days=3),\n", + ")\n", + "\n", + "filter_query = FilterQuery(\n", + " return_fields=[\"job_title\", \"job_description\", \"posted\"], \n", + " filter_expression=ts,\n", + " num_results=10,\n", + ")\n", + "\n", + "res = index.query(filter_query)\n", + "res" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Batch search\n", + "\n", + "This enhancement allows you to speed up the execution of queries by reducing the impact of network latency." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time taken for 100 queries: 0.21 seconds\n" + ] + } + ], + "source": [ + "import time\n", + "num_queries = 100\n", + "\n", + "start = time.time()\n", + "for i in range(num_queries):\n", + " # run the same filter query \n", + " res = index.query(filter_query)\n", + "end = time.time()\n", + "print(f\"Time taken for {num_queries} queries: {end - start:.2f} seconds\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time taken for 100 batched queries: 0.01 seconds\n" + ] + } + ], + "source": [ + "batched_queries = [filter_query] * num_queries\n", + "\n", + "start = time.time()\n", + "\n", + "index.batch_search(batched_queries, batch_size=10)\n", + "\n", + "end = time.time()\n", + "print(f\"Time taken for {num_queries} batched queries: {end - start:.2f} seconds\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Vector normalization\n", + "\n", + "By default Redis returns vector cosine *distance* when performing a search which returns a value between 0 and 2 where 0 would be a perfect match. Sometimes you may wish instead for a *similarity* score between 0 and 1 where 1 is a perfect match when turned on this flag does the conversion for you. Additionally, if this flag is set to true for L2 distance will normalize the euclidean distance to a value between 0 and 1 as well. \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': 'jobs:01JQPY6H4MZHY7YHZP8WRVH27K',\n", + " 'vector_distance': '0.7090711295605',\n", + " 'job_title': 'Software Engineer',\n", + " 'job_description': 'Develop and maintain web applications using JavaScript, React, and Node.js.',\n", + " 'posted': '1743366449.24'},\n", + " {'id': 'jobs:01JQRQZAXREMGYPHTRFMK72NK3',\n", + " 'vector_distance': '0.7090711295605',\n", + " 'job_title': 'Software Engineer',\n", + " 'job_description': 'Develop and maintain web applications using JavaScript, React, and Node.js.',\n", + " 'posted': '1743427030.59'},\n", + " {'id': 'jobs:01JQPY6H4M4MVKC4S9R4EQ69KA',\n", + " 'vector_distance': '0.6049451231955',\n", + " 'job_title': 'Data Analyst',\n", + " 'job_description': 'Analyze large datasets to provide business insights and create data visualizations.',\n", + " 'posted': '1742848049.24'},\n", + " {'id': 'jobs:01JQRQZAXRK36XRRPK0A2XJD4D',\n", + " 'vector_distance': '0.6049451231955',\n", + " 'job_title': 'Data Analyst',\n", + " 'job_description': 'Analyze large datasets to provide business insights and create data visualizations.',\n", + " 'posted': '1742908630.59'},\n", + " {'id': 'jobs:01JQPY6H4MJQX9KD739SWXZHBG',\n", + " 'vector_distance': '0.553376108408',\n", + " 'job_title': 'Marketing Manager',\n", + " 'job_description': 'Develop and implement marketing strategies to drive brand awareness and customer engagement.',\n", + " 'posted': '1740864449.24'},\n", + " {'id': 'jobs:01JQRQZAXREPHMVZCRFWC2N8DV',\n", + " 'vector_distance': '0.553376108408',\n", + " 'job_title': 'Marketing Manager',\n", + " 'job_description': 'Develop and implement marketing strategies to drive brand awareness and customer engagement.',\n", + " 'posted': '1740925030.59'}]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from redisvl.query import VectorQuery\n", + "\n", + "query = VectorQuery(\n", + " vector=emb_model.embed(\"Software Engineer\", as_buffer=True),\n", + " vector_field_name=\"job_embedding\",\n", + " return_fields=[\"job_title\", \"job_description\", \"posted\"],\n", + " normalize_vector_distance=True,\n", + ")\n", + "\n", + "res = index.query(query)\n", + "res" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': 'jobs:01JQPY6H4MZHY7YHZP8WRVH27K',\n", + " 'vector_distance': '0.581857740879',\n", + " 'job_title': 'Software Engineer',\n", + " 'job_description': 'Develop and maintain web applications using JavaScript, React, and Node.js.',\n", + " 'posted': '1743366449.24'},\n", + " {'id': 'jobs:01JQRQZAXREMGYPHTRFMK72NK3',\n", + " 'vector_distance': '0.581857740879',\n", + " 'job_title': 'Software Engineer',\n", + " 'job_description': 'Develop and maintain web applications using JavaScript, React, and Node.js.',\n", + " 'posted': '1743427030.59'},\n", + " {'id': 'jobs:01JQPY6H4M4MVKC4S9R4EQ69KA',\n", + " 'vector_distance': '0.790109753609',\n", + " 'job_title': 'Data Analyst',\n", + " 'job_description': 'Analyze large datasets to provide business insights and create data visualizations.',\n", + " 'posted': '1742848049.24'},\n", + " {'id': 'jobs:01JQRQZAXRK36XRRPK0A2XJD4D',\n", + " 'vector_distance': '0.790109753609',\n", + " 'job_title': 'Data Analyst',\n", + " 'job_description': 'Analyze large datasets to provide business insights and create data visualizations.',\n", + " 'posted': '1742908630.59'},\n", + " {'id': 'jobs:01JQPY6H4MJQX9KD739SWXZHBG',\n", + " 'vector_distance': '0.893247783184',\n", + " 'job_title': 'Marketing Manager',\n", + " 'job_description': 'Develop and implement marketing strategies to drive brand awareness and customer engagement.',\n", + " 'posted': '1740864449.24'},\n", + " {'id': 'jobs:01JQRQZAXREPHMVZCRFWC2N8DV',\n", + " 'vector_distance': '0.893247783184',\n", + " 'job_title': 'Marketing Manager',\n", + " 'job_description': 'Develop and implement marketing strategies to drive brand awareness and customer engagement.',\n", + " 'posted': '1740925030.59'}]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from redisvl.query import VectorQuery\n", + "\n", + "query = VectorQuery(\n", + " vector=emb_model.embed(\"Software Engineer\", as_buffer=True),\n", + " vector_field_name=\"job_embedding\",\n", + " return_fields=[\"job_title\", \"job_description\", \"posted\"],\n", + " normalize_vector_distance=False,\n", + ")\n", + "\n", + "res = index.query(query)\n", + "res" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}