+{"cells":[{"cell_type":"code","execution_count":1,"id":"fff9da03-af7c-436e-ac56-6b7a8fe1d453","metadata":{"trusted":true},"outputs":[],"source":["import tiledb\n","from tiledb.cloud import client\n","import tiledb.vector_search as vs\n","from tiledb.vector_search.utils import *\n","\n","import sklearn\n","import numpy as np"]},{"cell_type":"code","execution_count":2,"id":"b83f2a9e-9af4-46bb-9513-fa5e302a4647","metadata":{"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["--2023-10-06 11:03:33-- https://github.com/TileDB-Inc/TileDB-Vector-Search/releases/download/0.0.1/siftsmall.tgz\n","Resolving github.com (github.com)... 140.82.112.4\n","Connecting to github.com (github.com)|140.82.112.4|:443... connected.\n","HTTP request sent, awaiting response... 302 Found\n","Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/627523373/b1990696-797c-4876-86c9-24cb101f7922?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231006%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231006T110333Z&X-Amz-Expires=300&X-Amz-Signature=7be26420dc408c0519e72dbc3ced4d62439d4fbefd61b40ccba35a28cb3422fa&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=627523373&response-content-disposition=attachment%3B%20filename%3Dsiftsmall.tgz&response-content-type=application%2Foctet-stream [following]\n","--2023-10-06 11:03:33-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/627523373/b1990696-797c-4876-86c9-24cb101f7922?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231006%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231006T110333Z&X-Amz-Expires=300&X-Amz-Signature=7be26420dc408c0519e72dbc3ced4d62439d4fbefd61b40ccba35a28cb3422fa&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=627523373&response-content-disposition=attachment%3B%20filename%3Dsiftsmall.tgz&response-content-type=application%2Foctet-stream\n","Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.108.133, ...\n","Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.109.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 5313773 (5.1M) [application/octet-stream]\n","Saving to: ‘siftsmall.tgz.10’\n","\n","siftsmall.tgz.10 100%[===================>] 5.07M --.-KB/s in 0.02s \n","\n","2023-10-06 11:03:33 (318 MB/s) - ‘siftsmall.tgz.10’ saved [5313773/5313773]\n","\n"]}],"source":["!cd /tmp && wget https://github.com/TileDB-Inc/TileDB-Vector-Search/releases/download/0.0.1/siftsmall.tgz\n","!cd /tmp && tar xf siftsmall.tgz"]},{"cell_type":"code","execution_count":3,"id":"d8e5151b-9672-4001-97eb-d56c76c1b8be","metadata":{"trusted":true},"outputs":[],"source":["def delete_if_exists(uri):\n"," try:\n"," group = tiledb.Group(uri, \"m\")\n"," except tiledb.TileDBError as err:\n"," message = str(err)\n"," if \"group does not exist\" in message:\n"," return\n"," else:\n"," raise err\n"," group.delete()\n"]},{"cell_type":"code","execution_count":4,"id":"b65fbb15-ec7a-4868-95f3-2a7da577b9cf","metadata":{"trusted":true},"outputs":[],"source":["namespace=client.default_user().username\n","\n","# Use this in staging notebook\n","# index_uri = f\"tiledb://TileDB-Inc/s3://tiledb-unittest/groups/unit-tests/vector_search/{namespace}/sift10k_flat\"\n","# ivf_index_uri = f\"tiledb://TileDB-Inc/s3://tiledb-unittest/groups/unit-tests/vector_search/{namespace}/sift10k_ivf_flat\"\n","\n","# Use this for local tests\n","index_uri = f\"tiledb://{namespace}/s3://tiledb-unittest/groups/unit-tests/vector_search/{namespace}/sift10k_flat\"\n","ivf_index_uri = f\"tiledb://{namespace}/s3://tiledb-unittest/groups/unit-tests/vector_search/{namespace}/sift10k_ivf_flat\"\n","\n","source_uri = \"/tmp/siftsmall_base.fvecs\"\n"," \n","delete_if_exists(index_uri)\n","delete_if_exists(ivf_index_uri)"]},{"cell_type":"code","execution_count":5,"id":"42ab8a5d-7888-47ef-b663-9fe84780332e","metadata":{"trusted":true},"outputs":[],"source":["flat_index = vs.ingest(\n"," index_type = \"FLAT\",\n"," index_uri = index_uri,\n"," source_uri = source_uri,\n",")"]},{"cell_type":"code","execution_count":6,"id":"5cdd3663-4b16-4731-a61d-9fd6be49c418","metadata":{"trusted":true},"outputs":[],"source":["ivf_flat_index = vs.ingest(\n"," index_type=\"IVF_FLAT\",\n"," source_uri=source_uri,\n"," index_uri=ivf_index_uri,\n",")"]},{"cell_type":"code","execution_count":7,"id":"87c535e9-d10b-436e-a098-4a376b74711f","metadata":{"trusted":true},"outputs":[],"source":["# Get query vectors with ground truth\n","query_vectors = load_fvecs(\"/tmp/siftsmall_query.fvecs\")\n","ground_truth = load_ivecs(\"/tmp/siftsmall_groundtruth.ivecs\")"]},{"cell_type":"code","execution_count":8,"id":"0efa4265-aabf-4f5f-8ea2-1c7081d70e47","metadata":{"trusted":true},"outputs":[],"source":["def accuracy(result, gt):\n"," found = 0\n"," total = 0\n"," i = 0\n"," for r in result:\n"," total += len(r)\n"," found += len(np.intersect1d(r, gt[i]))\n"," i += 1\n"," return found / total"]},{"cell_type":"code","execution_count":9,"id":"cdc67d8d-cea3-4017-b42b-26952a2a84a6","metadata":{"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Accuracy: 1.0\n"]}],"source":["# Return the 100 most similar vectors to the query vectors with FLAT\n","result_d, result_i = flat_index.query(query_vectors, k=100)\n","ac = accuracy(result_i, ground_truth)\n","print(f\"Accuracy: {ac}\")\n","assert ac == 1.0\n","\n"]},{"cell_type":"code","execution_count":10,"id":"5217e86d-a30d-4a16-b39b-c31d123786be","metadata":{"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Accuracy: 0.9204\n"]}],"source":["# Return the 100 most similar vectors to the query vectors with IVF_FLAT\n","# (you can set the nprobe parameter)\n","result_ivf_d, result_ivf_i = ivf_flat_index.query(query_vectors, nprobe=10, k=100)\n","ac = accuracy(result_ivf_i, ground_truth)\n","print(f\"Accuracy: {ac}\")\n","assert ac >= 0.85"]},{"cell_type":"code","execution_count":null,"id":"7fee6898-9369-43dd-92fa-067bac9df452","metadata":{"trusted":true},"outputs":[],"source":["# Test distributed query\n","result_ivf_d, result_ivf_i = ivf_flat_index.query(query_vectors, nprobe=10, k=100, mode=tiledb.cloud.dag.Mode.BATCH, num_partitions=2)\n","ac = accuracy(result_ivf_i, ground_truth)\n","print(f\"Accuracy: {ac}\")\n","assert ac >= 0.85"]}],"metadata":{"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.9.18"}},"nbformat":4,"nbformat_minor":5}
0 commit comments