Skip to content

Commit 39f94c3

Browse files
Merge pull request #146 from TileDB-Inc/npapa/cloud_unit_tests
Add unit tests for cloud and demo notebooks
2 parents 37c9c9b + 93c6df7 commit 39f94c3

File tree

12 files changed

+142
-6
lines changed

12 files changed

+142
-6
lines changed

.github/workflows/ci_python.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,8 @@ jobs:
3535
#pip uninstall -y tiledb.vector_search
3636
#pip install -e .
3737
#pytest
38+
pip install -r test/ipynb/requirements.txt
39+
pytest --nbmake test/ipynb
40+
env:
41+
TILEDB_REST_TOKEN: ${{ secrets.TILEDB_CLOUD_HELPER_VAR }}
3842
shell: bash -el {0}

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,15 @@ development-build instructions. For large new
4242
features, please open an issue to discuss goals and approach in order
4343
to ensure a smooth PR integration and review process. All contributions
4444
must be licensed under the repository's [MIT License](../LICENSE).
45+
46+
# Testing
47+
48+
* Unit tests: `pytest`
49+
* Demo notebooks:
50+
* ```
51+
pip install -r test/ipynb/requirements.txt
52+
pytest --nbmake test/ipynb
53+
```
54+
* Credentials:
55+
* Some tests run on TileDB Cloud using your current environment variable `TILEDB_REST_TOKEN` -- you will need a valid API token for the tests to pass
56+
* For continuous integration, the token is configured for the `unittest` user and all tests should pass

apis/python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ dependencies = [
2626
]
2727

2828
[project.optional-dependencies]
29-
test = ["pytest"]
29+
test = ["nbmake", "pytest"]
3030

3131

3232
[project.urls]

apis/python/src/tiledb/vector_search/index.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,15 @@ def consolidate_updates(self):
236236
self.group.remove(self.update_arrays_uri)
237237
self.group.close()
238238
return new_index
239+
240+
@staticmethod
241+
def delete_index(uri, config):
242+
try:
243+
group = tiledb.Group(uri, "m", config=config)
244+
except tiledb.TileDBError as err:
245+
message = str(err)
246+
if "group does not exist" in message:
247+
return
248+
else:
249+
raise err
250+
group.delete()

apis/python/src/tiledb/vector_search/ingestion.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,11 +1743,12 @@ def consolidate_and_vacuum(
17431743
conf = tiledb.Config(config)
17441744
conf["sm.consolidation.mode"] = mode
17451745
conf["sm.vacuum.mode"] = mode
1746-
tiledb.consolidate(group[PARTS_ARRAY_NAME].uri, config=conf)
1747-
tiledb.vacuum(group[PARTS_ARRAY_NAME].uri, config=conf)
1748-
if index_type == "IVF_FLAT":
1749-
tiledb.consolidate(group[IDS_ARRAY_NAME].uri, config=conf)
1750-
tiledb.vacuum(group[IDS_ARRAY_NAME].uri, config=conf)
1746+
ids_uri = group[IDS_ARRAY_NAME].uri
1747+
parts_uri = group[PARTS_ARRAY_NAME].uri
1748+
tiledb.consolidate(parts_uri, config=conf)
1749+
tiledb.vacuum(parts_uri, config=conf)
1750+
tiledb.consolidate(ids_uri, config=conf)
1751+
tiledb.vacuum(ids_uri, config=conf)
17511752

17521753
# TODO remove temp data for tiledb URIs
17531754
if not index_group_uri.startswith("tiledb://"):

apis/python/test/common.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import numpy as np
44

5+
import random
6+
import string
57
import tiledb
68

79

@@ -183,3 +185,8 @@ def accuracy(result, gt, external_ids_offset=0, updated_ids=None):
183185
total += len(temp_result)
184186
found += len(np.intersect1d(temp_result, gt[i]))
185187
return found / total
188+
189+
# Generate random names for test array uris
190+
def random_name(name: str) -> str:
191+
suffix = "".join(random.choices(string.ascii_letters, k=10))
192+
return f"zzz_unittest_{name}_{suffix}"

apis/python/test/ipynb/README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Cloud Notebook tests
2+
3+
Run all tests:
4+
```
5+
pip install -r test/ipynb/requirements.txt
6+
pytest --nbmake test/ipynb
7+
```
8+
9+
This is using [nbmake](https://github.com/treebeardtech/nbmake) to test notebooks.
10+
Credentials:
11+
* Local tests: use `TILEDB_REST_TOKEN` -- you will need a valid API token for the tests to pass
12+
* For continuous integration, the token is configured for the `unittest` user and all tests should pass
13+
14+
When changes get merged in these files make sure that you also propagate the changes to the Cloud registered notebooks:
15+
* [tiledb_101_vector_search.ipynb](https://cloud.tiledb.com/notebooks/details/TileDB-Inc/b05dd4b4-ba1c-41c6-a3c9-a1de70abb039/preview)
16+
* [staging-vector-search-checks-py.ipynb](https://console.dev.tiledb.io/notebooks/details/TileDB-Inc/299dd052-6b45-4943-88ae-37639b7b4b48/preview)
17+
* [image-search-dashboard](https://cloud.tiledb.com/notebooks/details/TileDB-Inc/9289229a-1742-4f99-9b86-0bb1339b31a0/preview)

apis/python/test/ipynb/image-search-dashboard.ipynb

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
efficientnet
2+
ipywidgets
3+
panel
4+
tensorflow
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"cells":[{"cell_type":"code","execution_count":1,"id":"fff9da03-af7c-436e-ac56-6b7a8fe1d453","metadata":{"trusted":true},"outputs":[],"source":["import tiledb\n","from tiledb.cloud import client\n","import tiledb.vector_search as vs\n","from tiledb.vector_search.utils import *\n","\n","import sklearn\n","import numpy as np"]},{"cell_type":"code","execution_count":2,"id":"b83f2a9e-9af4-46bb-9513-fa5e302a4647","metadata":{"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["--2023-10-06 11:03:33-- https://github.com/TileDB-Inc/TileDB-Vector-Search/releases/download/0.0.1/siftsmall.tgz\n","Resolving github.com (github.com)... 140.82.112.4\n","Connecting to github.com (github.com)|140.82.112.4|:443... connected.\n","HTTP request sent, awaiting response... 302 Found\n","Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/627523373/b1990696-797c-4876-86c9-24cb101f7922?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231006%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231006T110333Z&X-Amz-Expires=300&X-Amz-Signature=7be26420dc408c0519e72dbc3ced4d62439d4fbefd61b40ccba35a28cb3422fa&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=627523373&response-content-disposition=attachment%3B%20filename%3Dsiftsmall.tgz&response-content-type=application%2Foctet-stream [following]\n","--2023-10-06 11:03:33-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/627523373/b1990696-797c-4876-86c9-24cb101f7922?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231006%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231006T110333Z&X-Amz-Expires=300&X-Amz-Signature=7be26420dc408c0519e72dbc3ced4d62439d4fbefd61b40ccba35a28cb3422fa&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=627523373&response-content-disposition=attachment%3B%20filename%3Dsiftsmall.tgz&response-content-type=application%2Foctet-stream\n","Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.108.133, ...\n","Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.109.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 5313773 (5.1M) [application/octet-stream]\n","Saving to: ‘siftsmall.tgz.10’\n","\n","siftsmall.tgz.10 100%[===================>] 5.07M --.-KB/s in 0.02s \n","\n","2023-10-06 11:03:33 (318 MB/s) - ‘siftsmall.tgz.10’ saved [5313773/5313773]\n","\n"]}],"source":["!cd /tmp && wget https://github.com/TileDB-Inc/TileDB-Vector-Search/releases/download/0.0.1/siftsmall.tgz\n","!cd /tmp && tar xf siftsmall.tgz"]},{"cell_type":"code","execution_count":3,"id":"d8e5151b-9672-4001-97eb-d56c76c1b8be","metadata":{"trusted":true},"outputs":[],"source":["def delete_if_exists(uri):\n"," try:\n"," group = tiledb.Group(uri, \"m\")\n"," except tiledb.TileDBError as err:\n"," message = str(err)\n"," if \"group does not exist\" in message:\n"," return\n"," else:\n"," raise err\n"," group.delete()\n"]},{"cell_type":"code","execution_count":4,"id":"b65fbb15-ec7a-4868-95f3-2a7da577b9cf","metadata":{"trusted":true},"outputs":[],"source":["namespace=client.default_user().username\n","\n","# Use this in staging notebook\n","# index_uri = f\"tiledb://TileDB-Inc/s3://tiledb-unittest/groups/unit-tests/vector_search/{namespace}/sift10k_flat\"\n","# ivf_index_uri = f\"tiledb://TileDB-Inc/s3://tiledb-unittest/groups/unit-tests/vector_search/{namespace}/sift10k_ivf_flat\"\n","\n","# Use this for local tests\n","index_uri = f\"tiledb://{namespace}/s3://tiledb-unittest/groups/unit-tests/vector_search/{namespace}/sift10k_flat\"\n","ivf_index_uri = f\"tiledb://{namespace}/s3://tiledb-unittest/groups/unit-tests/vector_search/{namespace}/sift10k_ivf_flat\"\n","\n","source_uri = \"/tmp/siftsmall_base.fvecs\"\n"," \n","delete_if_exists(index_uri)\n","delete_if_exists(ivf_index_uri)"]},{"cell_type":"code","execution_count":5,"id":"42ab8a5d-7888-47ef-b663-9fe84780332e","metadata":{"trusted":true},"outputs":[],"source":["flat_index = vs.ingest(\n"," index_type = \"FLAT\",\n"," index_uri = index_uri,\n"," source_uri = source_uri,\n",")"]},{"cell_type":"code","execution_count":6,"id":"5cdd3663-4b16-4731-a61d-9fd6be49c418","metadata":{"trusted":true},"outputs":[],"source":["ivf_flat_index = vs.ingest(\n"," index_type=\"IVF_FLAT\",\n"," source_uri=source_uri,\n"," index_uri=ivf_index_uri,\n",")"]},{"cell_type":"code","execution_count":7,"id":"87c535e9-d10b-436e-a098-4a376b74711f","metadata":{"trusted":true},"outputs":[],"source":["# Get query vectors with ground truth\n","query_vectors = load_fvecs(\"/tmp/siftsmall_query.fvecs\")\n","ground_truth = load_ivecs(\"/tmp/siftsmall_groundtruth.ivecs\")"]},{"cell_type":"code","execution_count":8,"id":"0efa4265-aabf-4f5f-8ea2-1c7081d70e47","metadata":{"trusted":true},"outputs":[],"source":["def accuracy(result, gt):\n"," found = 0\n"," total = 0\n"," i = 0\n"," for r in result:\n"," total += len(r)\n"," found += len(np.intersect1d(r, gt[i]))\n"," i += 1\n"," return found / total"]},{"cell_type":"code","execution_count":9,"id":"cdc67d8d-cea3-4017-b42b-26952a2a84a6","metadata":{"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Accuracy: 1.0\n"]}],"source":["# Return the 100 most similar vectors to the query vectors with FLAT\n","result_d, result_i = flat_index.query(query_vectors, k=100)\n","ac = accuracy(result_i, ground_truth)\n","print(f\"Accuracy: {ac}\")\n","assert ac == 1.0\n","\n"]},{"cell_type":"code","execution_count":10,"id":"5217e86d-a30d-4a16-b39b-c31d123786be","metadata":{"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Accuracy: 0.9204\n"]}],"source":["# Return the 100 most similar vectors to the query vectors with IVF_FLAT\n","# (you can set the nprobe parameter)\n","result_ivf_d, result_ivf_i = ivf_flat_index.query(query_vectors, nprobe=10, k=100)\n","ac = accuracy(result_ivf_i, ground_truth)\n","print(f\"Accuracy: {ac}\")\n","assert ac >= 0.85"]},{"cell_type":"code","execution_count":null,"id":"7fee6898-9369-43dd-92fa-067bac9df452","metadata":{"trusted":true},"outputs":[],"source":["# Test distributed query\n","result_ivf_d, result_ivf_i = ivf_flat_index.query(query_vectors, nprobe=10, k=100, mode=tiledb.cloud.dag.Mode.BATCH, num_partitions=2)\n","ac = accuracy(result_ivf_i, ground_truth)\n","print(f\"Accuracy: {ac}\")\n","assert ac >= 0.85"]}],"metadata":{"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.9.18"}},"nbformat":4,"nbformat_minor":5}

0 commit comments

Comments
 (0)