Skip to content

Commit ee73170

Browse files
authored
Fix/run Python tests and pip build; add array_to_matrix function (#30)
1 parent 74ced60 commit ee73170

File tree

14 files changed

+119
-36
lines changed

14 files changed

+119
-36
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,6 @@ jobs:
1919
- name: Configure CMake
2020
run: cmake -S ./src -B ./src/build -DCMAKE_BUILD_TYPE=Debug
2121
- name: Build
22-
run: cmake --build ./src/build
22+
run: cmake --build ./src/build -j3
2323
- name: Run Tests
2424
run: cmake --build ./src/build --target check-ci

.github/workflows/ci_python.yml

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,14 @@ on:
77
branches: [main]
88

99
jobs:
10-
Run-Tests:
10+
run-tests:
1111
strategy:
1212
matrix:
1313
os: [ubuntu-latest]
1414
python-version: ["3.9"]
1515
runs-on: ${{ matrix.os }}
1616
steps:
17-
- name: Install OpenBLAS
18-
run: sudo apt install libopenblas-dev
17+
- uses: actions/checkout@v3
1918
- name: Set up Python ${{ matrix.python-version }}
2019
uses: actions/setup-python@v2
2120
with:
@@ -25,10 +24,12 @@ jobs:
2524
which python
2625
which pip
2726
python --version
28-
- name: Install pybind11
29-
run: pip install "pybind11[global]"
30-
- uses: actions/checkout@v3
31-
- name: Configure CMake
32-
run: cmake -S ./src -B ./src/build -DCMAKE_BUILD_TYPE=Debug -DTILEDB_VS_PYTHON=ON
33-
- name: Build
34-
run: cmake --build ./src/build
27+
- name: Build and test python
28+
run: |
29+
cd apis/python
30+
pip install .[test]
31+
pytest -k "not ingest" # TODO: requires token
32+
#pip uninstall -y tiledb.vector_search
33+
#pip install -e .
34+
#pytest -k "not ingest" # TODO: requires token
35+
shell: bash -el {0}

apis/python/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ repository = "https://github.com/TileDB-Inc/tiledb-vector-search"
3333
[build-system]
3434
requires = [
3535
"setuptools>=42", "wheel", "setuptools_scm>=6",
36-
"scikit-build>=0.13", "pybind11"
36+
"scikit-build>=0.13", "pybind11[global]",
37+
"cmake", "ninja"
3738
]
3839
build-backend = "setuptools.build_meta"
3940

apis/python/setup.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ def get_cmake_overrides():
77

88
conf = list()
99

10+
tiledb_dir = os.environ.get("TILEDB_DIR", None)
11+
if tiledb_dir:
12+
cmake_args.append(f"-DTileDB_DIR={tiledb_dir}")
13+
1014
key = "CMAKE_OSX_DEPLOYMENT_TARGET"
1115
val = os.environ.get(key, default=None)
1216
if val:

apis/python/src/tiledb/vector_search/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from .ingestion import ingest
33
from .module import load_as_array
44
from .module import load_as_matrix
5-
from .module import query_vq
5+
from .module import query_vq, query_kmeans, validate_top_k, array_to_matrix
66

77
__all__ = [
88
"FlatIndex",
@@ -11,4 +11,6 @@
1111
"ingest",
1212
"query_vq",
1313
"query_kmeans",
14+
"validate_top_k",
15+
"array_to_matrix",
1416
]

apis/python/src/tiledb/vector_search/module.cc

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,22 @@ static void declare_kmeans_query(py::module& m, const std::string& suffix) {
102102
}, py::keep_alive<1,2>());
103103
}
104104

105+
template <typename T>
106+
static void declare_pyarray_to_matrix(py::module& m, const std::string& suffix) {
107+
m.def(("pyarray_copyto_matrix" + suffix).c_str(),
108+
[](py::array_t<T> arr) -> ColMajorMatrix<T> {
109+
py::buffer_info info = arr.request();
110+
if (info.ndim != 2)
111+
throw std::runtime_error("Number of dimensions must be two");
112+
if (info.format != py::format_descriptor<T>::format())
113+
throw std::runtime_error("Mismatched buffer format!");
114+
115+
auto data = std::unique_ptr<T[]>{new T[info.shape[0] * info.shape[1]]};
116+
auto r = ColMajorMatrix<T>(std::move(data), info.shape[0], info.shape[1]);
117+
return r;
118+
});
119+
}
120+
105121

106122
// Declarations for typed subclasses of ColMajorMatrix
107123
template <typename P>
@@ -133,41 +149,48 @@ PYBIND11_MODULE(_tiledbvspy, m) {
133149
}
134150
));
135151

136-
/* Vector */
152+
/* === Vector === */
153+
137154
declareVector<uint32_t>(m, "_u32");
138155
declareVector<uint64_t>(m, "_u64");
139156
declareVector<float>(m, "_f32");
140157
declareVector<double>(m, "_f64");
141158

142159
m.def("read_vector_u32", &read_vector<uint32_t>, "Read a vector from TileDB");
143160
m.def("read_vector_u64", &read_vector<uint64_t>, "Read a vector from TileDB");
161+
162+
144163
/* === Matrix === */
145164

146165
// template specializations
147-
//declareTdbMatrix<float>(m, "_f32");
148-
149166
declareColMajorMatrix<uint8_t>(m, "_u8");
150167
declareColMajorMatrix<float>(m, "_f32");
151168
declareColMajorMatrix<double>(m, "_f64");
152169
declareColMajorMatrix<int32_t>(m, "_i32");
153170
declareColMajorMatrix<int64_t>(m, "_i64");
154-
// declareColMajorMatrix<uint64_t>(m, "_u64");
155-
declareColMajorMatrix<size_t>(m, "_szt");
171+
declareColMajorMatrix<uint32_t>(m, "_u32");
172+
declareColMajorMatrix<uint64_t>(m, "_u64");
173+
if constexpr (!std::is_same<uint64_t, unsigned long>::value) {
174+
// Required for a return type, but these types are equivalent on linux :/
175+
declareColMajorMatrix<unsigned long>(m, "_ul");
176+
}
156177

157178
declareColMajorMatrixSubclass<tdbColMajorMatrix<uint8_t>>(
158179
m, "tdbColMajorMatrix", "_u8");
159-
declareColMajorMatrixSubclass<tdbColMajorMatrix<size_t>>(
160-
m, "tdbColMajorMatrix", "_szt");
180+
declareColMajorMatrixSubclass<tdbColMajorMatrix<uint64_t>>(
181+
m, "tdbColMajorMatrix", "_u64");
161182
declareColMajorMatrixSubclass<tdbColMajorMatrix<float>>(
162183
m, "tdbColMajorMatrix", "_f32");
163184
declareColMajorMatrixSubclass<tdbColMajorMatrix<int32_t>>(
164185
m, "tdbColMajorMatrix", "_i32");
165186
declareColMajorMatrixSubclass<tdbColMajorMatrix<int64_t>>(
166187
m, "tdbColMajorMatrix", "_i64");
167-
// declareColMajorMatrixSubclass<tdbColMajorMatrix<uint64_t>>(
168-
// m, "tdbColMajorMatrix", "_u64");
169-
170188

189+
// Converters from pyarray to matrix
190+
declare_pyarray_to_matrix<uint8_t>(m, "_u8");
191+
declare_pyarray_to_matrix<uint64_t>(m, "_u64");
192+
declare_pyarray_to_matrix<float>(m, "_f32");
193+
declare_pyarray_to_matrix<double>(m, "_f64");
171194

172195
/* Query API */
173196

@@ -176,7 +199,7 @@ PYBIND11_MODULE(_tiledbvspy, m) {
176199
const ColMajorMatrix<float>& query_vectors,
177200
int k,
178201
bool nth,
179-
size_t nthreads) {
202+
size_t nthreads) -> ColMajorMatrix<uint64_t> {
180203
auto r = detail::flat::vq_query_heap(data, query_vectors, k, nthreads);
181204
return r;
182205
});
@@ -186,13 +209,13 @@ PYBIND11_MODULE(_tiledbvspy, m) {
186209
const ColMajorMatrix<float>& query_vectors,
187210
int k,
188211
bool nth,
189-
size_t nthreads) {
212+
size_t nthreads) -> ColMajorMatrix<uint64_t> {
190213
auto r = detail::flat::vq_query_heap(data, query_vectors, k, nthreads);
191214
return r;
192215
});
193216

194-
m.def("validate_top_k",
195-
[](const ColMajorMatrix<size_t>& top_k,
217+
m.def("validate_top_k_u64",
218+
[](const ColMajorMatrix<uint64_t>& top_k,
196219
const ColMajorMatrix<int32_t>& ground_truth) -> bool {
197220
return validate_top_k(top_k, ground_truth);
198221
});

apis/python/src/tiledb/vector_search/module.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import tiledb
66
from tiledb.vector_search._tiledbvspy import *
7+
from tiledb.vector_search import _tiledbvspy as cc
78

89

910
def load_as_matrix(path: str, nqueries: int = 0, config: Dict = {}):
@@ -146,3 +147,25 @@ def query_kmeans(
146147
return kmeans_query_u8(*args)
147148
else:
148149
raise TypeError("Unknown type!")
150+
151+
152+
def validate_top_k(results: np.ndarray, ground_truth: np.ndarray):
153+
if results.dtype == np.uint64:
154+
return cc.validate_top_k_u64(results, ground_truth)
155+
else:
156+
raise TypeError("Unknown type for validate_top_k!")
157+
158+
159+
def array_to_matrix(array: np.ndarray):
160+
if array.dtype == np.float32:
161+
return pyarray_copyto_matrix_f32(array)
162+
elif array.dtype == np.float64:
163+
return pyarray_copyto_matrix_f64(array)
164+
elif array.dtype == np.uint8:
165+
return pyarray_copyto_matrix_u8(array)
166+
elif array.dtype == np.int32:
167+
return pyarray_copyto_matrix_i32(array)
168+
elif array.dtype == np.uint64:
169+
return pyarray_copyto_matrix_u64(array)
170+
else:
171+
raise TypeError("Unsupported type!")

apis/python/test/test_api.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import tiledb.vector_search as vs
66
from tiledb.vector_search import _tiledbvspy as vspy
77

8+
import pytest
89

910
def test_load_matrix(tmpdir):
1011
p = str(tmpdir.mkdir("test").join("test.tdb"))
@@ -24,7 +25,10 @@ def test_load_matrix(tmpdir):
2425
assert np.array_equal(orig_matrix[0, 0], data[0, 0])
2526

2627

27-
def test_flat_query(tmpdir):
28+
@pytest.mark.skipif(
29+
not os.path.exists(os.path.expanduser("~/work/proj/vector-search/datasets/sift-andrew/")),
30+
reason="requires sift dataset")
31+
def test_flat_query():
2832
# db_uri = "s3://tiledb-andrew/sift/sift_base"
2933
# probe_uri = "s3://tiledb-andrew/sift/sift_query"
3034
# g_uri = "s3://tiledb-andrew/sift/sift_groundtruth"
@@ -54,4 +58,4 @@ def test_flat_query(tmpdir):
5458
assert np.array_equal(np.sort(ra[:k], axis=0), np.sort(g[:k, :nqueries], axis=0))
5559

5660
g_m = vs.load_as_matrix(g_uri)
57-
assert vspy.validate_top_k(r, g_m)
61+
assert vspy.validate_top_k_u64(r, g_m)

apis/python/test/test_index.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,27 @@
33
import tiledb.vector_search as vs
44
from tiledb.vector_search.index import FlatIndex
55

6+
import numpy as np
7+
import os
8+
import pytest
69

7-
def test_flat_index(tmpdir):
10+
## only run this test if the dataset is available
11+
@pytest.mark.skipif(
12+
not os.path.exists(os.path.expanduser("~/work/proj/vector-search/datasets/sift-andrew/")),
13+
reason="requires sift dataset")
14+
def test_flat_index_local():
815
group_uri = "~/work/proj/vector-search/datasets/sift-andrew/"
916
query_uri = "~/work/proj/vector-search/datasets/sift-andrew/sift_query"
17+
ground_truth_uri = "~/work/proj/vector-search/datasets/sift-andrew/sift_groundtruth"
1018

11-
query_vectors = vs.load_as_array(query_uri)[:, :10]
19+
nquery = 10
20+
query_vectors = vs.load_as_array(query_uri)[:, :nquery]
1221

1322
index = FlatIndex(group_uri, dtype="float32", parts_name="sift_base")
1423
result = index.query(query_vectors)
1524
assert isinstance(result, np.ndarray)
1625

17-
# ground_truth = vs.load_as_array(ground_truth_uri)
26+
#ground_truth = vs.load_as_array(ground_truth_uri)
27+
ground_truth = vs.load_as_matrix(ground_truth_uri, nquery)
28+
result_m = vs.array_to_matrix(result)
29+
vs.validate_top_k(result_m, ground_truth)

environment.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
name: tiledb-vs-env
2+
3+
dependencies:
4+
- python=3.10
5+
- numpy
6+
- tiledb
7+
- tiledb-py
8+
- pybind11
9+
- cmake
10+
- openblas
11+
- ninja
12+
- pytest

0 commit comments

Comments
 (0)