Skip to content

Commit 23f201a

Browse files
authored
Fix non-contiguous queries when using Vamana (#343)
1 parent 589958e commit 23f201a

File tree

6 files changed

+51
-34
lines changed

6 files changed

+51
-34
lines changed

apis/python/src/tiledb/vector_search/vamana_index.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,11 @@ def query_internal(
9191

9292
if queries.ndim == 1:
9393
queries = np.array([queries])
94+
queries = np.transpose(queries)
95+
if not queries.flags.f_contiguous:
96+
queries = queries.copy(order="F")
97+
queries_feature_vector_array = vspy.FeatureVectorArray(queries)
9498

95-
queries_feature_vector_array = vspy.FeatureVectorArray(np.transpose(queries))
9699
distances, ids = self.index.query(queries_feature_vector_array, k, opt_l)
97100

98101
return np.array(distances, copy=False), np.array(ids, copy=False)

apis/python/test/test_index.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,8 @@ def test_index_with_incorrect_num_of_query_columns_simple(tmp_path):
353353
queries = load_fvecs(queries_uri)
354354
index.query(queries, k=10)
355355

356+
Index.delete_index(uri=index_uri, config={})
357+
356358

357359
def test_index_with_incorrect_num_of_query_columns_complex(tmp_path):
358360
vfs = tiledb.VFS()

apis/python/test/test_ingestion.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -265,11 +265,6 @@ def test_ingestion_fvec(tmp_path):
265265
gt_i, gt_d = get_groundtruth_ivec(gt_uri, k=k, nqueries=nqueries)
266266

267267
for index_type, index_class in zip(INDEXES, INDEX_CLASSES):
268-
# TODO(paris): Fix Vamana bug and re-enable:
269-
# RuntimeError: IndexError: index 100 is out of bounds for axis 0 with size 100
270-
if index_type == "VAMANA":
271-
continue
272-
273268
index_uri = os.path.join(tmp_path, f"array_{index_type}")
274269
index = ingest(
275270
index_type=index_type,
@@ -319,11 +314,6 @@ def test_ingestion_numpy(tmp_path):
319314
gt_i, gt_d = get_groundtruth_ivec(gt_uri, k=k, nqueries=nqueries)
320315

321316
for index_type, index_class in zip(INDEXES, INDEX_CLASSES):
322-
# TODO(paris): Fix Vamana bug and re-enable:
323-
# RuntimeError: IndexError: index 100 is out of bounds for axis 0 with size 100
324-
if index_type == "VAMANA":
325-
continue
326-
327317
index_uri = os.path.join(tmp_path, f"array_{index_type}")
328318
index = ingest(
329319
index_type=index_type,
@@ -424,11 +414,6 @@ def test_ingestion_multiple_workers(tmp_path):
424414
gt_i, gt_d = get_groundtruth_ivec(gt_uri, k=k, nqueries=nqueries)
425415

426416
for index_type, index_class in zip(INDEXES, INDEX_CLASSES):
427-
# TODO(paris): Fix Vamana bug and re-enable:
428-
# RuntimeError: Invalid key when getting the URI: adjacency_scores_array_name. Name does not exist: adjacency_scores
429-
if index_type == "VAMANA":
430-
continue
431-
432417
index_uri = os.path.join(tmp_path, f"array_{index_type}")
433418
index = ingest(
434419
index_type=index_type,
@@ -485,11 +470,6 @@ def test_ingestion_external_ids_numpy(tmp_path):
485470
)
486471

487472
for index_type, index_class in zip(INDEXES, INDEX_CLASSES):
488-
# # TODO(paris): Fix Vamana bug and re-enable:
489-
# # RuntimeError: Invalid key when getting the URI: adjacency_scores_array_name. Name does not exist: adjacency_scores
490-
if index_type == "VAMANA":
491-
continue
492-
493473
index_uri = os.path.join(tmp_path, f"array_{index_type}")
494474
index = ingest(
495475
index_type=index_type,
@@ -530,11 +510,6 @@ def test_ingestion_with_updates(tmp_path):
530510
gt_i, gt_d = get_groundtruth(dataset_dir, k)
531511

532512
for index_type, index_class in zip(INDEXES, INDEX_CLASSES):
533-
# TODO(paris): Fix Vamana bug and re-enable:
534-
# RuntimeError: Invalid key when getting the URI: adjacency_scores_array_name. Name does not exist: adjacency_scores
535-
if index_type == "VAMANA":
536-
continue
537-
538513
index_uri = os.path.join(tmp_path, f"array_{index_type}")
539514
index = ingest(
540515
index_type=index_type,

apis/python/test/test_type_erased_module.py

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from array_paths import *
55

66
from tiledb.vector_search import _tiledbvspy as vspy
7+
from tiledb.vector_search.utils import load_fvecs
78

89
ctx = vspy.Ctx({})
910

@@ -80,6 +81,8 @@ def test_numpy_to_feature_vector_array():
8081
assert a.shape == (10000, 128)
8182
assert b.dimension() == 128
8283
assert b.num_vectors() == 10000
84+
assert a.shape == np.array(b).shape
85+
assert np.array_equal(a, np.array(b))
8386

8487
a = np.array(np.random.rand(128, 10000), dtype=np.float32, order="F")
8588
b = vspy.FeatureVectorArray(a)
@@ -88,6 +91,9 @@ def test_numpy_to_feature_vector_array():
8891
assert a.shape == (128, 10000)
8992
assert b.dimension() == 128
9093
assert b.num_vectors() == 10000
94+
# TODO(paris): This should work, but it doesn't.
95+
# assert a.shape == np.array(b).shape
96+
# assert np.array_equal(a, np.array(b))
9197

9298
a = np.array(np.random.rand(10000, 128), dtype=np.float32)
9399
b = vspy.FeatureVectorArray(a.T)
@@ -96,6 +102,8 @@ def test_numpy_to_feature_vector_array():
96102
assert a.shape == (10000, 128)
97103
assert b.dimension() == 128
98104
assert b.num_vectors() == 10000
105+
assert a.shape == np.array(b).shape
106+
assert np.array_equal(a, np.array(b))
99107

100108
a = np.array(np.random.rand(1000000, 128), dtype=np.uint8)
101109
b = vspy.FeatureVectorArray(a)
@@ -104,17 +112,44 @@ def test_numpy_to_feature_vector_array():
104112
assert a.shape == (1000000, 128)
105113
assert b.dimension() == 128
106114
assert b.num_vectors() == 1000000
115+
assert a.shape == np.array(b).shape
116+
assert np.array_equal(a, np.array(b))
107117

108118
a = np.array(np.random.rand(10000, 128), dtype=np.float32)
109119
b = vspy.FeatureVectorArray(a)
110120
logging.info(a.shape)
111121
logging.info((b.dimension(), b.num_vectors()))
112-
113-
c = np.array(b)
114-
logging.info(c.shape)
115-
116-
assert a.shape == c.shape
117-
assert (a == c).all()
122+
assert a.shape == np.array(b).shape
123+
assert np.array_equal(a, np.array(b))
124+
125+
a = np.array(np.arange(1, 16, dtype=np.float32).reshape(3, 5), dtype=np.float32)
126+
assert a.shape == (3, 5)
127+
assert a.flags.f_contiguous is False
128+
assert a.flags.c_contiguous is True
129+
a = np.transpose(a)
130+
assert a.shape == (5, 3)
131+
assert a.flags.f_contiguous is True
132+
assert a.flags.c_contiguous is False
133+
b = vspy.FeatureVectorArray(a)
134+
# NOTE(paris): It is strange that we have to transpose this output array to have it match the input array. Should investigate this and fix it.
135+
assert a.shape == np.transpose(np.array(b)).shape
136+
assert np.array_equal(a, np.transpose(np.array(b)))
137+
138+
n = 99
139+
a = load_fvecs(siftsmall_query_file)[0:n]
140+
assert a.shape == (n, 128)
141+
assert a.flags.f_contiguous is False
142+
assert a.flags.c_contiguous is False
143+
a = np.transpose(a)
144+
assert a.shape == (128, n)
145+
assert a.flags.f_contiguous is False
146+
assert a.flags.c_contiguous is False
147+
# NOTE(paris): load_fvecs() returns a view of an array, which is not contiguous, so make it contiguous. Ideally we would handle this in FeatureVectorArray().
148+
a = np.asfortranarray(a)
149+
b = vspy.FeatureVectorArray(a)
150+
# NOTE(paris): It is strange that we have to transpose this output array to have it match the input array. Should investigate this and fix it.
151+
assert a.shape == np.transpose(np.array(b)).shape
152+
assert np.array_equal(a, np.transpose(np.array(b)))
118153

119154

120155
def test_construct_IndexFlatL2():

src/include/detail/linalg/matrix.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,8 @@ void debug_slice(
508508
auto rowsEnd = std::min(dimension(A), static_cast<size_t>(max_size));
509509
auto colsEnd = std::min(num_vectors(A), static_cast<size_t>(max_size));
510510

511-
std::cout << "# " << msg << std::endl;
511+
std::cout << "# " << msg << " (" << dimension(A) << " rows x "
512+
<< num_vectors(A) << " cols)" << std::endl;
512513
for (size_t i = 0; i < rowsEnd; ++i) {
513514
std::cout << "# ";
514515
for (size_t j = 0; j < colsEnd; ++j) {

src/include/detail/linalg/matrix_with_ids.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,8 @@ void debug_slice_with_ids(
178178
auto rowsEnd = std::min(dimension(A), static_cast<size_t>(max_size));
179179
auto colsEnd = std::min(num_vectors(A), static_cast<size_t>(max_size));
180180

181-
std::cout << "# " << msg << std::endl;
181+
std::cout << "# " << msg << " (" << num_vectors(A) << " vectors)"
182+
<< std::endl;
182183
for (size_t i = 0; i < rowsEnd; ++i) {
183184
std::cout << "# ";
184185
for (size_t j = 0; j < colsEnd; ++j) {

0 commit comments

Comments
 (0)