Skip to content

Commit 8b4a53e

Browse files
authored
IVF_PQ re-ranking (#502)
1 parent ba8c7b4 commit 8b4a53e

24 files changed

+1545
-137
lines changed

apis/python/src/tiledb/vector_search/ivf_pq_index.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def query_internal(
8989
self,
9090
queries: np.ndarray,
9191
k: int = 10,
92+
k_factor: float = 1.0,
9293
nprobe: Optional[int] = 100,
9394
**kwargs,
9495
):
@@ -101,6 +102,13 @@ def query_internal(
101102
2D array of query vectors. This can be used as a batch query interface by passing multiple queries in one call.
102103
k: int
103104
Number of results to return per query vector.
105+
k_factor: int
106+
To improve accuracy, IVF_PQ can search for more vectors than requested and then
107+
perform re-ranking using the original non-PQ-encoded vectors. This can be slightly
108+
slower, but is more accurate. k_factor is the factor by which to increase the number
109+
of vectors searched. 1 means we search for exactly `k` vectors. 10 means we search for
110+
`10*k` vectors.
111+
Defaults to 1.
104112
nprobe: int
105113
Number of partitions to check per query.
106114
Use this parameter to trade-off accuracy for latency and cost.
@@ -120,11 +128,11 @@ def query_internal(
120128

121129
if self.memory_budget == -1:
122130
distances, ids = self.index.query_infinite_ram(
123-
queries_feature_vector_array, k, nprobe
131+
queries_feature_vector_array, k, nprobe, k_factor
124132
)
125133
else:
126134
distances, ids = self.index.query_finite_ram(
127-
queries_feature_vector_array, k, nprobe, self.memory_budget
135+
queries_feature_vector_array, k, nprobe, self.memory_budget, k_factor
128136
)
129137

130138
return np.array(distances, copy=False), np.array(ids, copy=False)

apis/python/src/tiledb/vector_search/type_erased_module.cc

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -498,29 +498,38 @@ void init_type_erased_module(py::module_& m) {
498498
[](IndexIVFPQ& index,
499499
const FeatureVectorArray& vectors,
500500
size_t top_k,
501-
size_t nprobe) {
502-
auto r =
503-
index.query(QueryType::InfiniteRAM, vectors, top_k, nprobe);
501+
size_t nprobe,
502+
float k_factor) {
503+
auto r = index.query(
504+
QueryType::InfiniteRAM, vectors, top_k, nprobe, 0, k_factor);
504505
return make_python_pair(std::move(r));
505506
},
506507
py::arg("vectors"),
507508
py::arg("top_k"),
508-
py::arg("nprobe"))
509+
py::arg("nprobe"),
510+
py::arg("k_factor") = 1.f)
509511
.def(
510512
"query_finite_ram",
511513
[](IndexIVFPQ& index,
512514
const FeatureVectorArray& vectors,
513515
size_t top_k,
514516
size_t nprobe,
515-
size_t memory_budget) {
517+
size_t memory_budget,
518+
float k_factor) {
516519
auto r = index.query(
517-
QueryType::FiniteRAM, vectors, top_k, nprobe, memory_budget);
520+
QueryType::FiniteRAM,
521+
vectors,
522+
top_k,
523+
nprobe,
524+
memory_budget,
525+
k_factor);
518526
return make_python_pair(std::move(r));
519527
},
520528
py::arg("vectors"),
521529
py::arg("top_k"),
522530
py::arg("nprobe"),
523-
py::arg("memory_budget"))
531+
py::arg("memory_budget"),
532+
py::arg("k_factor") = 1.f)
524533
.def(
525534
"write_index",
526535
[](IndexIVFPQ& index,

apis/python/test/test_index.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,9 @@ def test_ivf_pq_index(tmp_path):
329329
assert distances[0][0] == MAX_FLOAT32
330330
assert ids[0][0] == MAX_UINT64
331331
query_and_check_distances(index, queries, 1, [[MAX_FLOAT32]], [[MAX_UINT64]])
332+
query_and_check_distances(
333+
index, queries, 1, [[MAX_FLOAT32]], [[MAX_UINT64]], k_factor=2.0
334+
)
332335
check_default_metadata(uri, vector_type, STORAGE_VERSION, "IVF_PQ")
333336

334337
update_vectors = np.empty([5], dtype=object)
@@ -344,6 +347,14 @@ def test_ivf_pq_index(tmp_path):
344347
query_and_check_distances(
345348
index, np.array([[2, 2, 2]], dtype=np.float32), 2, [[0, 3]], [[2, 1]]
346349
)
350+
query_and_check_distances(
351+
index,
352+
np.array([[2, 2, 2]], dtype=np.float32),
353+
2,
354+
[[0, 3]],
355+
[[2, 1]],
356+
k_factor=2.0,
357+
)
347358

348359
index = index.consolidate_updates()
349360

@@ -366,6 +377,14 @@ def test_ivf_pq_index(tmp_path):
366377
[[0], [0]],
367378
[[i], [i]],
368379
)
380+
query_and_check_distances(
381+
index,
382+
np.array([[i, i, i], [i, i, i]], dtype=np.float32),
383+
1,
384+
[[0], [0]],
385+
[[i], [i]],
386+
k_factor=2.0,
387+
)
369388

370389
# Test that we can query with k > 1.
371390
query_and_check_distances(
@@ -380,6 +399,14 @@ def test_ivf_pq_index(tmp_path):
380399
[[0, 3], [0, 3]],
381400
[[0, 1], [4, 3]],
382401
)
402+
query_and_check_distances(
403+
index,
404+
np.array([[0, 0, 0], [4, 4, 4]], dtype=np.float32),
405+
2,
406+
[[0, 3], [0, 3]],
407+
[[0, 1], [4, 3]],
408+
k_factor=2.0,
409+
)
383410

384411
vfs = tiledb.VFS()
385412
assert vfs.dir_size(uri) > 0

apis/python/test/test_ingestion.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,17 @@ def test_ivf_pq_ingestion_u8(tmp_path):
245245
queries,
246246
k=k,
247247
nprobe=nprobe,
248-
use_nuv_implementation=True,
249248
)
250-
assert accuracy(result, gt_i) > MINIMUM_ACCURACY
249+
query_accuracy = accuracy(result, gt_i)
250+
assert query_accuracy > MINIMUM_ACCURACY
251+
252+
_, result = index_ram.query(
253+
queries,
254+
k=k,
255+
k_factor=2,
256+
nprobe=nprobe,
257+
)
258+
assert accuracy(result, gt_i) > query_accuracy + 0.1
251259

252260
_, result = index_ram.query(
253261
queries,

src/include/api/ivf_flat_index.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@ class IndexIVFFlat {
614614
(float*)vectors.data(),
615615
extents(vectors)[0],
616616
extents(vectors)[1]}; // @todo ??
617-
auto [s, t] = impl_index_.query_infinite_ram(qspan, k_nn, nprobe);
617+
auto [s, t, _] = impl_index_.query_infinite_ram(qspan, k_nn, nprobe);
618618
auto x = FeatureVectorArray{std::move(s)};
619619
auto y = FeatureVectorArray{std::move(t)};
620620
return {std::move(x), std::move(y)};
@@ -624,7 +624,7 @@ class IndexIVFFlat {
624624
(uint8_t*)vectors.data(),
625625
extents(vectors)[0],
626626
extents(vectors)[1]}; // @todo ??
627-
auto [s, t] = impl_index_.query_infinite_ram(qspan, k_nn, nprobe);
627+
auto [s, t, _] = impl_index_.query_infinite_ram(qspan, k_nn, nprobe);
628628
auto x = FeatureVectorArray{std::move(s)};
629629
auto y = FeatureVectorArray{std::move(t)};
630630
return {std::move(x), std::move(y)};
@@ -651,7 +651,7 @@ class IndexIVFFlat {
651651
(float*)vectors.data(),
652652
extents(vectors)[0],
653653
extents(vectors)[1]}; // @todo ??
654-
auto [s, t] =
654+
auto [s, t, _] =
655655
impl_index_.query_finite_ram(qspan, k_nn, nprobe, upper_bound);
656656
auto x = FeatureVectorArray{std::move(s)};
657657
auto y = FeatureVectorArray{std::move(t)};
@@ -662,7 +662,7 @@ class IndexIVFFlat {
662662
(uint8_t*)vectors.data(),
663663
extents(vectors)[0],
664664
extents(vectors)[1]}; // @todo ??
665-
auto [s, t] =
665+
auto [s, t, _] =
666666
impl_index_.query_finite_ram(qspan, k_nn, nprobe, upper_bound);
667667
auto x = FeatureVectorArray{std::move(s)};
668668
auto y = FeatureVectorArray{std::move(t)};

src/include/api/ivf_pq_index.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -248,11 +248,13 @@ class IndexIVFPQ {
248248
const QueryVectorArray& vectors,
249249
size_t top_k,
250250
size_t nprobe,
251-
size_t upper_bound = 0) {
251+
size_t upper_bound = 0,
252+
float k_factor = 1.f) {
252253
if (!index_) {
253254
throw std::runtime_error("Cannot query() because there is no index.");
254255
}
255-
return index_->query(queryType, vectors, top_k, nprobe, upper_bound);
256+
return index_->query(
257+
queryType, vectors, top_k, nprobe, upper_bound, k_factor);
256258
}
257259

258260
void write_index(
@@ -398,7 +400,8 @@ class IndexIVFPQ {
398400
const QueryVectorArray& vectors,
399401
size_t top_k,
400402
size_t nprobe,
401-
size_t upper_bound) = 0;
403+
size_t upper_bound,
404+
float k_factor) = 0;
402405

403406
virtual void write_index(
404407
const tiledb::Context& ctx,
@@ -508,7 +511,8 @@ class IndexIVFPQ {
508511
const QueryVectorArray& vectors,
509512
size_t top_k,
510513
size_t nprobe,
511-
size_t upper_bound) override {
514+
size_t upper_bound,
515+
float k_factor) override {
512516
// @todo using index_type = size_t;
513517
auto dtype = vectors.feature_type();
514518

@@ -519,8 +523,8 @@ class IndexIVFPQ {
519523
(float*)vectors.data(),
520524
extents(vectors)[0],
521525
extents(vectors)[1]}; // @todo ??
522-
auto [s, t] =
523-
impl_index_.query(queryType, qspan, top_k, nprobe, upper_bound);
526+
auto [s, t] = impl_index_.query(
527+
queryType, qspan, top_k, nprobe, upper_bound, k_factor);
524528
auto x = FeatureVectorArray{std::move(s)};
525529
auto y = FeatureVectorArray{std::move(t)};
526530
return {std::move(x), std::move(y)};
@@ -530,8 +534,8 @@ class IndexIVFPQ {
530534
(uint8_t*)vectors.data(),
531535
extents(vectors)[0],
532536
extents(vectors)[1]}; // @todo ??
533-
auto [s, t] =
534-
impl_index_.query(queryType, qspan, top_k, nprobe, upper_bound);
537+
auto [s, t] = impl_index_.query(
538+
queryType, qspan, top_k, nprobe, upper_bound, k_factor);
535539
auto x = FeatureVectorArray{std::move(s)};
536540
auto y = FeatureVectorArray{std::move(t)};
537541
return {std::move(x), std::move(y)};

src/include/detail/ivf/dist_qv.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,15 @@ auto dist_qv_finite_ram_part(
124124
fixed_min_pair_heap<score_type, shuffled_ids_type>(k_nn));
125125

126126
size_t part_offset = 0;
127+
size_t indices_offset = 0;
127128
while (partitioned_vectors.load()) {
128129
_i.start();
129130
auto current_part_size = ::num_partitions(partitioned_vectors);
130131
size_t parts_per_thread = (current_part_size + nthreads - 1) / nthreads;
131132

132-
std::vector<std::future<decltype(min_scores)>> futs;
133+
std::vector<std::future<std::vector<
134+
fixed_min_triplet_heap<score_type, shuffled_ids_type, size_t>>>>
135+
futs;
133136
futs.reserve(nthreads);
134137

135138
for (size_t n = 0; n < nthreads; ++n) {
@@ -148,7 +151,8 @@ auto dist_qv_finite_ram_part(
148151
k_nn,
149152
first_part,
150153
last_part,
151-
part_offset]() {
154+
part_offset,
155+
indices_offset]() {
152156
return apply_query(
153157
partitioned_vectors,
154158
std::optional<std::vector<int>>{},
@@ -159,6 +163,7 @@ auto dist_qv_finite_ram_part(
159163
first_part,
160164
last_part,
161165
part_offset,
166+
indices_offset,
162167
distance);
163168
}));
164169
}
@@ -167,13 +172,14 @@ auto dist_qv_finite_ram_part(
167172
auto min_n = futs[n].get();
168173

169174
for (size_t j = 0; j < num_queries; ++j) {
170-
for (auto&& [e, f] : min_n[j]) {
175+
for (auto&& [e, f, _] : min_n[j]) {
171176
min_scores[j].insert(e, f);
172177
}
173178
}
174179
}
175180

176181
part_offset += current_part_size;
182+
indices_offset += num_vectors(partitioned_vectors);
177183
_i.stop();
178184
}
179185
return min_scores;

0 commit comments

Comments
 (0)