Skip to content

Commit b730620

Browse files
authored
Add on-disk rescoring to disk BBQ (#135778)
Adds a `on_disk_rescore: true` option to disk BBQ that uses direct IO when accessing raw vectors during rescoring
1 parent aae6742 commit b730620

File tree

16 files changed

+587
-53
lines changed

16 files changed

+587
-53
lines changed

docs/changelog/135778.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 135778
2+
summary: "Add `on_disk_rescore: true` option to disk BBQ to rescore vectors on-disk without loading into memory"
3+
area: Vector Search
4+
type: feature
5+
issues: []

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/180_update_dense_vector_type.yml

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,6 +1682,109 @@ setup:
16821682
- match: { test_index.mappings.properties.embedding.index_options.confidence_interval: 0.3 }
16831683

16841684
---
1685+
"Allowed dense vector updates on same type but different other index_options, bbq_disk":
1686+
- requires:
1687+
cluster_features: "mapper.vectors.diskbbq_on_disk_rescoring"
1688+
reason: 'diskbbq needs to support on-disk rescoring'
1689+
- requires:
1690+
test_runner_features: [ contains ]
1691+
- do:
1692+
indices.create:
1693+
index: test_index
1694+
1695+
- do:
1696+
indices.put_mapping:
1697+
index: test_index
1698+
body:
1699+
properties:
1700+
embedding:
1701+
type: dense_vector
1702+
dims: 64
1703+
index_options:
1704+
type: bbq_disk
1705+
1706+
- do:
1707+
indices.get_mapping:
1708+
index: test_index
1709+
1710+
- match: { test_index.mappings.properties.embedding.type: dense_vector }
1711+
- match: { test_index.mappings.properties.embedding.index_options.type: bbq_disk }
1712+
1713+
- do:
1714+
index:
1715+
index: test_index
1716+
id: "1"
1717+
body:
1718+
embedding: [0.077, 0.32 , -0.205, 0.63 , 0.032, 0.201, 0.167, -0.313,
1719+
0.176, 0.531, -0.375, 0.334, -0.046, 0.078, -0.349, 0.272,
1720+
0.307, -0.083, 0.504, 0.255, -0.404, 0.289, -0.226, -0.132,
1721+
-0.216, 0.49 , 0.039, 0.507, -0.307, 0.107, 0.09 , -0.265,
1722+
-0.285, 0.336, -0.272, 0.369, -0.282, 0.086, -0.132, 0.475,
1723+
-0.224, 0.203, 0.439, 0.064, 0.246, -0.396, 0.297, 0.242,
1724+
-0.028, 0.321, -0.022, -0.009, -0.001 , 0.031, -0.533, 0.45,
1725+
-0.683, 1.331, 0.194, -0.157, -0.1 , -0.279, -0.098, -0.176]
1726+
- do:
1727+
indices.flush:
1728+
index: test_index
1729+
1730+
- do:
1731+
indices.put_mapping:
1732+
index: test_index
1733+
body:
1734+
properties:
1735+
embedding:
1736+
type: dense_vector
1737+
dims: 64
1738+
index_options:
1739+
type: bbq_disk
1740+
on_disk_rescore: true
1741+
1742+
- do:
1743+
indices.get_mapping:
1744+
index: test_index
1745+
1746+
- match: { test_index.mappings.properties.embedding.type: dense_vector }
1747+
- match: { test_index.mappings.properties.embedding.index_options.type: bbq_disk }
1748+
- match: { test_index.mappings.properties.embedding.index_options.on_disk_rescore: true }
1749+
1750+
- do:
1751+
index:
1752+
index: test_index
1753+
id: "2"
1754+
body:
1755+
embedding: [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348,
1756+
-0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048,
1757+
0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438,
1758+
-0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138,
1759+
-0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429,
1760+
-0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166,
1761+
0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569,
1762+
-0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013]
1763+
- do:
1764+
indices.flush:
1765+
index: test_index
1766+
- do:
1767+
indices.refresh: { }
1768+
- do:
1769+
search:
1770+
index: test_index
1771+
body:
1772+
knn:
1773+
field: embedding
1774+
query_vector: [ 0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393,
1775+
0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015,
1776+
0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259,
1777+
-0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 ,
1778+
-0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232,
1779+
-0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034,
1780+
-0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582,
1781+
-0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158 ]
1782+
k: 2
1783+
num_candidates: 2
1784+
1785+
- match: { hits.hits.0._id: "1" }
1786+
- match: { hits.hits.1._id: "2" }
1787+
---
16851788
"Test create and update dense vector mapping to int4 with per-doc indexing and flush":
16861789
- requires:
16871790
cluster_features: "gte_v8.16.0"

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/46_knn_search_bbq_ivf.yml

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,98 @@ setup:
306306
- match: { hits.hits.0._score: $rescore_score0 }
307307
- match: { hits.hits.1._score: $rescore_score1 }
308308
- match: { hits.hits.2._score: $rescore_score2 }
309+
310+
---
311+
"Test index configured rescore vector with on-disk rescore":
312+
- requires:
313+
cluster_features: [ "mapper.vectors.diskbbq_on_disk_rescoring" ]
314+
reason: Needs on_disk_rescoring feature for DiskBBQ
315+
- skip:
316+
features: "headers"
317+
- do:
318+
indices.create:
319+
index: bbq_on_disk_rescore_ivf
320+
body:
321+
settings:
322+
index:
323+
number_of_shards: 1
324+
mappings:
325+
properties:
326+
vector:
327+
type: dense_vector
328+
dims: 64
329+
index: true
330+
similarity: max_inner_product
331+
index_options:
332+
type: bbq_disk
333+
on_disk_rescore: true
334+
rescore_vector:
335+
oversample: 1.5
336+
337+
- do:
338+
bulk:
339+
index: bbq_on_disk_rescore_ivf
340+
refresh: true
341+
body: |
342+
{ "index": {"_id": "1"}}
343+
{ "vector": [0.077, 0.32 , -0.205, 0.63 , 0.032, 0.201, 0.167, -0.313, 0.176, 0.531, -0.375, 0.334, -0.046, 0.078, -0.349, 0.272, 0.307, -0.083, 0.504, 0.255, -0.404, 0.289, -0.226, -0.132, -0.216, 0.49 , 0.039, 0.507, -0.307, 0.107, 0.09 , -0.265, -0.285, 0.336, -0.272, 0.369, -0.282, 0.086, -0.132, 0.475, -0.224, 0.203, 0.439, 0.064, 0.246, -0.396, 0.297, 0.242, -0.028, 0.321, -0.022, -0.009, -0.001 , 0.031, -0.533, 0.45, -0.683, 1.331, 0.194, -0.157, -0.1 , -0.279, -0.098, -0.176] }
344+
{ "index": {"_id": "2"}}
345+
{ "vector": [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348, -0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048, 0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438, -0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138, -0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429, -0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166, 0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569, -0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013] }
346+
{ "index": {"_id": "3"}}
347+
{ "vector": [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348, -0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048, 0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438, -0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138, -0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429, -0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166, 0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569, -0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013] }
348+
349+
- do:
350+
headers:
351+
Content-Type: application/json
352+
search:
353+
rest_total_hits_as_int: true
354+
index: bbq_on_disk_rescore_ivf
355+
body:
356+
knn:
357+
field: vector
358+
query_vector: [ 0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393,
359+
0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015,
360+
0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259,
361+
-0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 ,
362+
-0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232,
363+
-0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034,
364+
-0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582,
365+
-0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158 ]
366+
k: 3
367+
num_candidates: 3
368+
369+
- match: { hits.total: 3 }
370+
- set: { hits.hits.0._score: rescore_score0 }
371+
- set: { hits.hits.1._score: rescore_score1 }
372+
- set: { hits.hits.2._score: rescore_score2 }
373+
374+
- do:
375+
headers:
376+
Content-Type: application/json
377+
search:
378+
rest_total_hits_as_int: true
379+
index: bbq_on_disk_rescore_ivf
380+
body:
381+
query:
382+
script_score:
383+
query: { match_all: { } }
384+
script:
385+
source: "double similarity = dotProduct(params.query_vector, 'vector'); return similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1"
386+
params:
387+
query_vector: [ 0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393,
388+
0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015,
389+
0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259,
390+
-0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 ,
391+
-0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232,
392+
-0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034,
393+
-0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582,
394+
-0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158 ]
395+
396+
# Compare scores as hit IDs may change depending on how things are distributed
397+
- match: { hits.total: 3 }
398+
- match: { hits.hits.0._score: $rescore_score0 }
399+
- match: { hits.hits.1._score: $rescore_score1 }
400+
- match: { hits.hits.2._score: $rescore_score2 }
309401
---
310402
"Test index configured rescore vector updateable and settable to 0":
311403
- do:

0 commit comments

Comments
 (0)