Skip to content

Commit 5e859d9

Browse files
authored
Even better(er) binary quantization (#117994)
This measurably improves BBQ by adjusting the underlying algorithm to an optimized per vector scalar quantization. This is a brand new way to quantize vectors. Instead of there being a global set of upper and lower quantile bands, these are optimized and calculated per individual vector. Additionally, vectors are centered on a common centroid. This allows for an almost 32x reduction in memory, and even better recall than before at the cost of slightly increasing indexing time. Additionally, this new approach is easily generalizable to various other bit sizes (e.g. 2 bits, etc.). While not taken advantage of yet, we may update our scalar quantized indices in the future to use this new algorithm, giving significant boosts in recall. The recall gains spread from 2% to almost 10% for certain datasets with an additional 5-10% indexing cost when indexing with HNSW when compared with current BBQ.
1 parent 0586cbf commit 5e859d9

32 files changed

+3501
-137
lines changed

docs/changelog/117994.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 117994
2+
summary: Even better(er) binary quantization
3+
area: Vector Search
4+
type: enhancement
5+
issues: []

rest-api-spec/build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,6 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
6767
task.skipTest("logsdb/20_source_mapping/include/exclude is supported with stored _source", "no longer serialize source_mode")
6868
task.skipTest("logsdb/20_source_mapping/synthetic _source is default", "no longer serialize source_mode")
6969
task.skipTest("search/520_fetch_fields/fetch _seq_no via fields", "error code is changed from 5xx to 400 in 9.0")
70+
task.skipTest("search.vectors/41_knn_search_bbq_hnsw/Test knn search", "Scoring has changed in latest versions")
71+
task.skipTest("search.vectors/42_knn_search_bbq_flat/Test knn search", "Scoring has changed in latest versions")
7072
})

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,11 @@ setup:
1111
number_of_shards: 1
1212
mappings:
1313
properties:
14-
name:
15-
type: keyword
1614
vector:
1715
type: dense_vector
1816
dims: 64
1917
index: true
20-
similarity: l2_norm
21-
index_options:
22-
type: bbq_hnsw
23-
another_vector:
24-
type: dense_vector
25-
dims: 64
26-
index: true
27-
similarity: l2_norm
18+
similarity: max_inner_product
2819
index_options:
2920
type: bbq_hnsw
3021

@@ -33,9 +24,14 @@ setup:
3324
index: bbq_hnsw
3425
id: "1"
3526
body:
36-
name: cow.jpg
37-
vector: [300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0]
38-
another_vector: [115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0]
27+
vector: [0.077, 0.32 , -0.205, 0.63 , 0.032, 0.201, 0.167, -0.313,
28+
0.176, 0.531, -0.375, 0.334, -0.046, 0.078, -0.349, 0.272,
29+
0.307, -0.083, 0.504, 0.255, -0.404, 0.289, -0.226, -0.132,
30+
-0.216, 0.49 , 0.039, 0.507, -0.307, 0.107, 0.09 , -0.265,
31+
-0.285, 0.336, -0.272, 0.369, -0.282, 0.086, -0.132, 0.475,
32+
-0.224, 0.203, 0.439, 0.064, 0.246, -0.396, 0.297, 0.242,
33+
-0.028, 0.321, -0.022, -0.009, -0.001 , 0.031, -0.533, 0.45,
34+
-0.683, 1.331, 0.194, -0.157, -0.1 , -0.279, -0.098, -0.176]
3935
# Flush in order to provoke a merge later
4036
- do:
4137
indices.flush:
@@ -46,9 +42,14 @@ setup:
4642
index: bbq_hnsw
4743
id: "2"
4844
body:
49-
name: moose.jpg
50-
vector: [100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0]
51-
another_vector: [50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120]
45+
vector: [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348,
46+
-0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048,
47+
0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438,
48+
-0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138,
49+
-0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429,
50+
-0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166,
51+
0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569,
52+
-0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013]
5253
# Flush in order to provoke a merge later
5354
- do:
5455
indices.flush:
@@ -60,8 +61,14 @@ setup:
6061
id: "3"
6162
body:
6263
name: rabbit.jpg
63-
vector: [111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0]
64-
another_vector: [11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0]
64+
vector: [0.139, 0.178, -0.117, 0.399, 0.014, -0.139, 0.347, -0.33 ,
65+
0.139, 0.34 , -0.052, -0.052, -0.249, 0.327, -0.288, 0.049,
66+
0.464, 0.338, 0.516, 0.247, -0.104, 0.259, -0.209, -0.246,
67+
-0.11 , 0.323, 0.091, 0.442, -0.254, 0.195, -0.109, -0.058,
68+
-0.279, 0.402, -0.107, 0.308, -0.273, 0.019, 0.082, 0.399,
69+
-0.658, -0.03 , 0.276, 0.041, 0.187, -0.331, 0.165, 0.017,
70+
0.171, -0.203, -0.198, 0.115, -0.007, 0.337, -0.444, 0.615,
71+
-0.657, 1.285, 0.2 , -0.062, 0.038, 0.089, -0.068, -0.058]
6572
# Flush in order to provoke a merge later
6673
- do:
6774
indices.flush:
@@ -73,20 +80,33 @@ setup:
7380
max_num_segments: 1
7481
---
7582
"Test knn search":
83+
- requires:
84+
capabilities:
85+
- method: POST
86+
path: /_search
87+
capabilities: [ optimized_scalar_quantization_bbq ]
88+
test_runner_features: capabilities
89+
reason: "BBQ scoring improved and changed with optimized_scalar_quantization_bbq"
7690
- do:
7791
search:
7892
index: bbq_hnsw
7993
body:
8094
knn:
8195
field: vector
82-
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
96+
query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393,
97+
0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015,
98+
0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259,
99+
-0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 ,
100+
-0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232,
101+
-0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034,
102+
-0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582,
103+
-0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158]
83104
k: 3
84105
num_candidates: 3
85106

86-
# Depending on how things are distributed, docs 2 and 3 might be swapped
87-
# here we verify that are last hit is always the worst one
88-
- match: { hits.hits.2._id: "1" }
89-
107+
- match: { hits.hits.0._id: "1" }
108+
- match: { hits.hits.1._id: "3" }
109+
- match: { hits.hits.2._id: "2" }
90110
---
91111
"Test bad quantization parameters":
92112
- do:

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,11 @@ setup:
1111
number_of_shards: 1
1212
mappings:
1313
properties:
14-
name:
15-
type: keyword
1614
vector:
1715
type: dense_vector
1816
dims: 64
1917
index: true
20-
similarity: l2_norm
21-
index_options:
22-
type: bbq_flat
23-
another_vector:
24-
type: dense_vector
25-
dims: 64
26-
index: true
27-
similarity: l2_norm
18+
similarity: max_inner_product
2819
index_options:
2920
type: bbq_flat
3021

@@ -33,9 +24,14 @@ setup:
3324
index: bbq_flat
3425
id: "1"
3526
body:
36-
name: cow.jpg
37-
vector: [300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0]
38-
another_vector: [115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0]
27+
vector: [0.077, 0.32 , -0.205, 0.63 , 0.032, 0.201, 0.167, -0.313,
28+
0.176, 0.531, -0.375, 0.334, -0.046, 0.078, -0.349, 0.272,
29+
0.307, -0.083, 0.504, 0.255, -0.404, 0.289, -0.226, -0.132,
30+
-0.216, 0.49 , 0.039, 0.507, -0.307, 0.107, 0.09 , -0.265,
31+
-0.285, 0.336, -0.272, 0.369, -0.282, 0.086, -0.132, 0.475,
32+
-0.224, 0.203, 0.439, 0.064, 0.246, -0.396, 0.297, 0.242,
33+
-0.028, 0.321, -0.022, -0.009, -0.001 , 0.031, -0.533, 0.45,
34+
-0.683, 1.331, 0.194, -0.157, -0.1 , -0.279, -0.098, -0.176]
3935
# Flush in order to provoke a merge later
4036
- do:
4137
indices.flush:
@@ -46,9 +42,14 @@ setup:
4642
index: bbq_flat
4743
id: "2"
4844
body:
49-
name: moose.jpg
50-
vector: [100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0]
51-
another_vector: [50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120]
45+
vector: [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348,
46+
-0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048,
47+
0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438,
48+
-0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138,
49+
-0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429,
50+
-0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166,
51+
0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569,
52+
-0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013]
5253
# Flush in order to provoke a merge later
5354
- do:
5455
indices.flush:
@@ -59,9 +60,14 @@ setup:
5960
index: bbq_flat
6061
id: "3"
6162
body:
62-
name: rabbit.jpg
63-
vector: [111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0]
64-
another_vector: [11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0]
63+
vector: [0.139, 0.178, -0.117, 0.399, 0.014, -0.139, 0.347, -0.33 ,
64+
0.139, 0.34 , -0.052, -0.052, -0.249, 0.327, -0.288, 0.049,
65+
0.464, 0.338, 0.516, 0.247, -0.104, 0.259, -0.209, -0.246,
66+
-0.11 , 0.323, 0.091, 0.442, -0.254, 0.195, -0.109, -0.058,
67+
-0.279, 0.402, -0.107, 0.308, -0.273, 0.019, 0.082, 0.399,
68+
-0.658, -0.03 , 0.276, 0.041, 0.187, -0.331, 0.165, 0.017,
69+
0.171, -0.203, -0.198, 0.115, -0.007, 0.337, -0.444, 0.615,
70+
-0.657, 1.285, 0.2 , -0.062, 0.038, 0.089, -0.068, -0.058]
6571
# Flush in order to provoke a merge later
6672
- do:
6773
indices.flush:
@@ -73,19 +79,33 @@ setup:
7379
max_num_segments: 1
7480
---
7581
"Test knn search":
82+
- requires:
83+
capabilities:
84+
- method: POST
85+
path: /_search
86+
capabilities: [ optimized_scalar_quantization_bbq ]
87+
test_runner_features: capabilities
88+
reason: "BBQ scoring improved and changed with optimized_scalar_quantization_bbq"
7689
- do:
7790
search:
7891
index: bbq_flat
7992
body:
8093
knn:
8194
field: vector
82-
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
95+
query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393,
96+
0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015,
97+
0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259,
98+
-0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 ,
99+
-0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232,
100+
-0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034,
101+
-0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582,
102+
-0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158]
83103
k: 3
84104
num_candidates: 3
85105

86-
# Depending on how things are distributed, docs 2 and 3 might be swapped
87-
# here we verify that are last hit is always the worst one
88-
- match: { hits.hits.2._id: "1" }
106+
- match: { hits.hits.0._id: "1" }
107+
- match: { hits.hits.1._id: "3" }
108+
- match: { hits.hits.2._id: "2" }
89109
---
90110
"Test bad parameters":
91111
- do:

server/src/main/java/module-info.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,9 @@
459459
org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat,
460460
org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat,
461461
org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat,
462-
org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat;
462+
org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat,
463+
org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat,
464+
org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat;
463465

464466
provides org.apache.lucene.codecs.Codec
465467
with

server/src/main/java/org/elasticsearch/index/codec/vectors/BQVectorUtils.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,20 @@ public static boolean isUnitVector(float[] v) {
4040
return Math.abs(l1norm - 1.0d) <= EPSILON;
4141
}
4242

43+
public static void packAsBinary(byte[] vector, byte[] packed) {
44+
for (int i = 0; i < vector.length;) {
45+
byte result = 0;
46+
for (int j = 7; j >= 0 && i < vector.length; j--) {
47+
assert vector[i] == 0 || vector[i] == 1;
48+
result |= (byte) ((vector[i] & 1) << j);
49+
++i;
50+
}
51+
int index = ((i + 7) / 8) - 1;
52+
assert index < packed.length;
53+
packed[index] = result;
54+
}
55+
}
56+
4357
public static int discretize(int value, int bucket) {
4458
return ((value + (bucket - 1)) / bucket) * bucket;
4559
}

0 commit comments

Comments
 (0)