Skip to content

Commit 09d023f

Browse files
authored
Fix bbq quantization algorithm but for differently distributed components (#126778) (#126794)
We had a silly bug in quantizing vectors in bbq where we were scaling the initial quantile optimization parameters incorrectly given the vector component distribution. In distributions where this has a major impact, the recall results were abysmal and rendered the quantization technique useless. In modern, well distributed components, this change is almost a no-op.
1 parent 33bdadf commit 09d023f

File tree

3 files changed

+65
-4
lines changed

3 files changed

+65
-4
lines changed

docs/changelog/126778.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 126778
2+
summary: Fix bbq quantization algorithm but for differently distributed components
3+
area: Vector Search
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/OptimizedScalarQuantizer.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ public QuantizationResult[] multiScalarQuantize(float[] vector, byte[][] destina
7575
assert bits[i] > 0 && bits[i] <= 8;
7676
int points = (1 << bits[i]);
7777
// Linearly scale the interval to the standard deviation of the vector, ensuring we are within the min/max bounds
78-
intervalScratch[0] = (float) clamp((MINIMUM_MSE_GRID[bits[i] - 1][0] + vecMean) * vecStd, min, max);
79-
intervalScratch[1] = (float) clamp((MINIMUM_MSE_GRID[bits[i] - 1][1] + vecMean) * vecStd, min, max);
78+
intervalScratch[0] = (float) clamp(MINIMUM_MSE_GRID[bits[i] - 1][0] * vecStd + vecMean, min, max);
79+
intervalScratch[1] = (float) clamp(MINIMUM_MSE_GRID[bits[i] - 1][1] * vecStd + vecMean, min, max);
8080
optimizeIntervals(intervalScratch, vector, norm2, points);
8181
float nSteps = ((1 << bits[i]) - 1);
8282
float a = intervalScratch[0];
@@ -128,8 +128,8 @@ public QuantizationResult scalarQuantize(float[] vector, byte[] destination, byt
128128
vecVar /= vector.length;
129129
double vecStd = Math.sqrt(vecVar);
130130
// Linearly scale the interval to the standard deviation of the vector, ensuring we are within the min/max bounds
131-
intervalScratch[0] = (float) clamp((MINIMUM_MSE_GRID[bits - 1][0] + vecMean) * vecStd, min, max);
132-
intervalScratch[1] = (float) clamp((MINIMUM_MSE_GRID[bits - 1][1] + vecMean) * vecStd, min, max);
131+
intervalScratch[0] = (float) clamp(MINIMUM_MSE_GRID[bits - 1][0] * vecStd + vecMean, min, max);
132+
intervalScratch[1] = (float) clamp(MINIMUM_MSE_GRID[bits - 1][1] * vecStd + vecMean, min, max);
133133
optimizeIntervals(intervalScratch, vector, norm2, points);
134134
float nSteps = ((1 << bits) - 1);
135135
// Now we have the optimized intervals, quantize the vector

server/src/test/java/org/elasticsearch/index/codec/vectors/es818/OptimizedScalarQuantizerTests.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,62 @@ public class OptimizedScalarQuantizerTests extends ESTestCase {
1919

2020
static final byte[] ALL_BITS = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 };
2121

22+
static float[] deQuantize(byte[] quantized, byte bits, float[] interval, float[] centroid) {
23+
float[] dequantized = new float[quantized.length];
24+
float a = interval[0];
25+
float b = interval[1];
26+
int nSteps = (1 << bits) - 1;
27+
double step = (b - a) / nSteps;
28+
for (int h = 0; h < quantized.length; h++) {
29+
double xi = (double) (quantized[h] & 0xFF) * step + a;
30+
dequantized[h] = (float) (xi + centroid[h]);
31+
}
32+
return dequantized;
33+
}
34+
35+
public void testQuantizationQuality() {
36+
int dims = 16;
37+
int numVectors = 32;
38+
float[][] vectors = new float[numVectors][];
39+
float[] centroid = new float[dims];
40+
for (int i = 0; i < numVectors; ++i) {
41+
vectors[i] = new float[dims];
42+
for (int j = 0; j < dims; ++j) {
43+
vectors[i][j] = randomFloat();
44+
centroid[j] += vectors[i][j];
45+
}
46+
}
47+
for (int j = 0; j < dims; ++j) {
48+
centroid[j] /= numVectors;
49+
}
50+
// similarity doesn't matter for this test
51+
OptimizedScalarQuantizer osq = new OptimizedScalarQuantizer(VectorSimilarityFunction.DOT_PRODUCT);
52+
float[] scratch = new float[dims];
53+
for (byte bit : ALL_BITS) {
54+
float eps = (1f / (float) (1 << (bit)));
55+
byte[] destination = new byte[dims];
56+
for (int i = 0; i < numVectors; ++i) {
57+
System.arraycopy(vectors[i], 0, scratch, 0, dims);
58+
OptimizedScalarQuantizer.QuantizationResult result = osq.scalarQuantize(scratch, destination, bit, centroid);
59+
assertValidResults(result);
60+
assertValidQuantizedRange(destination, bit);
61+
62+
float[] dequantized = deQuantize(
63+
destination,
64+
bit,
65+
new float[] { result.lowerInterval(), result.upperInterval() },
66+
centroid
67+
);
68+
float mae = 0;
69+
for (int k = 0; k < dims; ++k) {
70+
mae += Math.abs(dequantized[k] - vectors[i][k]);
71+
}
72+
mae /= dims;
73+
assertTrue("bits: " + bit + " mae: " + mae + " > eps: " + eps, mae <= eps);
74+
}
75+
}
76+
}
77+
2278
public void testAbusiveEdgeCases() {
2379
// large zero array
2480
for (VectorSimilarityFunction vectorSimilarityFunction : VectorSimilarityFunction.values()) {

0 commit comments

Comments
 (0)