Skip to content

Commit f4fae60

Browse files
committed
quantize: recompute norm when centroid is updated
When RaBitQuantizedVectorSet.Clear was called with a new centroid, the norm was not being recomputed. This commit fixes that bug. Epic: CRDB-42943 Release note: None
1 parent 2f72cf4 commit f4fae60

File tree

2 files changed

+21
-6
lines changed

2 files changed

+21
-6
lines changed

pkg/sql/vecindex/cspann/quantize/rabitqpb.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,9 @@ func (vs *RaBitQuantizedVectorSet) Clone() QuantizedVectorSet {
145145
// Clear implements the QuantizedVectorSet interface
146146
func (vs *RaBitQuantizedVectorSet) Clear(centroid vector.T) {
147147
if buildutil.CrdbTestBuild {
148+
if vs.Centroid == nil {
149+
panic(errors.New("Clear cannot be called on an uninitialized vector set"))
150+
}
148151
for i := range len(vs.CodeCounts) {
149152
vs.CodeCounts[i] = 0xBADF00D
150153
}
@@ -157,18 +160,21 @@ func (vs *RaBitQuantizedVectorSet) Clear(centroid vector.T) {
157160
// RaBitQCodeSet.Clear takes care of scribbling memory for vs.Codes.
158161
}
159162

163+
// Recompute the centroid norm for Cosine and InnerProduct metrics, but only
164+
// if a new centroid is provided.
165+
if vs.Metric != vecpb.L2SquaredDistance {
166+
if &vs.Centroid[0] != &centroid[0] {
167+
vs.CentroidNorm = num32.Norm(centroid)
168+
}
169+
}
170+
160171
// vs.Centroid is immutable, so do not try to reuse its memory.
161172
vs.Centroid = centroid
162173
vs.Codes.Clear()
163174
vs.CodeCounts = vs.CodeCounts[:0]
164175
vs.CentroidDistances = vs.CentroidDistances[:0]
165176
vs.QuantizedDotProducts = vs.QuantizedDotProducts[:0]
166177
vs.CentroidDotProducts = vs.CentroidDotProducts[:0]
167-
if vs.Metric != vecpb.L2SquaredDistance {
168-
if &vs.Centroid[0] != &centroid[0] {
169-
vs.CentroidNorm = num32.Norm(centroid)
170-
}
171-
}
172178
}
173179

174180
// AddUndefined adds the given number of quantized vectors to this set. The new

pkg/sql/vecindex/cspann/quantize/rabitqpb_test.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,15 @@ func TestRaBitQuantizedVectorSet(t *testing.T) {
105105
require.Equal(t, []float32{10}, cloned.CentroidDistances)
106106
require.Equal(t, []float32{10}, cloned.QuantizedDotProducts)
107107

108+
// Clear the set and ensure that norm is not updated.
109+
quantizedSet.Clear(quantizedSet.Centroid)
110+
require.Equal(t, float32(0), quantizedSet.CentroidNorm)
111+
108112
// Test InnerProduct distance metric, which uses the CentroidDotProducts
109113
// field (L2Squared does not use it).
110-
quantizedSet.Clear(quantizedSet.Centroid)
111114
quantizedSet.Metric = vecpb.InnerProductDistance
115+
quantizedSet.Clear(quantizedSet.Centroid)
116+
require.Equal(t, float32(0), quantizedSet.CentroidNorm)
112117
quantizedSet.AddUndefined(2)
113118
copy(quantizedSet.Codes.At(1), []uint64{1, 2, 3})
114119
quantizedSet.CodeCounts[1] = 15
@@ -124,4 +129,8 @@ func TestRaBitQuantizedVectorSet(t *testing.T) {
124129
require.Len(t, cloned.CentroidDotProducts, 2)
125130
cloned.Clear(quantizedSet.Centroid)
126131
require.Len(t, cloned.CentroidDotProducts, 0)
132+
133+
// Update the centroid and ensure that norm is updated.
134+
quantizedSet.Clear([]float32{2, 3, 6})
135+
require.Equal(t, float32(7), quantizedSet.CentroidNorm)
127136
}

0 commit comments

Comments
 (0)