Skip to content

Commit 1b8f532

Browse files
committed
vecindex: return minimum number of results from Search
When vector deletion is enabled in TestVecIndexConcurrency, it occasionally fails under stress with zero results. This is because searching with a LIMIT of one was returning only 2 results. If both of those results point to deleted primary key rows, then zero results get returned. The fix is to return at least 10 candidate vectors from searches. While all 10 vectors could be deleted, this at least makes this scenario much less common. The longer-term fix is to switch to an iterator execution model, where we continue to "pull" additional vectors until we get the desired number of results. Epic: CRDB-42943 Release note: None
1 parent 62c553c commit 1b8f532

File tree

11 files changed

+110
-30
lines changed

11 files changed

+110
-30
lines changed

pkg/sql/vecindex/cspann/index.go

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package cspann
88
import (
99
"bytes"
1010
"context"
11+
"math"
1112
"math/rand"
1213
"runtime"
1314
"strconv"
@@ -28,13 +29,33 @@ import (
2829
// of search results that will be reranked with the original full-size vectors.
2930
const RerankMultiplier = 10
3031

31-
// DeletedMultiplier increases the number of results that will be reranked, in
32+
// DeletedMinCount sets a minimum number of results that will be reranked, in
3233
// order to account for vectors that may have been deleted in the primary index.
34+
const DeletedMinCount = 10
35+
36+
// DeletedMultiplier increases the number of results that will be reranked by
37+
// this factor, in order to account for vectors that may have been deleted in
38+
// the primary index.
3339
const DeletedMultiplier = 1.2
3440

3541
// MaxQualitySamples specifies the max value of the QualitySamples index option.
3642
const MaxQualitySamples = 32
3743

44+
// IncreaseRerankResults returns good values for maxResults and maxExtraResults
45+
// that have a high probability of returning the desired number of results, even
46+
// when there are deleted results. Deleted results will be filtered out by the
47+
// rerank process, so we need to make sure there are additional results that can
48+
// be returned instead.
49+
//
50+
// TODO(andyk): Switch the index to use a search iterator so the caller can keep
51+
// requesting further results rather than guessing at how many additional
52+
// results might be needed.
53+
func IncreaseRerankResults(desiredMaxResults int) (maxResults, maxExtraResults int) {
54+
maxResults = max(int(math.Ceil(float64(desiredMaxResults)*DeletedMultiplier)), DeletedMinCount)
55+
maxExtraResults = desiredMaxResults * RerankMultiplier
56+
return maxResults, maxExtraResults
57+
}
58+
3859
// IndexOptions specifies options that control how the index will be built, as
3960
// well as default options for how it will be searched. A given search operation
4061
// can specify SearchOptions to override the default behavior.

pkg/sql/vecindex/cspann/searcher.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,10 @@ func (s *searcher) Next(ctx context.Context) (ok bool, err error) {
120120
maxResults = s.searchSet.MaxResults
121121
maxExtraResults = s.searchSet.MaxExtraResults
122122
if !s.idxCtx.options.SkipRerank {
123-
maxResults = int(math.Ceil(float64(maxResults) * DeletedMultiplier))
124-
maxExtraResults = maxResults * RerankMultiplier
123+
maxResults, maxExtraResults = IncreaseRerankResults(maxResults)
124+
if s.searchSet.MaxExtraResults > maxExtraResults {
125+
maxExtraResults = s.searchSet.MaxExtraResults
126+
}
125127
}
126128
if s.idxCtx.level != LeafLevel && s.idxCtx.options.UpdateStats {
127129
maxResults = max(maxResults, s.idx.options.QualitySamples)

pkg/sql/vecindex/cspann/testdata/read-only.ddt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ vec4: 2 (centroid=1.25)
3737
vec10: 4 (centroid=1.75)
3838
vec5: 4 (centroid=2.02)
3939
vec8: 4 (centroid=2.25)
40-
13 leaf vectors, 16 vectors, 5 full vectors, 4 partitions
40+
13 leaf vectors, 16 vectors, 13 full vectors, 4 partitions
4141

4242
format-tree
4343
----
@@ -71,7 +71,7 @@ search max-results=3 beam-size=3
7171
vec9: 18 (centroid=4.95)
7272
vec11: 25 (centroid=2.36)
7373
vec5: 26 (centroid=2.02)
74-
13 leaf vectors, 16 vectors, 10 full vectors, 4 partitions
74+
13 leaf vectors, 16 vectors, 11 full vectors, 4 partitions
7575

7676
format-tree
7777
----

pkg/sql/vecindex/cspann/testdata/search-features.ddt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ CV stats:
1414
search max-results=1 use-feature=5000 beam-size=1
1515
----
1616
vec771: 0.5624 (centroid=0.63)
17-
23 leaf vectors, 43 vectors, 2 full vectors, 4 partitions
17+
23 leaf vectors, 43 vectors, 12 full vectors, 4 partitions
1818

1919
# Search for additional results.
2020
search max-results=6 use-feature=5000 beam-size=1
@@ -36,7 +36,7 @@ vec640: 0.6525 (centroid=0.52)
3636
vec329: 0.6871 (centroid=0.52)
3737
vec95: 0.7008 (centroid=0.61)
3838
vec386: 0.7301 (centroid=0.61)
39-
85 leaf vectors, 141 vectors, 13 full vectors, 13 partitions
39+
85 leaf vectors, 141 vectors, 18 full vectors, 13 partitions
4040

4141
# Turn off re-ranking, which results in increased inaccuracy.
4242
search max-results=6 use-feature=5000 beam-size=4 skip-rerank

pkg/sql/vecindex/cspann/testdata/search.ddt

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,64 @@ vec2: 28.1958 ±10.12 (centroid=1.58)
217217
vec3: 45.6359 ±15.23 (centroid=1.41)
218218
vec5: 74.3641 ±15.23 (centroid=1.41)
219219
4 leaf vectors, 7 vectors, 0 full vectors, 3 partitions
220+
221+
# ----------------------------------------------------------------------
222+
# Search tree with many deleted vectors.
223+
# ----------------------------------------------------------------------
224+
new-index min-partition-size=1 max-partition-size=4 beam-size=3
225+
vec1: (0, 0)
226+
vec2: (100, 100)
227+
vec3: (200, 200)
228+
vec4: (300, 300)
229+
vec5: (400, 400)
230+
vec6: (500, 500)
231+
----
232+
• 1 (0, 0)
233+
234+
├───• 5 (450, 450)
235+
│ │
236+
│ ├───• vec5 (400, 400)
237+
│ └───• vec6 (500, 500)
238+
239+
├───• 3 (50, 50)
240+
│ │
241+
│ ├───• vec1 (0, 0)
242+
│ └───• vec2 (100, 100)
243+
244+
└───• 4 (250, 250)
245+
246+
├───• vec3 (200, 200)
247+
└───• vec4 (300, 300)
248+
249+
# Delete all but one vector.
250+
delete not-found
251+
vec1
252+
vec2
253+
vec3
254+
vec4
255+
vec5
256+
----
257+
• 1 (0, 0)
258+
259+
├───• 5 (450, 450)
260+
│ │
261+
│ ├───• vec5 (MISSING)
262+
│ └───• vec6 (500, 500)
263+
264+
├───• 3 (50, 50)
265+
│ │
266+
│ ├───• vec1 (MISSING)
267+
│ └───• vec2 (MISSING)
268+
269+
└───• 4 (250, 250)
270+
271+
├───• vec3 (MISSING)
272+
└───• vec4 (MISSING)
273+
274+
# Search for the vector that's farthest from vec6, so it's last in the result
275+
# set. This ensures that DeletedMinCount is working as intended.
276+
search max-results=1
277+
(0, 0)
278+
----
279+
vec6: 500000 (centroid=70.71)
280+
6 leaf vectors, 9 vectors, 6 full vectors, 4 partitions

pkg/sql/vecindex/cspann/testdata/split-non-root-step.ddt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,7 @@ search beam-size=16
781781
(2, -4)
782782
----
783783
vec5: 0 (centroid=1)
784-
7 leaf vectors, 17 vectors, 3 full vectors, 11 partitions
784+
7 leaf vectors, 17 vectors, 5 full vectors, 11 partitions
785785

786786
format-tree
787787
----
@@ -820,7 +820,7 @@ search beam-size=16
820820
(2, -4)
821821
----
822822
vec5: 0 (centroid=1)
823-
5 leaf vectors, 17 vectors, 3 full vectors, 13 partitions
823+
5 leaf vectors, 17 vectors, 5 full vectors, 13 partitions
824824

825825
format-tree
826826
----
@@ -863,7 +863,7 @@ search beam-size=16
863863
(2, -4)
864864
----
865865
vec5: 0 (centroid=1)
866-
5 leaf vectors, 19 vectors, 3 full vectors, 15 partitions
866+
5 leaf vectors, 19 vectors, 5 full vectors, 15 partitions
867867

868868
format-tree
869869
----
@@ -1042,7 +1042,7 @@ search
10421042
vec2: 0 (centroid=0)
10431043
vec6: 0 (centroid=0)
10441044
vec7: 0 (centroid=0)
1045-
5 leaf vectors, 12 vectors, 4 full vectors, 6 partitions
1045+
5 leaf vectors, 12 vectors, 5 full vectors, 6 partitions
10461046

10471047
format-tree
10481048
----

pkg/sql/vecindex/cspann/testdata/split-root-step.ddt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ vec7: 5 (centroid=1.67)
255255
vec8: 5 (centroid=2.24)
256256
vec3: 26 (centroid=1.05)
257257
vec4: 26 (centroid=1.05)
258-
9 leaf vectors, 13 vectors, 6 full vectors, 5 partitions
258+
9 leaf vectors, 13 vectors, 9 full vectors, 5 partitions
259259

260260
# Move to the DrainingForSplit state, where sub-partition #6 has vectors.
261261
force-split partition-key=1 steps=4
@@ -308,7 +308,7 @@ vec7: 5 (centroid=1.67)
308308
vec8: 5 (centroid=2.24)
309309
vec3: 26 (centroid=1.05)
310310
vec4: 26 (centroid=1.05)
311-
9 leaf vectors, 15 vectors, 6 full vectors, 7 partitions
311+
9 leaf vectors, 15 vectors, 9 full vectors, 7 partitions
312312

313313
# Move to the point where partition #1 children have been cleared.
314314
force-split partition-key=1 steps=2
@@ -324,7 +324,7 @@ vec7: 5 (centroid=1.67)
324324
vec8: 5 (centroid=2.24)
325325
vec3: 26 (centroid=1.05)
326326
vec4: 26 (centroid=1.05)
327-
9 leaf vectors, 13 vectors, 6 full vectors, 7 partitions
327+
9 leaf vectors, 13 vectors, 9 full vectors, 7 partitions
328328

329329
# Move to the AddingLevel state, where root partition's level has increased.
330330
force-split partition-key=1 steps=1
@@ -340,7 +340,7 @@ vec7: 5 (centroid=1.67)
340340
vec8: 5 (centroid=2.24)
341341
vec3: 26 (centroid=1.05)
342342
vec4: 26 (centroid=1.05)
343-
9 leaf vectors, 13 vectors, 6 full vectors, 7 partitions
343+
9 leaf vectors, 13 vectors, 9 full vectors, 7 partitions
344344

345345
# Move to point where sub-partitions #6 and #7 have been added to the root.
346346
force-split partition-key=1 steps=4
@@ -381,7 +381,7 @@ vec7: 5 (centroid=1.67)
381381
vec8: 5 (centroid=2.24)
382382
vec3: 26 (centroid=1.05)
383383
vec4: 26 (centroid=1.05)
384-
9 leaf vectors, 15 vectors, 6 full vectors, 7 partitions
384+
9 leaf vectors, 15 vectors, 9 full vectors, 7 partitions
385385

386386
# Finish the split.
387387
force-split partition-key=1 steps=1
@@ -422,7 +422,7 @@ vec7: 5 (centroid=1.67)
422422
vec8: 5 (centroid=2.24)
423423
vec3: 26 (centroid=1.05)
424424
vec4: 26 (centroid=1.05)
425-
9 leaf vectors, 15 vectors, 6 full vectors, 7 partitions
425+
9 leaf vectors, 15 vectors, 9 full vectors, 7 partitions
426426

427427
# ----------------------------------------------------------------------
428428
# Insert into the tree when the root is in splitting states.

pkg/sql/vecindex/cspann/testdata/split.ddt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ search max-results=3 beam-size=3
147147
vec7: 2 (centroid=2.12)
148148
vec10: 5 (centroid=0.71)
149149
vec12: 9 (centroid=0.71)
150-
6 leaf vectors, 14 vectors, 5 full vectors, 6 partitions
150+
6 leaf vectors, 14 vectors, 6 full vectors, 6 partitions
151151

152152
# ----------------------------------------------------------------------
153153
# Test linking nearby vectors from other partitions.

pkg/sql/vecindex/cspann/testdata/tree-key.ddt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ search max-results=1 tree=1
156156
(1, 2)
157157
----
158158
vec6: 5 (centroid=2.36)
159-
6 leaf vectors, 8 vectors, 2 full vectors, 3 partitions
159+
6 leaf vectors, 8 vectors, 6 full vectors, 3 partitions
160160

161161
# Vector should now be gone from the index.
162162
format-tree tree=1

pkg/sql/vecindex/searcher.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ package vecindex
77

88
import (
99
"context"
10-
"math"
1110

1211
"github.com/cockroachdb/cockroach/pkg/kv"
1312
"github.com/cockroachdb/cockroach/pkg/roachpb"
@@ -48,8 +47,7 @@ func (s *Searcher) Init(idx *cspann.Index, txn *kv.Txn, baseBeamSize, maxResults
4847
BaseBeamSize: baseBeamSize,
4948
SkipRerank: true,
5049
}
51-
s.searchSet.MaxResults = int(math.Ceil(float64(maxResults) * cspann.DeletedMultiplier))
52-
s.searchSet.MaxExtraResults = s.searchSet.MaxResults * cspann.RerankMultiplier
50+
s.searchSet.MaxResults, s.searchSet.MaxExtraResults = cspann.IncreaseRerankResults(maxResults)
5351

5452
// If the index is deterministic, then synchronously run the background worker
5553
// to process any pending fixups.

0 commit comments

Comments
 (0)