Skip to content

Commit 5acff53

Browse files
craig[bot]DrewKimball
andcommitted
Merge #144681
144681: sql: support vector indexes with external row data r=andy-kimball a=DrewKimball #### cspann: implement "read-only" mode This commit adds support for read-only indexes to the cspann library. If `readOnly=true` is supplied to `cspann.NewIndex`, fixups will be disabled for that index instance, as will merging into the global index stats. Informs #144679 Release note: None #### sql: support vector indexes with external row data This commit fixes a few cases where vector-indexing logic was not correctly using the table's `ExternalRowData` if it was set. It also initializes `cspann.VectorIndex` instances in read-only mode if the table with the index has external row data. Fixes #144679 Release note (bug fix): Fixed a bug in alpha versions of 25.2 as well as `v25.2.0-beta.1` that would cause vector indexes to return incorrect or no results from a standby reader in a PCR setup. Co-authored-by: Drew Kimball <[email protected]>
2 parents 18b42de + 89d6c57 commit 5acff53

File tree

19 files changed

+347
-62
lines changed

19 files changed

+347
-62
lines changed

pkg/ccl/testccl/sqlccl/BUILD.bazel

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ go_test(
4141
"//pkg/settings/cluster",
4242
"//pkg/spanconfig",
4343
"//pkg/sql",
44-
"//pkg/sql/catalog/lease",
45-
"//pkg/sql/catalog/replication",
4644
"//pkg/sql/gcjob",
4745
"//pkg/sql/isql",
4846
"//pkg/sql/lexbase",

pkg/ccl/testccl/sqlccl/standby_read_test.go

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,12 @@ import (
1010
"testing"
1111

1212
"github.com/cockroachdb/cockroach/pkg/base"
13-
"github.com/cockroachdb/cockroach/pkg/sql"
14-
"github.com/cockroachdb/cockroach/pkg/sql/catalog/lease"
15-
"github.com/cockroachdb/cockroach/pkg/sql/catalog/replication"
16-
"github.com/cockroachdb/cockroach/pkg/testutils"
1713
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
1814
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
1915
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
16+
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
2017
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
2118
"github.com/cockroachdb/cockroach/pkg/util/log"
22-
"github.com/cockroachdb/errors"
2319
"github.com/stretchr/testify/require"
2420
)
2521

@@ -73,33 +69,14 @@ func TestStandbyRead(t *testing.T) {
7369

7470
srcRunner := sqlutils.MakeSQLRunner(srcDB)
7571
dstRunner := sqlutils.MakeSQLRunner(dstDB)
76-
dstInternal := dstTenant.InternalDB().(*sql.InternalDB)
7772

7873
dstRunner.Exec(t, `SET CLUSTER SETTING sql.defaults.distsql = always`)
7974
dstRunner.Exec(t, `SET distsql = always`)
8075

81-
waitForReplication := func() {
82-
now := ts.Clock().Now()
83-
err := replication.SetupOrAdvanceStandbyReaderCatalog(
84-
ctx, serverutils.TestTenantID(), now, dstInternal, dstTenant.ClusterSettings(),
85-
)
86-
if err != nil {
87-
t.Fatal(err)
88-
}
89-
now = ts.Clock().Now()
90-
lm := dstTenant.LeaseManager().(*lease.Manager)
91-
testutils.SucceedsSoon(t, func() error {
92-
if lm.GetSafeReplicationTS().Less(now) {
93-
return errors.AssertionFailedf("waiting for descriptor close timestamp to catch up")
94-
}
95-
return nil
96-
})
97-
}
98-
9976
for _, tc := range testcases {
10077
var runner *sqlutils.SQLRunner
10178
if tc.standby {
102-
waitForReplication()
79+
testcluster.WaitForStandbyTenantReplication(t, ctx, ts.Clock(), dstTenant)
10380
runner = dstRunner
10481
} else {
10582
runner = srcRunner

pkg/sql/opt_exec_factory.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1865,7 +1865,9 @@ func (ef *execFactory) ConstructVectorSearch(
18651865

18661866
// Encode the prefix constraint as a list of roachpb.Keys.
18671867
var sb span.Builder
1868-
sb.Init(ef.planner.EvalContext(), ef.planner.ExecCfg().Codec, tabDesc, indexDesc)
1868+
sb.InitAllowingExternalRowData(
1869+
ef.planner.EvalContext(), ef.planner.ExecCfg().Codec, tabDesc, indexDesc,
1870+
)
18691871
prefixKeys, err := sb.KeysFromVectorPrefixConstraint(ef.ctx, prefixConstraint)
18701872
if err != nil {
18711873
return nil, err

pkg/sql/vecindex/BUILD.bazel

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ go_library(
4343
go_test(
4444
name = "vecindex_test",
4545
srcs = [
46+
"main_test.go",
4647
"manager_test.go",
4748
"searcher_test.go",
4849
"vecindex_test.go",
@@ -51,6 +52,8 @@ go_test(
5152
embed = [":vecindex"],
5253
deps = [
5354
"//pkg/base",
55+
"//pkg/ccl",
56+
"//pkg/ccl/storageccl",
5457
"//pkg/keys",
5558
"//pkg/roachpb",
5659
"//pkg/security/securityassets",
@@ -78,7 +81,9 @@ go_test(
7881
"//pkg/sql/vecindex/vecpb",
7982
"//pkg/sql/vecindex/vecstore",
8083
"//pkg/testutils/serverutils",
84+
"//pkg/testutils/skip",
8185
"//pkg/testutils/sqlutils",
86+
"//pkg/testutils/testcluster",
8287
"//pkg/util/encoding",
8388
"//pkg/util/leaktest",
8489
"//pkg/util/log",

pkg/sql/vecindex/cspann/fixup_processor.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,12 @@ func (fp *FixupProcessor) Process(discard bool) {
378378
// already a duplicate fixup that's pending. It also starts a background worker
379379
// to process the fixup, if needed and allowed.
380380
func (fp *FixupProcessor) addFixup(ctx context.Context, fixup fixup) {
381+
if fp.index.options.ReadOnly {
382+
// Don't enqueue fixups if the index is read-only.
383+
log.VEvent(ctx, 2, "discarding fixup because index is read-only")
384+
return
385+
}
386+
381387
fp.mu.Lock()
382388
defer fp.mu.Unlock()
383389

pkg/sql/vecindex/cspann/index.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ type IndexOptions struct {
6161
// index is initialized with the same seed and fixups happen serially, the
6262
// index will behave deterministically. This is useful for testing.
6363
IsDeterministic bool
64+
// ReadOnly is true if the index cannot be modified. If true, the index will
65+
// not perform any fixups, even if a too-large or too-small partition is
66+
// discovered during a search.
67+
ReadOnly bool
6468
// MaxWorkers specifies the maximum number of background workers that can be
6569
// created to process fixups for this vector index instance.
6670
MaxWorkers int

pkg/sql/vecindex/cspann/index_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,9 @@ func (s *testState) makeNewIndex(d *datadriven.TestData) {
653653

654654
case "beam-size":
655655
s.Options.BaseBeamSize = s.parseInt(arg)
656+
657+
case "read-only":
658+
s.Options.ReadOnly = s.parseFlag(arg)
656659
}
657660
}
658661

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# ----------------------------------------------------------------------
2+
# Test a read-only index.
3+
# ----------------------------------------------------------------------
4+
load-index min-partition-size=1 max-partition-size=4 beam-size=2 read-only
5+
• 1 (0, 0)
6+
7+
├───• 2 (8.5, 5.5)
8+
│ │
9+
│ ├───• vec6 (14, 1)
10+
│ ├───• vec2 (3, 10)
11+
│ ├───• vec9 (5, 2)
12+
│ ├───• vec12 (10, 10)
13+
│ └───• vec14 (8, 12)
14+
15+
├───• 5 (1, 6.25)
16+
│ │
17+
│ ├───• vec5 (3, 6)
18+
│ ├───• vec4 (2, 7)
19+
│ ├───• vec3 (-2, 8)
20+
│ ├───• vec8 (1, 4)
21+
│ ├───• vec10 (1, 8)
22+
│ └───• vec11 (3, 5)
23+
24+
└───• 4 (0, -0.5)
25+
26+
├───• vec7 (0, 0)
27+
└───• vec1 (0, -1)
28+
29+
----
30+
Loaded 13 vectors.
31+
32+
# Searching the index should not trigger fixups.
33+
search max-results=2 beam-size=3
34+
(1, 6)
35+
----
36+
vec4: 2 (centroid=1.25)
37+
vec10: 4 (centroid=1.75)
38+
vec5: 4 (centroid=2.02)
39+
vec8: 4 (centroid=2.25)
40+
13 leaf vectors, 16 vectors, 5 full vectors, 4 partitions
41+
42+
format-tree
43+
----
44+
• 1 (0, 0)
45+
46+
├───• 2 (8.5, 5.5)
47+
│ │
48+
│ ├───• vec6 (14, 1)
49+
│ ├───• vec2 (3, 10)
50+
│ ├───• vec9 (5, 2)
51+
│ ├───• vec12 (10, 10)
52+
│ └───• vec14 (8, 12)
53+
54+
├───• 5 (1, 6.25)
55+
│ │
56+
│ ├───• vec5 (3, 6)
57+
│ ├───• vec4 (2, 7)
58+
│ ├───• vec3 (-2, 8)
59+
│ ├───• vec8 (1, 4)
60+
│ ├───• vec10 (1, 8)
61+
│ └───• vec11 (3, 5)
62+
63+
└───• 4 (0, -0.5)
64+
65+
├───• vec7 (0, 0)
66+
└───• vec1 (0, -1)
67+
68+
search max-results=3 beam-size=3
69+
(8, 5)
70+
----
71+
vec9: 18 (centroid=4.95)
72+
vec11: 25 (centroid=2.36)
73+
vec5: 26 (centroid=2.02)
74+
13 leaf vectors, 16 vectors, 10 full vectors, 4 partitions
75+
76+
format-tree
77+
----
78+
• 1 (0, 0)
79+
80+
├───• 2 (8.5, 5.5)
81+
│ │
82+
│ ├───• vec6 (14, 1)
83+
│ ├───• vec2 (3, 10)
84+
│ ├───• vec9 (5, 2)
85+
│ ├───• vec12 (10, 10)
86+
│ └───• vec14 (8, 12)
87+
88+
├───• 5 (1, 6.25)
89+
│ │
90+
│ ├───• vec5 (3, 6)
91+
│ ├───• vec4 (2, 7)
92+
│ ├───• vec3 (-2, 8)
93+
│ ├───• vec8 (1, 4)
94+
│ ├───• vec10 (1, 8)
95+
│ └───• vec11 (3, 5)
96+
97+
└───• 4 (0, -0.5)
98+
99+
├───• vec7 (0, 0)
100+
└───• vec1 (0, -1)
101+
102+
metrics
103+
----
104+
0 successful splits
105+
0 pending splits/merges

pkg/sql/vecindex/main_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package vecindex_test
7+
8+
import (
9+
"os"
10+
"testing"
11+
12+
"github.com/cockroachdb/cockroach/pkg/ccl"
13+
_ "github.com/cockroachdb/cockroach/pkg/ccl/storageccl"
14+
"github.com/cockroachdb/cockroach/pkg/security/securityassets"
15+
"github.com/cockroachdb/cockroach/pkg/security/securitytest"
16+
"github.com/cockroachdb/cockroach/pkg/server"
17+
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
18+
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
19+
"github.com/cockroachdb/cockroach/pkg/util/randutil"
20+
)
21+
22+
func TestMain(m *testing.M) {
23+
defer ccl.TestingEnableEnterprise()()
24+
securityassets.SetLoader(securitytest.EmbeddedAssets)
25+
randutil.SeedForTests()
26+
serverutils.InitTestServerFactory(server.TestServerFactory)
27+
serverutils.InitTestClusterFactory(testcluster.TestClusterFactory)
28+
os.Exit(m.Run())
29+
}
30+
31+
//go:generate ../../util/leaktest/add-leaktest.sh *_test.go

pkg/sql/vecindex/manager.go

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,13 +171,17 @@ func (m *Manager) GetWithDesc(
171171
// TODO(drewk): use the config to populate the index options as well.
172172
config := index.GetVecConfig()
173173
quantizer := quantize.NewRaBitQuantizer(int(config.Dims), config.Seed)
174-
store, err := vecstore.NewWithColumnID(m.db, quantizer, m.codec, desc, index.GetID(), index.VectorColumnID())
174+
store, err := vecstore.NewWithColumnID(
175+
ctx, m.db, quantizer, m.codec, desc, index.GetID(), index.VectorColumnID(),
176+
)
175177
if err != nil {
176178
return nil, err
177179
}
178180

179181
return cspann.NewIndex(
180-
m.ctx, store, quantizer, config.Seed, m.getIndexOptions(config), m.stopper)
182+
m.ctx, store, quantizer, config.Seed,
183+
m.getIndexOptions(config, store.ReadOnly()), m.stopper,
184+
)
181185
},
182186
)
183187
}
@@ -207,12 +211,14 @@ func (m *Manager) Get(
207211
// passed to cspann.NewIndex, and we don't want that to be the context of
208212
// the Get call.
209213
return cspann.NewIndex(
210-
m.ctx, store, quantizer, config.Seed, m.getIndexOptions(config), m.stopper)
214+
m.ctx, store, quantizer, config.Seed,
215+
m.getIndexOptions(config, store.ReadOnly()), m.stopper,
216+
)
211217
},
212218
)
213219
}
214220

215-
func (m *Manager) getIndexOptions(config vecpb.Config) *cspann.IndexOptions {
221+
func (m *Manager) getIndexOptions(config vecpb.Config, readOnly bool) *cspann.IndexOptions {
216222
return &cspann.IndexOptions{
217223
MinPartitionSize: int(config.MinPartitionSize),
218224
MaxPartitionSize: int(config.MaxPartitionSize),
@@ -222,6 +228,7 @@ func (m *Manager) getIndexOptions(config vecpb.Config) *cspann.IndexOptions {
222228
return StalledOpTimeoutSetting.Get(m.sv)
223229
},
224230
IsDeterministic: config.IsDeterministic,
231+
ReadOnly: readOnly,
225232
}
226233
}
227234

0 commit comments

Comments
 (0)