Skip to content

Commit 2354426

Browse files
committed
vecindex: fix determinism violations
Fix a couple cases where the vector index is not behaving deterministically even when configured to do so. In one case, partition keys were not generated deterministically. In the other case, the seed used in crdb_test builds was different than the seed used otherwise. Epic: CRDB-42943 Release note: None
1 parent 53a36eb commit 2354426

File tree

2 files changed

+19
-9
lines changed

2 files changed

+19
-9
lines changed

pkg/sql/vecindex/manager.go

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/cspann/quantize"
2020
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/vecpb"
2121
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/vecstore"
22-
"github.com/cockroachdb/cockroach/pkg/util/buildutil"
2322
"github.com/cockroachdb/cockroach/pkg/util/log"
2423
"github.com/cockroachdb/cockroach/pkg/util/metric"
2524
"github.com/cockroachdb/cockroach/pkg/util/stop"
@@ -136,7 +135,8 @@ func (m *Manager) Get(
136135
}
137136
// TODO(drewk): use the config to populate the index options as well.
138137
quantizer := quantize.NewRaBitQuantizer(int(config.Dims), config.Seed, config.DistanceMetric)
139-
store, err := vecstore.New(ctx, m.db, quantizer, m.codec, tableID, indexID)
138+
store, err := vecstore.New(
139+
ctx, m.db, quantizer, m.codec, tableID, indexID, config.IsDeterministic)
140140
if err != nil {
141141
return nil, err
142142
}
@@ -220,13 +220,6 @@ func (m *Manager) getVecConfig(
220220
if config.Dims <= 0 {
221221
return vecpb.Config{}, errInvalidVecConfig
222222
}
223-
// TODO(mw5h, drewk): this should be a session setting in create index rather
224-
// than an override like this.
225-
if buildutil.CrdbTestBuild {
226-
// This is a test build, so let's use a fixed seed for the random projection to
227-
// avoid test flakes.
228-
config.Seed = 0xdeadcafe
229-
}
230223
return config, nil
231224
}
232225

pkg/sql/vecindex/vecstore/store.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package vecstore
88
import (
99
"context"
1010
"slices"
11+
"sync/atomic"
1112

1213
"github.com/cockroachdb/cockroach/pkg/keys"
1314
"github.com/cockroachdb/cockroach/pkg/kv"
@@ -38,6 +39,12 @@ type Store struct {
3839
// readOnly is true if the store does not accept writes.
3940
readOnly bool
4041

42+
// isDeterministic is true if the store should generate new partition keys
43+
// using a deterministic algorithm.
44+
isDeterministic bool
45+
// keyGen generates new partition keys deterministically.
46+
keyGen atomic.Uint64
47+
4148
codec keys.SQLCodec
4249
tableID catid.DescID
4350
indexID catid.IndexID
@@ -98,6 +105,7 @@ func New(
98105
defaultCodec keys.SQLCodec,
99106
tableID catid.DescID,
100107
indexID catid.IndexID,
108+
isDeterministic bool,
101109
) (ps *Store, err error) {
102110
ps = &Store{
103111
db: db,
@@ -110,6 +118,10 @@ func New(
110118
minConsistency: kvpb.INCONSISTENT,
111119
emptyVec: make(vector.T, quantizer.GetDims()),
112120
}
121+
if isDeterministic {
122+
ps.isDeterministic = true
123+
ps.keyGen.Store(1)
124+
}
113125

114126
err = db.DescsTxn(ctx, func(ctx context.Context, txn descs.Txn) error {
115127
tableDesc, err := txn.Descriptors().ByIDWithLeased(txn.KV()).Get().Table(ctx, tableID)
@@ -196,6 +208,11 @@ func (s *Store) RunTransaction(ctx context.Context, fn func(txn cspann.Txn) erro
196208
// MakePartitionKey is part of the cspann.Store interface. It allocates a new
197209
// unique partition key.
198210
func (s *Store) MakePartitionKey() cspann.PartitionKey {
211+
if s.isDeterministic {
212+
// Generate new partition keys by incrementing an atomic.
213+
return cspann.PartitionKey(s.keyGen.Add(1))
214+
}
215+
199216
instanceID := s.kv.Context().NodeID.SQLInstanceID()
200217
return cspann.PartitionKey(unique.GenerateUniqueUnorderedID(unique.ProcessUniqueID(instanceID)))
201218
}

0 commit comments

Comments
 (0)