Skip to content

Commit 44df6bf

Browse files
committed
optbuilder: disable mutation of vector indexes while backfilling
Merging concurrent writes on vector indexes while backfill is in progress is somewhat more complicated problem than can be solved in the 25.2 timeframe. In order to ensure data integrity, we will disable mutation on vector indexes while backfill is in progress. Informs: #144443 Release note: None
1 parent 02738e6 commit 44df6bf

File tree

13 files changed

+167
-10
lines changed

13 files changed

+167
-10
lines changed

pkg/sql/alter_primary_key.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,15 @@ func (p *planner) AlterPrimaryKey(
5454
alterPKNode tree.AlterTableAlterPrimaryKey,
5555
alterPrimaryKeyLocalitySwap *alterPrimaryKeyLocalitySwap,
5656
) error {
57+
// Check if sql_safe_updates is enabled and the table has vector indexes
58+
if p.EvalContext().SessionData().SafeUpdates {
59+
for _, idx := range tableDesc.AllIndexes() {
60+
if idx.GetType() == idxtype.VECTOR {
61+
return pgerror.DangerousStatementf("ALTER PRIMARY KEY on a table with vector indexes will disable writes to the table while the index is being rebuilt")
62+
}
63+
}
64+
}
65+
5766
if err := paramparse.ValidateUniqueConstraintParams(
5867
alterPKNode.StorageParams,
5968
paramparse.UniqueConstraintParamContext{

pkg/sql/backfill/backfill_test.go

Lines changed: 90 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package backfill_test
77

88
import (
99
"context"
10+
"sync"
1011
"testing"
1112

1213
"github.com/cockroachdb/cockroach/pkg/base"
@@ -54,16 +55,18 @@ func TestVectorColumnAndIndexBackfill(t *testing.T) {
5455
sqlDB.Exec(t, `
5556
CREATE TABLE vectors (
5657
id INT PRIMARY KEY,
57-
vec VECTOR(3)
58+
vec VECTOR(3),
59+
data INT
5860
)
5961
`)
6062

6163
// Insert 200 rows with random vector data
6264
sqlDB.Exec(t, `
63-
INSERT INTO vectors (id, vec)
65+
INSERT INTO vectors (id, vec, data)
6466
SELECT
6567
generate_series(1, 200) as id,
66-
ARRAY[random(), random(), random()]::vector(3) as vec
68+
ARRAY[random(), random(), random()]::vector(3) as vec,
69+
generate_series(1, 200) as data
6770
`)
6871

6972
// Create a vector index on the vector column
@@ -88,3 +91,87 @@ func TestVectorColumnAndIndexBackfill(t *testing.T) {
8891
// be 200 in most cases.
8992
require.Greater(t, matchCount, 190, "Expected to find at least 190 similar vectors")
9093
}
94+
95+
func TestConcurrentOperationsDuringVectorIndexCreation(t *testing.T) {
96+
defer leaktest.AfterTest(t)()
97+
defer log.Scope(t).Close(t)
98+
99+
// Channel to block the backfill process
100+
blockBackfill := make(chan struct{})
101+
backfillBlocked := make(chan struct{})
102+
103+
ctx := context.Background()
104+
srv, db, _ := serverutils.StartServer(t, base.TestServerArgs{
105+
Knobs: base.TestingKnobs{
106+
DistSQL: &execinfra.TestingKnobs{
107+
// Block the backfill process after the first batch
108+
RunAfterBackfillChunk: func() {
109+
backfillBlocked <- struct{}{}
110+
<-blockBackfill
111+
},
112+
},
113+
},
114+
})
115+
defer srv.Stopper().Stop(ctx)
116+
sqlDB := sqlutils.MakeSQLRunner(db)
117+
118+
// Create a table with a vector column
119+
sqlDB.Exec(t, `
120+
CREATE TABLE vectors (
121+
id INT PRIMARY KEY,
122+
vec VECTOR(3),
123+
data INT
124+
)
125+
`)
126+
127+
// Insert some initial data
128+
sqlDB.Exec(t, `
129+
INSERT INTO vectors (id, vec, data) VALUES
130+
(1, '[1, 2, 3]', 100),
131+
(2, '[4, 5, 6]', 200),
132+
(3, '[7, 8, 9]', 300)
133+
`)
134+
135+
wg := sync.WaitGroup{}
136+
wg.Add(1)
137+
138+
// Start creating the vector index in a goroutine
139+
var createIndexErr error
140+
go func() {
141+
defer wg.Done()
142+
_, createIndexErr = db.ExecContext(ctx, `CREATE VECTOR INDEX vec_idx ON vectors (vec)`)
143+
}()
144+
145+
// Wait for the backfill to be blocked
146+
<-backfillBlocked
147+
148+
// Attempt concurrent operations while the index is being created
149+
// These should fail with the appropriate error
150+
_, err := db.ExecContext(ctx, `UPDATE vectors SET vec = '[10, 11, 12]', data = 150 WHERE id = 1`)
151+
require.Error(t, err)
152+
require.Contains(t, err.Error(), "Cannot write to a vector index while it is being built")
153+
154+
_, err = db.ExecContext(ctx, `INSERT INTO vectors (id, vec, data) VALUES (4, '[13, 14, 15]', 400)`)
155+
require.Error(t, err)
156+
require.Contains(t, err.Error(), "Cannot write to a vector index while it is being built")
157+
158+
_, err = db.ExecContext(ctx, `DELETE FROM vectors WHERE id = 2`)
159+
require.Error(t, err)
160+
require.Contains(t, err.Error(), "Cannot write to a vector index while it is being built")
161+
162+
// This update should succeed since it only modifies the data column
163+
_, err = db.ExecContext(ctx, `UPDATE vectors SET data = 250 WHERE id = 2`)
164+
require.NoError(t, err, "Updating only the data column should succeed during index creation")
165+
166+
// Unblock the backfill process
167+
close(blockBackfill)
168+
169+
wg.Wait()
170+
// Wait for the index creation to complete
171+
require.NoError(t, createIndexErr)
172+
173+
// Verify the index was created successfully
174+
var id int
175+
sqlDB.QueryRow(t, `SELECT id FROM vectors@vec_idx ORDER BY vec <-> '[1, 2, 3]' LIMIT 1`).Scan(&id)
176+
require.Equal(t, 1, id)
177+
}

pkg/sql/catalog/tabledesc/index.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -464,12 +464,7 @@ func (w index) CreatedAt() time.Time {
464464
return timeutil.Unix(0, w.desc.CreatedAtNanos)
465465
}
466466

467-
// IsTemporaryIndexForBackfill returns true iff the index is
468-
// an index being used as the temporary index being used by an
469-
// in-progress index backfill.
470-
//
471-
// TODO(ssd): This could be its own boolean or we could store the ID
472-
// of the index it is a temporary index for.
467+
// IsTemporaryIndexForBackfill implements the cat.Index interface.
473468
func (w index) IsTemporaryIndexForBackfill() bool {
474469
return w.desc.UseDeletePreservingEncoding
475470
}

pkg/sql/create_index.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ func (p *planner) CreateIndex(ctx context.Context, n *tree.CreateIndex) (planNod
5757
); err != nil {
5858
return nil, err
5959
}
60+
61+
// Check if sql_safe_updates is enabled and this is a vector index
62+
if n.Type == idxtype.VECTOR && p.EvalContext().SessionData().SafeUpdates {
63+
return nil, pgerror.DangerousStatementf("CREATE VECTOR INDEX will disable writes to the table while the index is being built")
64+
}
65+
6066
_, tableDesc, err := p.ResolveMutableTableDescriptor(
6167
ctx, &n.Table, true /*required*/, tree.ResolveRequireTableOrViewDesc,
6268
)

pkg/sql/logictest/testdata/logic_test/vector_index

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,30 @@
22
# CREATE TABLE/INDEX tests.
33
# ------------------------------------------------------------------------------
44

5+
# Test guardrail for vector index creation.
6+
# TODO(mw5h): remove these two statements once online modifications are supported.
7+
statement ok
8+
SET sql_safe_updates = true
9+
510
# Simple vector index.
611
statement ok
712
CREATE TABLE simple (
813
a INT PRIMARY KEY,
14+
b INT NOT NULL,
915
vec1 VECTOR(3),
1016
VECTOR INDEX (vec1),
1117
FAMILY (a, vec1)
1218
)
1319

20+
statement error pgcode 01000 pq: rejected \(sql_safe_updates = true\): CREATE VECTOR INDEX will disable writes to the table while the index is being built
21+
CREATE VECTOR INDEX ON simple (vec1)
22+
23+
statement error pgcode 01000 pq: rejected \(sql_safe_updates = true\): ALTER PRIMARY KEY on a table with vector indexes will disable writes to the table while the index is being rebuilt
24+
ALTER TABLE simple ALTER PRIMARY KEY USING COLUMNS (b)
25+
26+
statement ok
27+
SET sql_safe_updates = false
28+
1429
statement ok
1530
CREATE VECTOR INDEX ON simple (vec1)
1631

@@ -23,12 +38,13 @@ SHOW CREATE TABLE simple
2338
----
2439
simple CREATE TABLE public.simple (
2540
a INT8 NOT NULL,
41+
b INT8 NOT NULL,
2642
vec1 VECTOR(3) NULL,
2743
CONSTRAINT simple_pkey PRIMARY KEY (a ASC),
2844
VECTOR INDEX simple_vec1_idx (vec1),
2945
VECTOR INDEX simple_vec1_idx1 (vec1),
3046
VECTOR INDEX simple_vec1_idx2 (vec1),
31-
FAMILY fam_0_a_vec1 (a, vec1)
47+
FAMILY fam_0_a_vec1_b (a, vec1, b)
3248
)
3349

3450
statement ok

pkg/sql/opt/cat/index.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,10 @@ type Index interface {
204204
// Partition returns the ith PARTITION BY LIST partition within the index
205205
// definition, where i < PartitionCount.
206206
Partition(i int) Partition
207+
208+
// IsTemporaryIndexForBackfill returns true iff the index is an index being
209+
// used as the temporary index being used by an in-progress index backfill.
210+
IsTemporaryIndexForBackfill() bool
207211
}
208212

209213
// IndexColumn describes a single column that is part of an index definition.

pkg/sql/opt/exec/explain/plan_gist_factory.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,4 +795,8 @@ func (u *unknownIndex) Partition(i int) cat.Partition {
795795
panic(errors.AssertionFailedf("not implemented"))
796796
}
797797

798+
func (u *unknownIndex) IsTemporaryIndexForBackfill() bool {
799+
return false
800+
}
801+
798802
var _ cat.Index = &unknownIndex{}

pkg/sql/opt/indexrec/hypothetical_index.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,11 @@ func (hi *hypotheticalIndex) Partition(i int) cat.Partition {
244244
return nil
245245
}
246246

247+
// IsTemporaryIndexForBackfill is part of the cat.Index interface.
248+
func (hi *hypotheticalIndex) IsTemporaryIndexForBackfill() bool {
249+
return false
250+
}
251+
247252
// hasSameExplicitCols checks whether the given existing index has identical
248253
// explicit columns as the hypothetical index. To be identical, they need to
249254
// have the exact same list, length, and order. If the index is inverted, it

pkg/sql/opt/optbuilder/mutation_builder.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,6 +1218,9 @@ func (mb *mutationBuilder) projectVectorIndexColsImpl(op opt.Operator) {
12181218
func (mb *mutationBuilder) buildVectorMutationSearch(
12191219
input memo.RelExpr, index cat.Index, partitionCol, quantizedVecCol opt.ColumnID, isIndexPut bool,
12201220
) memo.RelExpr {
1221+
if index.IsTemporaryIndexForBackfill() {
1222+
panic(unimplemented.NewWithIssue(144443, "Cannot write to a vector index while it is being built"))
1223+
}
12211224
getCol := func(colOrd int) (colID opt.ColumnID) {
12221225
// Check in turn if the column is being upserted, inserted, updated, or
12231226
// fetched.

pkg/sql/opt/testutils/testcat/test_catalog.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,6 +1434,10 @@ func (ti *Index) Partition(i int) cat.Partition {
14341434
return &ti.partitions[i]
14351435
}
14361436

1437+
func (ti *Index) IsTemporaryIndexForBackfill() bool {
1438+
return false
1439+
}
1440+
14371441
// SetPartitions manually sets the partitions.
14381442
func (ti *Index) SetPartitions(partitions []Partition) {
14391443
ti.partitions = partitions

0 commit comments

Comments
 (0)