Skip to content

Commit f8b83ac

Browse files
DrewKimballyuzefovich
authored andcommitted
execbuilder: fix locked indexes info in an edge case
For buffered writes project, in 5e968e6 we started propagating information about which indexes have been locked which allows us to use non-locking Puts and Dels when mutating those indexes. That change had an incorrect assumption that any indexes we lock belong to the same table as the one being mutated. However, we just saw a user report where this wasn't the case - in some cases we might perform the initial scan on a different table (in the example, it was a FK referencing us), so we could incorrectly treat some indexes as having been locked when they haven't. With buffered writes enabled, this could lead to difference in blocking behavior (but no corruption or correctness issues since we still would find the concurrent write on COMMIT); with buffered writes disabled, the locked indexes info doesn't have any impact, so we could only hit an internal error when a given index ordinal doesn't exist in the target table. Furthermore, there is no point in locking an index of a different table since we might not actually modify it (this will depend on whether there exists a CASCADE action and which action it is). Release note (bug fix): CockroachDB previously could hit an internal error when performing a DELETE, an UPDATE, or an UPSERT statement where the initial scan of the mutation is locking and is on a table different from the one being mutated. As a workaround, one could do `SET enable_implicit_select_for_update = false`, but note that it might increase contention, so use with caution. The bug was introduced in 25.2 version and is now fixed.
1 parent 59789f4 commit f8b83ac

File tree

3 files changed

+202
-13
lines changed

3 files changed

+202
-13
lines changed

pkg/sql/opt/exec/execbuilder/mutation.go

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,7 @@ func (b *Builder) shouldApplyImplicitLockingToMutationInput(
11741174
}()
11751175
}
11761176

1177+
md := b.mem.Metadata()
11771178
switch t := mutExpr.(type) {
11781179
case *memo.InsertExpr:
11791180
// Unlike with the other three mutation expressions, it never makes
@@ -1183,16 +1184,14 @@ func (b *Builder) shouldApplyImplicitLockingToMutationInput(
11831184
return 0, nil, nil
11841185

11851186
case *memo.UpdateExpr:
1186-
md := b.mem.Metadata()
11871187
tableID, toLockIndexes := shouldApplyImplicitLockingToUpdateOrDeleteInput(md, t.Input, t.Table)
11881188
return tableID, toLockIndexes.Ordered(), nil
11891189

11901190
case *memo.UpsertExpr:
1191-
tableID, toLockIndexes := shouldApplyImplicitLockingToUpsertInput(t)
1191+
tableID, toLockIndexes := shouldApplyImplicitLockingToUpsertInput(md, t)
11921192
return tableID, toLockIndexes.Ordered(), nil
11931193

11941194
case *memo.DeleteExpr:
1195-
md := b.mem.Metadata()
11961195
tableID, toLockIndexes := shouldApplyImplicitLockingToUpdateOrDeleteInput(md, t.Input, t.Table)
11971196
return tableID, toLockIndexes.Ordered(), nil
11981197

@@ -1243,10 +1242,11 @@ func shouldApplyImplicitLockingToUpdateOrDeleteInput(
12431242
input = idxJoin.Input
12441243
toLockIndexes.Add(cat.PrimaryIndex)
12451244
}
1245+
var toLock opt.TableID
12461246
switch t := input.(type) {
12471247
case *memo.ScanExpr:
12481248
toLockIndexes.Add(t.Index)
1249-
return t.Table, toLockIndexes
1249+
toLock = t.Table
12501250
case *memo.LookupJoinExpr:
12511251
toLockIndexes.Add(t.Index)
12521252
if innerJoin, ok := t.Input.(*memo.LookupJoinExpr); ok && innerJoin.Table == t.Table {
@@ -1257,24 +1257,30 @@ func shouldApplyImplicitLockingToUpdateOrDeleteInput(
12571257
t = innerJoin
12581258
toLockIndexes.Add(t.Index)
12591259
}
1260-
mutStableID := md.Table(tabID).ID()
1261-
lookupStableID := md.Table(t.Table).ID()
1262-
// Only lock rows read in the lookup join if the lookup table is the
1263-
// same as the table being updated. Also, don't lock rows if there is an
1264-
// ON condition so that we don't lock rows that won't be updated.
1265-
if mutStableID == lookupStableID && t.On.IsTrue() && t.Input.Op() == opt.ValuesOp {
1266-
return t.Table, toLockIndexes
1260+
// Don't lock rows if there is an ON condition so that we don't lock rows
1261+
// that won't be updated.
1262+
if !t.On.IsTrue() || t.Input.Op() != opt.ValuesOp {
1263+
return 0, intsets.Fast{}
12671264
}
1265+
toLock = t.Table
1266+
default:
1267+
return 0, intsets.Fast{}
12681268
}
1269-
return 0, intsets.Fast{}
1269+
if md.Table(tabID).ID() != md.Table(toLock).ID() {
1270+
// Make sure that this is the table being updated.
1271+
return 0, intsets.Fast{}
1272+
}
1273+
return toLock, toLockIndexes
12701274
}
12711275

12721276
// tryApplyImplicitLockingToUpsertInput determines whether or not the builder
12731277
// should apply a FOR UPDATE row-level locking mode to the initial row scan of
12741278
// an UPSERT statement. If the builder should lock the initial row scan, it
12751279
// returns the TableID of the scan (as well as ordinals of indexes to lock),
12761280
// otherwise it returns 0.
1277-
func shouldApplyImplicitLockingToUpsertInput(ups *memo.UpsertExpr) (opt.TableID, intsets.Fast) {
1281+
func shouldApplyImplicitLockingToUpsertInput(
1282+
md *opt.Metadata, ups *memo.UpsertExpr,
1283+
) (opt.TableID, intsets.Fast) {
12781284
var toLockIndexes intsets.Fast
12791285
// Try to match the Upsert's input expression against the pattern:
12801286
//
@@ -1309,6 +1315,10 @@ func shouldApplyImplicitLockingToUpsertInput(ups *memo.UpsertExpr) (opt.TableID,
13091315
default:
13101316
return 0, intsets.Fast{}
13111317
}
1318+
if md.Table(ups.Table).ID() != md.Table(toLock).ID() {
1319+
// Make sure that this is the table being updated.
1320+
return 0, intsets.Fast{}
1321+
}
13121322
input = unwrapProjectExprs(input)
13131323
if _, ok := input.(*memo.ValuesExpr); ok {
13141324
return toLock, toLockIndexes

pkg/sql/opt/exec/execbuilder/testdata/delete

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -974,3 +974,94 @@ RESET optimizer_use_lock_elision_multiple_families;
974974
RESET buffered_writes_use_locking_on_non_unique_indexes;
975975

976976
subtest end
977+
978+
# Regression test for incorrectly treating a locked index in one table as being
979+
# locked in another. Furthermore, we shouldn't be locking an index in a
980+
# different table altogether.
981+
subtest locking_other_table
982+
983+
statement ok
984+
CREATE TABLE legal_entity (
985+
id TEXT PRIMARY KEY
986+
);
987+
INSERT INTO legal_entity VALUES ('foo');
988+
989+
statement ok
990+
CREATE TABLE entity_address (
991+
id TEXT PRIMARY KEY
992+
);
993+
INSERT INTO entity_address VALUES ('bar');
994+
995+
# Retry to ensure that referenced tables are visible.
996+
retry
997+
statement ok
998+
CREATE TABLE legal_entity_address (
999+
id INT PRIMARY KEY,
1000+
legal_entity_id text NOT NULL REFERENCES legal_entity(id) ON DELETE CASCADE,
1001+
entity_address_id text NOT NULL REFERENCES entity_address(id) ON DELETE CASCADE,
1002+
UNIQUE (legal_entity_id, entity_address_id),
1003+
FAMILY (legal_entity_id, entity_address_id)
1004+
);
1005+
1006+
statement ok
1007+
INSERT INTO legal_entity_address VALUES (1, 'foo', 'bar');
1008+
1009+
query T
1010+
EXPLAIN
1011+
DELETE FROM entity_address WHERE id IN (
1012+
SELECT entity_address_id FROM legal_entity_address WHERE legal_entity_id = 'foo'
1013+
);
1014+
----
1015+
distribution: local
1016+
vectorized: true
1017+
·
1018+
• root
1019+
1020+
├── • delete
1021+
│ │ from: entity_address
1022+
│ │
1023+
│ └── • buffer
1024+
│ │ label: buffer 1
1025+
│ │
1026+
│ └── • render
1027+
│ │
1028+
│ └── • scan
1029+
│ missing stats
1030+
│ table: legal_entity_address@legal_entity_address_legal_entity_id_entity_address_id_key
1031+
│ spans: [/'foo' - /'foo']
1032+
1033+
└── • fk-cascade
1034+
│ fk: legal_entity_address_entity_address_id_fkey
1035+
1036+
└── • delete
1037+
│ from: legal_entity_address
1038+
1039+
└── • hash join
1040+
│ equality: (entity_address_id) = (id)
1041+
│ right cols are key
1042+
1043+
├── • scan
1044+
│ missing stats
1045+
│ table: legal_entity_address@legal_entity_address_pkey
1046+
│ spans: FULL SCAN
1047+
1048+
└── • distinct
1049+
│ estimated row count: 10
1050+
│ distinct on: id
1051+
1052+
└── • scan buffer
1053+
estimated row count: 100
1054+
label: buffer 1000000
1055+
1056+
query T kvtrace
1057+
DELETE FROM entity_address WHERE id IN (
1058+
SELECT entity_address_id FROM legal_entity_address WHERE legal_entity_id = 'foo'
1059+
);
1060+
----
1061+
Scan /Table/120/2/"foo"{-/PrefixEnd}
1062+
Del (locking) /Table/119/1/"bar"/0
1063+
Scan /Table/120/{1-2}
1064+
Del (locking) /Table/120/1/1/0
1065+
Del (locking) /Table/120/2/"foo"/"bar"/0
1066+
1067+
subtest end

pkg/sql/opt/exec/execbuilder/testdata/update

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1833,3 +1833,91 @@ RESET optimizer_use_lock_elision_multiple_families;
18331833
RESET use_cputs_on_non_unique_indexes;
18341834

18351835
subtest end
1836+
1837+
# Regression test for incorrectly treating a locked index in one table as being
1838+
# locked in another. Furthermore, we shouldn't be locking an index in a
1839+
# different table altogether.
1840+
subtest locking_other_table
1841+
1842+
statement ok
1843+
CREATE TABLE legal_entity (
1844+
id TEXT PRIMARY KEY
1845+
);
1846+
INSERT INTO legal_entity VALUES ('foo');
1847+
1848+
statement ok
1849+
CREATE TABLE entity_address (
1850+
id TEXT PRIMARY KEY
1851+
);
1852+
INSERT INTO entity_address VALUES ('bar');
1853+
1854+
# Retry to ensure that referenced tables are visible.
1855+
retry
1856+
statement ok
1857+
CREATE TABLE legal_entity_address (
1858+
legal_entity_id text NOT NULL REFERENCES legal_entity(id),
1859+
entity_address_id text NOT NULL REFERENCES entity_address(id),
1860+
UNIQUE (legal_entity_id, entity_address_id),
1861+
FAMILY (legal_entity_id, entity_address_id)
1862+
);
1863+
1864+
statement ok
1865+
INSERT INTO legal_entity_address VALUES ('foo', 'bar');
1866+
1867+
query T
1868+
EXPLAIN
1869+
UPDATE entity_address SET id = 'bar' WHERE id IN (
1870+
SELECT entity_address_id FROM legal_entity_address WHERE legal_entity_id = 'foo'
1871+
);
1872+
----
1873+
distribution: local
1874+
vectorized: true
1875+
·
1876+
• root
1877+
1878+
├── • update
1879+
│ │ table: entity_address
1880+
│ │ set: id
1881+
│ │
1882+
│ └── • buffer
1883+
│ │ label: buffer 1
1884+
│ │
1885+
│ └── • render
1886+
│ │
1887+
│ └── • render
1888+
│ │
1889+
│ └── • scan
1890+
│ missing stats
1891+
│ table: legal_entity_address@legal_entity_address_legal_entity_id_entity_address_id_key
1892+
│ spans: [/'foo' - /'foo']
1893+
1894+
└── • constraint-check
1895+
1896+
└── • error if rows
1897+
1898+
└── • hash join (right semi)
1899+
│ equality: (entity_address_id) = (id)
1900+
│ right cols are key
1901+
1902+
├── • scan
1903+
│ missing stats
1904+
│ table: legal_entity_address@legal_entity_address_pkey
1905+
│ spans: FULL SCAN
1906+
1907+
└── • except all
1908+
1909+
├── • scan buffer
1910+
│ label: buffer 1
1911+
1912+
└── • scan buffer
1913+
label: buffer 1
1914+
1915+
query T kvtrace
1916+
UPDATE entity_address SET id = 'bar' WHERE id IN (
1917+
SELECT entity_address_id FROM legal_entity_address WHERE legal_entity_id = 'foo'
1918+
);
1919+
----
1920+
Scan /Table/123/2/"foo"{-/PrefixEnd}
1921+
Put (locking) /Table/122/1/"bar"/0 -> /TUPLE/
1922+
1923+
subtest end

0 commit comments

Comments
 (0)