Skip to content

Commit 9e1e159

Browse files
craig[bot]mw5h
andcommitted
Merge #150291
150291: opt: take exclusive locks for foreign key cascades under read-committed r=mw5h a=mw5h Previously foreign key cascades would never take locks on the rows they modified. Under serializable isolation, this presents no difficulty as locks are purely advisory at that isolation level. However, under repeatable read and read committed isolations, locks are necessary to avoid incorrect enforcement. For example, consider this timing: t1: R-C statement that deletes from parent starts. t2: transaction that writes to child commits. This row references the parent that the previous statement is deleting, but still sees it because that statement hasn't done anything yet. t3: R-C statement deletes child keys, but has a timestamp of t1, so doesn't see the row inserted by t2. After the R-C transaction commits, we're left with a child table that has a row it should not. A similar problem exists for update cascades. Fortunately, the fix is relatively simple. By taking an exclusive lock on the child rows before we delete or update them, we force the KV to check write intents. KV will see the write at t2 and either bump t1's timestamp or force it to restart. Fixes: #150282 Release note (bug fix): A but that would allow a race condition in foreign key cascades under read committed and repeatable read isolations has been fixed. Co-authored-by: Matt White <[email protected]>
2 parents 9d5a1fa + 8385685 commit 9e1e159

File tree

3 files changed

+212
-3
lines changed

3 files changed

+212
-3
lines changed

pkg/ccl/logictestccl/testdata/logic_test/fk_read_committed

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,81 @@ INSERT INTO e VALUES (2)
9898

9999
statement ok
100100
UPDATE a SET a = 2 WHERE a = 1
101+
102+
subtest fk_cascade_race_150282
103+
104+
statement ok
105+
CREATE TABLE parent_150282 (
106+
p INT PRIMARY KEY,
107+
i INT,
108+
j INT,
109+
INDEX (i),
110+
INDEX (j)
111+
);
112+
113+
statement ok
114+
CREATE TABLE child_150282 (
115+
c INT PRIMARY KEY,
116+
p INT REFERENCES parent_150282 (p) ON DELETE CASCADE ON UPDATE CASCADE,
117+
INDEX (p)
118+
);
119+
120+
statement ok
121+
INSERT INTO parent_150282 VALUES (1, 2, 3);
122+
123+
statement ok
124+
BEGIN ISOLATION LEVEL READ COMMITTED;
125+
126+
statement ok
127+
SELECT 1;
128+
129+
statement async fk_delete
130+
WITH sleep AS (SELECT pg_sleep(1)) DELETE FROM parent_150282@parent_150282_i_idx WHERE i = 2;
131+
132+
statement ok
133+
INSERT INTO child_150282 VALUES (4, 1);
134+
135+
awaitstatement fk_delete
136+
137+
statement ok
138+
COMMIT;
139+
140+
query III
141+
SELECT * FROM parent_150282;
142+
----
143+
144+
query II
145+
SELECT * FROM child_150282;
146+
----
147+
148+
statement ok
149+
INSERT INTO parent_150282 VALUES (1, 2, 3);
150+
151+
statement ok
152+
BEGIN ISOLATION LEVEL READ COMMITTED;
153+
154+
statement ok
155+
SELECT 1;
156+
157+
statement async fk_update
158+
WITH sleep AS (SELECT pg_sleep(1)) UPDATE parent_150282 SET p = 4 WHERE i = 2;
159+
160+
statement ok
161+
INSERT INTO child_150282 VALUES (4, 1);
162+
163+
awaitstatement fk_update
164+
165+
statement ok
166+
COMMIT;
167+
168+
query III
169+
SELECT * FROM parent_150282;
170+
----
171+
4 2 3
172+
173+
query II
174+
SELECT * FROM child_150282;
175+
----
176+
4 4
177+
178+
subtest end

pkg/sql/opt/exec/execbuilder/testdata/fk_read_committed

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ CREATE TABLE jars (j INT PRIMARY KEY)
66
statement ok
77
CREATE TABLE cookies (c INT PRIMARY KEY, j INT REFERENCES jars (j), FAMILY (c, j))
88

9+
statement ok
10+
CREATE TABLE gumballs (g INT PRIMARY KEY, j INT REFERENCES jars (j) ON DELETE CASCADE ON UPDATE CASCADE, FAMILY (g, j))
11+
912
statement ok
1013
SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL READ COMMITTED
1114

@@ -243,6 +246,73 @@ vectorized: true
243246
│ spans: FULL SCAN
244247
│ locking strength: for update
245248
249+
├── • fk-cascade
250+
│ │ fk: gumballs_j_fkey
251+
│ │
252+
│ └── • root
253+
│ │ columns: ()
254+
│ │
255+
│ ├── • update
256+
│ │ │ columns: ()
257+
│ │ │ estimated row count: 0 (missing stats)
258+
│ │ │ table: gumballs
259+
│ │ │ set: j
260+
│ │ │
261+
│ │ └── • buffer
262+
│ │ │ columns: (g, j, j_new, j)
263+
│ │ │ label: buffer 1
264+
│ │ │
265+
│ │ └── • hash join (inner)
266+
│ │ │ columns: (g, j, j_new, j)
267+
│ │ │ estimated row count: 327 (missing stats)
268+
│ │ │ equality: (j) = (j)
269+
│ │ │
270+
│ │ ├── • scan
271+
│ │ │ columns: (g, j)
272+
│ │ │ estimated row count: 1,000 (missing stats)
273+
│ │ │ table: gumballs@gumballs_pkey
274+
│ │ │ spans: FULL SCAN
275+
│ │ │ locking strength: for update
276+
│ │ │ locking durability: guaranteed
277+
│ │ │
278+
│ │ └── • filter
279+
│ │ │ columns: (j, j_new)
280+
│ │ │ estimated row count: 33
281+
│ │ │ filter: j IS DISTINCT FROM j_new
282+
│ │ │
283+
│ │ └── • scan buffer
284+
│ │ columns: (j, j_new)
285+
│ │ estimated row count: 100
286+
│ │ label: buffer 1000000
287+
│ │
288+
│ └── • constraint-check
289+
│ │
290+
│ └── • error if rows
291+
│ │ columns: ()
292+
│ │
293+
│ └── • lookup join (anti)
294+
│ │ columns: (j_new)
295+
│ │ estimated row count: 0 (missing stats)
296+
│ │ table: jars@jars_pkey
297+
│ │ equality: (j_new) = (j)
298+
│ │ equality cols are key
299+
│ │ locking strength: for share
300+
│ │ locking durability: guaranteed
301+
│ │ parallel
302+
│ │
303+
│ └── • filter
304+
│ │ columns: (j_new)
305+
│ │ estimated row count: 323 (missing stats)
306+
│ │ filter: j_new IS NOT NULL
307+
│ │
308+
│ └── • project
309+
│ │ columns: (j_new)
310+
│ │
311+
│ └── • scan buffer
312+
│ columns: (g, j, j_new, j)
313+
│ estimated row count: 327 (missing stats)
314+
│ label: buffer 1
315+
246316
└── • constraint-check
247317
248318
└── • error if rows
@@ -330,6 +400,30 @@ vectorized: true
330400
│ spans: /1/0
331401
│ locking strength: for update
332402
403+
├── • fk-cascade
404+
│ │ fk: gumballs_j_fkey
405+
│ │
406+
│ └── • delete
407+
│ │ columns: ()
408+
│ │ estimated row count: 0 (missing stats)
409+
│ │ from: gumballs
410+
│ │
411+
│ └── • project
412+
│ │ columns: (g)
413+
│ │
414+
│ └── • filter
415+
│ │ columns: (g, j)
416+
│ │ estimated row count: 10 (missing stats)
417+
│ │ filter: j = 1
418+
│ │
419+
│ └── • scan
420+
│ columns: (g, j)
421+
│ estimated row count: 1,000 (missing stats)
422+
│ table: gumballs@gumballs_pkey
423+
│ spans: FULL SCAN
424+
│ locking strength: for update
425+
│ locking durability: guaranteed
426+
333427
└── • constraint-check
334428
335429
└── • error if rows

pkg/sql/opt/optbuilder/fk_cascade.go

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"context"
1010
"fmt"
1111

12+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/isolation"
1213
"github.com/cockroachdb/cockroach/pkg/sql/opt"
1314
"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
1415
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
@@ -346,6 +347,18 @@ func (cb *onDeleteFastCascadeBuilder) Build(
346347
indexFlags = &tree.IndexFlags{AvoidFullScan: true}
347348
}
348349

350+
locking := noRowLocking
351+
if b.evalCtx.TxnIsoLevel != isolation.Serializable {
352+
locking = lockingSpec{
353+
&lockingItem{
354+
item: &tree.LockingItem{
355+
Strength: tree.ForUpdate,
356+
WaitPolicy: tree.LockWaitBlock,
357+
},
358+
},
359+
}
360+
}
361+
349362
// Build the input to the delete mutation, which is simply a Scan with a
350363
// Select on top. The scan is exempt from RLS to maintain data integrity.
351364
mb.fetchScope = b.buildScan(
@@ -356,7 +369,7 @@ func (cb *onDeleteFastCascadeBuilder) Build(
356369
includeInverted: false,
357370
}),
358371
indexFlags,
359-
noRowLocking,
372+
locking,
360373
b.allocScope(),
361374
true, /* disableNotVisibleIndex */
362375
cat.PolicyScopeExempt,
@@ -604,6 +617,18 @@ func (b *Builder) buildDeleteCascadeMutationInput(
604617
indexFlags = &tree.IndexFlags{AvoidFullScan: true}
605618
}
606619

620+
locking := noRowLocking
621+
if b.evalCtx.TxnIsoLevel != isolation.Serializable {
622+
locking = lockingSpec{
623+
&lockingItem{
624+
item: &tree.LockingItem{
625+
Strength: tree.ForUpdate,
626+
WaitPolicy: tree.LockWaitBlock,
627+
},
628+
},
629+
}
630+
}
631+
607632
// The scan is exempt from RLS to maintain data integrity.
608633
outScope = b.buildScan(
609634
b.addTable(childTable, childTableAlias),
@@ -613,7 +638,7 @@ func (b *Builder) buildDeleteCascadeMutationInput(
613638
includeInverted: false,
614639
}),
615640
indexFlags,
616-
noRowLocking,
641+
locking,
617642
b.allocScope(),
618643
true, /* disableNotVisibleIndex */
619644
cat.PolicyScopeExempt,
@@ -880,6 +905,18 @@ func (b *Builder) buildUpdateCascadeMutationInput(
880905
indexFlags = &tree.IndexFlags{AvoidFullScan: true}
881906
}
882907

908+
locking := noRowLocking
909+
if b.evalCtx.TxnIsoLevel != isolation.Serializable {
910+
locking = lockingSpec{
911+
&lockingItem{
912+
item: &tree.LockingItem{
913+
Strength: tree.ForUpdate,
914+
WaitPolicy: tree.LockWaitBlock,
915+
},
916+
},
917+
}
918+
}
919+
883920
// The scan is exempt from RLS to maintain data integrity.
884921
outScope = b.buildScan(
885922
b.addTable(childTable, childTableAlias),
@@ -889,7 +926,7 @@ func (b *Builder) buildUpdateCascadeMutationInput(
889926
includeInverted: false,
890927
}),
891928
indexFlags,
892-
noRowLocking,
929+
locking,
893930
b.allocScope(),
894931
true, /* disableNotVisibleIndex */
895932
cat.PolicyScopeExempt,

0 commit comments

Comments
 (0)