Skip to content

Commit edcfa98

Browse files
committed
concurrency: don't consider lock acquisitions at higher epochs idempotent
Stateful transaction retries that result from an IntentMissingError happen at the same WriteTimestamp as the previous attempt. Thus, sequences like the one included in the test: - Req0: Put@epo=0 (not included below since it has no lock table side effects) - Req1: Get(Exclusive, Unreplicated)@epo=0 - Req2: Get(Shared, Replicated)@epo=0 - Req3: Put@epo=0 - IntentMissingError on some unrelated key. - Req4: Put@epo=1 (retry of Req0) Would not result in the unreplicated lock being cleared from the lock table, even though it would be cleared on most other types of retries. Fixes #148635 Release note: None
1 parent 0777857 commit edcfa98

File tree

3 files changed

+153
-11
lines changed

3 files changed

+153
-11
lines changed

pkg/kv/kvserver/concurrency/lock_table.go

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1587,16 +1587,24 @@ func (tl *txnLock) isIdempotentLockAcquisition(acq *roachpb.LockAcquisition) boo
15871587
tl.unreplicatedInfo.ts.Equal(acq.Txn.WriteTimestamp) && tl.unreplicatedInfo.ignoredSequenceNumbersEqual(acq)
15881588
case lock.Replicated:
15891589
// Lock is being re-acquired with the same strength.
1590-
return tl.replicatedInfo.held(acq.Strength) &&
1591-
// NB: Lock re-acquisitions at different timestamps are not considered
1592-
// idempotent. Strictly speaking, we could tighten this condition in a
1593-
// few ways:
1594-
// 1. We could consider lock re-acquisition at a lower timestamp idempotent,
1595-
// as a lock's timestamp at a given durability can never regress.
1596-
// 2. We could only check the timestamp if the lock is being acquired with
1597-
// strength lock.Intent, as we disregard the timestamp for all other lock
1598-
// strengths when dealing with replicated locks.
1599-
tl.replicatedInfo.ts.Equal(acq.Txn.WriteTimestamp)
1590+
isIdempotent := tl.replicatedInfo.held(acq.Strength)
1591+
// NB: Lock re-acquisitions at different timestamps are not considered
1592+
// idempotent. Strictly speaking, we could tighten this condition in a
1593+
// few ways:
1594+
//
1595+
// 1. We could consider lock re-acquisition at a lower timestamp idempotent,
1596+
// as a lock's timestamp at a given durability can never regress.
1597+
//
1598+
// 2. We could only check the timestamp if the lock is being acquired with
1599+
// strength lock.Intent, as we disregard the timestamp for all other lock
1600+
// strengths when dealing with replicated locks.
1601+
isIdempotent = isIdempotent && tl.replicatedInfo.ts.Equal(acq.Txn.WriteTimestamp)
1602+
// Lock acquisitions at a new epoch are not considered idempotent. In most
1603+
// cases of an epoch bump, the WriteTimestamp also changes. But in the case
1604+
// of an IntentMissingError, the WriteTimestamp isn't forwarded, but the
1605+
// on-disk intent has still changed.
1606+
isIdempotent = isIdempotent && tl.txn.Epoch == acq.Txn.Epoch
1607+
return isIdempotent
16001608
default:
16011609
panic(fmt.Sprintf("unknown lock durability: %s", acq.Durability))
16021610
}
@@ -3690,7 +3698,7 @@ func (kl *keyLocks) tryFreeLockOnReplicatedAcquire(
36903698
// Bail if there's an epoch number mismatch, as we can't compare sequence
36913699
// numbers across epochs. Note that we're not making any effort to forget
36923700
// unreplicated locks if the replicated lock acquisition corresponds to a
3693-
// newer epoch -- we defer to acquireLock to handle epcoh bumps instead.
3701+
// newer epoch -- we defer to acquireLock to handle epoch bumps instead.
36943702
if tl.txn.Epoch != acq.Txn.Epoch {
36953703
return false /* freed */, false /* mustGC */
36963704
}

pkg/kv/kvserver/concurrency/lock_table_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,22 @@ func TestLockTableBasic(t *testing.T) {
450450
}
451451
return lt.String()
452452

453+
case "update-txn-not-observed":
454+
var txnName string
455+
d.ScanArgs(t, "txn", &txnName)
456+
txnMeta, ok := txnsByName[txnName]
457+
if !ok {
458+
d.Fatalf(t, "unknown txn %s", txnName)
459+
}
460+
ts := scanTimestamp(t, d)
461+
var epoch int
462+
d.ScanArgs(t, "epoch", &epoch)
463+
txnMeta = &enginepb.TxnMeta{ID: txnMeta.ID, Sequence: txnMeta.Sequence}
464+
txnMeta.Epoch = enginepb.TxnEpoch(epoch)
465+
txnMeta.WriteTimestamp = ts
466+
txnsByName[txnName] = txnMeta
467+
return ""
468+
453469
case "add-discovered":
454470
var reqName string
455471
d.ScanArgs(t, "r", &reqName)
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# -------------------------------------------------------------
2+
# This test is a regression test for an issue in which a
3+
# series of acquires resulted in the lock table holding an unreplicated
4+
# lock belonging to an older epoch.
5+
#
6+
# The actual test failure involved a longer and more complicated
7+
# sequence of acquisition, here we've reduce it the following
8+
# operations on a single key:
9+
#
10+
# Req0: Put@epo=0 (not included below since it has no lock table side effects)
11+
# Req1: Get(Exclusive, Unreplicated)@epo=0
12+
# Req2: Get(Shared, Replicated)@epo=0
13+
# Req3: Put@epo=0
14+
#
15+
# Then we bump the transaction Epoch to 1 but leave the WriteTimestamp
16+
# the same -- simulating an IntentMissingError on some unrelated
17+
# key.
18+
#
19+
# Finally, we send what would be the first request in the retrying
20+
# transaction:
21+
#
22+
# Req4: Put@epo=1
23+
#
24+
# Previously, this sequence would leave the unreplicated lock from
25+
# epoch=0 in our lock table. In most cases this does not cause
26+
# problems, but if we want to flush the lock table to disk, we would
27+
# fail with an error.
28+
# -------------------------------------------------------------
29+
30+
new-lock-table maxlocks=10000
31+
----
32+
33+
new-txn txn=txn1 ts=10,1 epoch=0 seq=1
34+
----
35+
36+
new-request r=req1 txn=txn1 ts=10,1 spans=intent@a
37+
----
38+
39+
scan r=req1
40+
----
41+
start-waiting: false
42+
43+
acquire r=req1 k=a durability=u strength=exclusive
44+
----
45+
num=1
46+
lock: "a"
47+
holder: txn: 00000000-0000-0000-0000-000000000001 epoch: 0, iso: Serializable, ts: 10.000000000,1, info: unrepl [(str: Exclusive seq: 1)]
48+
49+
dequeue r=req1
50+
----
51+
num=1
52+
lock: "a"
53+
holder: txn: 00000000-0000-0000-0000-000000000001 epoch: 0, iso: Serializable, ts: 10.000000000,1, info: unrepl [(str: Exclusive seq: 1)]
54+
55+
56+
new-request r=req2 txn=txn1 ts=10,1 spans=shared@a
57+
----
58+
59+
scan r=req2
60+
----
61+
start-waiting: false
62+
63+
acquire r=req2 k=a durability=r strength=shared
64+
----
65+
num=1
66+
lock: "a"
67+
holder: txn: 00000000-0000-0000-0000-000000000001 epoch: 0, iso: Serializable, ts: 10.000000000,1, info: repl [Shared], unrepl [(str: Exclusive seq: 1)]
68+
69+
dequeue r=req2
70+
----
71+
num=1
72+
lock: "a"
73+
holder: txn: 00000000-0000-0000-0000-000000000001 epoch: 0, iso: Serializable, ts: 10.000000000,1, info: repl [Shared], unrepl [(str: Exclusive seq: 1)]
74+
75+
76+
new-request r=req3 txn=txn1 ts=10,1 spans=intent@a
77+
----
78+
79+
scan r=req3
80+
----
81+
start-waiting: false
82+
83+
acquire r=req3 k=a durability=r strength=intent
84+
----
85+
num=1
86+
lock: "a"
87+
holder: txn: 00000000-0000-0000-0000-000000000001 epoch: 0, iso: Serializable, ts: 10.000000000,1, info: repl [Intent, Shared], unrepl [(str: Exclusive seq: 1)]
88+
queued locking requests:
89+
active: false req: 3 promoting: true, strength: Intent, txn: 00000000-0000-0000-0000-000000000001
90+
91+
dequeue r=req3
92+
----
93+
num=1
94+
lock: "a"
95+
holder: txn: 00000000-0000-0000-0000-000000000001 epoch: 0, iso: Serializable, ts: 10.000000000,1, info: repl [Intent, Shared], unrepl [(str: Exclusive seq: 1)]
96+
97+
# Update Epoch but not the WriteTimestamp like an IntentMissingError.
98+
update-txn-not-observed txn=txn1 ts=10,1 epoch=1 seq=1
99+
----
100+
101+
new-request r=req4 txn=txn1 ts=10,1 spans=intent@a
102+
----
103+
104+
scan r=req4
105+
----
106+
start-waiting: false
107+
108+
acquire r=req4 k=a durability=r strength=intent
109+
----
110+
num=1
111+
lock: "a"
112+
holder: txn: 00000000-0000-0000-0000-000000000001 epoch: 1, iso: Serializable, ts: 10.000000000,1, info: repl [Intent, Shared]
113+
114+
dequeue r=req4
115+
----
116+
num=1
117+
lock: "a"
118+
holder: txn: 00000000-0000-0000-0000-000000000001 epoch: 1, iso: Serializable, ts: 10.000000000,1, info: repl [Intent, Shared]

0 commit comments

Comments
 (0)