Skip to content

Commit 74479fb

Browse files
craig[bot]pav-kv
andcommitted
Merge #157189
157189: sql: fix lease duration in TestReacquireLeaseOnRestart r=yuzefovich a=pav-kv The test jumps the clock forward in order to expire KV leases. However, the `DefaultDescriptorLeaseDuration` constant that it chooses seems to be by mistake taken as the KV lease duration. Use the `RangeLeaseDuration` from the server config instead. This also fixes early SQL pod termination when multi-tenancy is enabled. Currently, the lifetime of the SQL pod is the same as that of the session, which has a TTL of 40s (`server.sqlliveness.ttl` setting). The 10 minute clock jump in this test was causing early SQL pod termination and flakes. Resolves #156333 Co-authored-by: Pavel Kalinnikov <[email protected]>
2 parents c484d09 + 109bdbb commit 74479fb

File tree

1 file changed

+34
-42
lines changed

1 file changed

+34
-42
lines changed

pkg/sql/txn_restart_test.go

Lines changed: 34 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1265,40 +1265,42 @@ func TestReacquireLeaseOnRestart(t *testing.T) {
12651265
clientTestingKnobs := &kvcoord.ClientTestingKnobs{
12661266
MaxTxnRefreshAttempts: refreshAttempts,
12671267
}
1268+
var params base.TestServerArgs
1269+
params.RaftConfig.SetDefaults()
1270+
leaseDuration := params.RaftConfig.RangeLeaseDuration
12681271

12691272
testKey := []byte("test_key")
12701273
var s serverutils.ApplicationLayerInterface
12711274
var nodeID roachpb.NodeID
1272-
var clockUpdate, restartDone int32
1275+
var clockUpdate atomic.Bool
1276+
var restartDone atomic.Int32
12731277
testingResponseFilter := func(
12741278
ctx context.Context, ba *kvpb.BatchRequest, br *kvpb.BatchResponse,
12751279
) *kvpb.Error {
12761280
for _, ru := range ba.Requests {
1277-
if req := ru.GetGet(); req != nil {
1278-
if bytes.Contains(req.Key, testKey) {
1279-
if atomic.LoadInt32(&clockUpdate) == 0 {
1280-
atomic.AddInt32(&clockUpdate, 1)
1281-
// Hack to advance the transaction timestamp on a
1282-
// transaction restart.
1283-
const advancement = 2 * base.DefaultDescriptorLeaseDuration
1284-
now := s.Clock().NowAsClockTimestamp()
1285-
now.WallTime += advancement.Nanoseconds()
1286-
s.Clock().Update(now)
1287-
}
1281+
if req := ru.GetGet(); req == nil || !bytes.Contains(req.Key, testKey) {
1282+
continue
1283+
}
1284+
if !clockUpdate.Load() {
1285+
clockUpdate.Store(true)
1286+
// Hack to advance the transaction timestamp on a transaction restart.
1287+
advancement := 2 * leaseDuration
1288+
now := s.Clock().NowAsClockTimestamp()
1289+
now.WallTime += advancement.Nanoseconds()
1290+
s.Clock().Update(now)
1291+
}
12881292

1289-
// Allow a set number of restarts so that the auto retry on
1290-
// the first few uncertainty interval errors also fails.
1291-
if atomic.LoadInt32(&restartDone) <= refreshAttempts {
1292-
atomic.AddInt32(&restartDone, 1)
1293-
// Return ReadWithinUncertaintyIntervalError to update
1294-
// the transaction timestamp on retry.
1295-
txn := ba.Txn.Clone()
1296-
txn.ResetObservedTimestamps()
1297-
now := s.Clock().NowAsClockTimestamp()
1298-
txn.UpdateObservedTimestamp(nodeID, now)
1299-
return kvpb.NewErrorWithTxn(kvpb.NewReadWithinUncertaintyIntervalError(now.ToTimestamp(), now, txn, now.ToTimestamp(), now), txn)
1300-
}
1301-
}
1293+
// Allow a set number of restarts so that the auto retry on the first few
1294+
// uncertainty interval errors also fails.
1295+
if restartDone.Load() <= refreshAttempts {
1296+
restartDone.Add(1)
1297+
// Return ReadWithinUncertaintyIntervalError to update the transaction
1298+
// timestamp on retry.
1299+
txn := ba.Txn.Clone()
1300+
txn.ResetObservedTimestamps()
1301+
now := s.Clock().NowAsClockTimestamp()
1302+
txn.UpdateObservedTimestamp(nodeID, now)
1303+
return kvpb.NewErrorWithTxn(kvpb.NewReadWithinUncertaintyIntervalError(now.ToTimestamp(), now, txn, now.ToTimestamp(), now), txn)
13021304
}
13031305
}
13041306
return nil
@@ -1310,40 +1312,30 @@ func TestReacquireLeaseOnRestart(t *testing.T) {
13101312
DisableMaxOffsetCheck: true,
13111313
}
13121314

1313-
var params base.TestServerArgs
13141315
params.Knobs.Store = storeTestingKnobs
13151316
params.Knobs.KVClient = clientTestingKnobs
1316-
params.DefaultTestTenant = base.TestDoesNotWorkWithExternalProcessMode(156333)
13171317
srv, sqlDB, _ := serverutils.StartServer(t, params)
13181318
defer srv.Stopper().Stop(context.Background())
13191319
s = srv.ApplicationLayer()
13201320
nodeID = srv.NodeID()
13211321

13221322
sqlDB.SetMaxOpenConns(1)
1323-
if _, err := sqlDB.Exec(`
1323+
_, err := sqlDB.Exec(`
13241324
CREATE DATABASE t;
13251325
CREATE TABLE t.test (k TEXT PRIMARY KEY, v TEXT);
13261326
INSERT INTO t.test (k, v) VALUES ('test_key', 'test_val');
1327-
`); err != nil {
1328-
t.Fatal(err)
1329-
}
1327+
`)
1328+
require.NoError(t, err)
13301329
// Acquire the lease and enable the auto-retry. The first few read attempts
13311330
// will trigger ReadWithinUncertaintyIntervalError and advance the
13321331
// transaction timestamp due to txnSpanRefresher-initiated span refreshes.
13331332
// The transaction timestamp will exceed the lease expiration time, and the
13341333
// last read attempt will re-acquire the lease.
1335-
if _, err := sqlDB.Exec(`
1336-
SELECT * from t.test WHERE k = 'test_key';
1337-
`); err != nil {
1338-
t.Fatal(err)
1339-
}
1334+
_, err = sqlDB.Exec(`SELECT * from t.test WHERE k = 'test_key';`)
1335+
require.NoError(t, err)
13401336

1341-
if u := atomic.LoadInt32(&clockUpdate); u != 1 {
1342-
t.Errorf("expected exacltly one clock update, but got %d", u)
1343-
}
1344-
if u, e := atomic.LoadInt32(&restartDone), int32(refreshAttempts+1); u != e {
1345-
t.Errorf("expected exactly %d restarts, but got %d", e, u)
1346-
}
1337+
require.True(t, clockUpdate.Load())
1338+
require.Equal(t, int32(refreshAttempts)+1, restartDone.Load())
13471339
}
13481340

13491341
// Verifies that the uncommitted descriptor cache is flushed on a txn restart.

0 commit comments

Comments
 (0)