Skip to content

Commit a7f0434

Browse files
craig[bot]fqaziyuzefovich
committed
108133: sql: release save point did not retry for two version invariant errors r=fqazi a=fqazi Previously, when release save point was issued for cockroach_restart this would drive a commit, which could run into retryable errors. Like the two version invariant error, which is not tagged as a user facing re triable error, so the client doesn't know to retry. To address this, this patch converts two version invariant errors in this case into user facing retryable errors; Fixes: cockroachdb#107171 Release note (bug fix): Release save point could incorrectly emit "cannot publish new versions for descriptors" instead of a retryable error to applications. 108461: sql: fix recently introduced minor bug around PREPARE r=yuzefovich a=yuzefovich In just merged 4a3e787, we had a minor bug that in case `addPreparedStmt` call fails, we don't restore the original placeholders, which can then lead to panics. Fixes: cockroachdb#108421. Release note: None Co-authored-by: Faizan Qazi <[email protected]> Co-authored-by: Yahor Yuzefovich <[email protected]>
3 parents 1925449 + 0a1be2f + 2042afe commit a7f0434

File tree

4 files changed

+156
-13
lines changed

4 files changed

+156
-13
lines changed

pkg/sql/catalog/lease/lease_test.go

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,121 @@ CREATE TABLE t.kv (k CHAR PRIMARY KEY, v CHAR);
14791479
}
14801480
}
14811481

1482+
// Tests that when a transaction has already returned results
1483+
// to the user and the transaction continues on to make a schema change,
1484+
// whenever the table lease two version invariant is violated and the
1485+
// transaction needs to be restarted, a retryable error is returned to the
1486+
// user. This specific version validations that release savepoint does the
1487+
// same with cockroach_restart, which commits on release.
1488+
func TestTwoVersionInvariantRetryErrorWitSavePoint(t *testing.T) {
1489+
defer leaktest.AfterTest(t)()
1490+
var violations int64
1491+
var params base.TestServerArgs
1492+
params.Knobs = base.TestingKnobs{
1493+
// Disable execution of schema changers after the schema change
1494+
// transaction commits. This is to prevent executing the default
1495+
// WaitForOneVersion() code that holds up a schema change
1496+
// transaction until the new version has been published to the
1497+
// entire cluster.
1498+
SQLSchemaChanger: &sql.SchemaChangerTestingKnobs{
1499+
SchemaChangeJobNoOp: func() bool {
1500+
return true
1501+
},
1502+
TwoVersionLeaseViolation: func() {
1503+
atomic.AddInt64(&violations, 1)
1504+
},
1505+
},
1506+
}
1507+
s, sqlDB, kvDB := serverutils.StartServer(t, params)
1508+
defer s.Stopper().Stop(context.Background())
1509+
1510+
if _, err := sqlDB.Exec(`
1511+
CREATE DATABASE t;
1512+
CREATE TABLE t.kv (k CHAR PRIMARY KEY, v CHAR);
1513+
INSERT INTO t.kv VALUES ('a', 'b');
1514+
`); err != nil {
1515+
t.Fatal(err)
1516+
}
1517+
1518+
tableDesc := desctestutils.TestingGetPublicTableDescriptor(kvDB, keys.SystemSQLCodec, "t", "kv")
1519+
if tableDesc.GetVersion() != 1 {
1520+
t.Fatalf("invalid version %d", tableDesc.GetVersion())
1521+
}
1522+
1523+
tx, err := sqlDB.Begin()
1524+
if err != nil {
1525+
t.Fatal(err)
1526+
}
1527+
1528+
// Grab a lease on the table.
1529+
rows, err := tx.Query("SELECT * FROM t.kv")
1530+
if err != nil {
1531+
t.Fatal(err)
1532+
}
1533+
if err := rows.Close(); err != nil {
1534+
t.Fatal(err)
1535+
}
1536+
1537+
// Modify the table descriptor increments the version.
1538+
if _, err := sqlDB.Exec(`ALTER TABLE t.kv RENAME to t.kv1`); err != nil {
1539+
t.Fatal(err)
1540+
}
1541+
1542+
txRetry, err := sqlDB.Begin()
1543+
if err != nil {
1544+
t.Fatal(err)
1545+
}
1546+
_, err = txRetry.Exec("SAVEPOINT cockroach_restart;")
1547+
if err != nil {
1548+
t.Fatal(err)
1549+
}
1550+
1551+
// Read some data using the transaction so that it cannot be
1552+
// retried internally
1553+
rows, err = txRetry.Query(`SELECT 1`)
1554+
if err != nil {
1555+
t.Fatal(err)
1556+
}
1557+
if err := rows.Close(); err != nil {
1558+
t.Fatal(err)
1559+
}
1560+
1561+
if _, err := txRetry.Exec(`ALTER TABLE t.kv1 RENAME TO t.kv2`); err != nil {
1562+
t.Fatal(err)
1563+
}
1564+
1565+
var wg sync.WaitGroup
1566+
wg.Add(1)
1567+
go func() {
1568+
defer wg.Done()
1569+
// This can hang waiting for one version before tx.Commit() is
1570+
// called below, so it is executed in another goroutine.
1571+
_, err := txRetry.Exec("RELEASE SAVEPOINT cockroach_restart;")
1572+
if !testutils.IsError(err,
1573+
fmt.Sprintf(`TransactionRetryWithProtoRefreshError: cannot publish new versions for descriptors: \[\{kv1 %d 1\}\], old versions still in use`, tableDesc.GetID()),
1574+
) {
1575+
t.Errorf("err = %v", err)
1576+
}
1577+
err = txRetry.Rollback()
1578+
if err != nil {
1579+
t.Errorf("err = %v", err)
1580+
}
1581+
}()
1582+
1583+
// Make sure that txRetry does violate the two version lease invariant.
1584+
testutils.SucceedsSoon(t, func() error {
1585+
if atomic.LoadInt64(&violations) == 0 {
1586+
return errors.Errorf("didnt retry schema change")
1587+
}
1588+
return nil
1589+
})
1590+
// Commit the first transaction, unblocking txRetry.
1591+
if err := tx.Commit(); err != nil {
1592+
t.Fatal(err)
1593+
}
1594+
wg.Wait()
1595+
}
1596+
14821597
// Tests that when a transaction has already returned results
14831598
// to the user and the transaction continues on to make a schema change,
14841599
// whenever the table lease two version invariant is violated and the

pkg/sql/conn_executor.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3284,6 +3284,28 @@ func errIsRetriable(err error) bool {
32843284
descs.IsTwoVersionInvariantViolationError(err)
32853285
}
32863286

3287+
// convertRetriableErrorIntoUserVisibleError converts internal retriable
3288+
// errors into external, so that the client goes and retries this
3289+
// transaction. One example of this is two version invariant errors, which
3290+
// happens when a schema change is waiting for a schema change transition to
3291+
// propagate. When this happens, we either need to retry externally or internally,
3292+
// depending on if we are in an explicit transaction.
3293+
func (ex *connExecutor) convertRetriableErrorIntoUserVisibleError(
3294+
ctx context.Context, origErr error,
3295+
) (modifiedErr error, err error) {
3296+
if descs.IsTwoVersionInvariantViolationError(origErr) {
3297+
if resetErr := ex.resetTransactionOnSchemaChangeRetry(ctx); resetErr != nil {
3298+
return nil, resetErr
3299+
}
3300+
// Generating a forced retry error here, right after resetting the
3301+
// transaction is not exactly necessary, but it's a sound way to
3302+
// generate the only type of ClientVisibleRetryError we have.
3303+
return ex.state.mu.txn.GenerateForcedRetryableError(ctx, redact.Sprint(origErr)), nil
3304+
}
3305+
// Return the original error, this error will not be surfaced to the user.
3306+
return origErr, nil
3307+
}
3308+
32873309
// makeErrEvent takes an error and returns either an eventRetriableErr or an
32883310
// eventNonRetriableErr, depending on the error type.
32893311
func (ex *connExecutor) makeErrEvent(err error, stmt tree.Statement) (fsm.Event, fsm.EventPayload) {

pkg/sql/conn_executor_exec.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -903,16 +903,18 @@ func (ex *connExecutor) execStmtInOpenState(
903903
// PREPARE statement itself.
904904
oldPlaceholders := p.extendedEvalCtx.Placeholders
905905
p.extendedEvalCtx.Placeholders = nil
906+
defer func() {
907+
// The call to addPreparedStmt changed the planner stmt to the
908+
// statement being prepared. Set it back to the PREPARE statement,
909+
// so that it's logged correctly.
910+
p.stmt = stmt
911+
p.extendedEvalCtx.Placeholders = oldPlaceholders
912+
}()
906913
if _, err := ex.addPreparedStmt(
907914
ctx, name, prepStmt, typeHints, rawTypeHints, PreparedStatementOriginSQL,
908915
); err != nil {
909916
return makeErrEvent(err)
910917
}
911-
// The call to addPreparedStmt changed the planner stmt to the statement
912-
// being prepared. Set it back to the PREPARE statement, so that it's
913-
// logged correctly.
914-
p.stmt = stmt
915-
p.extendedEvalCtx.Placeholders = oldPlaceholders
916918
return nil, nil, nil
917919
}
918920

@@ -1307,14 +1309,12 @@ func (ex *connExecutor) commitSQLTransaction(
13071309
}
13081310
ex.phaseTimes.SetSessionPhaseTime(sessionphase.SessionStartTransactionCommit, timeutil.Now())
13091311
if err := commitFn(ctx); err != nil {
1310-
if descs.IsTwoVersionInvariantViolationError(err) {
1311-
if resetErr := ex.resetTransactionOnSchemaChangeRetry(ctx); resetErr != nil {
1312-
return ex.makeErrEvent(err, ast)
1313-
}
1314-
// Generating a forced retry error here, right after resetting the
1315-
// transaction is not exactly necessary, but it's a sound way to
1316-
// generate the only type of ClientVisibleRetryError we have.
1317-
err = ex.state.mu.txn.GenerateForcedRetryableError(ctx, redact.Sprint(err))
1312+
// For certain retryable errors, we should turn them into client visible
1313+
// errors, since the client needs to retry now.
1314+
var conversionError error
1315+
err, conversionError = ex.convertRetriableErrorIntoUserVisibleError(ctx, err)
1316+
if conversionError != nil {
1317+
return ex.makeErrEvent(conversionError, ast)
13181318
}
13191319
return ex.makeErrEvent(err, ast)
13201320
}

pkg/sql/conn_executor_savepoints.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,12 @@ func (ex *connExecutor) execRelease(
152152
// Committing the transaction failed. We'll go to state RestartWait if
153153
// it's a retriable error, or to state RollbackWait otherwise.
154154
if errIsRetriable(err) {
155+
// For certain retryable errors, we should turn them into client visible
156+
// errors, since the client needs to retry now.
157+
var conversionError error
158+
if err, conversionError = ex.convertRetriableErrorIntoUserVisibleError(ctx, err); conversionError != nil {
159+
return ex.makeErrEvent(conversionError, s)
160+
}
155161
// Add the savepoint back. We want to allow a ROLLBACK TO SAVEPOINT
156162
// cockroach_restart (that's the whole point of commitOnRelease).
157163
env.push(*entry)

0 commit comments

Comments
 (0)