Skip to content

Commit c25f350

Browse files
committed
sql: fix race in TestRollbackForeignKeyAddition
The test flaked due to canceling the wrong job. Adding a foreign key creates two jobs (one per table). But only one gets suspended. The test could mistakenly cancel the non-suspended job, causing a failure if the job had already completed. Now, it correctly selects the suspended job before canceling. Fixes #153570 Release note: none Epic: none
1 parent 0a3a203 commit c25f350

File tree

1 file changed

+60
-4
lines changed

1 file changed

+60
-4
lines changed

pkg/sql/schema_changer_test.go

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6356,9 +6356,11 @@ func TestRollbackForeignKeyAddition(t *testing.T) {
63566356
tdb.Exec(t, `CREATE TABLE db.t2 (a INT)`)
63576357
tdb.Exec(t, `SET use_declarative_schema_changer = off`)
63586358

6359+
const alterTableSQL = `ALTER TABLE db.public.t2 ADD FOREIGN KEY (a) REFERENCES db.public.t`
6360+
63596361
g := ctxgroup.WithContext(ctx)
63606362
g.GoCtx(func(ctx context.Context) error {
6361-
_, err := sqlDB.ExecContext(ctx, `ALTER TABLE db.t2 ADD FOREIGN KEY (a) REFERENCES db.t`)
6363+
_, err := sqlDB.ExecContext(ctx, alterTableSQL)
63626364
require.Regexp(t, "job canceled by user", err)
63636365
return nil
63646366
})
@@ -6367,9 +6369,54 @@ func TestRollbackForeignKeyAddition(t *testing.T) {
63676369

63686370
var jobID jobspb.JobID
63696371

6370-
// We filter by running because there's a bug where we create an extra
6371-
// no-op job for the referenced table (#57624).
6372-
require.NoError(t, sqlDB.QueryRow(`SELECT job_id FROM crdb_internal.jobs WHERE description LIKE '%ALTER TABLE%' AND status = 'running'`).Scan(&jobID))
6372+
// The ALTER creates two jobs, but we only end up pausing one with the
6373+
// RunBeforeBackfill callback. Capture the job ID of the one that is paused
6374+
// and allow the other one to complete.
6375+
testutils.SucceedsSoon(t, func() error {
6376+
rows, err := sqlDB.Query(`SELECT job_id, status FROM crdb_internal.jobs WHERE description = $1 ORDER BY job_id`, alterTableSQL)
6377+
if err != nil {
6378+
return err
6379+
}
6380+
defer rows.Close()
6381+
6382+
var jobs []struct {
6383+
id jobspb.JobID
6384+
status string
6385+
}
6386+
6387+
for rows.Next() {
6388+
var id jobspb.JobID
6389+
var status string
6390+
if err := rows.Scan(&id, &status); err != nil {
6391+
return err
6392+
}
6393+
jobs = append(jobs, struct {
6394+
id jobspb.JobID
6395+
status string
6396+
}{id, status})
6397+
}
6398+
6399+
if len(jobs) != 2 {
6400+
return errors.Errorf("expected 2 jobs, found %d", len(jobs))
6401+
}
6402+
6403+
var runningCount, succeededCount int
6404+
for _, job := range jobs {
6405+
switch job.status {
6406+
case "running":
6407+
runningCount++
6408+
jobID = job.id
6409+
case "succeeded":
6410+
succeededCount++
6411+
}
6412+
}
6413+
6414+
if runningCount != 1 || succeededCount != 1 {
6415+
return errors.Errorf("expected 1 running and 1 succeeded job, found %d running, %d succeeded", runningCount, succeededCount)
6416+
}
6417+
6418+
return nil
6419+
})
63736420
tdb.Exec(t, "CANCEL JOB $1", jobID)
63746421

63756422
close(continueNotification)
@@ -6381,6 +6428,15 @@ func TestRollbackForeignKeyAddition(t *testing.T) {
63816428
Scan(&status, &error)
63826429
require.Equal(t, status, jobs.StateCanceled)
63836430
require.Equal(t, error, "job canceled by user")
6431+
6432+
// Verify that descriptors are valid after job cancellation
6433+
rows, err := sqlDB.Query(`SELECT * FROM "".crdb_internal.invalid_objects`)
6434+
require.NoError(t, err)
6435+
defer rows.Close()
6436+
if rows.Next() {
6437+
t.Fatal("found catalog corruptions after job cancellation")
6438+
}
6439+
require.NoError(t, rows.Err())
63846440
}
63856441

63866442
// TestRevertingJobsOnDatabasesAndSchemas tests that schema change jobs on

0 commit comments

Comments
 (0)