Skip to content

Commit ac55715

Browse files
committed
sql,jobs: short-term fix for UndefinedColumn job_type error
In cockroachdb#106762 we noticed that if a query is executed with an AS OF SYSTEM TIME clause that picks a transaction timestamp before the job_type migration, then parts of the jobs infrastructure will attempt to query the job_type column even though it doesn't exist at the transaction's timestamp. As a short term fix, when we encounter an `UndefinedColumn` error for the `job_type` column in `crdb_internal.jobs` we generate a synthetic retryable error so that the txn is pushed to a higher timestamp at which the upgrade will have completed and the `job_type` column will be visible. The longer term fix is being tracked in cockroachdb#106764. We are intentionally approaching this issue with a whack-a-mole approach to stabilize the tests the are running into this issue. We think time is better spent designing and investing in the longer term solution that will be tracked in cockroachdb#106764. Fixes: cockroachdb#107169 Informs: cockroachdb#106762 Release note: None
1 parent 451d761 commit ac55715

File tree

4 files changed

+35
-3
lines changed

4 files changed

+35
-3
lines changed

pkg/jobs/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ go_library(
4949
"//pkg/sql/catalog/descpb",
5050
"//pkg/sql/catalog/descs",
5151
"//pkg/sql/isql",
52+
"//pkg/sql/pgwire/pgcode",
5253
"//pkg/sql/pgwire/pgerror",
5354
"//pkg/sql/protoreflect",
5455
"//pkg/sql/sem/builtins",

pkg/jobs/utils.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ import (
1919
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
2020
"github.com/cockroachdb/cockroach/pkg/kv"
2121
"github.com/cockroachdb/cockroach/pkg/sql/isql"
22+
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
23+
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
2224
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
2325
"github.com/cockroachdb/errors"
2426
)
@@ -153,3 +155,33 @@ func JobExists(
153155
}
154156
return row != nil, nil
155157
}
158+
159+
// IsJobTypeColumnDoesNotExistError returns true if the error is of the form
160+
// `column "job_type" does not exist`.
161+
func isJobTypeColumnDoesNotExistError(err error) bool {
162+
return pgerror.GetPGCode(err) == pgcode.UndefinedColumn &&
163+
strings.Contains(err.Error(), "column \"job_type\" does not exist")
164+
}
165+
166+
// MaybeGenerateForcedRetryableError returns a
167+
// TransactionRetryWithProtoRefreshError that will cause the txn to be retried
168+
// if the error is because of an undefined job_type column.
169+
//
170+
// In https://github.com/cockroachdb/cockroach/issues/106762 we noticed that if
171+
// a query is executed with an AS OF SYSTEM TIME clause that picks a transaction
172+
// timestamp before the job_type migration, then parts of the jobs
173+
// infrastructure will attempt to query the job_type column even though it
174+
// doesn't exist at the transaction's timestamp.
175+
//
176+
// As a short term fix, when we encounter an `UndefinedColumn` error we
177+
// generate a synthetic retryable error so that the txn is pushed to a
178+
// higher timestamp at which the upgrade will have completed and the
179+
// `job_type` column will be visible. The longer term fix is being tracked
180+
// in https://github.com/cockroachdb/cockroach/issues/106764.
181+
func MaybeGenerateForcedRetryableError(ctx context.Context, txn *kv.Txn, err error) error {
182+
if err != nil && isJobTypeColumnDoesNotExistError(err) {
183+
return txn.GenerateForcedRetryableError(ctx, "synthetic error "+
184+
"to push timestamp to after the `job_type` upgrade has run")
185+
}
186+
return err
187+
}

pkg/sql/crdb_internal.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,7 +1069,7 @@ func populateSystemJobsTableRows(
10691069
params...,
10701070
)
10711071
if err != nil {
1072-
return matched, err
1072+
return matched, jobs.MaybeGenerateForcedRetryableError(ctx, p.Txn(), err)
10731073
}
10741074

10751075
cleanup := func(ctx context.Context) {
@@ -1082,7 +1082,7 @@ func populateSystemJobsTableRows(
10821082
for {
10831083
hasNext, err := it.Next(ctx)
10841084
if !hasNext || err != nil {
1085-
return matched, err
1085+
return matched, jobs.MaybeGenerateForcedRetryableError(ctx, p.Txn(), err)
10861086
}
10871087

10881088
currentRow := it.Cur()

pkg/upgrade/upgrades/json_forward_indexes_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
)
2626

2727
func TestJSONForwardingIndexes(t *testing.T) {
28-
skip.WithIssue(t, 107169, "flaky test")
2928
var err error
3029
skip.UnderStressRace(t)
3130
defer leaktest.AfterTest(t)()

0 commit comments

Comments
 (0)