Skip to content

Commit 8b5e34a

Browse files
craig[bot]kev-caorickystewart
committed
149235: backup: fix flaky OR recovery test r=jeffswenson a=kev-cao OR recovery test was flaking due to an insufficient timeout and also ran into errors due to the default database being set offline. This patch fixes the test to account for these errors. Epic: CRDB-51394 Fixes: #148424 Release note: None 149240: process-bep-file: set default server to `mesolite` r=jlinder a=rickystewart We are migrating workloads from `tanzanite` to `mesolite` as part of merging the two clusters. Epic: none Release note: None Release justification: Non-production code changes Co-authored-by: Kevin Cao <[email protected]> Co-authored-by: Ricky Stewart <[email protected]>
3 parents 9015a1c + 7a1e2bc + a3c26ab commit 8b5e34a

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

pkg/cmd/bazci/process-bep-file/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ var (
3131
eventStreamFile = flag.String("eventsfile", "", "eventstream file produced by bazel build --build_event_binary_file")
3232
jsonOutFile = flag.String("jsonoutfile", "", "if given, file path where to write the JSON test report")
3333

34-
serverName = flag.String("servername", "tanzanite", "URL of the EngFlow cluster")
34+
serverName = flag.String("servername", "mesolite", "URL of the EngFlow cluster")
3535
tlsClientCert = flag.String("cert", "", "TLS client certificate for accessing EngFlow, probably a .crt file")
3636
tlsClientKey = flag.String("key", "", "TLS client key for accessing EngFlow")
3737

pkg/cmd/roachtest/tests/backup_restore_roundtrip.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,12 @@ func testOnlineRestoreRecovery(ctx context.Context, t test.Test, c cluster.Clust
539539
)
540540
}
541541

542+
// defaultdb is going to be set offline by the failed download job, so we
543+
// need to switch to the system database first to avoid any errors.
544+
if _, err := dbConn.ExecContext(ctx, "USE system"); err != nil {
545+
return err
546+
}
547+
542548
if _, err := dbConn.ExecContext(
543549
ctx, "SET CLUSTER SETTING jobs.debug.pausepoints = ''",
544550
); err != nil {
@@ -558,7 +564,7 @@ func testOnlineRestoreRecovery(ctx context.Context, t test.Test, c cluster.Clust
558564
err, "waiting for download job %v to reach resumed state", downloadJobID,
559565
)
560566
}
561-
if err := WaitForFailed(ctx, dbConn, jobspb.JobID(downloadJobID), jobStatusWait); err != nil {
567+
if err := WaitForFailed(ctx, dbConn, jobspb.JobID(downloadJobID), 10*time.Minute); err != nil {
562568
return errors.Wrapf(
563569
err, "waiting for download job %v to reach failed state", downloadJobID,
564570
)

0 commit comments

Comments
 (0)