Skip to content

Commit 04db75f

Browse files
committed
roachtest: make gossip chaos test more resilient
See #153403 (comment). Via backport: Closes #153403. Epic: none
1 parent c2c042f commit 04db75f

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

pkg/cmd/roachtest/tests/gossip.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/cockroachdb/cockroach/pkg/roachprod"
3030
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
3131
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
32+
"github.com/cockroachdb/cockroach/pkg/testutils"
3233
"github.com/cockroachdb/cockroach/pkg/util"
3334
"github.com/cockroachdb/cockroach/pkg/util/allstacks"
3435
"github.com/cockroachdb/cockroach/pkg/util/retry"
@@ -60,7 +61,17 @@ SELECT node_id
6061
tBeforePing := timeutil.Now()
6162
db := c.Conn(ctx, t.L(), node)
6263
defer db.Close()
63-
require.NoError(t, db.Ping())
64+
65+
testutils.SucceedsSoon(t, func() error {
66+
// Having just shut down a node, the sql user table may be in the
67+
// process of failing over, and if we're unlucky and try to open a new
68+
// conn here, we can sometimes hit an internal 10s timeout should the
69+
// failover take longer than usual.
70+
//
71+
// See https://github.com/cockroachdb/cockroach/issues/153403#issuecomment-3296381756.
72+
return db.Ping()
73+
})
74+
6475
tAfterPing := timeutil.Now()
6576
if pingDur := tAfterPing.Sub(tBeforePing); pingDur > 20*time.Second {
6677
t.L().Printf("sql connection ready after %.2fs", pingDur.Seconds())

0 commit comments

Comments
 (0)