Skip to content

Commit 7170345

Browse files
committed
backup: improve datadriven test cleanup
Previously, the datadriven test harness would tear down clusters in order. This makes it difficult to troubleshoot stuck tear downs because there are goroutines for a running server mixed in with goroutines for a server with a stuck shutdown. Release note: none Informs: #145079
1 parent 8b550fa commit 7170345

File tree

1 file changed

+24
-15
lines changed

1 file changed

+24
-15
lines changed

pkg/backup/datadriven_test.go

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import (
4545
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
4646
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
4747
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
48+
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
4849
"github.com/cockroachdb/datadriven"
4950
"github.com/cockroachdb/errors"
5051
"github.com/lib/pq"
@@ -91,10 +92,9 @@ type sqlDBKey struct {
9192

9293
type datadrivenTestState struct {
9394
// cluster maps the user defined cluster name to its cluster
94-
clusters map[string]serverutils.TestClusterInterface
95+
clusters map[string]serverutils.TestClusterInterface
96+
clusterCleanup []func()
9597

96-
// firstNode maps the cluster name to the first node in the cluster
97-
firstNode map[string]serverutils.TestServerInterface
9898
dataDirs map[string]string
9999
sqlDBs map[sqlDBKey]*gosql.DB
100100
jobTags map[string]jobspb.JobID
@@ -107,7 +107,6 @@ type datadrivenTestState struct {
107107
func newDatadrivenTestState() datadrivenTestState {
108108
return datadrivenTestState{
109109
clusters: make(map[string]serverutils.TestClusterInterface),
110-
firstNode: make(map[string]serverutils.TestServerInterface),
111110
dataDirs: make(map[string]string),
112111
sqlDBs: make(map[sqlDBKey]*gosql.DB),
113112
jobTags: make(map[string]jobspb.JobID),
@@ -120,16 +119,27 @@ func (d *datadrivenTestState) cleanup(ctx context.Context, t *testing.T) {
120119
// While the testCluster cleanupFns would close the dbConn and clusters, close
121120
// them manually to ensure all queries finish on tests that share these
122121
// resources.
122+
for _, f := range d.cleanupFns {
123+
f()
124+
}
125+
123126
for _, db := range d.sqlDBs {
124127
backuptestutils.CheckForInvalidDescriptors(t, db)
125128
db.Close()
126129
}
127-
for _, s := range d.firstNode {
128-
s.Stopper().Stop(ctx)
129-
}
130-
for _, f := range d.cleanupFns {
131-
f()
130+
131+
// Clean up the clusters in parallel so that if one gets stuck we don't see
132+
// goroutines for running clusters mixed in with goroutines for the stuck
133+
// cluster.
134+
group := ctxgroup.WithContext(ctx)
135+
for _, cleanup := range d.clusterCleanup {
136+
group.Go(func() error {
137+
cleanup()
138+
return nil
139+
})
132140
}
141+
require.NoError(t, group.Wait())
142+
133143
d.noticeBuffer = nil
134144
}
135145

@@ -223,9 +233,8 @@ func (d *datadrivenTestState) addCluster(t *testing.T, cfg clusterCfg) error {
223233
var cleanup func()
224234
tc, _, cfg.iodir, cleanup = backuptestutils.StartBackupRestoreTestCluster(t, clusterSize, opts...)
225235
d.clusters[cfg.name] = tc
226-
d.firstNode[cfg.name] = tc.Server(0)
227236
d.dataDirs[cfg.name] = cfg.iodir
228-
d.cleanupFns = append(d.cleanupFns, cleanup)
237+
d.clusterCleanup = append(d.clusterCleanup, cleanup)
229238

230239
return nil
231240
}
@@ -262,15 +271,15 @@ func (d *datadrivenTestState) getSQLDBForVC(
262271
serverutils.User(user),
263272
}
264273

265-
s := d.firstNode[name].ApplicationLayer()
274+
s := d.clusters[name].ApplicationLayer(0)
266275
switch vc {
267276
case "default":
268277
// Nothing to do.
269278
case "system":
270279
// We use the system layer since in the case of
271280
// external SQL server's the application layer can't
272281
// route to the system tenant.
273-
s = d.firstNode[name].SystemLayer()
282+
s = d.clusters[name].SystemLayer(0)
274283
default:
275284
opts = append(opts, serverutils.DBName("cluster:"+vc))
276285
}
@@ -956,7 +965,7 @@ func runTestDataDriven(t *testing.T, testFilePathFromWorkspace string) {
956965
return ""
957966

958967
case "create-dummy-system-table":
959-
al := ds.firstNode[lastCreatedCluster].ApplicationLayer()
968+
al := ds.clusters[lastCreatedCluster].ApplicationLayer(0)
960969
db := al.DB()
961970
execCfg := al.ExecutorConfig().(sql.ExecutorConfig)
962971
codec := execCfg.Codec
@@ -1037,7 +1046,7 @@ func handleKVRequest(
10371046
},
10381047
UseRangeTombstone: true,
10391048
}
1040-
if _, err := kv.SendWrapped(ctx, ds.firstNode[cluster].SystemLayer().DistSenderI().(*kvcoord.DistSender), &dr); err != nil {
1049+
if _, err := kv.SendWrapped(ctx, ds.clusters[cluster].SystemLayer(0).DistSenderI().(*kvcoord.DistSender), &dr); err != nil {
10411050
t.Fatal(err)
10421051
}
10431052
} else {

0 commit comments

Comments
 (0)