Skip to content

Commit 3e7991d

Browse files
craig[bot]Andrew Baptist
andcommitted
108115: roachtest: don't reuse clusters that call dmsetup r=erikgrinaker,srosenberg a=andrewbaptist Certain tests need to modify the blockdevice and they are prone to failures during setup that the device is still busy. Ideally we would figure out what is still holding onto the dish handle, but it is safer to simply not reuse clusters that perform this by adding `spec.ReuseNone()` Fixes: cockroachdb#107865 Epic: none Release note: None Co-authored-by: Andrew Baptist <[email protected]>
2 parents f1623e0 + 704e6e9 commit 3e7991d

File tree

1 file changed

+11
-6
lines changed

1 file changed

+11
-6
lines changed

pkg/cmd/roachtest/tests/failover.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ func registerFailover(r registry.Registry) {
8080
Owner: registry.OwnerKV,
8181
Benchmark: true,
8282
Timeout: 60 * time.Minute,
83-
Cluster: r.MakeClusterSpec(10, spec.CPU(2), spec.PreferLocalSSD(false)), // uses disk stalls
83+
Cluster: r.MakeClusterSpec(10, spec.CPU(2), spec.PreferLocalSSD(false), spec.ReuseNone()), // uses disk stalls
8484
Leases: leases,
8585
SkipPostValidations: registry.PostValidationNoDeadNodes, // cleanup kills nodes
8686
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
@@ -122,12 +122,17 @@ func registerFailover(r registry.Registry) {
122122
for _, failureMode := range allFailureModes {
123123
failureMode := failureMode // pin loop variable
124124

125-
var usePD bool
125+
clusterOpts := make([]spec.Option, 0)
126+
clusterOpts = append(clusterOpts, spec.CPU(2))
127+
126128
var postValidation registry.PostValidation
127129
if failureMode == failureModeDiskStall {
128130
// Use PDs in an attempt to work around flakes encountered when using
129131
// SSDs. See #97968.
130-
usePD = true
132+
clusterOpts = append(clusterOpts, spec.PreferLocalSSD(false))
133+
// Don't reuse the cluster for tests that call dmsetup to avoid
134+
// spurious flakes from previous runs. See #107865
135+
clusterOpts = append(clusterOpts, spec.ReuseNone())
131136
postValidation = registry.PostValidationNoDeadNodes
132137
}
133138
r.Add(registry.TestSpec{
@@ -136,7 +141,7 @@ func registerFailover(r registry.Registry) {
136141
Benchmark: true,
137142
Timeout: 30 * time.Minute,
138143
SkipPostValidations: postValidation,
139-
Cluster: r.MakeClusterSpec(7, spec.CPU(2), spec.PreferLocalSSD(!usePD)),
144+
Cluster: r.MakeClusterSpec(7, clusterOpts...),
140145
Leases: leases,
141146
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
142147
runFailoverNonSystem(ctx, t, c, failureMode)
@@ -149,7 +154,7 @@ func registerFailover(r registry.Registry) {
149154
Benchmark: true,
150155
Timeout: 30 * time.Minute,
151156
SkipPostValidations: postValidation,
152-
Cluster: r.MakeClusterSpec(5, spec.CPU(2), spec.PreferLocalSSD(!usePD)),
157+
Cluster: r.MakeClusterSpec(5, clusterOpts...),
153158
Leases: leases,
154159
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
155160
runFailoverLiveness(ctx, t, c, failureMode)
@@ -162,7 +167,7 @@ func registerFailover(r registry.Registry) {
162167
Benchmark: true,
163168
Timeout: 30 * time.Minute,
164169
SkipPostValidations: postValidation,
165-
Cluster: r.MakeClusterSpec(7, spec.CPU(2), spec.PreferLocalSSD(!usePD)),
170+
Cluster: r.MakeClusterSpec(7, clusterOpts...),
166171
Leases: leases,
167172
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
168173
runFailoverSystemNonLiveness(ctx, t, c, failureMode)

0 commit comments

Comments
 (0)