Skip to content

Commit 2e0c144

Browse files
committed
failureinjection: add disableStateValidation option to Failers
Long running clusters such as DRT perform setup and cleanup in separate CLI calls where state cannot be maintained. This change adds an option to disable state validation.
1 parent 6d90a52 commit 2e0c144

17 files changed

+47
-31
lines changed

pkg/cmd/roachtest/cluster.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3341,11 +3341,12 @@ func (c *clusterImpl) GetFailer(
33413341
l *logger.Logger,
33423342
nodes option.NodeListOption,
33433343
failureModeName string,
3344+
disableStateValidation bool,
33443345
opts ...failures.ClusterOptionFunc,
33453346
) (*failures.Failer, error) {
33463347
fr := failures.GetFailureRegistry()
33473348
clusterOpts := append(opts, failures.Secure(c.IsSecure()), failures.LocalCertsPath(c.localCertsDir))
3348-
failer, err := fr.GetFailer(c.MakeNodes(nodes), failureModeName, l, clusterOpts...)
3349+
failer, err := fr.GetFailer(c.MakeNodes(nodes), failureModeName, l, disableStateValidation, clusterOpts...)
33493350
if err != nil {
33503351
return nil, err
33513352
}

pkg/cmd/roachtest/cluster/cluster_interface.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,5 +222,5 @@ type Cluster interface {
222222

223223
RegisterClusterHook(hookName string, hookType option.ClusterHookType, timeout time.Duration, hook func(context.Context) error)
224224

225-
GetFailer(l *logger.Logger, nodes option.NodeListOption, failureModeName string, opts ...failures.ClusterOptionFunc) (*failures.Failer, error)
225+
GetFailer(l *logger.Logger, nodes option.NodeListOption, failureModeName string, disableStateValidation bool, opts ...failures.ClusterOptionFunc) (*failures.Failer, error)
226226
}

pkg/cmd/roachtest/clusterstats/mock_cluster_generated_test.go

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/cmd/roachtest/operations/disk_stall.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ func runDiskStall(
4949

5050
nodes := c.All()
5151
nid := nodes[rng.Intn(len(nodes))]
52-
ds := roachtestutil.MakeDmsetupDiskStaller(o, c)
52+
// Disable state validation since we run dmsetup Setup() during cluster creation and not part
53+
// of the operation.
54+
ds := roachtestutil.MakeDmsetupDiskStaller(o, c, true /* disableStateValidation */)
5355

5456
o.Status(fmt.Sprintf("stalling disk on node %d", nid))
5557
ds.Stall(ctx, c.Node(nid))

pkg/cmd/roachtest/roachtestutil/disk_stall.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ type cgroupDiskStaller struct {
6464
var _ DiskStaller = (*cgroupDiskStaller)(nil)
6565

6666
func MakeCgroupDiskStaller(
67-
f Fataler, c cluster.Cluster, stallReads bool, stallLogs bool,
67+
f Fataler, c cluster.Cluster, stallReads bool, stallLogs bool, disableStateValidation bool,
6868
) DiskStaller {
69-
diskStaller, err := c.GetFailer(f.L(), c.CRDBNodes(), failures.CgroupsDiskStallName)
69+
diskStaller, err := c.GetFailer(f.L(), c.CRDBNodes(), failures.CgroupsDiskStallName, disableStateValidation)
7070
if err != nil {
7171
f.Fatalf("failed to get failer: %s", err)
7272
}
@@ -158,8 +158,8 @@ type dmsetupDiskStaller struct {
158158

159159
var _ DiskStaller = (*dmsetupDiskStaller)(nil)
160160

161-
func MakeDmsetupDiskStaller(f Fataler, c cluster.Cluster) DiskStaller {
162-
diskStaller, err := c.GetFailer(f.L(), c.CRDBNodes(), failures.DmsetupDiskStallName)
161+
func MakeDmsetupDiskStaller(f Fataler, c cluster.Cluster, disableStateValidation bool) DiskStaller {
162+
diskStaller, err := c.GetFailer(f.L(), c.CRDBNodes(), failures.DmsetupDiskStallName, disableStateValidation)
163163
if err != nil {
164164
f.Fatalf("failed to get failer: %s", err)
165165
}

pkg/cmd/roachtest/roachtestutil/mixedversion/mutators.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ func (m networkPartitionMutator) Generate(
515515
nodeList := planner.currentContext.System.Descriptor.Nodes
516516

517517
failure := failures.GetFailureRegistry()
518-
f, err := failure.GetFailer(planner.cluster.Name(), failures.IPTablesNetworkPartitionName, planner.logger)
518+
f, err := failure.GetFailer(planner.cluster.Name(), failures.IPTablesNetworkPartitionName, planner.logger, false)
519519
if err != nil {
520520
return nil, fmt.Errorf("failed to get failer for %s: %w", failures.IPTablesNetworkPartitionName, err)
521521
}

pkg/cmd/roachtest/roachtestutil/utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ func SimulateMultiRegionCluster(
298298
regionToNodeMap failures.RegionToNodes,
299299
l *logger.Logger,
300300
) (func(), error) {
301-
latencyFailer, err := c.GetFailer(l, c.All(), failures.NetworkLatencyName)
301+
latencyFailer, err := c.GetFailer(l, c.All(), failures.NetworkLatencyName, false /* disableStateValidation */)
302302
if err != nil {
303303
return nil, err
304304
}

pkg/cmd/roachtest/tests/admission_control_disk_bandwidth_overload.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ func registerDiskBandwidthOverload(r registry.Registry) {
7272
const provisionedBandwidth = 128 << 20 // 128 MiB
7373
t.Status(fmt.Sprintf("limiting disk bandwidth to %d bytes/s", provisionedBandwidth))
7474
staller := roachtestutil.MakeCgroupDiskStaller(t, c,
75-
false /* readsToo */, false /* logsToo */)
75+
false /* readsToo */, false /* logsToo */, false /* disableStateValidation */)
7676
staller.Setup(ctx)
7777
staller.Slow(ctx, c.CRDBNodes(), provisionedBandwidth)
7878

pkg/cmd/roachtest/tests/admission_control_elastic_mixed_version.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ func registerElasticWorkloadMixedVersion(r registry.Registry) {
6666
setDiskBandwidth := func() {
6767
t.Status(fmt.Sprintf("limiting disk bandwidth to %d bytes/s", diskBand))
6868
staller := roachtestutil.MakeCgroupDiskStaller(t, c,
69-
false /* readsToo */, false /* logsToo */)
69+
false /* readsToo */, false /* logsToo */, false /* disableStateValidation */)
7070
staller.Setup(ctx)
7171
staller.Slow(ctx, c.CRDBNodes(), diskBand)
7272
}

pkg/cmd/roachtest/tests/admission_control_snapshot_overload_io.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ func runAdmissionControlSnapshotOverloadIO(
175175
const bandwidthLimit = 128 << 20 // 128 MiB
176176
t.Status(fmt.Sprintf("limiting disk bandwidth to %d bytes/s", bandwidthLimit))
177177
staller := roachtestutil.MakeCgroupDiskStaller(t, c,
178-
false /* readsToo */, false /* logsToo */)
178+
false /* readsToo */, false /* logsToo */, false /* disableStateValidation */)
179179
staller.Setup(ctx)
180180
staller.Slow(ctx, c.CRDBNodes(), bandwidthLimit)
181181

0 commit comments

Comments
 (0)