Skip to content

Commit 8ebacaf

Browse files
craig[bot]DarrylWong
andcommitted
Merge #151605
151605: failureinjection: add disableStateValidation option to Failers r=shailendra-patel a=DarrylWong Long running clusters such as DRT perform setup and cleanup in separate CLI calls where state cannot be maintained. This change adds an option to disable state validation. Fixes: none Epic: none Release note: none Co-authored-by: DarrylWong <[email protected]>
2 parents 3682fa3 + 2e0c144 commit 8ebacaf

17 files changed

+47
-31
lines changed

pkg/cmd/roachtest/cluster.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3341,11 +3341,12 @@ func (c *clusterImpl) GetFailer(
33413341
l *logger.Logger,
33423342
nodes option.NodeListOption,
33433343
failureModeName string,
3344+
disableStateValidation bool,
33443345
opts ...failures.ClusterOptionFunc,
33453346
) (*failures.Failer, error) {
33463347
fr := failures.GetFailureRegistry()
33473348
clusterOpts := append(opts, failures.Secure(c.IsSecure()), failures.LocalCertsPath(c.localCertsDir))
3348-
failer, err := fr.GetFailer(c.MakeNodes(nodes), failureModeName, l, clusterOpts...)
3349+
failer, err := fr.GetFailer(c.MakeNodes(nodes), failureModeName, l, disableStateValidation, clusterOpts...)
33493350
if err != nil {
33503351
return nil, err
33513352
}

pkg/cmd/roachtest/cluster/cluster_interface.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,5 +222,5 @@ type Cluster interface {
222222

223223
RegisterClusterHook(hookName string, hookType option.ClusterHookType, timeout time.Duration, hook func(context.Context) error)
224224

225-
GetFailer(l *logger.Logger, nodes option.NodeListOption, failureModeName string, opts ...failures.ClusterOptionFunc) (*failures.Failer, error)
225+
GetFailer(l *logger.Logger, nodes option.NodeListOption, failureModeName string, disableStateValidation bool, opts ...failures.ClusterOptionFunc) (*failures.Failer, error)
226226
}

pkg/cmd/roachtest/clusterstats/mock_cluster_generated_test.go

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/cmd/roachtest/operations/disk_stall.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ func runDiskStall(
4949

5050
nodes := c.All()
5151
nid := nodes[rng.Intn(len(nodes))]
52-
ds := roachtestutil.MakeDmsetupDiskStaller(o, c)
52+
// Disable state validation since we run dmsetup Setup() during cluster creation and not part
53+
// of the operation.
54+
ds := roachtestutil.MakeDmsetupDiskStaller(o, c, true /* disableStateValidation */)
5355

5456
o.Status(fmt.Sprintf("stalling disk on node %d", nid))
5557
ds.Stall(ctx, c.Node(nid))

pkg/cmd/roachtest/roachtestutil/disk_stall.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ type cgroupDiskStaller struct {
6464
var _ DiskStaller = (*cgroupDiskStaller)(nil)
6565

6666
func MakeCgroupDiskStaller(
67-
f Fataler, c cluster.Cluster, stallReads bool, stallLogs bool,
67+
f Fataler, c cluster.Cluster, stallReads bool, stallLogs bool, disableStateValidation bool,
6868
) DiskStaller {
69-
diskStaller, err := c.GetFailer(f.L(), c.CRDBNodes(), failures.CgroupsDiskStallName)
69+
diskStaller, err := c.GetFailer(f.L(), c.CRDBNodes(), failures.CgroupsDiskStallName, disableStateValidation)
7070
if err != nil {
7171
f.Fatalf("failed to get failer: %s", err)
7272
}
@@ -158,8 +158,8 @@ type dmsetupDiskStaller struct {
158158

159159
var _ DiskStaller = (*dmsetupDiskStaller)(nil)
160160

161-
func MakeDmsetupDiskStaller(f Fataler, c cluster.Cluster) DiskStaller {
162-
diskStaller, err := c.GetFailer(f.L(), c.CRDBNodes(), failures.DmsetupDiskStallName)
161+
func MakeDmsetupDiskStaller(f Fataler, c cluster.Cluster, disableStateValidation bool) DiskStaller {
162+
diskStaller, err := c.GetFailer(f.L(), c.CRDBNodes(), failures.DmsetupDiskStallName, disableStateValidation)
163163
if err != nil {
164164
f.Fatalf("failed to get failer: %s", err)
165165
}

pkg/cmd/roachtest/roachtestutil/mixedversion/mutators.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ func (m networkPartitionMutator) Generate(
515515
nodeList := planner.currentContext.System.Descriptor.Nodes
516516

517517
failure := failures.GetFailureRegistry()
518-
f, err := failure.GetFailer(planner.cluster.Name(), failures.IPTablesNetworkPartitionName, planner.logger)
518+
f, err := failure.GetFailer(planner.cluster.Name(), failures.IPTablesNetworkPartitionName, planner.logger, false)
519519
if err != nil {
520520
return nil, fmt.Errorf("failed to get failer for %s: %w", failures.IPTablesNetworkPartitionName, err)
521521
}

pkg/cmd/roachtest/roachtestutil/utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ func SimulateMultiRegionCluster(
298298
regionToNodeMap failures.RegionToNodes,
299299
l *logger.Logger,
300300
) (func(), error) {
301-
latencyFailer, err := c.GetFailer(l, c.All(), failures.NetworkLatencyName)
301+
latencyFailer, err := c.GetFailer(l, c.All(), failures.NetworkLatencyName, false /* disableStateValidation */)
302302
if err != nil {
303303
return nil, err
304304
}

pkg/cmd/roachtest/tests/admission_control_disk_bandwidth_overload.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ func registerDiskBandwidthOverload(r registry.Registry) {
7272
const provisionedBandwidth = 128 << 20 // 128 MiB
7373
t.Status(fmt.Sprintf("limiting disk bandwidth to %d bytes/s", provisionedBandwidth))
7474
staller := roachtestutil.MakeCgroupDiskStaller(t, c,
75-
false /* readsToo */, false /* logsToo */)
75+
false /* readsToo */, false /* logsToo */, false /* disableStateValidation */)
7676
staller.Setup(ctx)
7777
staller.Slow(ctx, c.CRDBNodes(), provisionedBandwidth)
7878

pkg/cmd/roachtest/tests/admission_control_elastic_mixed_version.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ func registerElasticWorkloadMixedVersion(r registry.Registry) {
6666
setDiskBandwidth := func() {
6767
t.Status(fmt.Sprintf("limiting disk bandwidth to %d bytes/s", diskBand))
6868
staller := roachtestutil.MakeCgroupDiskStaller(t, c,
69-
false /* readsToo */, false /* logsToo */)
69+
false /* readsToo */, false /* logsToo */, false /* disableStateValidation */)
7070
staller.Setup(ctx)
7171
staller.Slow(ctx, c.CRDBNodes(), diskBand)
7272
}

pkg/cmd/roachtest/tests/admission_control_snapshot_overload_io.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ func runAdmissionControlSnapshotOverloadIO(
175175
const bandwidthLimit = 128 << 20 // 128 MiB
176176
t.Status(fmt.Sprintf("limiting disk bandwidth to %d bytes/s", bandwidthLimit))
177177
staller := roachtestutil.MakeCgroupDiskStaller(t, c,
178-
false /* readsToo */, false /* logsToo */)
178+
false /* readsToo */, false /* logsToo */, false /* disableStateValidation */)
179179
staller.Setup(ctx)
180180
staller.Slow(ctx, c.CRDBNodes(), bandwidthLimit)
181181

0 commit comments

Comments
 (0)