Skip to content

Commit d44036d

Browse files
committed
roachtest: add reset failure mode smoke test
Epic: None Release note: None
1 parent 138b99d commit d44036d

File tree

1 file changed

+43
-0
lines changed

1 file changed

+43
-0
lines changed

pkg/cmd/roachtest/tests/failure_injection.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,48 @@ var processKillTests = func(c cluster.Cluster) []failureSmokeTest {
726726
return tests
727727
}
728728

729+
var resetVMTests = func(c cluster.Cluster) failureSmokeTest {
730+
rng, _ := randutil.NewPseudoRand()
731+
rebootedNode := c.CRDBNodes().SeededRandNode(rng)
732+
return failureSmokeTest{
733+
testName: failures.ResetVMFailureName,
734+
failureName: failures.ResetVMFailureName,
735+
args: failures.ResetVMArgs{
736+
Nodes: rebootedNode.InstallNodes(),
737+
},
738+
validateFailure: func(ctx context.Context, l *logger.Logger, c cluster.Cluster, f *failures.Failer) error {
739+
// Check that we aren't able to establish a SQL connection to the rebooted node.
740+
// waitForFailureToPropagate already does a similar check, but we do it here
741+
// to satisfy the smoke test framework since this is a fairly simple failure
742+
// mode with less to validate.
743+
return testutils.SucceedsSoonError(func() error {
744+
if ctx.Err() != nil {
745+
return ctx.Err()
746+
}
747+
748+
killedDB, err := c.ConnE(ctx, l, rebootedNode[0])
749+
if err == nil {
750+
defer killedDB.Close()
751+
if err := killedDB.Ping(); err == nil {
752+
return errors.Errorf("expected node %d to be dead, but it is alive", rebootedNode)
753+
} else {
754+
l.Printf("failed to connect to node %d: %v", rebootedNode, err)
755+
}
756+
} else {
757+
l.Printf("unable to establish SQL connection to node %d", rebootedNode)
758+
}
759+
return nil
760+
})
761+
},
762+
validateRecover: func(ctx context.Context, l *logger.Logger, c cluster.Cluster, f *failures.Failer) error {
763+
return nil
764+
},
765+
workload: func(ctx context.Context, c cluster.Cluster, args ...string) error {
766+
return defaultFailureSmokeTestWorkload(ctx, c, "--tolerate-errors")
767+
},
768+
}
769+
}
770+
729771
func defaultFailureSmokeTestWorkload(ctx context.Context, c cluster.Cluster, args ...string) error {
730772
workloadArgs := strings.Join(args, " ")
731773
cmd := roachtestutil.NewCommand("./cockroach workload run kv %s", workloadArgs).
@@ -772,6 +814,7 @@ func runFailureSmokeTest(ctx context.Context, t test.Test, c cluster.Cluster, no
772814
asymmetricOutgoingNetworkPartitionTest(c),
773815
latencyTest(c),
774816
dmsetupDiskStallTest(c),
817+
resetVMTests(c),
775818
}
776819
failureSmokeTests = append(failureSmokeTests, cgroupsDiskStallTests(c)...)
777820
failureSmokeTests = append(failureSmokeTests, processKillTests(c)...)

0 commit comments

Comments
 (0)