Skip to content

Commit a67cc34

Browse files
committed
roachtest: update reset VM smoke test
The reset VM failure now has the option to stop the processes before restarting the cluster. This change updates the test to exercise the new functionality. Epic: None Release note: None
1 parent a62909d commit a67cc34

File tree

1 file changed

+39
-34
lines changed

1 file changed

+39
-34
lines changed

pkg/cmd/roachtest/tests/failure_injection.go

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -789,46 +789,51 @@ var processKillTests = func(c cluster.Cluster) []failureSmokeTest {
789789
return tests
790790
}
791791

792-
var resetVMTests = func(c cluster.Cluster) failureSmokeTest {
792+
var resetVMTests = func(c cluster.Cluster) []failureSmokeTest {
793793
rng, _ := randutil.NewPseudoRand()
794794
rebootedNode := c.CRDBNodes().SeededRandNode(rng)
795-
return failureSmokeTest{
796-
testName: failures.ResetVMFailureName,
797-
failureName: failures.ResetVMFailureName,
798-
args: failures.ResetVMArgs{
799-
Nodes: rebootedNode.InstallNodes(),
800-
},
801-
validateFailure: func(ctx context.Context, l *logger.Logger, c cluster.Cluster, f *failures.Failer) error {
802-
// Check that we aren't able to establish a SQL connection to the rebooted node.
803-
// waitForFailureToPropagate already does a similar check, but we do it here
804-
// to satisfy the smoke test framework since this is a fairly simple failure
805-
// mode with less to validate.
806-
return testutils.SucceedsSoonError(func() error {
807-
if ctx.Err() != nil {
808-
return ctx.Err()
809-
}
795+
var tests []failureSmokeTest
796+
for _, stopCluster := range []bool{true, false} {
797+
tests = append(tests, failureSmokeTest{
798+
testName: fmt.Sprintf("%s/StopCluster=%t", failures.ResetVMFailureName, stopCluster),
799+
failureName: failures.ResetVMFailureName,
800+
args: failures.ResetVMArgs{
801+
Nodes: rebootedNode.InstallNodes(),
802+
StopProcesses: stopCluster,
803+
},
804+
validateFailure: func(ctx context.Context, l *logger.Logger, c cluster.Cluster, f *failures.Failer) error {
805+
// Check that we aren't able to establish a SQL connection to the rebooted node.
806+
// waitForFailureToPropagate already does a similar check, but we do it here
807+
// to satisfy the smoke test framework since this is a fairly simple failure
808+
// mode with less to validate.
809+
return testutils.SucceedsSoonError(func() error {
810+
if ctx.Err() != nil {
811+
return ctx.Err()
812+
}
810813

811-
killedDB, err := c.ConnE(ctx, l, rebootedNode[0])
812-
if err == nil {
813-
defer killedDB.Close()
814-
if err := killedDB.Ping(); err == nil {
815-
return errors.Errorf("expected node %d to be dead, but it is alive", rebootedNode)
814+
killedDB, err := c.ConnE(ctx, l, rebootedNode[0])
815+
if err == nil {
816+
defer killedDB.Close()
817+
if err := killedDB.Ping(); err == nil {
818+
return errors.Errorf("expected node %d to be dead, but it is alive", rebootedNode)
819+
} else {
820+
l.Printf("failed to connect to node %d: %v", rebootedNode, err)
821+
}
816822
} else {
817-
l.Printf("failed to connect to node %d: %v", rebootedNode, err)
823+
l.Printf("unable to establish SQL connection to node %d", rebootedNode)
818824
}
819-
} else {
820-
l.Printf("unable to establish SQL connection to node %d", rebootedNode)
821-
}
825+
return nil
826+
})
827+
},
828+
validateRecover: func(ctx context.Context, l *logger.Logger, c cluster.Cluster, f *failures.Failer) error {
822829
return nil
823-
})
824-
},
825-
validateRecover: func(ctx context.Context, l *logger.Logger, c cluster.Cluster, f *failures.Failer) error {
826-
return nil
827-
},
828-
workload: func(ctx context.Context, c cluster.Cluster, args ...string) error {
829-
return defaultFailureSmokeTestWorkload(ctx, c, "--tolerate-errors")
830-
},
830+
},
831+
workload: func(ctx context.Context, c cluster.Cluster, args ...string) error {
832+
return defaultFailureSmokeTestWorkload(ctx, c, "--tolerate-errors")
833+
},
834+
})
831835
}
836+
return tests
832837
}
833838

834839
func defaultFailureSmokeTestWorkload(ctx context.Context, c cluster.Cluster, args ...string) error {
@@ -873,11 +878,11 @@ func runFailureSmokeTest(ctx context.Context, t test.Test, c cluster.Cluster, no
873878
asymmetricOutgoingNetworkPartitionTest(c),
874879
latencyTest(c),
875880
dmsetupDiskStallTest(c),
876-
resetVMTests(c),
877881
cgroupStallLogsTest(c),
878882
}
879883
failureSmokeTests = append(failureSmokeTests, cgroupsDiskStallTests(c)...)
880884
failureSmokeTests = append(failureSmokeTests, processKillTests(c)...)
885+
failureSmokeTests = append(failureSmokeTests, resetVMTests(c)...)
881886

882887
// Randomize the order of the tests in case any of the failures have unexpected side
883888
// effects that may mask failures, e.g. a cgroups disk stall isn't properly recovered

0 commit comments

Comments
 (0)