@@ -789,46 +789,51 @@ var processKillTests = func(c cluster.Cluster) []failureSmokeTest {
789
789
return tests
790
790
}
791
791
792
- var resetVMTests = func (c cluster.Cluster ) failureSmokeTest {
792
+ var resetVMTests = func (c cluster.Cluster ) [] failureSmokeTest {
793
793
rng , _ := randutil .NewPseudoRand ()
794
794
rebootedNode := c .CRDBNodes ().SeededRandNode (rng )
795
- return failureSmokeTest {
796
- testName : failures .ResetVMFailureName ,
797
- failureName : failures .ResetVMFailureName ,
798
- args : failures.ResetVMArgs {
799
- Nodes : rebootedNode .InstallNodes (),
800
- },
801
- validateFailure : func (ctx context.Context , l * logger.Logger , c cluster.Cluster , f * failures.Failer ) error {
802
- // Check that we aren't able to establish a SQL connection to the rebooted node.
803
- // waitForFailureToPropagate already does a similar check, but we do it here
804
- // to satisfy the smoke test framework since this is a fairly simple failure
805
- // mode with less to validate.
806
- return testutils .SucceedsSoonError (func () error {
807
- if ctx .Err () != nil {
808
- return ctx .Err ()
809
- }
795
+ var tests []failureSmokeTest
796
+ for _ , stopCluster := range []bool {true , false } {
797
+ tests = append (tests , failureSmokeTest {
798
+ testName : fmt .Sprintf ("%s/StopCluster=%t" , failures .ResetVMFailureName , stopCluster ),
799
+ failureName : failures .ResetVMFailureName ,
800
+ args : failures.ResetVMArgs {
801
+ Nodes : rebootedNode .InstallNodes (),
802
+ StopProcesses : stopCluster ,
803
+ },
804
+ validateFailure : func (ctx context.Context , l * logger.Logger , c cluster.Cluster , f * failures.Failer ) error {
805
+ // Check that we aren't able to establish a SQL connection to the rebooted node.
806
+ // waitForFailureToPropagate already does a similar check, but we do it here
807
+ // to satisfy the smoke test framework since this is a fairly simple failure
808
+ // mode with less to validate.
809
+ return testutils .SucceedsSoonError (func () error {
810
+ if ctx .Err () != nil {
811
+ return ctx .Err ()
812
+ }
810
813
811
- killedDB , err := c .ConnE (ctx , l , rebootedNode [0 ])
812
- if err == nil {
813
- defer killedDB .Close ()
814
- if err := killedDB .Ping (); err == nil {
815
- return errors .Errorf ("expected node %d to be dead, but it is alive" , rebootedNode )
814
+ killedDB , err := c .ConnE (ctx , l , rebootedNode [0 ])
815
+ if err == nil {
816
+ defer killedDB .Close ()
817
+ if err := killedDB .Ping (); err == nil {
818
+ return errors .Errorf ("expected node %d to be dead, but it is alive" , rebootedNode )
819
+ } else {
820
+ l .Printf ("failed to connect to node %d: %v" , rebootedNode , err )
821
+ }
816
822
} else {
817
- l .Printf ("failed to connect to node %d: %v " , rebootedNode , err )
823
+ l .Printf ("unable to establish SQL connection to node %d" , rebootedNode )
818
824
}
819
- } else {
820
- l .Printf ("unable to establish SQL connection to node %d" , rebootedNode )
821
- }
825
+ return nil
826
+ })
827
+ },
828
+ validateRecover : func (ctx context.Context , l * logger.Logger , c cluster.Cluster , f * failures.Failer ) error {
822
829
return nil
823
- })
824
- },
825
- validateRecover : func (ctx context.Context , l * logger.Logger , c cluster.Cluster , f * failures.Failer ) error {
826
- return nil
827
- },
828
- workload : func (ctx context.Context , c cluster.Cluster , args ... string ) error {
829
- return defaultFailureSmokeTestWorkload (ctx , c , "--tolerate-errors" )
830
- },
830
+ },
831
+ workload : func (ctx context.Context , c cluster.Cluster , args ... string ) error {
832
+ return defaultFailureSmokeTestWorkload (ctx , c , "--tolerate-errors" )
833
+ },
834
+ })
831
835
}
836
+ return tests
832
837
}
833
838
834
839
func defaultFailureSmokeTestWorkload (ctx context.Context , c cluster.Cluster , args ... string ) error {
@@ -873,11 +878,11 @@ func runFailureSmokeTest(ctx context.Context, t test.Test, c cluster.Cluster, no
873
878
asymmetricOutgoingNetworkPartitionTest (c ),
874
879
latencyTest (c ),
875
880
dmsetupDiskStallTest (c ),
876
- resetVMTests (c ),
877
881
cgroupStallLogsTest (c ),
878
882
}
879
883
failureSmokeTests = append (failureSmokeTests , cgroupsDiskStallTests (c )... )
880
884
failureSmokeTests = append (failureSmokeTests , processKillTests (c )... )
885
+ failureSmokeTests = append (failureSmokeTests , resetVMTests (c )... )
881
886
882
887
// Randomize the order of the tests in case any of the failures have unexpected side
883
888
// effects that may mask failures, e.g. a cgroups disk stall isn't properly recovered
0 commit comments