@@ -726,6 +726,48 @@ var processKillTests = func(c cluster.Cluster) []failureSmokeTest {
726
726
return tests
727
727
}
728
728
729
+ var resetVMTests = func (c cluster.Cluster ) failureSmokeTest {
730
+ rng , _ := randutil .NewPseudoRand ()
731
+ rebootedNode := c .CRDBNodes ().SeededRandNode (rng )
732
+ return failureSmokeTest {
733
+ testName : failures .ResetVMFailureName ,
734
+ failureName : failures .ResetVMFailureName ,
735
+ args : failures.ResetVMArgs {
736
+ Nodes : rebootedNode .InstallNodes (),
737
+ },
738
+ validateFailure : func (ctx context.Context , l * logger.Logger , c cluster.Cluster , f * failures.Failer ) error {
739
+ // Check that we aren't able to establish a SQL connection to the rebooted node.
740
+ // waitForFailureToPropagate already does a similar check, but we do it here
741
+ // to satisfy the smoke test framework since this is a fairly simple failure
742
+ // mode with less to validate.
743
+ return testutils .SucceedsSoonError (func () error {
744
+ if ctx .Err () != nil {
745
+ return ctx .Err ()
746
+ }
747
+
748
+ killedDB , err := c .ConnE (ctx , l , rebootedNode [0 ])
749
+ if err == nil {
750
+ defer killedDB .Close ()
751
+ if err := killedDB .Ping (); err == nil {
752
+ return errors .Errorf ("expected node %d to be dead, but it is alive" , rebootedNode )
753
+ } else {
754
+ l .Printf ("failed to connect to node %d: %v" , rebootedNode , err )
755
+ }
756
+ } else {
757
+ l .Printf ("unable to establish SQL connection to node %d" , rebootedNode )
758
+ }
759
+ return nil
760
+ })
761
+ },
762
+ validateRecover : func (ctx context.Context , l * logger.Logger , c cluster.Cluster , f * failures.Failer ) error {
763
+ return nil
764
+ },
765
+ workload : func (ctx context.Context , c cluster.Cluster , args ... string ) error {
766
+ return defaultFailureSmokeTestWorkload (ctx , c , "--tolerate-errors" )
767
+ },
768
+ }
769
+ }
770
+
729
771
func defaultFailureSmokeTestWorkload (ctx context.Context , c cluster.Cluster , args ... string ) error {
730
772
workloadArgs := strings .Join (args , " " )
731
773
cmd := roachtestutil .NewCommand ("./cockroach workload run kv %s" , workloadArgs ).
@@ -772,6 +814,7 @@ func runFailureSmokeTest(ctx context.Context, t test.Test, c cluster.Cluster, no
772
814
asymmetricOutgoingNetworkPartitionTest (c ),
773
815
latencyTest (c ),
774
816
dmsetupDiskStallTest (c ),
817
+ resetVMTests (c ),
775
818
}
776
819
failureSmokeTests = append (failureSmokeTests , cgroupsDiskStallTests (c )... )
777
820
failureSmokeTests = append (failureSmokeTests , processKillTests (c )... )
0 commit comments