99 "context"
1010 "fmt"
1111 "math/rand"
12+ "time"
1213
1314 "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
1415 "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option"
@@ -35,14 +36,18 @@ type failureSmokeTest struct {
3536
3637func (t * failureSmokeTest ) run (
3738 ctx context.Context , l * logger.Logger , c cluster.Cluster , fr * failures.FailureRegistry ,
38- ) error {
39+ ) ( err error ) {
3940 // TODO(darryl): In the future, roachtests should interact with the failure injection library
4041 // through helper functions in roachtestutil so they don't have to interface with roachprod
4142 // directly.
42- failureMode , err := fr .GetFailureMode (c .MakeNodes (), t .failureName , l , c .IsSecure ())
43+ failureMode , err := fr .GetFailureMode (c .MakeNodes (c . CRDBNodes () ), t .failureName , l , c .IsSecure ())
4344 if err != nil {
4445 return err
4546 }
47+ // Make sure to cleanup the failure mode even if the test fails.
48+ defer func () {
49+ err = errors .CombineErrors (err , failureMode .Cleanup (ctx , l , t .args ))
50+ }()
4651 if err = failureMode .Setup (ctx , l , t .args ); err != nil {
4752 return err
4853 }
@@ -67,13 +72,7 @@ func (t *failureSmokeTest) run(
6772 return err
6873 }
6974
70- if err = t .validateRestore (ctx , l , c ); err != nil {
71- return err
72- }
73- if err = failureMode .Cleanup (ctx , l , t .args ); err != nil {
74- return err
75- }
76- return nil
75+ return t .validateRestore (ctx , l , c )
7776}
7877
7978func (t * failureSmokeTest ) noopRun (
@@ -233,6 +232,69 @@ var asymmetricOutgoingNetworkPartitionTest = func(c cluster.Cluster) failureSmok
233232 }
234233}
235234
235+ var latencyTest = func (c cluster.Cluster ) failureSmokeTest {
236+ nodes := c .CRDBNodes ()
237+ rand .Shuffle (len (nodes ), func (i , j int ) {
238+ nodes [i ], nodes [j ] = nodes [j ], nodes [i ]
239+ })
240+ srcNode := nodes [0 ]
241+ destNode := nodes [1 ]
242+ unaffectedNode := nodes [2 ]
243+ return failureSmokeTest {
244+ testName : "Network Latency" ,
245+ failureName : failures .NetworkLatencyName ,
246+ args : failures.NetworkLatencyArgs {
247+ ArtificialLatencies : []failures.ArtificialLatency {
248+ {
249+ Source : install.Nodes {install .Node (srcNode )},
250+ Destination : install.Nodes {install .Node (destNode )},
251+ Delay : 2 * time .Second ,
252+ },
253+ {
254+ Source : install.Nodes {install .Node (destNode )},
255+ Destination : install.Nodes {install .Node (srcNode )},
256+ Delay : 2 * time .Second ,
257+ },
258+ },
259+ },
260+ validateFailure : func (ctx context.Context , l * logger.Logger , c cluster.Cluster ) error {
261+ // Note that this is one way latency, since the sender doesn't have the matching port.
262+ delayedLatency , err := roachtestutil .PortLatency (ctx , l , c , c .Nodes (srcNode ), c .Nodes (destNode ))
263+ if err != nil {
264+ return err
265+ }
266+ normalLatency , err := roachtestutil .PortLatency (ctx , l , c , c .Nodes (unaffectedNode ), c .Nodes (destNode ))
267+ if err != nil {
268+ return err
269+ }
270+ if delayedLatency < normalLatency * 2 {
271+ return errors .Errorf ("expected latency between nodes with artificial latency (n%d and n%d) to be much higher than between nodes without (n%d and n%d)" , srcNode , destNode , unaffectedNode , destNode )
272+ }
273+ if delayedLatency < time .Second || delayedLatency > 3 * time .Second {
274+ return errors .Errorf ("expected latency between nodes with artificial latency (n%d and n%d) to be at least within 1s and 3s" , srcNode , destNode )
275+ }
276+ return nil
277+ },
278+ validateRestore : func (ctx context.Context , l * logger.Logger , c cluster.Cluster ) error {
279+ delayedLatency , err := roachtestutil .PortLatency (ctx , l , c , c .Nodes (srcNode ), c .Nodes (destNode ))
280+ if err != nil {
281+ return err
282+ }
283+ normalLatency , err := roachtestutil .PortLatency (ctx , l , c , c .Nodes (unaffectedNode ), c .Nodes (destNode ))
284+ if err != nil {
285+ return err
286+ }
287+ if delayedLatency > 2 * normalLatency {
288+ return errors .Errorf ("expected latency between nodes with artificial latency (n%d and n%d) to be close to latency between nodes without (n%d and n%d)" , srcNode , destNode , unaffectedNode , destNode )
289+ }
290+ if delayedLatency > 500 * time .Millisecond {
291+ return errors .Errorf ("expected latency between nodes with artificial latency (n%d and n%d) to have restored to at least less than 500ms" , srcNode , destNode )
292+ }
293+ return nil
294+ },
295+ }
296+ }
297+
236298func setupFailureSmokeTests (ctx context.Context , t test.Test , c cluster.Cluster ) error {
237299 // Download any dependencies needed.
238300 if err := c .Install (ctx , t .L (), c .CRDBNodes (), "nmap" ); err != nil {
@@ -258,6 +320,7 @@ func runFailureSmokeTest(ctx context.Context, t test.Test, c cluster.Cluster, no
258320 bidirectionalNetworkPartitionTest (c ),
259321 asymmetricIncomingNetworkPartitionTest (c ),
260322 asymmetricOutgoingNetworkPartitionTest (c ),
323+ latencyTest (c ),
261324 }
262325
263326 // Randomize the order of the tests in case any of the failures have unexpected side
@@ -284,7 +347,7 @@ func runFailureSmokeTest(ctx context.Context, t test.Test, c cluster.Cluster, no
284347
285348func registerFISmokeTest (r registry.Registry ) {
286349 r .Add (registry.TestSpec {
287- Name : "failure-injection- smoke-test" ,
350+ Name : "failure-injection/ smoke-test" ,
288351 Owner : registry .OwnerTestEng ,
289352 Cluster : r .MakeClusterSpec (4 , spec .WorkloadNode (), spec .CPU (2 ), spec .WorkloadNodeCPU (2 ), spec .ReuseNone ()),
290353 CompatibleClouds : registry .OnlyGCE ,
@@ -295,7 +358,7 @@ func registerFISmokeTest(r registry.Registry) {
295358 },
296359 })
297360 r .Add (registry.TestSpec {
298- Name : "failure-injection-noop- smoke-test" ,
361+ Name : "failure-injection/ smoke-test/noop " ,
299362 Owner : registry .OwnerTestEng ,
300363 Cluster : r .MakeClusterSpec (4 , spec .WorkloadNode (), spec .CPU (2 ), spec .WorkloadNodeCPU (2 ), spec .ReuseNone ()),
301364 CompatibleClouds : registry .OnlyGCE ,
0 commit comments