@@ -557,6 +557,71 @@ var cgroupsDiskStallTests = func(c cluster.Cluster) []failureSmokeTest {
557
557
return tests
558
558
}
559
559
560
+ var cgroupStallLogsTest = func (c cluster.Cluster ) failureSmokeTest {
561
+ nodes := c .CRDBNodes ()
562
+ rand .Shuffle (len (nodes ), func (i , j int ) {
563
+ nodes [i ], nodes [j ] = nodes [j ], nodes [i ]
564
+ })
565
+ // We only want to stall one node for this test. If we stall writes on a quorum
566
+ // of nodes, then auth-session login will fail even if logs are not stalled.
567
+ stalledNode := c .Node (nodes [0 ])
568
+ unaffectedNode := c .Node (nodes [1 ])
569
+
570
+ getSessionCookie := func (
571
+ ctx context.Context ,
572
+ l * logger.Logger ,
573
+ c cluster.Cluster ,
574
+ node option.NodeListOption ,
575
+ ) bool {
576
+ loginCmd := fmt .Sprintf (
577
+ "%s auth-session login root --url={pgurl%s} --certs-dir ./certs --only-cookie" ,
578
+ test .DefaultCockroachPath , node ,
579
+ )
580
+ err := c .RunE (ctx , option .WithNodes (node ), loginCmd )
581
+ return err == nil
582
+ }
583
+
584
+ return failureSmokeTest {
585
+ testName : fmt .Sprintf ("%s/WritesStalled=true/LogsStalled=true" , failures .CgroupsDiskStallName ),
586
+ failureName : failures .CgroupsDiskStallName ,
587
+ args : failures.DiskStallArgs {
588
+ StallWrites : true ,
589
+ RestartNodes : true ,
590
+ Nodes : stalledNode .InstallNodes (),
591
+ StallLogs : true ,
592
+ },
593
+ validateFailure : func (ctx context.Context , l * logger.Logger , c cluster.Cluster , f * failures.Failer ) error {
594
+ // Confirm symlink exists
595
+ if err := c .RunE (ctx , option .WithNodes (stalledNode ), "test -L logs" ); err != nil {
596
+ return errors .Wrapf (err , "`logs` is not a symlink on node %d" , stalledNode )
597
+ }
598
+
599
+ // The cockroach-sql-auth.log file is appended to each time an authenticated session event
600
+ // occurs, e.g. a client logging in. If we attempt to fetch a cookie from the stalled node,
601
+ // we should expect to see our request time out, as it will be unable to write to the log.
602
+ if getSessionCookie (ctx , l , c , stalledNode ) {
603
+ return errors .Errorf ("was able to successfully get session cookie from stalled node %d" , stalledNode )
604
+ }
605
+
606
+ // The unaffected node should be able to write to the log, so we should be able to
607
+ // get the session cookie with no issues.
608
+ if ! getSessionCookie (ctx , l , c , unaffectedNode ) {
609
+ return errors .Errorf ("was unable to get session cookie from unaffected node %d" , unaffectedNode )
610
+ }
611
+ return nil
612
+ },
613
+ validateRecover : func (ctx context.Context , l * logger.Logger , c cluster.Cluster , f * failures.Failer ) error {
614
+ if ! getSessionCookie (ctx , l , c , stalledNode ) {
615
+ return errors .Errorf ("was unable to get session cookie from stalled node %d" , stalledNode )
616
+ }
617
+ return nil
618
+ },
619
+ workload : func (ctx context.Context , c cluster.Cluster , args ... string ) error {
620
+ return nil
621
+ },
622
+ }
623
+ }
624
+
560
625
var dmsetupDiskStallTest = func (c cluster.Cluster ) failureSmokeTest {
561
626
rng , _ := randutil .NewPseudoRand ()
562
627
// SeededRandGroups only returns an error if the requested size is larger than the
@@ -776,9 +841,7 @@ func defaultFailureSmokeTestWorkload(ctx context.Context, c cluster.Cluster, arg
776
841
return c .RunE (ctx , option .WithNodes (c .WorkloadNode ()), cmd )
777
842
}
778
843
779
- func setupFailureSmokeTests (
780
- ctx context.Context , t test.Test , c cluster.Cluster , fr * failures.FailureRegistry ,
781
- ) error {
844
+ func setupFailureSmokeTests (ctx context.Context , t test.Test , c cluster.Cluster ) error {
782
845
// Download any dependencies needed.
783
846
if err := c .Install (ctx , t .L (), c .CRDBNodes (), "nmap" ); err != nil {
784
847
return err
@@ -804,7 +867,7 @@ func setupFailureSmokeTests(
804
867
func runFailureSmokeTest (ctx context.Context , t test.Test , c cluster.Cluster , noopFailer bool ) {
805
868
fr := failures .NewFailureRegistry ()
806
869
fr .Register ()
807
- if err := setupFailureSmokeTests (ctx , t , c , fr ); err != nil {
870
+ if err := setupFailureSmokeTests (ctx , t , c ); err != nil {
808
871
t .Error (err )
809
872
}
810
873
@@ -815,6 +878,7 @@ func runFailureSmokeTest(ctx context.Context, t test.Test, c cluster.Cluster, no
815
878
latencyTest (c ),
816
879
dmsetupDiskStallTest (c ),
817
880
resetVMTests (c ),
881
+ cgroupStallLogsTest (c ),
818
882
}
819
883
failureSmokeTests = append (failureSmokeTests , cgroupsDiskStallTests (c )... )
820
884
failureSmokeTests = append (failureSmokeTests , processKillTests (c )... )
0 commit comments