@@ -444,7 +444,7 @@ func registerDiskStalledWALFailoverWithProgress(r registry.Registry) {
444
444
func runDiskStalledWALFailoverWithProgress (ctx context.Context , t test.Test , c cluster.Cluster ) {
445
445
const (
446
446
testDuration = 1 * time .Hour
447
- // We'll issue short stalls every 10s to keep us in the failover state.
447
+ // We'll issue short stalls every 5s to keep us in the failover state.
448
448
stallInterval = 5 * time .Second
449
449
shortStallDur = 200 * time .Millisecond
450
450
// For each loop, each operation will start after a random wait between [30s, 150s).
@@ -560,13 +560,13 @@ func runDiskStalledWALFailoverWithProgress(ctx context.Context, t test.Test, c c
560
560
select {
561
561
case <- ctx .Done ():
562
562
t .Fatalf ("context done before workload started: %s" , ctx .Err ())
563
- case <- time .After (20 * time .Second ):
563
+ case <- time .After (30 * time .Second ):
564
564
t .Status ("starting QPS sampling" )
565
565
}
566
566
567
- // Calculate approx how many samples we can take before workload ends.
568
567
// We want to stop sampling 10s before workload ends to avoid sampling during shutdown.
569
- samplingDuration := operationDur - 30 * time .Second // 20s initial wait + 10s buffer at workload end
568
+ // We'll take approx. 14 samples with this configuration.
569
+ samplingDuration := operationDur - 40 * time .Second // 30s initial wait + 10s buffer at workload end
570
570
sampleCount := int (samplingDuration / sampleInterval )
571
571
572
572
sampleTimer := time .NewTicker (sampleInterval )
@@ -620,19 +620,24 @@ func runDiskStalledWALFailoverWithProgress(ctx context.Context, t test.Test, c c
620
620
t .Status ("starting disk stall" )
621
621
}
622
622
stallStart := timeutil .Now ()
623
- // Execute short 200ms stalls every 10s .
623
+ // Execute short 200ms stalls every 5s .
624
624
for timeutil .Since (stallStart ) < operationDur {
625
625
select {
626
626
case <- ctx .Done ():
627
627
t .Fatalf ("context done while stall induced: %s" , ctx .Err ())
628
628
case <- time .After (stallInterval ):
629
629
func () {
630
- s .Stall (ctx , c .Node (1 ))
631
630
t .Status ("short disk stall on n1" )
631
+ s .Stall (ctx , c .Node (1 ))
632
632
defer func () {
633
+ // NB: We use a background context in the defer'ed unstall command,
634
+ // otherwise on test failure our Unstall calls will be ignored. Leaving
635
+ // the disk stalled will prevent artifact collection, making debugging
636
+ // difficult.
633
637
ctx , cancel := context .WithTimeout (context .Background (), time .Minute )
634
638
defer cancel ()
635
639
s .Unstall (ctx , c .Node (1 ))
640
+ t .Status ("unstalled disk on n1" )
636
641
}()
637
642
select {
638
643
case <- ctx .Done ():
0 commit comments