Skip to content

Commit d37deb6

Browse files
committed
roachtest: fix failover disk stall
This fixes the failover disk staller to be compatible with the FI library. Specifically, it does not attempt to recover in cleanup, as this is already handled in the FI framework. Additionally, it temporarily disables concurrent disk stall failures from being injected as this is an invariant of the FI framework. Instead, they should be injected in one shot.
1 parent 7ca9db8 commit d37deb6

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

pkg/cmd/roachtest/tests/failover.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,18 +1594,24 @@ type diskStallFailer struct {
15941594
staller diskStaller
15951595
}
15961596

1597-
func (f *diskStallFailer) Mode() failureMode { return failureModeDiskStall }
1598-
func (f *diskStallFailer) String() string { return string(f.Mode()) }
1599-
func (f *diskStallFailer) CanUseLocal() bool { return false } // needs dmsetup
1600-
func (f *diskStallFailer) CanUseChaos() bool { return true }
1601-
func (f *diskStallFailer) CanRunWith(failureMode) bool { return true }
1597+
func (f *diskStallFailer) Mode() failureMode { return failureModeDiskStall }
1598+
func (f *diskStallFailer) String() string { return string(f.Mode()) }
1599+
func (f *diskStallFailer) CanUseLocal() bool { return false } // needs dmsetup
1600+
func (f *diskStallFailer) CanUseChaos() bool { return true }
1601+
1602+
// CanRunWith returns false for other disk stalls, as the FI library it uses
1603+
// does not allow concurrent failure modes to be injected without recovering
1604+
// from them first.
1605+
// TODO(darryl): This is a temporary workaround to reduce test failure noise.
1606+
// We should fix this by merging concurrent disk stall failures and injecting
1607+
// them in one shot.
1608+
func (f *diskStallFailer) CanRunWith(other failureMode) bool { return other != failureModeDiskStall }
16021609

16031610
func (f *diskStallFailer) Setup(ctx context.Context) {
16041611
f.staller.Setup(ctx)
16051612
}
16061613

16071614
func (f *diskStallFailer) Cleanup(ctx context.Context) {
1608-
f.staller.Unstall(ctx, f.c.All())
16091615
// We have to stop the cluster before cleaning up the staller.
16101616
f.m.ExpectDeaths(int32(f.c.Spec().NodeCount))
16111617
f.c.Stop(ctx, f.t.L(), option.DefaultStopOpts(), f.c.All())

0 commit comments

Comments
 (0)