@@ -7,12 +7,14 @@ package roachtestutil
7
7
8
8
import (
9
9
"context"
10
+ "time"
10
11
11
12
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
12
13
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option"
13
14
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec"
14
15
"github.com/cockroachdb/cockroach/pkg/roachprod/failureinjection/failures"
15
16
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
17
+ "github.com/cockroachdb/errors"
16
18
)
17
19
18
20
// TODO(darryl): Once the failure injection library is a first class citizen of roachtest,
@@ -22,8 +24,9 @@ type DiskStaller interface {
22
24
Setup (ctx context.Context )
23
25
Cleanup (ctx context.Context )
24
26
Stall (ctx context.Context , nodes option.NodeListOption )
27
+ StallCycle (ctx context.Context , nodes option.NodeListOption , stallDuration , unstallDuration time.Duration )
25
28
Slow (ctx context.Context , nodes option.NodeListOption , bytesPerSecond int )
26
- Unstall (ctx context.Context , nodes option.NodeListOption )
29
+ Unstall (ctx context.Context , nodes option.NodeListOption ) error
27
30
DataDir () string
28
31
LogDir () string
29
32
}
@@ -38,7 +41,11 @@ func (n NoopDiskStaller) LogDir() string
38
41
func (n NoopDiskStaller ) Setup (ctx context.Context ) {}
39
42
func (n NoopDiskStaller ) Slow (_ context.Context , _ option.NodeListOption , _ int ) {}
40
43
func (n NoopDiskStaller ) Stall (_ context.Context , _ option.NodeListOption ) {}
41
- func (n NoopDiskStaller ) Unstall (_ context.Context , _ option.NodeListOption ) {}
44
+ func (n NoopDiskStaller ) StallCycle (
45
+ _ context.Context , _ option.NodeListOption , _ , _ time.Duration ,
46
+ ) {
47
+ }
48
+ func (n NoopDiskStaller ) Unstall (_ context.Context , _ option.NodeListOption ) error { return nil }
42
49
43
50
type Fataler interface {
44
51
Fatal (args ... interface {})
@@ -103,6 +110,23 @@ func (s *cgroupDiskStaller) Stall(ctx context.Context, nodes option.NodeListOpti
103
110
}
104
111
}
105
112
113
+ func (s * cgroupDiskStaller ) StallCycle (
114
+ ctx context.Context , nodes option.NodeListOption , stallDuration , unstallDuration time.Duration ,
115
+ ) {
116
+ l := newDiskStallLogger (s .f .L (), nodes , "Stall" )
117
+ if err := s .Failer .Inject (ctx , l , failures.DiskStallArgs {
118
+ StallLogs : s .stallLogs ,
119
+ StallWrites : true ,
120
+ StallReads : s .stallReads ,
121
+ Nodes : nodes .InstallNodes (),
122
+ Cycle : true ,
123
+ CycleStallDuration : stallDuration ,
124
+ CycleUnstallDuration : unstallDuration ,
125
+ }); err != nil {
126
+ s .f .Fatalf ("failed to stall disk: %s" , err )
127
+ }
128
+ }
129
+
106
130
func (s * cgroupDiskStaller ) Slow (
107
131
ctx context.Context , nodes option.NodeListOption , bytesPerSecond int ,
108
132
) {
@@ -118,11 +142,12 @@ func (s *cgroupDiskStaller) Slow(
118
142
}
119
143
}
120
144
121
- func (s * cgroupDiskStaller ) Unstall (ctx context.Context , nodes option.NodeListOption ) {
145
+ func (s * cgroupDiskStaller ) Unstall (ctx context.Context , nodes option.NodeListOption ) error {
122
146
l := newDiskStallLogger (s .f .L (), nodes , "Unstall" )
123
- if err := s .Failer .Recover (ctx , l ); err != nil {
124
- s .f .Fatalf ("failed to unstall disk: %s" , err )
125
- }
147
+ // cgroup may fail when unstalling the disk, usually because the node already
148
+ // fataled and the cgroup is no longer available. Return the error and let
149
+ // the caller decide if a node fatal is expected or not.
150
+ return errors .Wrap (s .Failer .Recover (ctx , l ), "failed to unstall disk" )
126
151
}
127
152
128
153
type dmsetupDiskStaller struct {
@@ -168,18 +193,34 @@ func (s *dmsetupDiskStaller) Stall(ctx context.Context, nodes option.NodeListOpt
168
193
}
169
194
}
170
195
196
+ func (s * dmsetupDiskStaller ) StallCycle (
197
+ ctx context.Context , nodes option.NodeListOption , stallDuration , unstallDuration time.Duration ,
198
+ ) {
199
+ l := newDiskStallLogger (s .f .L (), nodes , "Stall" )
200
+ if err := s .Failer .Inject (ctx , l , failures.DiskStallArgs {
201
+ Nodes : nodes .InstallNodes (),
202
+ Cycle : true ,
203
+ CycleStallDuration : stallDuration ,
204
+ CycleUnstallDuration : unstallDuration ,
205
+ }); err != nil {
206
+ s .f .Fatalf ("failed to stall disk: %s" , err )
207
+ }
208
+ }
209
+
171
210
func (s * dmsetupDiskStaller ) Slow (
172
211
ctx context.Context , nodes option.NodeListOption , bytesPerSecond int ,
173
212
) {
174
213
// TODO(baptist): Consider https://github.com/kawamuray/ddi.
175
214
s .f .Fatal ("Slow is not supported for dmsetupDiskStaller" )
176
215
}
177
216
178
- func (s * dmsetupDiskStaller ) Unstall (ctx context.Context , nodes option.NodeListOption ) {
217
+ func (s * dmsetupDiskStaller ) Unstall (ctx context.Context , nodes option.NodeListOption ) error {
179
218
l := newDiskStallLogger (s .f .L (), nodes , "Unstall" )
219
+ // Any unstall error for dmsetup is unexpected and should fail the test.
180
220
if err := s .Failer .Recover (ctx , l ); err != nil {
181
221
s .f .Fatalf ("failed to unstall disk: %s" , err )
182
222
}
223
+ return nil
183
224
}
184
225
185
226
func (s * dmsetupDiskStaller ) DataDir () string { return "{store-dir}" }
0 commit comments