Skip to content

Commit a529896

Browse files
committed
tapgarden: remove extra error sent during cancel
In this commit, we remove an extra error broadcast during caretaker cancellation that could prevent graceful shutdown. If the caretaker state machine has not reached BatchStateBroadcast, it sends potential errors to the planter on a channel with capacity of one. If cancellation is requested before reaching BatchStateBroadcast and fails internally, sending that error to the planter prevents an error from being sent by the main caretaker goroutine. We also unify cancel request handling.
1 parent 24b6986 commit a529896

File tree

1 file changed

+56
-25
lines changed

1 file changed

+56
-25
lines changed

tapgarden/caretaker.go

Lines changed: 56 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -175,18 +175,24 @@ func (b *BatchCaretaker) Stop() error {
175175
return stopErr
176176
}
177177

178-
// Cancel signals for a batch caretaker to stop advancing a batch if possible.
179-
// A batch can only be cancelled if it has not reached BatchStateBroadcast yet.
180-
func (b *BatchCaretaker) Cancel() CancelResp {
178+
// Cancel signals for a batch caretaker to stop advancing a batch. A batch can
179+
// only be cancelled if it has not reached BatchStateBroadcast yet. If
180+
// cancellation succeeds, we forward the batch state after cancellation. If the
181+
// batch could not be cancelled, the planter will handle caretaker shutdown and
182+
// batch state.
183+
func (b *BatchCaretaker) Cancel() error {
181184
ctx, cancel := b.WithCtxQuit()
182185
defer cancel()
183186

184-
batchKey := b.cfg.Batch.BatchKey.PubKey.SerializeCompressed()
187+
batchKey := b.batchKey[:]
185188
batchState := b.cfg.Batch.State()
189+
var cancelResp CancelResp
190+
186191
// This function can only be called before the caretaker state stepping
187192
// function, so the batch state read is the next state that has not yet
188193
// been executed. Seedlings are converted to asset sprouts in the Frozen
189194
// state, and broadcast in the Broadast state.
195+
log.Debugf("BatchCaretaker(%x): Trying to cancel", batchKey)
190196
switch batchState {
191197
// In the pending state, the batch seedlings have not sprouted yet.
192198
case BatchStatePending, BatchStateFrozen:
@@ -200,9 +206,7 @@ func (b *BatchCaretaker) Cancel() CancelResp {
200206
"cancel failed: %w", batchKey, batchState, err)
201207
}
202208

203-
b.cfg.BroadcastErrChan <- fmt.Errorf("caretaker canceled")
204-
205-
return CancelResp{&finalBatchState, err}
209+
cancelResp = CancelResp{&finalBatchState, err}
206210

207211
case BatchStateCommitted:
208212
finalBatchState := BatchStateSproutCancelled
@@ -215,15 +219,29 @@ func (b *BatchCaretaker) Cancel() CancelResp {
215219
"cancel failed: %w", batchKey, batchState, err)
216220
}
217221

218-
b.cfg.BroadcastErrChan <- fmt.Errorf("caretaker canceled")
219-
220-
return CancelResp{&finalBatchState, err}
222+
cancelResp = CancelResp{&finalBatchState, err}
221223

222224
default:
223225
err := fmt.Errorf("BatchCaretaker(%x), batch not cancellable",
224226
b.cfg.Batch.BatchKey.PubKey.SerializeCompressed())
225-
return CancelResp{nil, err}
227+
cancelResp = CancelResp{nil, err}
226228
}
229+
230+
b.cfg.CancelRespChan <- cancelResp
231+
232+
// If the batch was cancellable, the finalState of the cancel response
233+
// will be non-nil. If the cancellation failed, that error will be
234+
// handled by the planter. At this point, the caretaker should always
235+
// shut down gracefully.
236+
if cancelResp.finalState != nil {
237+
log.Infof("BatchCaretaker(%x), attempted batch cancellation, "+
238+
"shutting down", b.batchKey[:])
239+
240+
return nil
241+
}
242+
243+
return fmt.Errorf("BatchCaretaker(%x) cancellation failed",
244+
b.batchKey[:])
227245
}
228246

229247
// advanceStateUntil attempts to advance the internal state machine until the
@@ -243,22 +261,20 @@ func (b *BatchCaretaker) advanceStateUntil(currentState,
243261
return 0, fmt.Errorf("BatchCaretaker(%x), shutting "+
244262
"down", b.batchKey[:])
245263

264+
// If the batch was cancellable, the finalState of the cancel
265+
// response will be non-nil. If the cancellation failed, that
266+
// error will be handled by the planter. At this point, the
267+
// caretaker should always shut down gracefully.
246268
case <-b.cfg.CancelReqChan:
247-
cancelResp := b.Cancel()
248-
b.cfg.CancelRespChan <- cancelResp
249-
250-
// TODO(jhb): Use concrete error types for caretaker
251-
// shutdown cases
252-
// If the batch was cancellable, the finalState of the
253-
// cancel response will be non-nil. If the cancellation
254-
// failed, that error will be handled by the planter.
255-
// At this point, the caretaker should always shut down
256-
// gracefully.
257-
if cancelResp.finalState != nil {
269+
cancelErr := b.Cancel()
270+
if cancelErr == nil {
258271
return 0, fmt.Errorf("BatchCaretaker(%x), "+
259272
"attempted batch cancellation, "+
260273
"shutting down", b.batchKey[:])
261274
}
275+
276+
log.Info(cancelErr)
277+
262278
default:
263279
}
264280

@@ -362,7 +378,12 @@ func (b *BatchCaretaker) assetCultivator() {
362378
return
363379

364380
case <-b.cfg.CancelReqChan:
365-
b.cfg.CancelRespChan <- b.Cancel()
381+
cancelErr := b.Cancel()
382+
if cancelErr == nil {
383+
return
384+
}
385+
386+
log.Error(cancelErr)
366387

367388
case <-b.Quit:
368389
return
@@ -866,7 +887,12 @@ func (b *BatchCaretaker) stateStep(currentState BatchState) (BatchState, error)
866887
"done")
867888

868889
case <-b.cfg.CancelReqChan:
869-
b.cfg.CancelRespChan <- b.Cancel()
890+
cancelErr := b.Cancel()
891+
if cancelErr == nil {
892+
return
893+
}
894+
895+
log.Info(cancelErr)
870896

871897
case <-b.Quit:
872898
log.Debugf("Skipping TX confirmation, exiting")
@@ -887,7 +913,12 @@ func (b *BatchCaretaker) stateStep(currentState BatchState) (BatchState, error)
887913
"done")
888914

889915
case <-b.cfg.CancelReqChan:
890-
b.cfg.CancelRespChan <- b.Cancel()
916+
cancelErr := b.Cancel()
917+
if cancelErr == nil {
918+
return
919+
}
920+
921+
log.Info(cancelErr)
891922

892923
case <-b.Quit:
893924
log.Debugf("Skipping TX confirmation, exiting")

0 commit comments

Comments
 (0)