@@ -38,19 +38,18 @@ type Monitor struct {
3838}
3939
4040type Config struct {
41- // Max time to wait for other side to accept open channel request before attempting restart
41+ // Max time to wait for other side to accept open channel request before attempting restart.
42+ // Set to 0 to disable timeout.
4243 AcceptTimeout time.Duration
4344 // Debounce when restart is triggered by multiple errors
4445 RestartDebounce time.Duration
4546 // Backoff after restarting
4647 RestartBackoff time.Duration
4748 // Number of times to try to restart before failing
4849 MaxConsecutiveRestarts uint32
49- // Max time to wait for the peer to acknowledge a restart request.
50- // Note: Does not include the time taken to reconnect to the peer.
51- RestartAckTimeout time.Duration
5250 // Max time to wait for the responder to send a Complete message once all
53- // data has been sent
51+ // data has been sent.
52+ // Set to 0 to disable timeout.
5453 CompleteTimeout time.Duration
5554 // Called when a restart completes successfully
5655 OnRestartComplete func (id datatransfer.ChannelID )
@@ -74,17 +73,14 @@ func checkConfig(cfg *Config) {
7473 }
7574
7675 prefix := "data-transfer channel monitor config "
77- if cfg .AcceptTimeout <= 0 {
78- panic (fmt .Sprintf (prefix + "AcceptTimeout is %s but must be > 0" , cfg .AcceptTimeout ))
76+ if cfg .AcceptTimeout < 0 {
77+ panic (fmt .Sprintf (prefix + "AcceptTimeout is %s but must be >= 0" , cfg .AcceptTimeout ))
7978 }
8079 if cfg .MaxConsecutiveRestarts == 0 {
8180 panic (fmt .Sprintf (prefix + "MaxConsecutiveRestarts is %d but must be > 0" , cfg .MaxConsecutiveRestarts ))
8281 }
83- if cfg .RestartAckTimeout <= 0 {
84- panic (fmt .Sprintf (prefix + "RestartAckTimeout is %s but must be > 0" , cfg .RestartAckTimeout ))
85- }
86- if cfg .CompleteTimeout <= 0 {
87- panic (fmt .Sprintf (prefix + "CompleteTimeout is %s but must be > 0" , cfg .CompleteTimeout ))
82+ if cfg .CompleteTimeout < 0 {
83+ panic (fmt .Sprintf (prefix + "CompleteTimeout is %s but must be >= 0" , cfg .CompleteTimeout ))
8884 }
8985}
9086
@@ -275,6 +271,11 @@ func (mc *monitoredChannel) start() {
275271// an Accept to our open channel request before the accept timeout.
276272// Returns a function that can be used to cancel the timer.
277273func (mc * monitoredChannel ) watchForResponderAccept () func () {
274+ // Check if the accept timeout is disabled
275+ if mc .cfg .AcceptTimeout == 0 {
276+ return func () {}
277+ }
278+
278279 // Start a timer for the accept timeout
279280 timer := time .NewTimer (mc .cfg .AcceptTimeout )
280281
@@ -297,6 +298,11 @@ func (mc *monitoredChannel) watchForResponderAccept() func() {
297298
298299// Wait up to the configured timeout for the responder to send a Complete message
299300func (mc * monitoredChannel ) watchForResponderComplete () {
301+ // Check if the complete timeout is disabled
302+ if mc .cfg .CompleteTimeout == 0 {
303+ return
304+ }
305+
300306 // Start a timer for the complete timeout
301307 timer := time .NewTimer (mc .cfg .CompleteTimeout )
302308 defer timer .Stop ()
@@ -308,7 +314,7 @@ func (mc *monitoredChannel) watchForResponderComplete() {
308314 case <- timer .C :
309315 // Timer expired before we received a Complete message from the responder
310316 err := xerrors .Errorf ("%s: timed out waiting %s for Complete message from remote peer" ,
311- mc .chid , mc .cfg .AcceptTimeout )
317+ mc .chid , mc .cfg .CompleteTimeout )
312318 mc .closeChannelAndShutdown (err )
313319 }
314320}
@@ -414,8 +420,7 @@ func (mc *monitoredChannel) doRestartChannel() error {
414420 err := mc .sendRestartMessage (restartCount )
415421 if err != nil {
416422 log .Warnf ("%s: restart failed, trying again: %s" , mc .chid , err )
417- // If the restart message could not be sent, or there was a timeout
418- // waiting for the restart to be acknowledged, try again
423+ // If the restart message could not be sent, try again
419424 return mc .doRestartChannel ()
420425 }
421426 log .Infof ("%s: restart completed successfully" , mc .chid )
@@ -438,25 +443,12 @@ func (mc *monitoredChannel) sendRestartMessage(restartCount int) error {
438443 log .Infof ("%s: re-established connection to %s in %s" , mc .chid , p , time .Since (start ))
439444
440445 // Send a restart message for the channel
441- restartResult := mc .waitForRestartResponse ()
442446 log .Infof ("%s: sending restart message to %s (%d consecutive restarts)" , mc .chid , p , restartCount )
443447 err = mc .mgr .RestartDataTransferChannel (mc .ctx , mc .chid )
444448 if err != nil {
445449 return xerrors .Errorf ("%s: failed to send restart message to %s: %w" , mc .chid , p , err )
446450 }
447451
448- // The restart message is fire and forget, so we need to watch for a
449- // restart response to know that the restart message reached the peer.
450- select {
451- case <- mc .ctx .Done ():
452- return nil // channel shutdown so just bail out
453- case err = <- restartResult :
454- if err != nil {
455- return xerrors .Errorf ("%s: failed to send restart message to %s: %w" , mc .chid , p , err )
456- }
457- }
458- log .Infof ("%s: received restart response from %s" , mc .chid , p )
459-
460452 // The restart message was sent successfully.
461453 // If a restart backoff is configured, backoff after a restart before
462454 // attempting another.
@@ -490,47 +482,3 @@ func (mc *monitoredChannel) closeChannelAndShutdown(cherr error) {
490482 log .Errorf ("error closing data-transfer channel %s: %s" , mc .chid , err )
491483 }
492484}
493-
494- // Wait for the peer to send an acknowledgement to the restart request
495- func (mc * monitoredChannel ) waitForRestartResponse () chan error {
496- restartFired := make (chan struct {})
497- restarted := make (chan error , 1 )
498- timer := time .NewTimer (mc .cfg .RestartAckTimeout )
499-
500- unsub := mc .mgr .SubscribeToEvents (func (event datatransfer.Event , channelState datatransfer.ChannelState ) {
501- if channelState .ChannelID () != mc .chid {
502- return
503- }
504-
505- // The Restart event is fired when we receive an acknowledgement
506- // from the peer that it has received a restart request
507- if event .Code == datatransfer .Restart {
508- close (restartFired )
509- }
510- })
511-
512- go func () {
513- defer unsub ()
514- defer timer .Stop ()
515-
516- select {
517-
518- // Restart ack received from peer
519- case <- restartFired :
520- restarted <- nil
521-
522- // Channel monitor shutdown, just bail out
523- case <- mc .ctx .Done ():
524- restarted <- nil
525-
526- // Timer expired before receiving a restart ack from peer
527- case <- timer .C :
528- p := mc .chid .OtherParty (mc .mgr .PeerID ())
529- err := xerrors .Errorf ("did not receive response to restart request from %s after %s" ,
530- p , mc .cfg .RestartAckTimeout )
531- restarted <- err
532- }
533- }()
534-
535- return restarted
536- }
0 commit comments