@@ -160,7 +160,7 @@ type monitoredChannel struct {
160160 chid datatransfer.ChannelID
161161 cfg * Config
162162 unsub datatransfer.Unsubscribe
163- restartChannelDebounced func ()
163+ restartChannelDebounced func (error )
164164 onShutdown func (datatransfer.ChannelID )
165165 shutdownLk sync.Mutex
166166
@@ -187,8 +187,33 @@ func newMonitoredChannel(
187187 cfg : cfg ,
188188 onShutdown : onShutdown ,
189189 }
190+
191+ // "debounce" calls to restart channel, ie if there are multiple calls in a
192+ // short space of time, only send a message to restart the channel once
193+ var lk sync.Mutex
194+ var lastErr error
190195 debouncer := debounce .New (cfg .RestartDebounce )
191- mpc .restartChannelDebounced = func () { debouncer (mpc .restartChannel ) }
196+ mpc .restartChannelDebounced = func (err error ) {
197+ // Log the error at debug level
198+ log .Debug (err .Error ())
199+
200+ // Save the last error passed to restartChannelDebounced
201+ lk .Lock ()
202+ lastErr = err
203+ lk .Unlock ()
204+
205+ debouncer (func () {
206+ // Log only the last error passed to restartChannelDebounced at warning level
207+ lk .Lock ()
208+ log .Warnf ("%s" , lastErr )
209+ lk .Unlock ()
210+
211+ // Restart the channel
212+ mpc .restartChannel ()
213+ })
214+ }
215+
216+ // Start monitoring the channel
192217 mpc .start ()
193218 return mpc
194219}
@@ -247,13 +272,13 @@ func (mc *monitoredChannel) start() {
247272 case datatransfer .SendDataError :
248273 // If the transport layer reports an error sending data over the wire,
249274 // attempt to restart the channel
250- log . Warnf ("%s: data transfer transport send error, restarting data transfer" , mc .chid )
251- go mc .restartChannelDebounced ()
275+ err := xerrors . Errorf ("%s: data transfer transport send error, restarting data transfer" , mc .chid )
276+ go mc .restartChannelDebounced (err )
252277 case datatransfer .ReceiveDataError :
253278 // If the transport layer reports an error receiving data over the wire,
254279 // attempt to restart the channel
255- log . Warnf ("%s: data transfer transport receive error, restarting data transfer" , mc .chid )
256- go mc .restartChannelDebounced ()
280+ err := xerrors . Errorf ("%s: data transfer transport receive error, restarting data transfer" , mc .chid )
281+ go mc .restartChannelDebounced (err )
257282 case datatransfer .FinishTransfer :
258283 // The channel initiator has finished sending / receiving all data.
259284 // Watch to make sure that the responder sends a message to acknowledge
@@ -355,7 +380,7 @@ func (mc *monitoredChannel) restartChannel() {
355380
356381 // Check if channel is already being restarted
357382 if ! restartedAt .IsZero () {
358- log .Infof ("%s: restart called but already restarting channel, " +
383+ log .Debugf ("%s: restart called but already restarting channel, " +
359384 "waiting to restart again (since %s; restart backoff is %s)" ,
360385 mc .chid , time .Since (restartedAt ), mc .cfg .RestartBackoff )
361386 return
@@ -397,7 +422,7 @@ func (mc *monitoredChannel) restartChannel() {
397422 }
398423
399424 // There was a restart queued, restart again
400- log .Infof ("%s: restart was queued - restarting again" , mc .chid )
425+ log .Debugf ("%s: restart was queued - restarting again" , mc .chid )
401426 }
402427}
403428
@@ -416,7 +441,7 @@ func (mc *monitoredChannel) doRestartChannel() error {
416441 }
417442
418443 // Send the restart message
419- log .Infof ("%s: restarting (%d consecutive restarts)" , mc .chid , restartCount )
444+ log .Debugf ("%s: restarting (%d consecutive restarts)" , mc .chid , restartCount )
420445 err := mc .sendRestartMessage (restartCount )
421446 if err != nil {
422447 log .Warnf ("%s: restart failed, trying again: %s" , mc .chid , err )
@@ -433,17 +458,17 @@ func (mc *monitoredChannel) sendRestartMessage(restartCount int) error {
433458 // Note that at the networking layer there is logic to retry if a network
434459 // connection cannot be established, so this may take some time.
435460 p := mc .chid .OtherParty (mc .mgr .PeerID ())
436- log .Infof ("%s: re-establishing connection to %s" , mc .chid , p )
461+ log .Debugf ("%s: re-establishing connection to %s" , mc .chid , p )
437462 start := time .Now ()
438463 err := mc .mgr .ConnectTo (mc .ctx , p )
439464 if err != nil {
440465 return xerrors .Errorf ("%s: failed to reconnect to peer %s after %s: %w" ,
441466 mc .chid , p , time .Since (start ), err )
442467 }
443- log .Infof ("%s: re-established connection to %s in %s" , mc .chid , p , time .Since (start ))
468+ log .Debugf ("%s: re-established connection to %s in %s" , mc .chid , p , time .Since (start ))
444469
445470 // Send a restart message for the channel
446- log .Infof ("%s: sending restart message to %s (%d consecutive restarts)" , mc .chid , p , restartCount )
471+ log .Debugf ("%s: sending restart message to %s (%d consecutive restarts)" , mc .chid , p , restartCount )
447472 err = mc .mgr .RestartDataTransferChannel (mc .ctx , mc .chid )
448473 if err != nil {
449474 return xerrors .Errorf ("%s: failed to send restart message to %s: %w" , mc .chid , p , err )
@@ -453,11 +478,11 @@ func (mc *monitoredChannel) sendRestartMessage(restartCount int) error {
453478 // If a restart backoff is configured, backoff after a restart before
454479 // attempting another.
455480 if mc .cfg .RestartBackoff > 0 {
456- log .Infof ("%s: backing off %s before allowing any other restarts" ,
481+ log .Debugf ("%s: backing off %s before allowing any other restarts" ,
457482 mc .chid , mc .cfg .RestartBackoff )
458483 select {
459484 case <- time .After (mc .cfg .RestartBackoff ):
460- log .Infof ("%s: restart back-off of %s complete" , mc .chid , mc .cfg .RestartBackoff )
485+ log .Debugf ("%s: restart back-off of %s complete" , mc .chid , mc .cfg .RestartBackoff )
461486 case <- mc .ctx .Done ():
462487 return nil
463488 }
0 commit comments