@@ -69,11 +69,14 @@ func (r *Retryer) runRetryLoop(
6969 li := & LoopInfo {
7070 durationLimit : r .retryLimit ,
7171 }
72- funcinfos := lo .RepeatBy (
73- len ( r .callbacks ) ,
74- func (_ int ) * FuncInfo {
72+ funcinfos := lo .Map (
73+ r .callbacks ,
74+ func (cb retryCallbackInfo , _ int ) * FuncInfo {
7575 return & FuncInfo {
76- lastResetTime : msync .NewTypedAtomic (startTime ),
76+ lastReset : msync .NewTypedAtomic (lastResetInfo {
77+ time : startTime ,
78+ }),
79+ description : cb .description ,
7780 loopDescription : r .description ,
7881 loopInfo : li ,
7982 }
@@ -113,17 +116,25 @@ func (r *Retryer) runRetryLoop(
113116 defer ticker .Stop ()
114117
115118 for {
116- lastSuccessTime := funcinfos [i ].lastResetTime .Load ()
119+ lastReset := funcinfos [i ].lastReset .Load ()
117120
118121 select {
119122 case <- cbDoneChan :
120123 return
121124 case <- ticker .C :
122- if funcinfos [i ].lastResetTime .Load () == lastSuccessTime {
123- logger .Warn ().
125+ if funcinfos [i ].lastReset .Load () == lastReset {
126+ event := logger .Warn ().
124127 Str ("callbackDescription" , curCbInfo .description ).
125- Time ("lastSuccessAt" , lastSuccessTime ).
126- Str ("elapsedTime" , reportutils .DurationToHMS (time .Since (lastSuccessTime ))).
128+ Time ("noSuccessSince" , lastReset .time ).
129+ Uint64 ("successesSoFar" , lastReset .resetsSoFar )
130+
131+ if successDesc , hasDesc := lastReset .description .Get (); hasDesc {
132+ event .
133+ Str ("lastSuccessDescription" , successDesc )
134+ }
135+
136+ event .
137+ Str ("elapsedTime" , reportutils .DurationToHMS (time .Since (lastReset .time ))).
127138 Msg ("Operation has not reported success for a while." )
128139 }
129140 }
@@ -164,9 +175,11 @@ func (r *Retryer) runRetryLoop(
164175 }
165176
166177 failedFuncInfo := funcinfos [groupErr .funcNum ]
178+ descriptions := failedFuncInfo .GetDescriptions ()
179+ cbErr := groupErr .errFromCallback
167180
168181 // Not a transient error? Fail immediately.
169- if ! r .shouldRetryWithSleep (logger , sleepTime , * failedFuncInfo , groupErr . errFromCallback ) {
182+ if ! r .shouldRetryWithSleep (logger , sleepTime , descriptions , cbErr ) {
170183 return groupErr .errFromCallback
171184 }
172185
@@ -201,7 +214,7 @@ func (r *Retryer) runRetryLoop(
201214 // Set all of the funcs that did *not* fail as having just succeeded.
202215 for i , curInfo := range funcinfos {
203216 if i != groupErr .funcNum {
204- curInfo .lastResetTime .Store (now )
217+ curInfo .lastReset .Store (lastResetInfo { time : now } )
205218 }
206219 }
207220 }
@@ -235,7 +248,7 @@ func (r *Retryer) addDescriptionToEvent(event *zerolog.Event) *zerolog.Event {
235248func (r * Retryer ) shouldRetryWithSleep (
236249 logger * logger.Logger ,
237250 sleepTime time.Duration ,
238- funcinfo FuncInfo ,
251+ descriptions [] string ,
239252 err error ,
240253) bool {
241254 if err == nil {
@@ -250,26 +263,35 @@ func (r *Retryer) shouldRetryWithSleep(
250263 )
251264
252265 event := logger .WithLevel (
253- lo .Ternary (isTransient , zerolog .InfoLevel , zerolog .WarnLevel ),
266+ lo .Ternary (
267+ // If it’s transient, surface it as info.
268+ isTransient ,
269+ zerolog .InfoLevel ,
270+
271+ lo .Ternary (
272+ // Context cancellation is unimportant, so debug.
273+ errors .Is (err , context .Canceled ),
274+ zerolog .DebugLevel ,
275+
276+ // Other non-retryables are serious, so warn.
277+ zerolog .WarnLevel ,
278+ ),
279+ ),
254280 )
255281
256- if loopDesc , hasLoopDesc := r .description .Get (); hasLoopDesc {
257- event .Str ("operationDescription" , loopDesc )
258- }
259-
260- event .Str ("callbackDescription" , funcinfo .description ).
282+ event .Strs ("description" , descriptions ).
261283 Int ("error code" , util .GetErrorCode (err )).
262284 Err (err )
263285
264286 if isTransient {
265287 event .
266288 Stringer ("delay" , sleepTime ).
267- Msg ("Pausing before retrying after transient error ." )
289+ Msg ("Got retryable error. Pausing, then will retry ." )
268290
269291 return true
270292 }
271293
272- event .Msg ("Non-transient error occurred." )
294+ event .Msg ("Non-retryable error occurred." )
273295
274296 return false
275297}
0 commit comments