@@ -15,8 +15,20 @@ import (
1515
1616type RetryCallback = func (context.Context , * FuncInfo ) error
1717
18- // Run retries f() whenever a transient error happens, up to the retryer's
19- // configured duration limit.
18+ // Run() runs each given callback in parallel. If none of them fail,
19+ // then no error is returned.
20+ //
21+ // If one of them fails, the other allbacks' contexts are canceled.
22+ // If the error is non-transient, it's returned. If the error is transient,
23+ // though, then every function will be retried.
24+ //
25+ // The retries last until a function fails and it's exceeded the retryer's
26+ // limit without either a success or being canceled (i.e., because another
27+ // thread fails).
28+ //
29+ // Note that, if a given callback runs multiple potentially-retryable reqeusts,
30+ // each successful request should be noted in the callback's FuncInfo.
31+ // See that struct's documentation for more details.
2032//
2133// IMPORTANT: This function should generally NOT be used within a transaction
2234// callback. It may be used within a transaction callback if and only if:
@@ -35,18 +47,6 @@ func (r *Retryer) Run(
3547 return r .runRetryLoop (ctx , logger , f )
3648}
3749
38- type errgroupErr struct {
39- funcNum int
40- err error
41- }
42-
43- func (ege errgroupErr ) Error () string {
44- return fmt .Sprintf ("func %d failed: %v" , ege .funcNum , ege .err )
45- }
46- func (ege errgroupErr ) Unwrap () error {
47- return ege .err
48- }
49-
5050// runRetryLoop contains the core logic for the retry loops.
5151func (r * Retryer ) runRetryLoop (
5252 ctx context.Context ,
@@ -82,8 +82,8 @@ func (r *Retryer) runRetryLoop(
8282
8383 if err != nil {
8484 return errgroupErr {
85- funcNum : i ,
86- err : err ,
85+ funcNum : i ,
86+ errFromCallback : err ,
8787 }
8888 }
8989
@@ -97,26 +97,25 @@ func (r *Retryer) runRetryLoop(
9797 return nil
9898 }
9999
100- // Not a transient error? Fail immediately.
101- if ! r .shouldRetryWithSleep (logger , sleepTime , err ) {
102- return err
103- }
104-
105- // Our error is transient. First we learn which function failed.
106- // We have to get this information from the error itself in order
107- // for it to be fully reliable.
100+ // Let's get the actual error from the function.
108101 groupErr := errgroupErr {}
109102 if ! errors .As (err , & groupErr ) {
110103 panic (fmt .Sprintf ("Error should be a %T, not %T: %v" , groupErr , err , err ))
111104 }
112- failedFuncInfo := funcinfos [groupErr .funcNum ]
113105
114- // If we've exhausted the allowed time then fail.
106+ // Not a transient error? Fail immediately.
107+ if ! r .shouldRetryWithSleep (logger , sleepTime , groupErr .errFromCallback ) {
108+ return groupErr .errFromCallback
109+ }
110+
111+ // Our error is transient. If we've exhausted the allowed time
112+ // then fail.
113+ failedFuncInfo := funcinfos [groupErr .funcNum ]
115114 if failedFuncInfo .GetDurationSoFar () > li .durationLimit {
116115 return RetryDurationLimitExceededErr {
117116 attempts : li .attemptNumber ,
118117 duration : failedFuncInfo .GetDurationSoFar (),
119- lastErr : groupErr .err ,
118+ lastErr : groupErr .errFromCallback ,
120119 }
121120 }
122121
0 commit comments