move things a bit & document

FGasper · FGasper · commit 34259294b509 · 2024-11-29T13:38:04.000-05:00
diff --git a/internal/partitions/partitions.go b/internal/partitions/partitions.go
@@ -613,7 +613,7 @@ func getMidIDBounds(
 
 			// Append the copied bound to the other mid _id bounds.
 			midIDBounds = append(midIDBounds, bound)
-			ri.IterationSuccess()
+			ri.NoteSuccess()
 		}
 
 		return cursor.Err()
diff --git a/internal/retry/error.go b/internal/retry/error.go
@@ -25,3 +25,15 @@ func (rde RetryDurationLimitExceededErr) Error() string {
 func (rde RetryDurationLimitExceededErr) Unwrap() error {
 	return rde.lastErr
 }
+
+// errgroupErr is an internal error type that we return from errgroup
+// callbacks. It allows us to know (reliably) which error is the one
+// that triggers the errgroup's failure
+type errgroupErr struct {
+	funcNum         int
+	errFromCallback error
+}
+
+func (ege errgroupErr) Error() string {
+	return fmt.Sprintf("func %d failed: %v", ege.funcNum, ege.errFromCallback)
+}
diff --git a/internal/retry/retry.go b/internal/retry/retry.go
@@ -15,8 +15,20 @@ import (
 
 type RetryCallback = func(context.Context, *FuncInfo) error
 
-// Run retries f() whenever a transient error happens, up to the retryer's
-// configured duration limit.
+// Run() runs each given callback in parallel. If none of them fail,
+// then no error is returned.
+//
+// If one of them fails, the other allbacks' contexts are canceled.
+// If the error is non-transient, it's returned. If the error is transient,
+// though, then every function will be retried.
+//
+// The retries last until a function fails and it's exceeded the retryer's
+// limit without either a success or being canceled (i.e., because another
+// thread fails).
+//
+// Note that, if a given callback runs multiple potentially-retryable reqeusts,
+// each successful request should be noted in the callback's FuncInfo.
+// See that struct's documentation for more details.
 //
 // IMPORTANT: This function should generally NOT be used within a transaction
 // callback. It may be used within a transaction callback if and only if:
@@ -35,18 +47,6 @@ func (r *Retryer) Run(
 	return r.runRetryLoop(ctx, logger, f)
 }
 
-type errgroupErr struct {
-	funcNum int
-	err     error
-}
-
-func (ege errgroupErr) Error() string {
-	return fmt.Sprintf("func %d failed: %v", ege.funcNum, ege.err)
-}
-func (ege errgroupErr) Unwrap() error {
-	return ege.err
-}
-
 // runRetryLoop contains the core logic for the retry loops.
 func (r *Retryer) runRetryLoop(
 	ctx context.Context,
@@ -82,8 +82,8 @@ func (r *Retryer) runRetryLoop(
 
 				if err != nil {
 					return errgroupErr{
-						funcNum: i,
-						err:     err,
+						funcNum:         i,
+						errFromCallback: err,
 					}
 				}
 
@@ -97,26 +97,25 @@ func (r *Retryer) runRetryLoop(
 			return nil
 		}
 
-		// Not a transient error? Fail immediately.
-		if !r.shouldRetryWithSleep(logger, sleepTime, err) {
-			return err
-		}
-
-		// Our error is transient. First we learn which function failed.
-		// We have to get this information from the error itself in order
-		// for it to be fully reliable.
+		// Let's get the actual error from the function.
 		groupErr := errgroupErr{}
 		if !errors.As(err, &groupErr) {
 			panic(fmt.Sprintf("Error should be a %T, not %T: %v", groupErr, err, err))
 		}
-		failedFuncInfo := funcinfos[groupErr.funcNum]
 
-		// If we've exhausted the allowed time then fail.
+		// Not a transient error? Fail immediately.
+		if !r.shouldRetryWithSleep(logger, sleepTime, groupErr.errFromCallback) {
+			return groupErr.errFromCallback
+		}
+
+		// Our error is transient. If we've exhausted the allowed time
+		// then fail.
+		failedFuncInfo := funcinfos[groupErr.funcNum]
 		if failedFuncInfo.GetDurationSoFar() > li.durationLimit {
 			return RetryDurationLimitExceededErr{
 				attempts: li.attemptNumber,
 				duration: failedFuncInfo.GetDurationSoFar(),
-				lastErr:  groupErr.err,
+				lastErr:  groupErr.errFromCallback,
 			}
 		}
 
diff --git a/internal/retry/retry_info.go b/internal/retry/retry_info.go
@@ -69,14 +69,12 @@ func (fi *FuncInfo) GetDurationSoFar() time.Duration {
 	return time.Since(fi.lastResetTime)
 }
 
-// IterationSuccess is used to tell the retry util to reset its measurement
+// NoteSuccess is used to tell the retry util to reset its measurement
 // of how long the closure has been running for. This is useful for long
 // running operations that might run successfully for a few days and then fail.
-// Essentially, calling this function tells the retry util not to include the
-// closure's run time as a part of the overall measurement of how long the
-// closure took including retries, since that measurement is used to determine
-// whether we want to retry the operation or not. (If the measurement is greater
-// than the retry time, we will not retry.)
-func (i *FuncInfo) IterationSuccess() {
+//
+// Call this after every successful command in a multi-command callback.
+// (It’s useless--but harmless--in a single-command callback.)
+func (i *FuncInfo) NoteSuccess() {
 	i.lastResetTime = time.Now()
 }
diff --git a/internal/retry/retryer_test.go b/internal/retry/retryer_test.go
@@ -124,7 +124,7 @@ func (suite *UnitTestSuite) TestRetryerDurationReset() {
 		// Artificially advance how much time was taken.
 		ri.lastResetTime = ri.lastResetTime.Add(-2 * ri.loopInfo.durationLimit)
 
-		ri.IterationSuccess()
+		ri.NoteSuccess()
 
 		successIterations++
 		if successIterations == 1 {
@@ -308,7 +308,7 @@ func (suite *UnitTestSuite) TestMulti_LongRunningSuccess() {
 		ctx,
 		logger,
 		func(ctx context.Context, fi *FuncInfo) error {
-			fi.IterationSuccess()
+			fi.NoteSuccess()
 
 			if time.Now().Before(succeedPastTime) {
 				time.Sleep(1 * time.Second)
diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go
@@ -188,7 +188,7 @@ func (verifier *Verifier) readAndHandleOneChangeEventBatch(
 		eventsRead++
 	}
 
-	ri.IterationSuccess()
+	ri.NoteSuccess()
 
 	if eventsRead == 0 {
 		return nil

Original file line number	Diff line number	Diff line change
`@@ -613,7 +613,7 @@ func getMidIDBounds(`
`613`	`613`
`614`	`614`	`// Append the copied bound to the other mid _id bounds.`
`615`	`615`	`midIDBounds = append(midIDBounds, bound)`
`616`		`- ri.IterationSuccess()`
	`616`	`+ ri.NoteSuccess()`
`617`	`617`	`}`
`618`	`618`
`619`	`619`	`return cursor.Err()`
Original file line number	Diff line number	Diff line change
`@@ -188,7 +188,7 @@ func (verifier *Verifier) readAndHandleOneChangeEventBatch(`
`188`	`188`	`eventsRead++`
`189`	`189`	`}`
`190`	`190`
`191`		`- ri.IterationSuccess()`
	`191`	`+ ri.NoteSuccess()`
`192`	`192`
`193`	`193`	`if eventsRead == 0 {`
`194`	`194`	`return nil`