@@ -68,7 +68,7 @@ public interface RetryStrategy {
6868 *
6969 * @throws TokenAcquisitionFailedException if a token cannot be acquired.
7070 */
71- RetryToken acquireInitialToken ();
71+ RetryToken acquireInitialToken (ApiOperation<?, ?> operation );
7272
7373 /**
7474 * Invoked before each subsequent (non-first) request attempt.
@@ -100,6 +100,13 @@ client. Be careful to ensure that access to that state is synchronized in order
100100to prevent race conditions.
101101:::
102102
103+ :::{admonition} TODO - Define ApiOperation
104+ :class: note
105+
106+ ` ApiOperation ` will be defined later in a separate document. At a minimum, it
107+ should contain the operation's ID.
108+ :::
109+
103110#### Using retry strategies
104111
105112An initial retry token should be acquired at the beginning of a request, before
@@ -129,12 +136,12 @@ The following is a simplified example of what it looks like to use the
129136 *
130137 * @return a successful result.
131138 */
132- public Result request(SerializedRequest serializedRequest) {
139+ public Result request(ApiOperation<?, ?> operation, SerializedRequest serializedRequest) {
133140 // First acquire the initial retry token. If a token cannot be acquired,
134141 // make only one attempt without retries.
135142 RetryToken retryToken;
136143 try {
137- retryToken = this . retryStrategy. acquireInitialToken();
144+ retryToken = this . retryStrategy. acquireInitialToken(operation );
138145 } catch (TokenAcquisitionFailedException e) {
139146 return send(serializedRequest);
140147 }
@@ -413,25 +420,34 @@ demonstrate some of the potential needs of a retry system.
413420### Example retry strategy
414421
415422The following is an example retry strategy that implements exponential backoff
416- with jitter alongside a token bucket. This strategy adds extra cost for timeout
417- errors since they may indicate a more degraded service.
423+ with jitter alongside a token bucket. This strategy has a reduced cost for
424+ throttling errors as they indicate that the service is actively managing
425+ retries.
418426
419427Aside from delay, the retry token also tracks the number of attempts that have
420- been made. This is necessary because this strategy imposes a maximum attempt
421- count, and also because the delay is calculated in part based on how many
422- attempts have been made.
428+ been made as well as if the operation is a long-polling operation. The attempt
429+ count is necessary because this strategy imposes a maximum attempt count, and
430+ also because the delay is calculated in part based on how many attempts have
431+ been made.
432+
433+ For long-polling operations, the strategy will continue to back off even if the
434+ token bucket is empty.
423435
424436``` java
425- public record AwsStandardRetryToken(int attempts, Duration delay) implements RetryToken {
437+ public record AwsStandardRetryToken(
438+ int attempts,
439+ Duration delay,
440+ boolean isLongPoll
441+ ) implements RetryToken {
426442}
427443```
428444
429445``` java
430446public final class AwsStandardRetryStrategy implements RetryStrategy {
431447 // These values are not prescriptive. They are static in this example for the
432448 // sake of simplicity, but making them configurable is ideal.
433- private static final int RETRY_COST = 5 ;
434- private static final int TIMEOUT_COST = 10 ;
449+ private static final int RETRY_COST = 14 ;
450+ private static final int THROTTLE_RETRY_COST = 5 ;
435451 private static final int SUCCESS_REFUND = 1 ;
436452
437453 private static final int MAX_ATTEMPTS = 5 ;
@@ -449,13 +465,14 @@ public final class AwsStandardRetryStrategy implements RetryStrategy {
449465 private final Object tokensLock = new Object ();
450466
451467 @Override
452- public RetryToken acquireInitialToken () {
468+ public RetryToken acquireInitialToken (ApiOperation<?, ?> operation ) {
453469 // This returns successfully even if the token bucket is empty. This is
454470 // because an initial attempt will always be performed anyway, and
455471 // returning successfully here will ensure that the retry strategy is
456472 // checked if that initial attempt fails. By that point, the token bucket
457473 // may no longer be empty.
458- return new AwsStandardRetryToken (0 , null );
474+ boolean isLongPoll = operation. schema(). hasTrait(TraitKey . get(LongPollTrait . class));
475+ return new AwsStandardRetryToken (0 , null , isLongPoll);
459476 }
460477
461478 @Override
@@ -479,16 +496,16 @@ public final class AwsStandardRetryStrategy implements RetryStrategy {
479496 case RetryInfo retryInfo when retryInfo. isRetrySafe() != RetrySafety . NO - > {
480497 // Attempt to consume tokens from the token bucket to "pay"
481498 // for the retry.
482- consumeTokens(retryInfo. isTimeout ());
483- yield backoff(standardToken, retryInfo. retryAfter());
499+ consumeTokens(retryInfo. isThrottle(), standardToken . isLongPoll ());
500+ yield backoff(standardToken, retryInfo. retryAfter(), retryInfo . isThrottle() );
484501 }
485502
486503 // If the exception does not have retry info, but does have more
487504 // general error info, that can also be used. This assumes that
488505 // a server error is likely retryable and that a client error
489506 // likely is not.
490507 case ErrorInfo errorInfo when errorInfo. fault() == ErrorFault . SERVER - > {
491- consumeTokens(false );
508+ consumeTokens(false , standardToken . isLongPoll() );
492509 yield backoff(standardToken);
493510 }
494511 default - > throw new TokenAcquisitionFailedException (" Exception not retryable." );
@@ -498,15 +515,24 @@ public final class AwsStandardRetryStrategy implements RetryStrategy {
498515 /**
499516 * Consumes tokens to "pay" for a retry.
500517 *
501- * @param isTimeout whether the retry is in response to a timeout error,
502- * which will require more tokens.
518+ * @param isThrottle whether the retry is in response to a throttling error,
519+ * which will require fewer tokens.
520+ * @param isLongPoll whether the operation is a long-polling operation. If
521+ * so, a retry will always be performed even if the bucket doesn't have
522+ * enough tokens.
503523 *
504524 * @throws TokenAcquisitionFailedException if there are not enough tokens
505525 * in the bucket to pay for the retry.
506526 */
507- private void consumeTokens (boolean isTimeout ) {
527+ private void consumeTokens (boolean isThrottle , boolean isLongPoll ) {
508528 synchronized (tokensLock) {
509- int cost = isTimeout ? TIMEOUT_COST : RETRY_COST ;
529+ int cost = isThrottle ? THROTTLE_RETRY_COST : RETRY_COST ;
530+
531+ // Long-polling operations will always backoff. If the bucket doesn't have
532+ // enough tokens, it will just be emptied.
533+ if (isLongPoll) {
534+ cost = Math . min(cost, this . tokens);
535+ }
510536
511537 if (this . tokens < cost) {
512538 throw new TokenAcquisitionFailedException (" Token bucket exhausted." );
@@ -522,41 +548,51 @@ public final class AwsStandardRetryStrategy implements RetryStrategy {
522548 * @param token the previous token.
523549 */
524550 private AwsStandardRetryToken backoff (AwsStandardRetryToken token ) {
525- return new AwsStandardRetryToken (token. attempts + 1 , computeDelay(token. attempts));
551+ return new AwsStandardRetryToken (
552+ token. attempts + 1 , computeDelay(token. attempts, false ), token. isLongPoll);
526553 }
527554
528555 /**
529556 * Computes a backoff with exponential backoff and jitter, capped at 20 seconds.
530557 *
531558 * @param token the previous token.
559+ * @param isThrottle whether the triggering error was a throttle.
532560 * @param suggested the delay suggested by the service, which will serve as
533561 * the minimum delay.
534562 */
535- private AwsStandardRetryToken backoff (AwsStandardRetryToken token , Duration suggested ) {
563+ private AwsStandardRetryToken backoff (AwsStandardRetryToken token , Duration suggested , boolean isThrottle ) {
536564 // Compute the backoff as normal. If it is longer than the suggested
537565 // backoff from the service, use it. Otherwise, use the suggested
538566 // backoff.
539- Duration computedDelay = computeDelay(token. attempts);
540- Duration finalDelay = computedDelay. toMillis() < suggested. toMillis() ? suggested : computedDelay;
541- return new AwsStandardRetryToken (token. attempts + 1 , finalDelay);
567+ Duration finalDelay = computeDelay(token. attempts, isThrottle);
568+ if (suggested != null && finalDelay. toMillis() < suggested. toMillis()) {
569+ finalDelay = suggested;
570+ }
571+ return new AwsStandardRetryToken (token. attempts + 1 , finalDelay, token. isLongPoll);
542572 }
543573
544574 /**
545575 * Computes the delay with exponential backoff and jitter, capped at 20 seconds.
546576 *
547577 * @param attempts the number of attempts made so far.
578+ * @param isThrottle whether the triggering error was a throttle.
548579 * @return the computed delay duration.
549580 */
550- private Duration computeDelay (int attempts ) {
581+ private Duration computeDelay (int attempts , boolean isThrottle ) {
551582 // First compute the exponential backoff.
552583 double backoff = Math . pow(2 , attempts);
553584
585+ // Try to recover faster from non-throttling errors.
586+ if (! isThrottle) {
587+ backoff = backoff * 0.05 ;
588+ }
589+
554590 // Next, cap it at 20 seconds.
555591 backoff = Math . min(backoff, MAX_BACKOFF );
556592
557593 // Finally, add jitter and expand to milliseconds.
558594 double backoffMillis = Math . random() * backoff * 1000 ;
559- return Duration . ofMilliseconds ((long ) backoffMillis);
595+ return Duration . ofMillis ((long ) backoffMillis);
560596 }
561597
562598 @Override
0 commit comments