|
| 1 | +/* |
| 2 | + * Copyright 2023, gRPC Authors All rights reserved. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +/// A throttle used to rate-limit retries and hedging attempts. |
| 18 | +/// |
| 19 | +/// gRPC prevents servers from being overloaded by retries and hedging by using a token-based |
| 20 | +/// throttling mechanism at the transport level. |
| 21 | +/// |
| 22 | +/// Each client transport maintains a throttle for the server it is connected to and gRPC records |
| 23 | +/// successful and failed RPC attempts. Successful attempts increment the number of tokens |
| 24 | +/// by ``tokenRatio`` and failed attempts decrement the available tokens by one. In the context |
| 25 | +/// of throttling, a failed attempt is one where the server terminates the RPC with a status code |
| 26 | +/// which is retryable or non fatal (as defined by ``RetryPolicy/retryableStatusCodes`` and |
| 27 | +/// ``HedgingPolicy/nonFatalStatusCodes``) or when the client receives a pushback response from |
| 28 | +/// the server. |
| 29 | +/// |
| 30 | +/// See also [gRFC A6: client retries](https://github.com/grpc/proposal/blob/master/A6-client-retries.md). |
| 31 | +public struct RetryThrottle: Sendable { |
| 32 | + // Note: only three figures after the decimal point from the original token ratio are used so |
| 33 | + // all computation is done a scaled number of tokens (tokens * 1000). This allows us to do all |
| 34 | + // computation in integer space. |
| 35 | + |
| 36 | + /// The number of tokens available, multiplied by 1000. |
| 37 | + private let scaledTokensAvailable: LockedValueBox<Int> |
| 38 | + /// The number of tokens, multiplied by 1000. |
| 39 | + private let scaledTokenRatio: Int |
| 40 | + /// The maximum number of tokens, multiplied by 1000. |
| 41 | + private let scaledMaximumTokens: Int |
| 42 | + /// The retry threshold, multiplied by 1000. If ``scaledTokensAvailable`` is above this then |
| 43 | + /// retries are permitted. |
| 44 | + private let scaledRetryThreshold: Int |
| 45 | + |
| 46 | + /// Returns the throttling token ratio. |
| 47 | + /// |
| 48 | + /// The number of tokens held by the throttle is incremented by this value for each successful |
| 49 | + /// response. In the context of throttling, a successful response is one which: |
| 50 | + /// - receives metadata from the server, or |
| 51 | + /// - is terminated with a non-retryable or fatal status code. |
| 52 | + /// |
| 53 | + /// If the response is a pushback response then it is not considered to be successful, even if |
| 54 | + /// either of the preceding conditions are met. |
| 55 | + public var tokenRatio: Double { |
| 56 | + Double(self.scaledTokenRatio) / 1000 |
| 57 | + } |
| 58 | + |
| 59 | + /// The maximum number of tokens the throttle may hold. |
| 60 | + public var maximumTokens: Int { |
| 61 | + self.scaledMaximumTokens / 1000 |
| 62 | + } |
| 63 | + |
| 64 | + /// The number of tokens the throttle currently has. |
| 65 | + /// |
| 66 | + /// If this value is less than or equal to the retry threshold (defined as `maximumTokens / 2`) |
| 67 | + /// then RPCs will not be retried and hedging will be disabled. |
| 68 | + public var tokens: Double { |
| 69 | + self.scaledTokensAvailable.withLockedValue { |
| 70 | + Double($0) / 1000 |
| 71 | + } |
| 72 | + } |
| 73 | + |
| 74 | + /// Returns whether retries and hedging are permitted at this time. |
| 75 | + public var isRetryPermitted: Bool { |
| 76 | + self.scaledTokensAvailable.withLockedValue { |
| 77 | + $0 > self.scaledRetryThreshold |
| 78 | + } |
| 79 | + } |
| 80 | + |
| 81 | + /// Create a new throttle. |
| 82 | + /// |
| 83 | + /// - Parameters: |
| 84 | + /// - maximumTokens: The maximum number of tokens available. Must be in the range `1...1000`. |
| 85 | + /// - tokenRatio: The number of tokens to increment the available tokens by for successful |
| 86 | + /// responses. See the documentation on this type for a description of what counts as a |
| 87 | + /// successful response. Note that only three decimal places are used from this value. |
| 88 | + /// - Precondition: `maximumTokens` must be in the range `1...1000`. |
| 89 | + /// - Precondition: `tokenRatio` must be `>= 0.001`. |
| 90 | + public init(maximumTokens: Int, tokenRatio: Double) { |
| 91 | + precondition( |
| 92 | + (1 ... 1000).contains(maximumTokens), |
| 93 | + "maximumTokens must be in the range 1...1000 (is \(maximumTokens))" |
| 94 | + ) |
| 95 | + |
| 96 | + let scaledTokenRatio = Int(tokenRatio * 1000) |
| 97 | + precondition(scaledTokenRatio > 0, "tokenRatio must be >= 0.001 (is \(tokenRatio))") |
| 98 | + |
| 99 | + let scaledTokens = maximumTokens * 1000 |
| 100 | + self.scaledMaximumTokens = scaledTokens |
| 101 | + self.scaledRetryThreshold = scaledTokens / 2 |
| 102 | + self.scaledTokenRatio = scaledTokenRatio |
| 103 | + self.scaledTokensAvailable = LockedValueBox(scaledTokens) |
| 104 | + } |
| 105 | + |
| 106 | + /// Records a success, adding a token to the throttle. |
| 107 | + @usableFromInline |
| 108 | + func recordSuccess() { |
| 109 | + self.scaledTokensAvailable.withLockedValue { value in |
| 110 | + value = min(self.scaledMaximumTokens, value &+ self.scaledTokenRatio) |
| 111 | + } |
| 112 | + } |
| 113 | + |
| 114 | + /// Records a failure, removing tokens from the throttle. |
| 115 | + /// - Returns: Whether retries will now be throttled. |
| 116 | + @usableFromInline |
| 117 | + @discardableResult |
| 118 | + func recordFailure() -> Bool { |
| 119 | + self.scaledTokensAvailable.withLockedValue { value in |
| 120 | + value = max(0, value &- 1000) |
| 121 | + return value <= self.scaledRetryThreshold |
| 122 | + } |
| 123 | + } |
| 124 | +} |
0 commit comments