Skip to content

Commit e5209f9

Browse files
authored
[Inference API] Expand RateLimiter docs (#117156)
1 parent 21c4431 commit e5209f9

File tree

1 file changed

+12
-1
lines changed
  • x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/common

1 file changed

+12
-1
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/common/RateLimiter.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@
2828
*
2929
* By setting the accumulated tokens limit to a value greater than zero, it effectively allows bursts of traffic. If the accumulated
3030
* tokens limit is set to zero, it will force the acquiring thread to wait on each call.
31+
*
32+
* Example:
33+
* Time unit: Second
34+
* Tokens to produce per time unit: 10
35+
* Limit for tokens in bucket: 100
36+
*
37+
* Tokens in bucket after n seconds (n second -> tokens in bucket):
38+
* 1 sec -> 10 tokens, 2 sec -> 20 tokens, ... , 10 sec -> 100 tokens (bucket full), ... 200 sec -> 100 tokens (no increase in tokens)
3139
*/
3240
public class RateLimiter {
3341

@@ -76,6 +84,7 @@ public final synchronized void setRate(double newAccumulatedTokensLimit, double
7684
throw new IllegalArgumentException(Strings.format("Tokens per time unit must be less than or equal to %s", Double.MAX_VALUE));
7785
}
7886

87+
// If the new token limit is smaller than what we've accumulated already we need to drop tokens to meet the new token limit
7988
accumulatedTokens = Math.min(accumulatedTokens, newAccumulatedTokensLimit);
8089

8190
accumulatedTokensLimit = newAccumulatedTokensLimit;
@@ -88,7 +97,8 @@ public final synchronized void setRate(double newAccumulatedTokensLimit, double
8897
}
8998

9099
/**
91-
* Causes the thread to wait until the tokens are available
100+
* Causes the thread to wait until the tokens are available.
101+
* This reserves token in advance leading to a reduction of accumulated tokens.
92102
* @param tokens the number of items of work that should be throttled, typically you'd pass a value of 1 here
93103
* @throws InterruptedException _
94104
*/
@@ -130,6 +140,7 @@ private static void validateTokenRequest(int tokens) {
130140

131141
/**
132142
* Returns the amount of time to wait for the tokens to become available.
143+
* This reserves tokens in advance leading to a reduction of accumulated tokens.
133144
* @param tokens the number of items of work that should be throttled, typically you'd pass a value of 1 here. Must be greater than 0.
134145
* @return the amount of time to wait
135146
*/

0 commit comments

Comments
 (0)