Skip to content

Commit bd5eb8f

Browse files
ViezeVingertjesSOM-Michaelsaoudrizwan
authored
feat: add retry decorator with rate limit handling (RooCodeInc#1605)
* fix: improve retry decorator with smart rate limit handling - Add handling of rate limit (429) errors - Implement retry timing based on response headers - Add exponential backoff when no headers present - Add a few unit tests Fixes RooCodeInc#713 * Create modern-knives-tan.md * Improve readability in retry.ts --------- Co-authored-by: Michael Overhorst <[email protected]> Co-authored-by: Saoud Rizwan <[email protected]>
1 parent 0795b04 commit bd5eb8f

File tree

11 files changed

+299
-0
lines changed

11 files changed

+299
-0
lines changed

.changeset/modern-knives-tan.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"claude-dev": patch
3+
---
4+
5+
Add automatic retry for rate limited requests

src/api/providers/anthropic.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming"
3+
import { withRetry } from "../retry"
34
import { anthropicDefaultModelId, AnthropicModelId, anthropicModels, ApiHandlerOptions, ModelInfo } from "../../shared/api"
45
import { ApiHandler } from "../index"
56
import { ApiStream } from "../transform/stream"
@@ -16,6 +17,7 @@ export class AnthropicHandler implements ApiHandler {
1617
})
1718
}
1819

20+
@withRetry()
1921
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
2022
const model = this.getModel()
2123
let stream: AnthropicStream<Anthropic.Beta.PromptCaching.Messages.RawPromptCachingBetaMessageStreamEvent>

src/api/providers/deepseek.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import OpenAI from "openai"
3+
import { withRetry } from "../retry"
34
import { ApiHandler } from "../"
45
import { ApiHandlerOptions, DeepSeekModelId, ModelInfo, deepSeekDefaultModelId, deepSeekModels } from "../../shared/api"
56
import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -18,6 +19,7 @@ export class DeepSeekHandler implements ApiHandler {
1819
})
1920
}
2021

22+
@withRetry()
2123
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
2224
const model = this.getModel()
2325

src/api/providers/gemini.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import { GoogleGenerativeAI } from "@google/generative-ai"
3+
import { withRetry } from "../retry"
34
import { ApiHandler } from "../"
45
import { ApiHandlerOptions, geminiDefaultModelId, GeminiModelId, geminiModels, ModelInfo } from "../../shared/api"
56
import { convertAnthropicMessageToGemini } from "../transform/gemini-format"
@@ -17,6 +18,7 @@ export class GeminiHandler implements ApiHandler {
1718
this.client = new GoogleGenerativeAI(options.geminiApiKey)
1819
}
1920

21+
@withRetry()
2022
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
2123
const model = this.client.getGenerativeModel({
2224
model: this.getModel().id,

src/api/providers/mistral.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import { Mistral } from "@mistralai/mistralai"
3+
import { withRetry } from "../retry"
34
import { ApiHandler } from "../"
45
import {
56
ApiHandlerOptions,
@@ -26,6 +27,7 @@ export class MistralHandler implements ApiHandler {
2627
})
2728
}
2829

30+
@withRetry()
2931
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
3032
const stream = await this.client.chat.stream({
3133
model: this.getModel().id,

src/api/providers/openai-native.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import OpenAI from "openai"
3+
import { withRetry } from "../retry"
34
import { ApiHandler } from "../"
45
import {
56
ApiHandlerOptions,
@@ -22,6 +23,7 @@ export class OpenAiNativeHandler implements ApiHandler {
2223
})
2324
}
2425

26+
@withRetry()
2527
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
2628
switch (this.getModel().id) {
2729
case "o1":

src/api/providers/openai.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import OpenAI, { AzureOpenAI } from "openai"
3+
import { withRetry } from "../retry"
34
import { ApiHandlerOptions, azureOpenAiDefaultApiVersion, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api"
45
import { ApiHandler } from "../index"
56
import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -27,6 +28,7 @@ export class OpenAiHandler implements ApiHandler {
2728
}
2829
}
2930

31+
@withRetry()
3032
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
3133
const modelId = this.options.openAiModelId ?? ""
3234
const isDeepseekReasoner = modelId.includes("deepseek-reasoner")

src/api/providers/openrouter.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
22
import axios from "axios"
33
import delay from "delay"
44
import OpenAI from "openai"
5+
import { withRetry } from "../retry"
56
import { ApiHandler } from "../"
67
import { ApiHandlerOptions, ModelInfo, openRouterDefaultModelId, openRouterDefaultModelInfo } from "../../shared/api"
78
import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -24,6 +25,7 @@ export class OpenRouterHandler implements ApiHandler {
2425
})
2526
}
2627

28+
@withRetry()
2729
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
2830
const model = this.getModel()
2931

src/api/providers/vertex.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
3+
import { withRetry } from "../retry"
34
import { ApiHandler } from "../"
45
import { ApiHandlerOptions, ModelInfo, vertexDefaultModelId, VertexModelId, vertexModels } from "../../shared/api"
56
import { ApiStream } from "../transform/stream"
@@ -18,6 +19,7 @@ export class VertexHandler implements ApiHandler {
1819
})
1920
}
2021

22+
@withRetry()
2123
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
2224
const stream = await this.client.messages.create({
2325
model: this.getModel().id,

src/api/retry.test.ts

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
import { describe, it } from "mocha"
2+
import "should"
3+
import { withRetry } from "./retry"
4+
5+
describe("Retry Decorator", () => {
6+
describe("withRetry", () => {
7+
it("should not retry on success", async () => {
8+
let callCount = 0
9+
class TestClass {
10+
@withRetry()
11+
async *successMethod() {
12+
callCount++
13+
yield "success"
14+
}
15+
}
16+
17+
const test = new TestClass()
18+
const result = []
19+
for await (const value of test.successMethod()) {
20+
result.push(value)
21+
}
22+
23+
callCount.should.equal(1)
24+
result.should.deepEqual(["success"])
25+
})
26+
27+
it("should retry on rate limit (429) error", async () => {
28+
let callCount = 0
29+
class TestClass {
30+
@withRetry({ maxRetries: 2, baseDelay: 10, maxDelay: 100 })
31+
async *failMethod() {
32+
callCount++
33+
if (callCount === 1) {
34+
const error: any = new Error("Rate limit exceeded")
35+
error.status = 429
36+
throw error
37+
}
38+
yield "success after retry"
39+
}
40+
}
41+
42+
const test = new TestClass()
43+
const result = []
44+
for await (const value of test.failMethod()) {
45+
result.push(value)
46+
}
47+
48+
callCount.should.equal(2)
49+
result.should.deepEqual(["success after retry"])
50+
})
51+
52+
it("should not retry on non-rate-limit errors", async () => {
53+
let callCount = 0
54+
class TestClass {
55+
@withRetry()
56+
async *failMethod() {
57+
callCount++
58+
throw new Error("Regular error")
59+
}
60+
}
61+
62+
const test = new TestClass()
63+
try {
64+
for await (const _ of test.failMethod()) {
65+
// Should not reach here
66+
}
67+
throw new Error("Should have thrown")
68+
} catch (error: any) {
69+
error.message.should.equal("Regular error")
70+
callCount.should.equal(1)
71+
}
72+
})
73+
74+
it("should respect retry-after header with delta seconds", async () => {
75+
let callCount = 0
76+
const startTime = Date.now()
77+
class TestClass {
78+
@withRetry({ maxRetries: 2, baseDelay: 1000 }) // Use large baseDelay to ensure header takes precedence
79+
async *failMethod() {
80+
callCount++
81+
if (callCount === 1) {
82+
const error: any = new Error("Rate limit exceeded")
83+
error.status = 429
84+
error.headers = { "retry-after": "0.01" } // 10ms delay
85+
throw error
86+
}
87+
yield "success after retry"
88+
}
89+
}
90+
91+
const test = new TestClass()
92+
const result = []
93+
for await (const value of test.failMethod()) {
94+
result.push(value)
95+
}
96+
97+
const duration = Date.now() - startTime
98+
duration.should.be.approximately(10, 10) // Allow 10ms variance
99+
callCount.should.equal(2)
100+
result.should.deepEqual(["success after retry"])
101+
})
102+
103+
it("should respect retry-after header with Unix timestamp", async () => {
104+
let callCount = 0
105+
const startTime = Date.now()
106+
const retryTimestamp = Math.floor(Date.now() / 1000) + 0.01 // 10ms in the future
107+
108+
class TestClass {
109+
@withRetry({ maxRetries: 2, baseDelay: 1000 }) // Use large baseDelay to ensure header takes precedence
110+
async *failMethod() {
111+
callCount++
112+
if (callCount === 1) {
113+
const error: any = new Error("Rate limit exceeded")
114+
error.status = 429
115+
error.headers = { "retry-after": retryTimestamp.toString() }
116+
throw error
117+
}
118+
yield "success after retry"
119+
}
120+
}
121+
122+
const test = new TestClass()
123+
const result = []
124+
for await (const value of test.failMethod()) {
125+
result.push(value)
126+
}
127+
128+
const duration = Date.now() - startTime
129+
duration.should.be.approximately(10, 10) // Allow 10ms variance
130+
callCount.should.equal(2)
131+
result.should.deepEqual(["success after retry"])
132+
})
133+
134+
it("should use exponential backoff when no retry-after header", async () => {
135+
let callCount = 0
136+
const startTime = Date.now()
137+
class TestClass {
138+
@withRetry({ maxRetries: 2, baseDelay: 10, maxDelay: 100 })
139+
async *failMethod() {
140+
callCount++
141+
if (callCount === 1) {
142+
const error: any = new Error("Rate limit exceeded")
143+
error.status = 429
144+
throw error
145+
}
146+
yield "success after retry"
147+
}
148+
}
149+
150+
const test = new TestClass()
151+
const result = []
152+
for await (const value of test.failMethod()) {
153+
result.push(value)
154+
}
155+
156+
const duration = Date.now() - startTime
157+
// First retry should be after baseDelay (10ms)
158+
duration.should.be.approximately(10, 10)
159+
callCount.should.equal(2)
160+
result.should.deepEqual(["success after retry"])
161+
})
162+
163+
it("should respect maxDelay", async () => {
164+
let callCount = 0
165+
const startTime = Date.now()
166+
class TestClass {
167+
@withRetry({ maxRetries: 3, baseDelay: 50, maxDelay: 10 })
168+
async *failMethod() {
169+
callCount++
170+
if (callCount < 3) {
171+
const error: any = new Error("Rate limit exceeded")
172+
error.status = 429
173+
throw error
174+
}
175+
yield "success after retries"
176+
}
177+
}
178+
179+
const test = new TestClass()
180+
const result = []
181+
for await (const value of test.failMethod()) {
182+
result.push(value)
183+
}
184+
185+
const duration = Date.now() - startTime
186+
// Both retries should be capped at maxDelay (10ms each)
187+
duration.should.be.approximately(20, 20)
188+
callCount.should.equal(3)
189+
result.should.deepEqual(["success after retries"])
190+
})
191+
192+
it("should throw after maxRetries attempts", async () => {
193+
let callCount = 0
194+
class TestClass {
195+
@withRetry({ maxRetries: 2, baseDelay: 10 })
196+
async *failMethod() {
197+
callCount++
198+
const error: any = new Error("Rate limit exceeded")
199+
error.status = 429
200+
throw error
201+
}
202+
}
203+
204+
const test = new TestClass()
205+
try {
206+
for await (const _ of test.failMethod()) {
207+
// Should not reach here
208+
}
209+
throw new Error("Should have thrown")
210+
} catch (error: any) {
211+
error.message.should.equal("Rate limit exceeded")
212+
callCount.should.equal(2) // Initial attempt + 1 retry
213+
}
214+
})
215+
})
216+
})

0 commit comments

Comments
 (0)