feat: add retry decorator with rate limit handling (RooCodeInc#1605)

ViezeVingertjes · SOM-Michael · saoudrizwan · web-flow · commit bd5eb8fcaef6 · 2025-02-05T23:11:01.000-08:00
* fix: improve retry decorator with smart rate limit handling - Add handling of rate limit (429) errors - Implement retry timing based on response headers - Add exponential backoff when no headers present - Add a few unit tests Fixes RooCodeInc#713 * Create modern-knives-tan.md * Improve readability in retry.ts --------- Co-authored-by: Michael Overhorst <m.overhorst@spotonmedics.nl> Co-authored-by: Saoud Rizwan <7799382+saoudrizwan@users.noreply.github.com>
diff --git a/.changeset/modern-knives-tan.md b/.changeset/modern-knives-tan.md
@@ -0,0 +1,5 @@
+---
+"claude-dev": patch
+---
+
+Add automatic retry for rate limited requests
diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts
@@ -1,5 +1,6 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming"
+import { withRetry } from "../retry"
 import { anthropicDefaultModelId, AnthropicModelId, anthropicModels, ApiHandlerOptions, ModelInfo } from "../../shared/api"
 import { ApiHandler } from "../index"
 import { ApiStream } from "../transform/stream"
@@ -16,6 +17,7 @@ export class AnthropicHandler implements ApiHandler {
 		})
 	}
 
+	@withRetry()
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const model = this.getModel()
 		let stream: AnthropicStream<Anthropic.Beta.PromptCaching.Messages.RawPromptCachingBetaMessageStreamEvent>
diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts
@@ -1,5 +1,6 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
+import { withRetry } from "../retry"
 import { ApiHandler } from "../"
 import { ApiHandlerOptions, DeepSeekModelId, ModelInfo, deepSeekDefaultModelId, deepSeekModels } from "../../shared/api"
 import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -18,6 +19,7 @@ export class DeepSeekHandler implements ApiHandler {
 		})
 	}
 
+	@withRetry()
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const model = this.getModel()
 
diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts
@@ -1,5 +1,6 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import { GoogleGenerativeAI } from "@google/generative-ai"
+import { withRetry } from "../retry"
 import { ApiHandler } from "../"
 import { ApiHandlerOptions, geminiDefaultModelId, GeminiModelId, geminiModels, ModelInfo } from "../../shared/api"
 import { convertAnthropicMessageToGemini } from "../transform/gemini-format"
@@ -17,6 +18,7 @@ export class GeminiHandler implements ApiHandler {
 		this.client = new GoogleGenerativeAI(options.geminiApiKey)
 	}
 
+	@withRetry()
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const model = this.client.getGenerativeModel({
 			model: this.getModel().id,
diff --git a/src/api/providers/mistral.ts b/src/api/providers/mistral.ts
@@ -1,5 +1,6 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import { Mistral } from "@mistralai/mistralai"
+import { withRetry } from "../retry"
 import { ApiHandler } from "../"
 import {
 	ApiHandlerOptions,
@@ -26,6 +27,7 @@ export class MistralHandler implements ApiHandler {
 		})
 	}
 
+	@withRetry()
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const stream = await this.client.chat.stream({
 			model: this.getModel().id,
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
@@ -1,5 +1,6 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
+import { withRetry } from "../retry"
 import { ApiHandler } from "../"
 import {
 	ApiHandlerOptions,
@@ -22,6 +23,7 @@ export class OpenAiNativeHandler implements ApiHandler {
 		})
 	}
 
+	@withRetry()
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		switch (this.getModel().id) {
 			case "o1":
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
@@ -1,5 +1,6 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI, { AzureOpenAI } from "openai"
+import { withRetry } from "../retry"
 import { ApiHandlerOptions, azureOpenAiDefaultApiVersion, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api"
 import { ApiHandler } from "../index"
 import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -27,6 +28,7 @@ export class OpenAiHandler implements ApiHandler {
 		}
 	}
 
+	@withRetry()
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const modelId = this.options.openAiModelId ?? ""
 		const isDeepseekReasoner = modelId.includes("deepseek-reasoner")
diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts
@@ -2,6 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import axios from "axios"
 import delay from "delay"
 import OpenAI from "openai"
+import { withRetry } from "../retry"
 import { ApiHandler } from "../"
 import { ApiHandlerOptions, ModelInfo, openRouterDefaultModelId, openRouterDefaultModelInfo } from "../../shared/api"
 import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -24,6 +25,7 @@ export class OpenRouterHandler implements ApiHandler {
 		})
 	}
 
+	@withRetry()
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const model = this.getModel()
 
diff --git a/src/api/providers/vertex.ts b/src/api/providers/vertex.ts
@@ -1,5 +1,6 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
+import { withRetry } from "../retry"
 import { ApiHandler } from "../"
 import { ApiHandlerOptions, ModelInfo, vertexDefaultModelId, VertexModelId, vertexModels } from "../../shared/api"
 import { ApiStream } from "../transform/stream"
@@ -18,6 +19,7 @@ export class VertexHandler implements ApiHandler {
 		})
 	}
 
+	@withRetry()
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const stream = await this.client.messages.create({
 			model: this.getModel().id,
diff --git a/src/api/retry.test.ts b/src/api/retry.test.ts
@@ -0,0 +1,216 @@
+import { describe, it } from "mocha"
+import "should"
+import { withRetry } from "./retry"
+
+describe("Retry Decorator", () => {
+	describe("withRetry", () => {
+		it("should not retry on success", async () => {
+			let callCount = 0
+			class TestClass {
+				@withRetry()
+				async *successMethod() {
+					callCount++
+					yield "success"
+				}
+			}
+
+			const test = new TestClass()
+			const result = []
+			for await (const value of test.successMethod()) {
+				result.push(value)
+			}
+
+			callCount.should.equal(1)
+			result.should.deepEqual(["success"])
+		})
+
+		it("should retry on rate limit (429) error", async () => {
+			let callCount = 0
+			class TestClass {
+				@withRetry({ maxRetries: 2, baseDelay: 10, maxDelay: 100 })
+				async *failMethod() {
+					callCount++
+					if (callCount === 1) {
+						const error: any = new Error("Rate limit exceeded")
+						error.status = 429
+						throw error
+					}
+					yield "success after retry"
+				}
+			}
+
+			const test = new TestClass()
+			const result = []
+			for await (const value of test.failMethod()) {
+				result.push(value)
+			}
+
+			callCount.should.equal(2)
+			result.should.deepEqual(["success after retry"])
+		})
+
+		it("should not retry on non-rate-limit errors", async () => {
+			let callCount = 0
+			class TestClass {
+				@withRetry()
+				async *failMethod() {
+					callCount++
+					throw new Error("Regular error")
+				}
+			}
+
+			const test = new TestClass()
+			try {
+				for await (const _ of test.failMethod()) {
+					// Should not reach here
+				}
+				throw new Error("Should have thrown")
+			} catch (error: any) {
+				error.message.should.equal("Regular error")
+				callCount.should.equal(1)
+			}
+		})
+
+		it("should respect retry-after header with delta seconds", async () => {
+			let callCount = 0
+			const startTime = Date.now()
+			class TestClass {
+				@withRetry({ maxRetries: 2, baseDelay: 1000 }) // Use large baseDelay to ensure header takes precedence
+				async *failMethod() {
+					callCount++
+					if (callCount === 1) {
+						const error: any = new Error("Rate limit exceeded")
+						error.status = 429
+						error.headers = { "retry-after": "0.01" } // 10ms delay
+						throw error
+					}
+					yield "success after retry"
+				}
+			}
+
+			const test = new TestClass()
+			const result = []
+			for await (const value of test.failMethod()) {
+				result.push(value)
+			}
+
+			const duration = Date.now() - startTime
+			duration.should.be.approximately(10, 10) // Allow 10ms variance
+			callCount.should.equal(2)
+			result.should.deepEqual(["success after retry"])
+		})
+
+		it("should respect retry-after header with Unix timestamp", async () => {
+			let callCount = 0
+			const startTime = Date.now()
+			const retryTimestamp = Math.floor(Date.now() / 1000) + 0.01 // 10ms in the future
+
+			class TestClass {
+				@withRetry({ maxRetries: 2, baseDelay: 1000 }) // Use large baseDelay to ensure header takes precedence
+				async *failMethod() {
+					callCount++
+					if (callCount === 1) {
+						const error: any = new Error("Rate limit exceeded")
+						error.status = 429
+						error.headers = { "retry-after": retryTimestamp.toString() }
+						throw error
+					}
+					yield "success after retry"
+				}
+			}
+
+			const test = new TestClass()
+			const result = []
+			for await (const value of test.failMethod()) {
+				result.push(value)
+			}
+
+			const duration = Date.now() - startTime
+			duration.should.be.approximately(10, 10) // Allow 10ms variance
+			callCount.should.equal(2)
+			result.should.deepEqual(["success after retry"])
+		})
+
+		it("should use exponential backoff when no retry-after header", async () => {
+			let callCount = 0
+			const startTime = Date.now()
+			class TestClass {
+				@withRetry({ maxRetries: 2, baseDelay: 10, maxDelay: 100 })
+				async *failMethod() {
+					callCount++
+					if (callCount === 1) {
+						const error: any = new Error("Rate limit exceeded")
+						error.status = 429
+						throw error
+					}
+					yield "success after retry"
+				}
+			}
+
+			const test = new TestClass()
+			const result = []
+			for await (const value of test.failMethod()) {
+				result.push(value)
+			}
+
+			const duration = Date.now() - startTime
+			// First retry should be after baseDelay (10ms)
+			duration.should.be.approximately(10, 10)
+			callCount.should.equal(2)
+			result.should.deepEqual(["success after retry"])
+		})
+
+		it("should respect maxDelay", async () => {
+			let callCount = 0
+			const startTime = Date.now()
+			class TestClass {
+				@withRetry({ maxRetries: 3, baseDelay: 50, maxDelay: 10 })
+				async *failMethod() {
+					callCount++
+					if (callCount < 3) {
+						const error: any = new Error("Rate limit exceeded")
+						error.status = 429
+						throw error
+					}
+					yield "success after retries"
+				}
+			}
+
+			const test = new TestClass()
+			const result = []
+			for await (const value of test.failMethod()) {
+				result.push(value)
+			}
+
+			const duration = Date.now() - startTime
+			// Both retries should be capped at maxDelay (10ms each)
+			duration.should.be.approximately(20, 20)
+			callCount.should.equal(3)
+			result.should.deepEqual(["success after retries"])
+		})
+
+		it("should throw after maxRetries attempts", async () => {
+			let callCount = 0
+			class TestClass {
+				@withRetry({ maxRetries: 2, baseDelay: 10 })
+				async *failMethod() {
+					callCount++
+					const error: any = new Error("Rate limit exceeded")
+					error.status = 429
+					throw error
+				}
+			}
+
+			const test = new TestClass()
+			try {
+				for await (const _ of test.failMethod()) {
+					// Should not reach here
+				}
+				throw new Error("Should have thrown")
+			} catch (error: any) {
+				error.message.should.equal("Rate limit exceeded")
+				callCount.should.equal(2) // Initial attempt + 1 retry
+			}
+		})
+	})
+})
diff --git a/src/api/retry.ts b/src/api/retry.ts

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"claude-dev": patch
 +---
++
 +Add automatic retry for rate limited requests
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,6 @@`
`1`	`1`	`import { Anthropic } from "@anthropic-ai/sdk"`
`2`	`2`	`import OpenAI, { AzureOpenAI } from "openai"`
	`3`	`+import { withRetry } from "../retry"`
`3`	`4`	`import { ApiHandlerOptions, azureOpenAiDefaultApiVersion, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api"`
`4`	`5`	`import { ApiHandler } from "../index"`
`5`	`6`	`import { convertToOpenAiMessages } from "../transform/openai-format"`
`@@ -27,6 +28,7 @@ export class OpenAiHandler implements ApiHandler {`
`27`	`28`	`}`
`28`	`29`	`}`
`29`	`30`
	`31`	`+ @withRetry()`
`30`	`32`	`async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {`
`31`	`33`	`const modelId = this.options.openAiModelId ?? ""`
`32`	`34`	`const isDeepseekReasoner = modelId.includes("deepseek-reasoner")`