diff --git a/src/core/context/context-management/__tests__/context-error-handling.test.ts b/src/core/context/context-management/__tests__/context-error-handling.test.ts new file mode 100644 index 0000000000..5d2321f0aa --- /dev/null +++ b/src/core/context/context-management/__tests__/context-error-handling.test.ts @@ -0,0 +1,329 @@ +import { describe, it, expect, vi } from "vitest" +import { APIError } from "openai" +import { checkContextWindowExceededError } from "../context-error-handling" + +describe("checkContextWindowExceededError", () => { + describe("OpenAI errors", () => { + it("should detect OpenAI context window error with APIError instance", () => { + const error = Object.create(APIError.prototype) + Object.assign(error, { + status: 400, + code: "400", + message: "This model's maximum context length is 4096 tokens", + error: { + message: "This model's maximum context length is 4096 tokens", + type: "invalid_request_error", + param: null, + code: "context_length_exceeded", + }, + }) + + expect(checkContextWindowExceededError(error)).toBe(true) + }) + + it("should detect OpenAI LengthFinishReasonError", () => { + const error = { + name: "LengthFinishReasonError", + message: "The response was cut off due to length", + } + + expect(checkContextWindowExceededError(error)).toBe(true) + }) + + it("should not detect non-context OpenAI errors", () => { + const error = Object.create(APIError.prototype) + Object.assign(error, { + status: 400, + code: "400", + message: "Invalid API key", + error: { + message: "Invalid API key", + type: "invalid_request_error", + param: null, + code: "invalid_api_key", + }, + }) + + expect(checkContextWindowExceededError(error)).toBe(false) + }) + }) + + describe("OpenRouter errors", () => { + it("should detect OpenRouter context window error with status 400", () => { + const error = { + status: 400, + message: "Request exceeds maximum context length of 8192 tokens", + } + + expect(checkContextWindowExceededError(error)).toBe(true) + }) + + it("should detect OpenRouter error with nested error structure", () => { + const error = { + error: { + status: 400, + message: "Input tokens exceed model limit", + }, + } + + expect(checkContextWindowExceededError(error)).toBe(true) + }) + + it("should detect OpenRouter error with response status", () => { + const error = { + response: { + status: 400, + }, + message: "Too many tokens in the request", + } + + expect(checkContextWindowExceededError(error)).toBe(true) + }) + + it("should detect various context error patterns", () => { + const patterns = [ + "context length exceeded", + "maximum context window", + "input tokens exceed limit", + "too many tokens", + ] + + patterns.forEach((pattern) => { + const error = { + status: 400, + message: pattern, + } + expect(checkContextWindowExceededError(error)).toBe(true) + }) + }) + + it("should not detect non-context 400 errors", () => { + const error = { + status: 400, + message: "Invalid request format", + } + + expect(checkContextWindowExceededError(error)).toBe(false) + }) + + it("should not detect errors with different status codes", () => { + const error = { + status: 500, + message: "context length exceeded", + } + + expect(checkContextWindowExceededError(error)).toBe(false) + }) + }) + + describe("Anthropic errors", () => { + it("should detect Anthropic context window error", () => { + const error = { + error: { + error: { + type: "invalid_request_error", + message: "prompt is too long: 150000 tokens > 100000 maximum", + }, + }, + } + + expect(checkContextWindowExceededError(error)).toBe(true) + }) + + it("should detect Anthropic error with context_length_exceeded code", () => { + const error = { + error: { + error: { + type: "invalid_request_error", + code: "context_length_exceeded", + message: "The request exceeds the maximum context window", + }, + }, + } + + expect(checkContextWindowExceededError(error)).toBe(true) + }) + + it("should detect various Anthropic context error patterns", () => { + const patterns = [ + "prompt is too long", + "maximum 200000 tokens", + "context is too long", + "exceeds the context window", + "token limit exceeded", + ] + + patterns.forEach((pattern) => { + const error = { + error: { + error: { + type: "invalid_request_error", + message: pattern, + }, + }, + } + expect(checkContextWindowExceededError(error)).toBe(true) + }) + }) + + it("should not detect non-context Anthropic errors", () => { + const error = { + error: { + error: { + type: "invalid_request_error", + message: "Invalid model specified", + }, + }, + } + + expect(checkContextWindowExceededError(error)).toBe(false) + }) + + it("should not detect errors with different error types", () => { + const error = { + error: { + error: { + type: "authentication_error", + message: "prompt is too long", + }, + }, + } + + expect(checkContextWindowExceededError(error)).toBe(false) + }) + }) + + describe("Cerebras errors", () => { + it("should detect Cerebras context window error", () => { + const error = { + status: 400, + message: "Please reduce the length of the messages or completion", + } + + expect(checkContextWindowExceededError(error)).toBe(true) + }) + + it("should detect Cerebras error with nested structure", () => { + const error = { + error: { + status: 400, + message: "Please reduce the length of the messages or completion", + }, + } + + expect(checkContextWindowExceededError(error)).toBe(true) + }) + + it("should not detect non-context Cerebras errors", () => { + const error = { + status: 400, + message: "Invalid request parameters", + } + + expect(checkContextWindowExceededError(error)).toBe(false) + }) + }) + + describe("Edge cases", () => { + it("should handle null input", () => { + expect(checkContextWindowExceededError(null)).toBe(false) + }) + + it("should handle undefined input", () => { + expect(checkContextWindowExceededError(undefined)).toBe(false) + }) + + it("should handle empty object", () => { + expect(checkContextWindowExceededError({})).toBe(false) + }) + + it("should handle string input", () => { + expect(checkContextWindowExceededError("error")).toBe(false) + }) + + it("should handle number input", () => { + expect(checkContextWindowExceededError(123)).toBe(false) + }) + + it("should handle array input", () => { + expect(checkContextWindowExceededError([])).toBe(false) + }) + + it("should handle errors with circular references", () => { + const error: any = { status: 400, message: "context length exceeded" } + error.self = error // Create circular reference + + expect(checkContextWindowExceededError(error)).toBe(true) + }) + + it("should handle errors with deeply nested undefined values", () => { + const error = { + error: { + error: { + type: undefined, + message: undefined, + }, + }, + } + + expect(checkContextWindowExceededError(error)).toBe(false) + }) + + it("should handle errors that throw during property access", () => { + const error = { + get status() { + throw new Error("Property access error") + }, + message: "context length exceeded", + } + + expect(checkContextWindowExceededError(error)).toBe(false) + }) + + it("should handle mixed provider error structures", () => { + // Error that could match multiple providers + const error = { + status: 400, + code: "400", + message: "context length exceeded", + error: { + error: { + type: "invalid_request_error", + message: "prompt is too long", + }, + }, + } + + expect(checkContextWindowExceededError(error)).toBe(true) + }) + }) + + describe("Multiple provider detection", () => { + it("should detect error if any provider check returns true", () => { + // This error should be detected by OpenRouter check + const error1 = { + status: 400, + message: "context window exceeded", + } + expect(checkContextWindowExceededError(error1)).toBe(true) + + // This error should be detected by Anthropic check + const error2 = { + error: { + error: { + type: "invalid_request_error", + message: "prompt is too long", + }, + }, + } + expect(checkContextWindowExceededError(error2)).toBe(true) + + // This error should be detected by Cerebras check + const error3 = { + status: 400, + message: "Please reduce the length of the messages or completion", + } + expect(checkContextWindowExceededError(error3)).toBe(true) + }) + }) +}) diff --git a/src/core/context/context-management/context-error-handling.ts b/src/core/context/context-management/context-error-handling.ts new file mode 100644 index 0000000000..006d7b1607 --- /dev/null +++ b/src/core/context/context-management/context-error-handling.ts @@ -0,0 +1,114 @@ +import { APIError } from "openai" + +export function checkContextWindowExceededError(error: unknown): boolean { + return ( + checkIsOpenAIContextWindowError(error) || + checkIsOpenRouterContextWindowError(error) || + checkIsAnthropicContextWindowError(error) || + checkIsCerebrasContextWindowError(error) + ) +} + +function checkIsOpenRouterContextWindowError(error: unknown): boolean { + try { + if (!error || typeof error !== "object") { + return false + } + + // Use Record for proper type narrowing + const err = error as Record + const status = err.status ?? err.code ?? err.error?.status ?? err.response?.status + const message: string = String(err.message || err.error?.message || "") + + // Known OpenAI/OpenRouter-style signal (code 400 and message includes "context length") + const CONTEXT_ERROR_PATTERNS = [ + /\bcontext\s*(?:length|window)\b/i, + /\bmaximum\s*context\b/i, + /\b(?:input\s*)?tokens?\s*exceed/i, + /\btoo\s*many\s*tokens?\b/i, + ] as const + + return String(status) === "400" && CONTEXT_ERROR_PATTERNS.some((pattern) => pattern.test(message)) + } catch { + return false + } +} + +// Docs: https://platform.openai.com/docs/guides/error-codes/api-errors +function checkIsOpenAIContextWindowError(error: unknown): boolean { + try { + // Check for LengthFinishReasonError + if (error && typeof error === "object" && "name" in error && error.name === "LengthFinishReasonError") { + return true + } + + const KNOWN_CONTEXT_ERROR_SUBSTRINGS = ["token", "context length"] as const + + return ( + Boolean(error) && + error instanceof APIError && + error.code?.toString() === "400" && + KNOWN_CONTEXT_ERROR_SUBSTRINGS.some((substring) => error.message.includes(substring)) + ) + } catch { + return false + } +} + +function checkIsAnthropicContextWindowError(response: unknown): boolean { + try { + // Type guard to safely access properties + if (!response || typeof response !== "object") { + return false + } + + // Use type assertions with proper checks + const res = response as Record + + // Check for Anthropic-specific error structure with more specific validation + if (res.error?.error?.type === "invalid_request_error") { + const message: string = String(res.error?.error?.message || "") + + // More specific patterns for context window errors + const contextWindowPatterns = [ + /prompt is too long/i, + /maximum.*tokens/i, + /context.*too.*long/i, + /exceeds.*context/i, + /token.*limit/i, + /context_length_exceeded/i, + /max_tokens_to_sample/i, + ] + + // Additional check for Anthropic-specific error codes + const errorCode = res.error?.error?.code + if (errorCode === "context_length_exceeded" || errorCode === "invalid_request_error") { + return contextWindowPatterns.some((pattern) => pattern.test(message)) + } + + return contextWindowPatterns.some((pattern) => pattern.test(message)) + } + + return false + } catch { + return false + } +} + +function checkIsCerebrasContextWindowError(response: unknown): boolean { + try { + // Type guard to safely access properties + if (!response || typeof response !== "object") { + return false + } + + // Use type assertions with proper checks + const res = response as Record + const status = res.status ?? res.code ?? res.error?.status ?? res.response?.status + const message: string = String(res.message || res.error?.message || "") + + return String(status) === "400" && message.includes("Please reduce the length of the messages or completion") + } catch { + return false + } +} diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index cff8d5aec3..c313de653a 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -88,6 +88,7 @@ import { MultiSearchReplaceDiffStrategy } from "../diff/strategies/multi-search- import { MultiFileSearchReplaceDiffStrategy } from "../diff/strategies/multi-file-search-replace" import { readApiMessages, saveApiMessages, readTaskMessages, saveTaskMessages, taskMetadata } from "../task-persistence" import { getEnvironmentDetails } from "../environment/getEnvironmentDetails" +import { checkContextWindowExceededError } from "../context/context-management/context-error-handling" import { type CheckpointDiffOptions, type CheckpointRestoreOptions, @@ -105,6 +106,8 @@ import { AutoApprovalHandler } from "./AutoApprovalHandler" const MAX_EXPONENTIAL_BACKOFF_SECONDS = 600 // 10 minutes const DEFAULT_USAGE_COLLECTION_TIMEOUT_MS = 5000 // 5 seconds +const FORCED_CONTEXT_REDUCTION_PERCENT = 75 // Keep 75% of context (remove 25%) on context window errors +const MAX_CONTEXT_WINDOW_RETRIES = 3 // Maximum retries for context window errors export type TaskOptions = { provider: ClineProvider @@ -1387,7 +1390,7 @@ export class Task extends EventEmitter implements TaskLike { if (this.bridgeService) { this.bridgeService .unsubscribeFromTask(this.taskId) - .catch((error) => console.error("Error unsubscribing from task bridge:", error)) + .catch((error: unknown) => console.error("Error unsubscribing from task bridge:", error)) this.bridgeService = null } @@ -2230,6 +2233,71 @@ export class Task extends EventEmitter implements TaskLike { })() } + private getCurrentProfileId(state: any): string { + return ( + state?.listApiConfigMeta?.find((profile: any) => profile.name === state?.currentApiConfigName)?.id ?? + "default" + ) + } + + private async handleContextWindowExceededError(): Promise { + const state = await this.providerRef.deref()?.getState() + const { profileThresholds = {} } = state ?? {} + + const { contextTokens } = this.getTokenUsage() + const modelInfo = this.api.getModel().info + const maxTokens = getModelMaxOutputTokens({ + modelId: this.api.getModel().id, + model: modelInfo, + settings: this.apiConfiguration, + }) + const contextWindow = modelInfo.contextWindow + + // Get the current profile ID using the helper method + const currentProfileId = this.getCurrentProfileId(state) + + // Log the context window error for debugging + console.warn( + `[Task#${this.taskId}] Context window exceeded for model ${this.api.getModel().id}. ` + + `Current tokens: ${contextTokens}, Context window: ${contextWindow}. ` + + `Forcing truncation to ${FORCED_CONTEXT_REDUCTION_PERCENT}% of current context.`, + ) + + // Force aggressive truncation by keeping only 75% of the conversation history + const truncateResult = await truncateConversationIfNeeded({ + messages: this.apiConversationHistory, + totalTokens: contextTokens || 0, + maxTokens, + contextWindow, + apiHandler: this.api, + autoCondenseContext: true, + autoCondenseContextPercent: FORCED_CONTEXT_REDUCTION_PERCENT, + systemPrompt: await this.getSystemPrompt(), + taskId: this.taskId, + profileThresholds, + currentProfileId, + }) + + if (truncateResult.messages !== this.apiConversationHistory) { + await this.overwriteApiConversationHistory(truncateResult.messages) + } + + if (truncateResult.summary) { + const { summary, cost, prevContextTokens, newContextTokens = 0 } = truncateResult + const contextCondense: ContextCondense = { summary, cost, newContextTokens, prevContextTokens } + await this.say( + "condense_context", + undefined /* text */, + undefined /* images */, + false /* partial */, + undefined /* checkpoint */, + undefined /* progressStatus */, + { isNonInteractive: true } /* options */, + contextCondense, + ) + } + } + public async *attemptApiRequest(retryAttempt: number = 0): ApiStream { const state = await this.providerRef.deref()?.getState() @@ -2308,9 +2376,8 @@ export class Task extends EventEmitter implements TaskLike { const contextWindow = modelInfo.contextWindow - const currentProfileId = - state?.listApiConfigMeta.find((profile) => profile.name === state?.currentApiConfigName)?.id ?? - "default" + // Get the current profile ID using the helper method + const currentProfileId = this.getCurrentProfileId(state) const truncateResult = await truncateConversationIfNeeded({ messages: this.apiConversationHistory, @@ -2417,6 +2484,21 @@ export class Task extends EventEmitter implements TaskLike { this.isWaitingForFirstChunk = false } catch (error) { this.isWaitingForFirstChunk = false + const isContextWindowExceededError = checkContextWindowExceededError(error) + + // If it's a context window error and we haven't exceeded max retries for this error type + if (isContextWindowExceededError && retryAttempt < MAX_CONTEXT_WINDOW_RETRIES) { + console.warn( + `[Task#${this.taskId}] Context window exceeded for model ${this.api.getModel().id}. ` + + `Retry attempt ${retryAttempt + 1}/${MAX_CONTEXT_WINDOW_RETRIES}. ` + + `Attempting automatic truncation...`, + ) + await this.handleContextWindowExceededError() + // Retry the request after handling the context window error + yield* this.attemptApiRequest(retryAttempt + 1) + return + } + // note that this api_req_failed ask is unique in that we only present this option if the api hasn't streamed any content yet (ie it fails on the first chunk due), as it would allow them to hit a retry button. However if the api failed mid-stream, it could be in any arbitrary state where some tools may have executed, so that error is handled differently and requires cancelling the task entirely. if (autoApprovalEnabled && alwaysApproveResubmit) { let errorMsg