diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index 8d67d3977f0d..5bb5a54ab50a 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -63,6 +63,9 @@ export const modelInfoSchema = z.object({ supportsReasoningBudget: z.boolean().optional(), // Capability flag to indicate whether the model supports temperature parameter supportsTemperature: z.boolean().optional(), + // When true, this model must be invoked using Responses background mode. + // Providers should auto-enable background:true, stream:true, and store:true. + backgroundMode: z.boolean().optional(), requiredReasoningBudget: z.boolean().optional(), supportsReasoningEffort: z.boolean().optional(), supportedParameters: z.array(modelParametersSchema).optional(), diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index 4153db0da4e0..0a4ed2da6c30 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -297,6 +297,15 @@ const openAiNativeSchema = apiModelIdProviderModelSchema.extend({ // OpenAI Responses API service tier for openai-native provider only. // UI should only expose this when the selected model supports flex/priority. openAiNativeServiceTier: serviceTierSchema.optional(), + // Enable OpenAI Responses background mode when using Responses API. + // Opt-in; defaults to false when omitted. + openAiNativeBackgroundMode: z.boolean().optional(), + // Background auto-resume/poll settings (no UI; plumbed via options) + openAiNativeBackgroundAutoResume: z.boolean().optional(), + openAiNativeBackgroundResumeMaxRetries: z.number().int().min(0).optional(), + openAiNativeBackgroundResumeBaseDelayMs: z.number().int().min(0).optional(), + openAiNativeBackgroundPollIntervalMs: z.number().int().min(0).optional(), + openAiNativeBackgroundPollMaxMinutes: z.number().int().min(1).optional(), }) const mistralSchema = apiModelIdProviderModelSchema.extend({ diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index a3eed1b57ce6..8abe00ccf840 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -37,6 +37,21 @@ export const openAiNativeModels = { { name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 }, ], }, + "gpt-5-pro-2025-10-06": { + maxTokens: 128000, + contextWindow: 400000, + supportsImages: true, + supportsPromptCache: false, + supportsReasoningEffort: false, // This is set to false to prevent the ui from displaying the reasoning effort selector + reasoningEffort: "high", // Pro model uses high reasoning effort by default and must be specified + inputPrice: 15.0, + outputPrice: 120.0, + description: + "GPT-5 Pro: A slow, reasoning-focused model for complex problems. Uses background mode with resilient streaming — requests may take some time and will automatically reconnect if they time out.", + supportsVerbosity: true, + supportsTemperature: false, + backgroundMode: true, + }, "gpt-5-mini-2025-08-07": { maxTokens: 128000, contextWindow: 400000, diff --git a/src/api/providers/__tests__/openai-native-usage.spec.ts b/src/api/providers/__tests__/openai-native-usage.spec.ts index 74806b26ab2e..acb41c37f9e6 100644 --- a/src/api/providers/__tests__/openai-native-usage.spec.ts +++ b/src/api/providers/__tests__/openai-native-usage.spec.ts @@ -344,6 +344,38 @@ describe("OpenAiNativeHandler - normalizeUsage", () => { }) }) + it("should produce identical usage chunk when background mode is enabled", () => { + const usage = { + input_tokens: 120, + output_tokens: 60, + cache_creation_input_tokens: 10, + cache_read_input_tokens: 30, + } + + const baselineHandler = new OpenAiNativeHandler({ + openAiNativeApiKey: "test-key", + apiModelId: "gpt-5-pro-2025-10-06", + }) + const backgroundHandler = new OpenAiNativeHandler({ + openAiNativeApiKey: "test-key", + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeBackgroundMode: true, + }) + + const baselineUsage = (baselineHandler as any).normalizeUsage(usage, baselineHandler.getModel()) + const backgroundUsage = (backgroundHandler as any).normalizeUsage(usage, backgroundHandler.getModel()) + + expect(baselineUsage).toMatchObject({ + type: "usage", + inputTokens: 120, + outputTokens: 60, + cacheWriteTokens: 10, + cacheReadTokens: 30, + totalCost: expect.any(Number), + }) + expect(backgroundUsage).toEqual(baselineUsage) + }) + describe("cost calculation", () => { it("should pass total input tokens to calculateApiCostOpenAI", () => { const usage = { diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts index 618cdeac659b..f412d5e99f6b 100644 --- a/src/api/providers/__tests__/openai-native.spec.ts +++ b/src/api/providers/__tests__/openai-native.spec.ts @@ -3,6 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import { OpenAiNativeHandler } from "../openai-native" +import type { ApiHandlerCreateMessageMetadata } from "../../index" import { ApiHandlerOptions } from "../../../shared/api" // Mock OpenAI client - now everything uses Responses API @@ -1734,3 +1735,517 @@ describe("GPT-5 streaming event coverage (additional)", () => { }) }) }) + +describe("OpenAI Native background mode behavior", () => { + const systemPrompt = "System prompt" + const baseMessages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "hi" }] + const createMinimalIterable = () => ({ + async *[Symbol.asyncIterator]() { + yield { + type: "response.done", + response: { id: "resp_minimal", usage: { input_tokens: 1, output_tokens: 1 } }, + } + }, + }) + const createUsageIterable = () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "response.text.delta", delta: "Hello" } + yield { + type: "response.done", + response: { + id: "resp_usage", + usage: { input_tokens: 120, output_tokens: 60 }, + }, + } + }, + }) + + beforeEach(() => { + mockResponsesCreate.mockClear() + }) + + afterEach(() => { + if ((global as any).fetch) { + delete (global as any).fetch + } + }) + + const metadataStoreFalse: ApiHandlerCreateMessageMetadata = { taskId: "background-test", store: false } + + it("auto-enables background mode for gpt-5-pro when no override is specified", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + // openAiNativeBackgroundMode is undefined + }) + + mockResponsesCreate.mockResolvedValueOnce(createMinimalIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages, metadataStoreFalse)) { + chunks.push(chunk) + } + + expect(chunks).not.toHaveLength(0) + const requestBody = mockResponsesCreate.mock.calls[0][0] + expect(requestBody.background).toBe(true) + expect(requestBody.stream).toBe(true) + expect(requestBody.store).toBe(true) + }) + it("sends background:true, stream:true, and forces store:true for gpt-5-pro when background mode is enabled", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + mockResponsesCreate.mockResolvedValueOnce(createMinimalIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages, metadataStoreFalse)) { + chunks.push(chunk) + } + + expect(chunks).not.toHaveLength(0) + + const requestBody = mockResponsesCreate.mock.calls[0][0] + expect(requestBody.background).toBe(true) + expect(requestBody.stream).toBe(true) + expect(requestBody.store).toBe(true) + expect(requestBody.instructions).toBe(systemPrompt) + expect(requestBody.model).toBe("gpt-5-pro-2025-10-06") + expect(Array.isArray(requestBody.input)).toBe(true) + expect(requestBody.input.length).toBeGreaterThan(0) + + mockResponsesCreate.mockClear() + + const handlerWithOptionFalse = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: false, // metadata still enforces background mode + }) + + mockResponsesCreate.mockResolvedValueOnce(createMinimalIterable()) + + for await (const chunk of handlerWithOptionFalse.createMessage( + systemPrompt, + baseMessages, + metadataStoreFalse, + )) { + chunks.push(chunk) + } + + const requestBodyWithOptionFalse = mockResponsesCreate.mock.calls[0][0] + // Still enabled due to model.info.backgroundMode + expect(requestBodyWithOptionFalse.background).toBe(true) + expect(requestBodyWithOptionFalse.store).toBe(true) + expect(requestBodyWithOptionFalse.stream).toBe(true) + }) + + it("forces store:true and includes background:true when falling back to SSE", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + mockResponsesCreate.mockResolvedValueOnce({}) + + const encoder = new TextEncoder() + const sseStream = new ReadableStream({ + start(controller) { + controller.enqueue( + encoder.encode( + 'data: {"type":"response.done","response":{"id":"resp_1","usage":{"input_tokens":1,"output_tokens":1}}}\n\n', + ), + ) + controller.enqueue(encoder.encode("data: [DONE]\n\n")) + controller.close() + }, + }) + + const mockFetch = vitest.fn().mockResolvedValue( + new Response(sseStream, { + status: 200, + headers: { "Content-Type": "text/event-stream" }, + }), + ) + global.fetch = mockFetch as any + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages, metadataStoreFalse)) { + chunks.push(chunk) + } + + expect(mockFetch).toHaveBeenCalledTimes(1) + const requestInit = mockFetch.mock.calls[0][1] as RequestInit + expect(requestInit?.body).toBeDefined() + + const parsedBody = JSON.parse(requestInit?.body as string) + expect(parsedBody.background).toBe(true) + expect(parsedBody.store).toBe(true) + expect(parsedBody.stream).toBe(true) + expect(parsedBody.model).toBe("gpt-5-pro-2025-10-06") + }) + + it("emits identical usage chunk when background mode is enabled", async () => { + const collectUsageChunk = async (options: ApiHandlerOptions) => { + mockResponsesCreate.mockResolvedValueOnce(createUsageIterable()) + const handler = new OpenAiNativeHandler(options) + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages)) { + chunks.push(chunk) + } + const usageChunk = chunks.find((chunk) => chunk.type === "usage") + mockResponsesCreate.mockClear() + return usageChunk + } + + const baselineUsage = await collectUsageChunk({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + }) + + expect(baselineUsage).toBeDefined() + + const backgroundUsage = await collectUsageChunk({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + expect(backgroundUsage).toBeDefined() + expect(backgroundUsage).toEqual(baselineUsage) + }) + + it("emits background status chunks for Responses events (SDK path)", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + const createStatusIterable = () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "response.queued", response: { id: "resp_bg" } } + yield { type: "response.in_progress" } + yield { type: "response.text.delta", delta: "Hello" } + yield { + type: "response.done", + response: { id: "resp_bg", usage: { input_tokens: 1, output_tokens: 1 } }, + } + }, + }) + mockResponsesCreate.mockResolvedValueOnce(createStatusIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages)) { + chunks.push(chunk) + } + + const statusChunks = chunks.filter((c) => c.type === "status") + expect(statusChunks).toEqual([ + { type: "status", mode: "background", status: "queued", responseId: "resp_bg" }, + { type: "status", mode: "background", status: "in_progress" }, + { type: "status", mode: "background", status: "completed", responseId: "resp_bg" }, + ]) + }) + + it("emits background status chunks for Responses events (SSE fallback)", async () => { + // Force fallback by making SDK return non-iterable + mockResponsesCreate.mockResolvedValueOnce({}) + + const encoder = new TextEncoder() + const sseStream = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode('data: {"type":"response.queued","response":{"id":"resp_bg2"}}\n\n')) + controller.enqueue(encoder.encode('data: {"type":"response.in_progress"}\n\n')) + controller.enqueue(encoder.encode('data: {"type":"response.text.delta","delta":"Hi"}\n\n')) + controller.enqueue( + encoder.encode( + 'data: {"type":"response.done","response":{"id":"resp_bg2","usage":{"input_tokens":1,"output_tokens":1}}}\n\n', + ), + ) + controller.enqueue(encoder.encode("data: [DONE]\n\n")) + controller.close() + }, + }) + + const mockFetch = vitest.fn().mockResolvedValue( + new Response(sseStream, { + status: 200, + headers: { "Content-Type": "text/event-stream" }, + }), + ) + global.fetch = mockFetch as any + + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages)) { + chunks.push(chunk) + } + + const statusChunks = chunks.filter((c) => c.type === "status") + expect(statusChunks).toEqual([ + { type: "status", mode: "background", status: "queued", responseId: "resp_bg2" }, + { type: "status", mode: "background", status: "in_progress" }, + { type: "status", mode: "background", status: "completed", responseId: "resp_bg2" }, + ]) + + // Clean up fetch + delete (global as any).fetch + }) +}) + +describe("OpenAI Native streaming metadata tracking", () => { + beforeEach(() => { + mockResponsesCreate.mockClear() + }) + + it("tracks sequence_number from streaming events and exposes via getLastSequenceNumber", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + }) + + const createSequenceIterable = () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "response.text.delta", delta: "A", sequence_number: 1 } + yield { type: "response.reasoning.delta", delta: "B", sequence_number: 2 } + yield { + type: "response.done", + sequence_number: 3, + response: { id: "resp_123", usage: { input_tokens: 1, output_tokens: 2 } }, + } + }, + }) + + mockResponsesCreate.mockResolvedValueOnce(createSequenceIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("System", [{ role: "user", content: "hi" }])) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "text", text: "A" }) + expect(chunks).toContainEqual({ type: "reasoning", text: "B" }) + expect(handler.getLastSequenceNumber()).toBe(3) + expect(handler.getLastResponseId()).toBe("resp_123") + }) +}) + +// Added plumbing test for openAiNativeBackgroundMode +describe("OpenAI Native background mode setting (plumbing)", () => { + it("should surface openAiNativeBackgroundMode in handler options when provided", () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-4.1", + openAiNativeApiKey: "test-api-key", + openAiNativeBackgroundMode: true, + } as ApiHandlerOptions) + + // Access protected options via runtime cast to verify pass-through + expect((handler as any).options.openAiNativeBackgroundMode).toBe(true) + }) +}) + +describe("OpenAI Native background auto-resume and polling", () => { + const systemPrompt = "System prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "hello" }] + + beforeEach(() => { + mockResponsesCreate.mockClear() + if ((global as any).fetch) { + delete (global as any).fetch + } + }) + + it("resumes background stream on drop and emits no duplicate deltas", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + const dropIterable = { + async *[Symbol.asyncIterator]() { + yield { type: "response.queued", response: { id: "resp_resume" }, sequence_number: 0 } + yield { type: "response.in_progress", sequence_number: 1 } + yield { type: "response.text.delta", delta: "Hello", sequence_number: 2 } + throw new Error("network drop") + }, + } + mockResponsesCreate.mockResolvedValueOnce(dropIterable as any) + + const encoder = new TextEncoder() + const sseStream = new ReadableStream({ + start(controller) { + controller.enqueue( + encoder.encode( + 'data: {"type":"response.output_item.added","item":{"type":"text","text":"SHOULD_SKIP"},"sequence_number":2}\n\n', + ), + ) + controller.enqueue( + encoder.encode( + 'data: {"type":"response.output_item.added","item":{"type":"text","text":" world"},"sequence_number":3}\n\n', + ), + ) + controller.enqueue( + encoder.encode( + 'data: {"type":"response.done","response":{"id":"resp_resume","usage":{"input_tokens":10,"output_tokens":5}},"sequence_number":4}\n\n', + ), + ) + controller.enqueue(encoder.encode("data: [DONE]\n\n")) + controller.close() + }, + }) + ;(global as any).fetch = vitest + .fn() + .mockResolvedValue( + new Response(sseStream, { status: 200, headers: { "Content-Type": "text/event-stream" } }), + ) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const c of stream) { + chunks.push(c) + } + + const statusChunks = chunks.filter((c) => c.type === "status") + const statusNames = statusChunks.map((s: any) => s.status) + const reconnectIdx = statusNames.indexOf("reconnecting") + const inProgIdx = statusNames.findIndex((s, i) => s === "in_progress" && i > reconnectIdx) + expect(reconnectIdx).toBeGreaterThanOrEqual(0) + expect(inProgIdx).toBeGreaterThan(reconnectIdx) + + const fullText = chunks + .filter((c) => c.type === "text") + .map((c: any) => c.text) + .join("") + expect(fullText).toBe("Hello world") + expect(fullText).not.toContain("SHOULD_SKIP") + + const usageChunks = chunks.filter((c) => c.type === "usage") + expect(usageChunks).toHaveLength(1) + }) + + it("falls back to polling after failed resume and yields final output/usage", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + openAiNativeBackgroundResumeMaxRetries: 1, + openAiNativeBackgroundResumeBaseDelayMs: 0, + openAiNativeBackgroundPollIntervalMs: 1, + openAiNativeBackgroundPollMaxMinutes: 1, + } as ApiHandlerOptions) + + const dropIterable = { + async *[Symbol.asyncIterator]() { + yield { type: "response.queued", response: { id: "resp_poll" }, sequence_number: 0 } + yield { type: "response.in_progress", sequence_number: 1 } + throw new Error("network drop") + }, + } + mockResponsesCreate.mockResolvedValueOnce(dropIterable as any) + + let pollStep = 0 + ;(global as any).fetch = vitest.fn().mockImplementation((url: string) => { + if (url.includes("?stream=true")) { + return Promise.resolve({ + ok: false, + status: 500, + text: async () => "resume failed", + } as any) + } + // polling path + const payloads = [ + { response: { id: "resp_poll", status: "queued" } }, + { response: { id: "resp_poll", status: "in_progress" } }, + { + response: { + id: "resp_poll", + status: "completed", + output: [{ type: "message", content: [{ type: "output_text", text: "Polled result" }] }], + usage: { input_tokens: 7, output_tokens: 3 }, + }, + }, + ] + const payload = payloads[Math.min(pollStep++, payloads.length - 1)] + return Promise.resolve( + new Response(JSON.stringify(payload), { status: 200, headers: { "Content-Type": "application/json" } }), + ) + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const c of stream) { + chunks.push(c) + } + + const statusNames = chunks.filter((c) => c.type === "status").map((s: any) => s.status) + const idxReconnect = statusNames.indexOf("reconnecting") + const idxPolling = statusNames.indexOf("polling") + const idxQueued = statusNames.indexOf("queued") + const idxInProgress = statusNames.indexOf("in_progress") + const idxCompleted = statusNames.indexOf("completed") + expect(idxReconnect).toBeGreaterThanOrEqual(0) + expect(idxPolling).toBeGreaterThan(idxReconnect) + + const idxQueuedAfterPolling = statusNames.findIndex((s, i) => s === "queued" && i > idxPolling) + const idxInProgressAfterQueued = statusNames.findIndex( + (s, i) => s === "in_progress" && i > idxQueuedAfterPolling, + ) + const idxCompletedAfterInProgress = statusNames.findIndex( + (s, i) => s === "completed" && i > idxInProgressAfterQueued, + ) + + expect(idxQueuedAfterPolling).toBeGreaterThan(idxPolling) + expect(idxInProgressAfterQueued).toBeGreaterThan(idxQueuedAfterPolling) + expect(idxCompletedAfterInProgress).toBeGreaterThan(idxInProgressAfterQueued) + + const finalText = chunks + .filter((c) => c.type === "text") + .map((c: any) => c.text) + .join("") + expect(finalText).toBe("Polled result") + + const usageChunks = chunks.filter((c) => c.type === "usage") + expect(usageChunks).toHaveLength(1) + expect(usageChunks[0]).toMatchObject({ type: "usage", inputTokens: 7, outputTokens: 3 }) + }) + + it("does not attempt resume when not in background mode", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-4.1", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: false, + }) + + const dropIterable = { + async *[Symbol.asyncIterator]() { + yield { type: "response.text.delta", delta: "Hi", sequence_number: 1 } + throw new Error("drop") + }, + } + mockResponsesCreate.mockResolvedValueOnce(dropIterable as any) + ;(global as any).fetch = vitest.fn().mockRejectedValue(new Error("SSE fallback failed")) + + const stream = handler.createMessage(systemPrompt, messages) + + const chunks: any[] = [] + await expect(async () => { + for await (const c of stream) { + chunks.push(c) + } + }).rejects.toThrow() + + const statuses = chunks.filter((c) => c.type === "status").map((s: any) => s.status) + expect(statuses).not.toContain("reconnecting") + expect(statuses).not.toContain("polling") + }) +}) diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 8a205a06b453..47e10179614e 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -31,6 +31,17 @@ export type OpenAiNativeModel = ReturnType // Constants for model identification const GPT5_MODEL_PREFIX = "gpt-5" +// Marker for terminal background-mode failures so we don't attempt resume/poll fallbacks +function createTerminalBackgroundError(message: string): Error { + const err = new Error(message) + ;(err as any).isTerminalBackgroundError = true + err.name = "TerminalBackgroundError" + return err +} +function isTerminalBackgroundError(err: any): boolean { + return !!(err && (err as any).isTerminalBackgroundError) +} + export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI @@ -39,6 +50,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio private responseIdResolver: ((value: string | undefined) => void) | undefined // Resolved service tier from Responses API (actual tier used by OpenAI) private lastServiceTier: ServiceTier | undefined + private lastSequenceNumber: number | undefined + // Track whether current request is in background mode for status chunk annotation + private currentRequestIsBackground?: boolean + private resumeCutoffSequence?: number + // Per-request tracking to prevent stale resume attempts + private currentRequestResponseId?: string + private currentRequestSequenceNumber?: number // Event types handled by the shared event processor to avoid duplication private readonly coreHandledEventTypes = new Set([ @@ -245,6 +263,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio store?: boolean instructions?: string service_tier?: ServiceTier + background?: boolean } // Validate requested tier against model support; if not supported, omit. @@ -290,6 +309,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio body.text = { verbosity: (verbosity || "medium") as VerbosityLevel } } + // Enable background mode when either explicitly opted in or required by model metadata + if (this.options.openAiNativeBackgroundMode === true || model.info.backgroundMode === true) { + body.background = true + body.stream = true + body.store = true + } + return body } @@ -300,6 +326,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio systemPrompt?: string, messages?: Anthropic.Messages.MessageParam[], ): ApiStream { + // Annotate if this request uses background mode (used for status chunks) + this.currentRequestIsBackground = !!requestBody?.background + // Reset per-request tracking to prevent stale values from previous requests + this.currentRequestResponseId = undefined + this.currentRequestSequenceNumber = undefined + + const canAttemptResume = () => + this.currentRequestIsBackground && + (this.options.openAiNativeBackgroundAutoResume ?? true) && + !!this.currentRequestResponseId && + typeof this.currentRequestSequenceNumber === "number" + try { // Use the official SDK const stream = (await (this.client as any).responses.create(requestBody)) as AsyncIterable @@ -310,12 +348,35 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio ) } - for await (const event of stream) { - for await (const outChunk of this.processEvent(event, model)) { - yield outChunk + try { + for await (const event of stream) { + for await (const outChunk of this.processEvent(event, model)) { + yield outChunk + } + } + } catch (iterErr) { + // If terminal failure, propagate and do not attempt resume/poll + if (isTerminalBackgroundError(iterErr)) { + throw iterErr } + // Stream dropped mid-flight; attempt resume for background requests + if (canAttemptResume()) { + for await (const chunk of this.attemptResumeOrPoll( + this.currentRequestResponseId!, + this.currentRequestSequenceNumber!, + model, + )) { + yield chunk + } + return + } + throw iterErr } } catch (sdkErr: any) { + // Propagate terminal background failures without fallback + if (isTerminalBackgroundError(sdkErr)) { + throw sdkErr + } // Check if this is a 400 error about previous_response_id not found const errorMessage = sdkErr?.message || sdkErr?.error?.message || "" const is400Error = sdkErr?.status === 400 || sdkErr?.response?.status === 400 @@ -323,8 +384,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio errorMessage.includes("Previous response") || errorMessage.includes("not found") if (is400Error && requestBody.previous_response_id && isPreviousResponseError) { - // Log the error and retry without the previous_response_id - // Clear the stored lastResponseId to prevent using it again this.lastResponseId = undefined @@ -346,6 +405,59 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio if (typeof (retryStream as any)[Symbol.asyncIterator] !== "function") { // If SDK fails, fall back to SSE + try { + yield* this.makeGpt5ResponsesAPIRequest( + retryRequestBody, + model, + metadata, + systemPrompt, + messages, + ) + return + } catch (fallbackErr) { + if (canAttemptResume()) { + for await (const chunk of this.attemptResumeOrPoll( + this.currentRequestResponseId!, + this.currentRequestSequenceNumber!, + model, + )) { + yield chunk + } + return + } + throw fallbackErr + } + } + + try { + for await (const event of retryStream) { + for await (const outChunk of this.processEvent(event, model)) { + yield outChunk + } + } + return + } catch (iterErr) { + if (isTerminalBackgroundError(iterErr)) { + throw iterErr + } + if (canAttemptResume()) { + for await (const chunk of this.attemptResumeOrPoll( + this.currentRequestResponseId!, + this.currentRequestSequenceNumber!, + model, + )) { + yield chunk + } + return + } + throw iterErr + } + } catch (retryErr) { + if (isTerminalBackgroundError(retryErr)) { + throw retryErr + } + // If retry also fails, fall back to SSE + try { yield* this.makeGpt5ResponsesAPIRequest( retryRequestBody, model, @@ -354,23 +466,50 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio messages, ) return - } - - for await (const event of retryStream) { - for await (const outChunk of this.processEvent(event, model)) { - yield outChunk + } catch (fallbackErr) { + if (isTerminalBackgroundError(fallbackErr)) { + throw fallbackErr } + if (canAttemptResume()) { + for await (const chunk of this.attemptResumeOrPoll( + this.currentRequestResponseId!, + this.currentRequestSequenceNumber!, + model, + )) { + yield chunk + } + return + } + throw fallbackErr } - return - } catch (retryErr) { - // If retry also fails, fall back to SSE - yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata, systemPrompt, messages) - return } } // For other errors, fallback to manual SSE via fetch - yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata, systemPrompt, messages) + try { + yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata, systemPrompt, messages) + } catch (fallbackErr) { + // If SSE fallback fails mid-stream and we can resume, try that + if (isTerminalBackgroundError(fallbackErr)) { + throw fallbackErr + } + if (canAttemptResume()) { + for await (const chunk of this.attemptResumeOrPoll( + this.currentRequestResponseId!, + this.currentRequestSequenceNumber!, + model, + )) { + yield chunk + } + return + } + throw fallbackErr + } + } finally { + // Always clear background flag and per-request tracking at end of request lifecycle + this.currentRequestIsBackground = undefined + this.currentRequestResponseId = undefined + this.currentRequestSequenceNumber = undefined } } @@ -675,9 +814,27 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio try { const parsed = JSON.parse(data) + // Skip stale events when resuming a dropped background stream + if ( + typeof parsed?.sequence_number === "number" && + this.resumeCutoffSequence !== undefined && + parsed.sequence_number <= this.resumeCutoffSequence + ) { + continue + } + + // Record sequence number for cursor tracking + if (typeof parsed?.sequence_number === "number") { + this.lastSequenceNumber = parsed.sequence_number + // Also track for per-request resume capability + this.currentRequestSequenceNumber = parsed.sequence_number + } + // Store response ID for conversation continuity if (parsed.response?.id) { this.resolveResponseId(parsed.response.id) + // Also track for per-request resume capability + this.currentRequestResponseId = parsed.response.id } // Capture resolved service tier if present if (parsed.response?.service_tier) { @@ -944,9 +1101,20 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio else if (parsed.type === "response.error" || parsed.type === "error") { // Error event from the API if (parsed.error || parsed.message) { - throw new Error( - `Responses API error: ${parsed.error?.message || parsed.message || "Unknown error"}`, - ) + const errMsg = `Responses API error: ${parsed.error?.message || parsed.message || "Unknown error"}` + // For background mode, treat as terminal to avoid futile resume attempts + if (this.currentRequestIsBackground) { + // Surface a failed status for UI lifecycle before terminating + yield { + type: "status", + mode: "background", + status: "failed", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } + throw createTerminalBackgroundError(errMsg) + } + // Non-background: propagate as a standard error + throw new Error(errMsg) } } // Handle incomplete event @@ -955,17 +1123,34 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } // Handle queued event else if (parsed.type === "response.queued") { - // Response is queued + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: "queued", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } } // Handle in_progress event else if (parsed.type === "response.in_progress") { - // Response is being processed + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: "in_progress", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } } // Handle failed event else if (parsed.type === "response.failed") { + // Emit failed status for UI lifecycle + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: "failed", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } // Response failed if (parsed.error || parsed.message) { - throw new Error( + throw createTerminalBackgroundError( `Response failed: ${parsed.error?.message || parsed.message || "Unknown failure"}`, ) } @@ -979,6 +1164,16 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio this.lastServiceTier = parsed.response.service_tier as ServiceTier } + // Emit completed status for UI lifecycle + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: "completed", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } + // Clear background marker on completion + this.currentRequestIsBackground = undefined + // Check if the done event contains the complete output (as a fallback) if ( !hasContent && @@ -1086,6 +1281,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // This can happen in certain edge cases and shouldn't break the flow } catch (error) { if (error instanceof Error) { + // Preserve terminal background errors so callers can avoid resume attempts + if ((error as any).isTerminalBackgroundError) { + throw error + } throw new Error(`Error processing response stream: ${error.message}`) } throw new Error("Unexpected error processing response stream") @@ -1094,6 +1293,235 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } } + /** + * Attempt to resume a dropped background stream; if resume fails, fall back to polling. + */ + private async *attemptResumeOrPoll(responseId: string, lastSeq: number, model: OpenAiNativeModel): ApiStream { + // Emit reconnecting status + yield { + type: "status", + mode: "background", + status: "reconnecting", + responseId, + } + + const apiKey = this.options.openAiNativeApiKey ?? "not-provided" + const baseUrl = this.options.openAiNativeBaseUrl || "https://api.openai.com" + const resumeMaxRetries = this.options.openAiNativeBackgroundResumeMaxRetries ?? 3 + const resumeBaseDelayMs = this.options.openAiNativeBackgroundResumeBaseDelayMs ?? 1000 + + // Try streaming resume with exponential backoff + for (let attempt = 0; attempt < resumeMaxRetries; attempt++) { + try { + const resumeUrl = `${baseUrl}/v1/responses/${responseId}?stream=true&starting_after=${lastSeq}` + const res = await fetch(resumeUrl, { + method: "GET", + headers: { + Authorization: `Bearer ${apiKey}`, + Accept: "text/event-stream", + }, + }) + + if (!res.ok) { + throw new Error(`Resume request failed (${res.status})`) + } + if (!res.body) { + throw new Error("Resume request failed (no body)") + } + + this.resumeCutoffSequence = lastSeq + + // Handshake accepted: immediately switch UI from reconnecting -> in_progress + yield { + type: "status", + mode: "background", + status: "in_progress", + responseId, + } + + try { + for await (const chunk of this.handleStreamResponse(res.body, model)) { + // Avoid double-emitting in_progress if the inner handler surfaces it + if (chunk.type === "status" && (chunk as any).status === "in_progress") { + continue + } + yield chunk + } + // Successful resume + this.resumeCutoffSequence = undefined + return + } catch (e) { + // Resume stream failed mid-flight; reset and throw to retry + this.resumeCutoffSequence = undefined + throw e + } + } catch (err: any) { + // If terminal error, don't keep retrying resume; fall back to polling immediately + const delay = resumeBaseDelayMs * Math.pow(2, attempt) + const msg = err instanceof Error ? err.message : String(err) + + if (isTerminalBackgroundError(err)) { + console.error(`[OpenAiNative][resume] terminal background error on attempt ${attempt + 1}: ${msg}`) + break + } + + // Otherwise retry with backoff (transient failure) + console.warn(`[OpenAiNative][resume] attempt ${attempt + 1} failed; retrying in ${delay}ms: ${msg}`) + if (delay > 0) { + await new Promise((r) => setTimeout(r, delay)) + } + } + } + + // Resume failed - begin polling fallback + yield { + type: "status", + mode: "background", + status: "polling", + responseId, + } + + const pollIntervalMs = this.options.openAiNativeBackgroundPollIntervalMs ?? 2000 + const pollMaxMinutes = this.options.openAiNativeBackgroundPollMaxMinutes ?? 20 + const deadline = Date.now() + pollMaxMinutes * 60_000 + + let lastEmittedStatus: "queued" | "in_progress" | "completed" | "failed" | "canceled" | undefined = undefined + + while (Date.now() <= deadline) { + try { + const pollRes = await fetch(`${baseUrl}/v1/responses/${responseId}`, { + method: "GET", + headers: { + Authorization: `Bearer ${apiKey}`, + }, + }) + + if (!pollRes.ok) { + // transient; wait and retry + await new Promise((r) => setTimeout(r, pollIntervalMs)) + continue + } + + let raw: any + try { + raw = await pollRes.json() + } catch { + await new Promise((r) => setTimeout(r, pollIntervalMs)) + continue + } + + const resp = raw?.response ?? raw + const status: string | undefined = resp?.status + const respId: string | undefined = resp?.id ?? responseId + + // Capture resolved service tier if present + if (resp?.service_tier) { + this.lastServiceTier = resp.service_tier as ServiceTier + } + + // Emit status transitions + if ( + status && + (status === "queued" || + status === "in_progress" || + status === "completed" || + status === "failed" || + status === "canceled") + ) { + if (status !== lastEmittedStatus) { + yield { + type: "status", + mode: "background", + status: status as any, + ...(respId ? { responseId: respId } : {}), + } + lastEmittedStatus = status as any + } + } + + if (status === "completed") { + // Synthesize final output + const output = resp?.output ?? raw?.output + if (Array.isArray(output)) { + for (const outputItem of output) { + if (outputItem.type === "text" && Array.isArray(outputItem.content)) { + for (const content of outputItem.content) { + if (content?.type === "text" && typeof content.text === "string") { + yield { type: "text", text: content.text } + } + } + } else if (outputItem.type === "message" && Array.isArray(outputItem.content)) { + for (const content of outputItem.content) { + if ( + (content?.type === "output_text" || content?.type === "text") && + typeof content.text === "string" + ) { + yield { type: "text", text: content.text } + } + } + } else if (outputItem.type === "reasoning" && Array.isArray(outputItem.summary)) { + for (const summary of outputItem.summary) { + if (summary?.type === "summary_text" && typeof summary.text === "string") { + yield { type: "reasoning", text: summary.text } + } + } + } + } + } + + // Synthesize usage + const usage = resp?.usage ?? raw?.usage + const usageData = this.normalizeUsage(usage, model) + if (usageData) { + yield usageData + } + + return + } + + if (status === "failed" || status === "canceled") { + const detail: string | undefined = resp?.error?.message ?? raw?.error?.message + const msg = detail ? `Response ${status}: ${detail}` : `Response ${status}: ${respId || responseId}` + throw createTerminalBackgroundError(msg) + } + } catch (err: any) { + // If we've already emitted a terminal status, propagate to consumer to stop polling. + if (lastEmittedStatus === "failed" || lastEmittedStatus === "canceled") { + throw err + } + + // Classify polling errors and log appropriately + const statusCode = err?.status ?? err?.response?.status + const msg = err instanceof Error ? err.message : String(err) + + // Permanent errors: stop polling + if (statusCode === 401 || statusCode === 403 || statusCode === 404) { + console.error(`[OpenAiNative][poll] permanent error (status ${statusCode}); stopping: ${msg}`) + throw createTerminalBackgroundError(`Polling failed with status ${statusCode}: ${msg}`) + } + + // Rate limit: transient, will retry + if (statusCode === 429) { + console.warn(`[OpenAiNative][poll] rate limited; will retry: ${msg}`) + } else { + // Other transient/network errors + console.warn( + `[OpenAiNative][poll] transient error; will retry${statusCode ? ` (status ${statusCode})` : ""}: ${msg}`, + ) + } + } + + // Stop polling immediately on terminal background statuses + if (lastEmittedStatus === "failed" || lastEmittedStatus === "canceled") { + throw new Error(`Background polling terminated with status=${lastEmittedStatus} for ${responseId}`) + } + + await new Promise((r) => setTimeout(r, pollIntervalMs)) + } + + throw new Error(`Background response polling timed out for ${responseId}`) + } + /** * Shared processor for Responses API events. */ @@ -1101,11 +1529,48 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Persist response id for conversation continuity when available if (event?.response?.id) { this.resolveResponseId(event.response.id) + // Also track for per-request resume capability + this.currentRequestResponseId = event.response.id } // Capture resolved service tier when available if (event?.response?.service_tier) { this.lastServiceTier = event.response.service_tier as ServiceTier } + // Record sequence number for cursor tracking + if (typeof event?.sequence_number === "number") { + this.lastSequenceNumber = event.sequence_number + // Also track for per-request resume capability + this.currentRequestSequenceNumber = event.sequence_number + } + + // Map lifecycle events to status chunks + const statusMap: Record = { + "response.queued": "queued", + "response.in_progress": "in_progress", + "response.completed": "completed", + "response.done": "completed", + "response.failed": "failed", + "response.canceled": "canceled", + } + const mappedStatus = statusMap[event?.type as string] + if (mappedStatus) { + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: mappedStatus, + ...(event?.response?.id ? { responseId: event.response.id } : {}), + } + // Clear background flag for terminal statuses + if (mappedStatus === "completed" || mappedStatus === "failed" || mappedStatus === "canceled") { + this.currentRequestIsBackground = undefined + } + // Throw terminal error to integrate with standard failure path (surfaced in UI) + if (mappedStatus === "failed" || mappedStatus === "canceled") { + const msg = (event as any)?.error?.message || (event as any)?.message || `Response ${mappedStatus}` + throw createTerminalBackgroundError(msg) + } + // Do not return; allow further handling (e.g., usage on done/completed) + } // Handle known streaming text deltas if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") { @@ -1259,6 +1724,14 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio return this.lastResponseId } + /** + * Gets the last sequence number observed from streaming events. + * @returns The sequence number, or undefined if not available yet + */ + getLastSequenceNumber(): number | undefined { + return this.lastSequenceNumber + } + /** * Sets the last response ID for conversation continuity. * Typically only used in tests or special flows. diff --git a/src/api/transform/stream.ts b/src/api/transform/stream.ts index 8484e6259580..8db360b389e4 100644 --- a/src/api/transform/stream.ts +++ b/src/api/transform/stream.ts @@ -5,6 +5,7 @@ export type ApiStreamChunk = | ApiStreamUsageChunk | ApiStreamReasoningChunk | ApiStreamGroundingChunk + | ApiStreamStatusChunk | ApiStreamError export interface ApiStreamError { @@ -43,3 +44,10 @@ export interface GroundingSource { url: string snippet?: string } + +export interface ApiStreamStatusChunk { + type: "status" + mode?: "background" + status: "queued" | "in_progress" | "completed" | "failed" | "canceled" | "reconnecting" | "polling" + responseId?: string +} diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 74cbd2a11005..ea6ace06ae23 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -1914,7 +1914,6 @@ export class Task extends EventEmitter implements TaskLike { // lastMessage.ts = Date.now() DO NOT update ts since it is used as a key for virtuoso list lastMessage.partial = false // instead of streaming partialMessage events, we do a save and post like normal to persist to disk - console.log("updating partial message", lastMessage) } // Update `api_req_started` to have cancelled and cost, so that @@ -1956,10 +1955,10 @@ export class Task extends EventEmitter implements TaskLike { let item = await iterator.next() while (!item.done) { const chunk = item.value - item = await iterator.next() if (!chunk) { // Sometimes chunk is undefined, no idea that can cause // it, but this workaround seems to fix it. + item = await iterator.next() continue } @@ -1994,6 +1993,24 @@ export class Task extends EventEmitter implements TaskLike { pendingGroundingSources.push(...chunk.sources) } break + case "status": { + try { + const apiReqMsg = this.clineMessages[lastApiReqIndex] + if (apiReqMsg && apiReqMsg.type === "say" && apiReqMsg.say === "api_req_started") { + ;(apiReqMsg as any).metadata = (apiReqMsg as any).metadata || {} + if (chunk.mode === "background") { + ;(apiReqMsg as any).metadata.background = true + } + ;(apiReqMsg as any).metadata.backgroundStatus = chunk.status + if (chunk.responseId) { + ;(apiReqMsg as any).metadata.responseId = chunk.responseId + } + // Update the specific message; avoid full-state refresh on every status chunk to reduce re-renders + await this.updateClineMessage(apiReqMsg) + } + } catch {} + break + } case "text": { assistantMessage += chunk.text @@ -2043,6 +2060,10 @@ export class Task extends EventEmitter implements TaskLike { "\n\n[Response interrupted by a tool use result. Only one tool may be used at a time and should be placed at the end of the message.]" break } + // Prefetch the next item after processing the current chunk. + // This ensures terminal status chunks (e.g., failed/canceled/completed) + // are not skipped when the provider throws on the following next(). + item = await iterator.next() } // Create a copy of current token values to avoid race conditions @@ -2367,12 +2388,31 @@ export class Task extends EventEmitter implements TaskLike { continue } else { // If there's no assistant_responses, that means we got no text - // or tool_use content blocks from API which we should assume is - // an error. - await this.say( - "error", - "Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output.", - ) + // or tool_use content blocks from API which we should assume is an error. + // Prefer any streaming failure details captured on the last api_req_started message. + let errorText = + "Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output." + try { + const lastApiReqStartedIdx = findLastIndex( + this.clineMessages, + (m) => m.type === "say" && m.say === "api_req_started", + ) + if (lastApiReqStartedIdx !== -1) { + const info = JSON.parse( + this.clineMessages[lastApiReqStartedIdx].text || "{}", + ) as ClineApiReqInfo + if ( + typeof info?.streamingFailedMessage === "string" && + info.streamingFailedMessage.trim().length > 0 + ) { + errorText = info.streamingFailedMessage + } + } + } catch { + // ignore parse issues and keep default message + } + + await this.say("error", errorText) await this.addToApiConversationHistory({ role: "assistant", diff --git a/src/shared/api.ts b/src/shared/api.ts index 8b18e7f50d8a..0efa467c5b1b 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -24,6 +24,20 @@ export type ApiHandlerOptions = Omit & { * When undefined, Ollama will use the model's default num_ctx from the Modelfile. */ ollamaNumCtx?: number + /** + * Opt-in for OpenAI Responses background mode when using apiProvider=openai-native. + * Defaults to false when omitted. + */ + openAiNativeBackgroundMode?: boolean + /** + * Auto-resume/poll configuration for OpenAI Responses background mode. + * These are plumbed-only (no UI). Defaults are resolved in the handler. + */ + openAiNativeBackgroundAutoResume?: boolean + openAiNativeBackgroundResumeMaxRetries?: number + openAiNativeBackgroundResumeBaseDelayMs?: number + openAiNativeBackgroundPollIntervalMs?: number + openAiNativeBackgroundPollMaxMinutes?: number } // RouterName diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index ed5257528fe1..77e3dc33c173 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -16,6 +16,7 @@ import { findMatchingResourceOrTemplate } from "@src/utils/mcp" import { vscode } from "@src/utils/vscode" import { removeLeadingNonAlphanumeric } from "@src/utils/removeLeadingNonAlphanumeric" import { getLanguageFromPath } from "@src/utils/getLanguageFromPath" +import { labelForBackgroundStatus } from "@src/utils/backgroundStatus" import { ToolUseBlock, ToolUseBlockHeader } from "../common/ToolUseBlock" import UpdateTodoListToolBlock from "./UpdateTodoListToolBlock" @@ -280,6 +281,21 @@ export const ChatRowContent = ({ /> ) + // Background mode UI label/icon handling + const meta: any = message.metadata + const isBackground = meta?.background === true + const bgStatus = meta?.backgroundStatus as + | "queued" + | "in_progress" + | "reconnecting" + | "polling" + | "completed" + | "failed" + | "canceled" + | undefined + const bgDone = + isBackground && (bgStatus === "completed" || bgStatus === "failed" || bgStatus === "canceled") + const label = isBackground ? labelForBackgroundStatus(bgStatus) : undefined return [ apiReqCancelReason !== null && apiReqCancelReason !== undefined ? ( apiReqCancelReason === "user_cancelled" ? ( @@ -287,6 +303,12 @@ export const ChatRowContent = ({ ) : ( getIconSpan("error", errorColor) ) + ) : bgDone ? ( + bgStatus === "completed" ? ( + getIconSpan("arrow-swap", normalColor) + ) : ( + getIconSpan("error", bgStatus === "canceled" ? cancelledColor : errorColor) + ) ) : cost !== null && cost !== undefined ? ( getIconSpan("arrow-swap", normalColor) ) : apiRequestFailedMessage ? ( @@ -295,7 +317,9 @@ export const ChatRowContent = ({ ), apiReqCancelReason !== null && apiReqCancelReason !== undefined ? ( - apiReqCancelReason === "user_cancelled" ? ( + isBackground && label ? ( + {label} + ) : apiReqCancelReason === "user_cancelled" ? ( {t("chat:apiRequest.cancelled")} @@ -304,6 +328,8 @@ export const ChatRowContent = ({ {t("chat:apiRequest.streamingFailed")} ) + ) : label ? ( + {label} ) : cost !== null && cost !== undefined ? ( {t("chat:apiRequest.title")} ) : apiRequestFailedMessage ? ( @@ -1030,8 +1056,14 @@ export const ChatRowContent = ({ ) case "api_req_started": // Determine if the API request is in progress + const bgMeta: any = message.metadata + const bgStatus = bgMeta?.background === true ? bgMeta?.backgroundStatus : undefined + const bgDone = bgStatus === "completed" || bgStatus === "failed" || bgStatus === "canceled" const isApiRequestInProgress = - apiReqCancelReason === undefined && apiRequestFailedMessage === undefined && cost === undefined + apiReqCancelReason === undefined && + apiRequestFailedMessage === undefined && + cost === undefined && + !bgDone return ( <> diff --git a/webview-ui/src/components/chat/ChatView.tsx b/webview-ui/src/components/chat/ChatView.tsx index b454c97bef96..332f1c2ffd92 100644 --- a/webview-ui/src/components/chat/ChatView.tsx +++ b/webview-ui/src/components/chat/ChatView.tsx @@ -531,27 +531,49 @@ const ChatViewComponent: React.ForwardRefRenderFunction message.say === "api_req_started", + ) + + // Extract background terminal state and cancel reason/cost if present + let bgDone = false + let cancelReason: string | null | undefined = undefined + let cost: any = undefined + + if (lastApiReqStarted && lastApiReqStarted.say === "api_req_started") { + const meta: any = (lastApiReqStarted as any).metadata + const bgStatus = meta?.background === true ? meta?.backgroundStatus : undefined + bgDone = bgStatus === "completed" || bgStatus === "failed" || bgStatus === "canceled" + + try { + if (lastApiReqStarted.text !== null && lastApiReqStarted.text !== undefined) { + const info = JSON.parse(lastApiReqStarted.text) + cost = info?.cost + cancelReason = info?.cancelReason + } + } catch { + // ignore malformed json + } + } + + // If background reached a terminal state or the provider recorded a cancel reason, + // treat UI as not streaming regardless of partial flags or missing cost. + if (bgDone || cancelReason != null) { + return false + } + // Partial assistant content means streaming unless overridden by the terminal checks above. + const isLastMessagePartial = modifiedMessages.at(-1)?.partial === true if (isLastMessagePartial) { return true - } else { - const lastApiReqStarted = findLast( - modifiedMessages, - (message: ClineMessage) => message.say === "api_req_started", - ) - - if ( - lastApiReqStarted && - lastApiReqStarted.text !== null && - lastApiReqStarted.text !== undefined && - lastApiReqStarted.say === "api_req_started" - ) { - const cost = JSON.parse(lastApiReqStarted.text).cost + } - if (cost === undefined) { - return true // API request has not finished yet. - } + // Otherwise, if the API request hasn't finished (no cost yet), consider it streaming. + if (lastApiReqStarted && lastApiReqStarted.say === "api_req_started") { + if (cost === undefined) { + return true } } diff --git a/webview-ui/src/utils/__tests__/backgroundStatus.spec.ts b/webview-ui/src/utils/__tests__/backgroundStatus.spec.ts new file mode 100644 index 000000000000..aac4c73b3e97 --- /dev/null +++ b/webview-ui/src/utils/__tests__/backgroundStatus.spec.ts @@ -0,0 +1,35 @@ +import { labelForBackgroundStatus } from "@src/utils/backgroundStatus" + +describe("labelForBackgroundStatus()", () => { + it("maps queued", () => { + expect(labelForBackgroundStatus("queued")).toBe("API Request: background mode (queued)…") + }) + + it("maps in_progress", () => { + expect(labelForBackgroundStatus("in_progress")).toBe("API Request: background mode (in progress)…") + }) + + it("maps reconnecting", () => { + expect(labelForBackgroundStatus("reconnecting")).toBe("API Request: background mode (reconnecting…)") + }) + + it("maps polling", () => { + expect(labelForBackgroundStatus("polling")).toBe("API Request: background mode (polling…)") + }) + + it("maps completed", () => { + expect(labelForBackgroundStatus("completed")).toBe("API Request: background mode (completed)") + }) + + it("maps failed", () => { + expect(labelForBackgroundStatus("failed")).toBe("API Request: background mode (failed)") + }) + + it("maps canceled", () => { + expect(labelForBackgroundStatus("canceled")).toBe("API Request: background mode (canceled)") + }) + + it("maps undefined to generic label", () => { + expect(labelForBackgroundStatus(undefined)).toBe("API Request: background mode") + }) +}) diff --git a/webview-ui/src/utils/backgroundStatus.ts b/webview-ui/src/utils/backgroundStatus.ts new file mode 100644 index 000000000000..ad56c2d6e2a0 --- /dev/null +++ b/webview-ui/src/utils/backgroundStatus.ts @@ -0,0 +1,29 @@ +export type BackgroundStatus = + | "queued" + | "in_progress" + | "completed" + | "failed" + | "canceled" + | "reconnecting" + | "polling" + +export function labelForBackgroundStatus(s?: BackgroundStatus): string { + switch (s) { + case "queued": + return "API Request: background mode (queued)…" + case "in_progress": + return "API Request: background mode (in progress)…" + case "reconnecting": + return "API Request: background mode (reconnecting…)" + case "polling": + return "API Request: background mode (polling…)" + case "completed": + return "API Request: background mode (completed)" + case "failed": + return "API Request: background mode (failed)" + case "canceled": + return "API Request: background mode (canceled)" + default: + return "API Request: background mode" + } +}