diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index 90b61ad879e..f3095b2869e 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -47,6 +47,8 @@ export const modelInfoSchema = z.object({ // Capability flag to indicate whether the model supports an output verbosity parameter supportsVerbosity: z.boolean().optional(), supportsReasoningBudget: z.boolean().optional(), + // Capability flag to indicate whether the model supports temperature parameter + supportsTemperature: z.boolean().optional(), requiredReasoningBudget: z.boolean().optional(), supportsReasoningEffort: z.boolean().optional(), supportedParameters: z.array(modelParametersSchema).optional(), diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index ff798249848..bdc383cf2b7 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -19,6 +19,7 @@ export const openAiNativeModels = { description: "GPT-5: The best model for coding and agentic tasks across domains", // supportsVerbosity is a new capability; ensure ModelInfo includes it supportsVerbosity: true, + supportsTemperature: false, }, "gpt-5-mini-2025-08-07": { maxTokens: 128000, @@ -32,6 +33,7 @@ export const openAiNativeModels = { cacheReadsPrice: 0.03, description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks", supportsVerbosity: true, + supportsTemperature: false, }, "gpt-5-nano-2025-08-07": { maxTokens: 128000, @@ -45,6 +47,7 @@ export const openAiNativeModels = { cacheReadsPrice: 0.01, description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5", supportsVerbosity: true, + supportsTemperature: false, }, "gpt-4.1": { maxTokens: 32_768, @@ -54,6 +57,7 @@ export const openAiNativeModels = { inputPrice: 2, outputPrice: 8, cacheReadsPrice: 0.5, + supportsTemperature: true, }, "gpt-4.1-mini": { maxTokens: 32_768, @@ -63,6 +67,7 @@ export const openAiNativeModels = { inputPrice: 0.4, outputPrice: 1.6, cacheReadsPrice: 0.1, + supportsTemperature: true, }, "gpt-4.1-nano": { maxTokens: 32_768, @@ -72,6 +77,7 @@ export const openAiNativeModels = { inputPrice: 0.1, outputPrice: 0.4, cacheReadsPrice: 0.025, + supportsTemperature: true, }, o3: { maxTokens: 100_000, @@ -83,6 +89,7 @@ export const openAiNativeModels = { cacheReadsPrice: 0.5, supportsReasoningEffort: true, reasoningEffort: "medium", + supportsTemperature: false, }, "o3-high": { maxTokens: 100_000, @@ -93,6 +100,7 @@ export const openAiNativeModels = { outputPrice: 8.0, cacheReadsPrice: 0.5, reasoningEffort: "high", + supportsTemperature: false, }, "o3-low": { maxTokens: 100_000, @@ -103,6 +111,7 @@ export const openAiNativeModels = { outputPrice: 8.0, cacheReadsPrice: 0.5, reasoningEffort: "low", + supportsTemperature: false, }, "o4-mini": { maxTokens: 100_000, @@ -114,6 +123,7 @@ export const openAiNativeModels = { cacheReadsPrice: 0.275, supportsReasoningEffort: true, reasoningEffort: "medium", + supportsTemperature: false, }, "o4-mini-high": { maxTokens: 100_000, @@ -124,6 +134,7 @@ export const openAiNativeModels = { outputPrice: 4.4, cacheReadsPrice: 0.275, reasoningEffort: "high", + supportsTemperature: false, }, "o4-mini-low": { maxTokens: 100_000, @@ -134,6 +145,7 @@ export const openAiNativeModels = { outputPrice: 4.4, cacheReadsPrice: 0.275, reasoningEffort: "low", + supportsTemperature: false, }, "o3-mini": { maxTokens: 100_000, @@ -145,6 +157,7 @@ export const openAiNativeModels = { cacheReadsPrice: 0.55, supportsReasoningEffort: true, reasoningEffort: "medium", + supportsTemperature: false, }, "o3-mini-high": { maxTokens: 100_000, @@ -155,6 +168,7 @@ export const openAiNativeModels = { outputPrice: 4.4, cacheReadsPrice: 0.55, reasoningEffort: "high", + supportsTemperature: false, }, "o3-mini-low": { maxTokens: 100_000, @@ -165,6 +179,7 @@ export const openAiNativeModels = { outputPrice: 4.4, cacheReadsPrice: 0.55, reasoningEffort: "low", + supportsTemperature: false, }, o1: { maxTokens: 100_000, @@ -174,6 +189,7 @@ export const openAiNativeModels = { inputPrice: 15, outputPrice: 60, cacheReadsPrice: 7.5, + supportsTemperature: false, }, "o1-preview": { maxTokens: 32_768, @@ -183,6 +199,7 @@ export const openAiNativeModels = { inputPrice: 15, outputPrice: 60, cacheReadsPrice: 7.5, + supportsTemperature: false, }, "o1-mini": { maxTokens: 65_536, @@ -192,6 +209,7 @@ export const openAiNativeModels = { inputPrice: 1.1, outputPrice: 4.4, cacheReadsPrice: 0.55, + supportsTemperature: false, }, "gpt-4o": { maxTokens: 16_384, @@ -201,6 +219,7 @@ export const openAiNativeModels = { inputPrice: 2.5, outputPrice: 10, cacheReadsPrice: 1.25, + supportsTemperature: true, }, "gpt-4o-mini": { maxTokens: 16_384, @@ -210,6 +229,7 @@ export const openAiNativeModels = { inputPrice: 0.15, outputPrice: 0.6, cacheReadsPrice: 0.075, + supportsTemperature: true, }, "codex-mini-latest": { maxTokens: 16_384, @@ -219,6 +239,7 @@ export const openAiNativeModels = { inputPrice: 1.5, outputPrice: 6, cacheReadsPrice: 0, + supportsTemperature: false, description: "Codex Mini: Cloud-based software engineering agent powered by codex-1, a version of o3 optimized for coding tasks. Trained with reinforcement learning to generate human-style code, adhere to instructions, and iteratively run tests.", }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index cc7fd4a06ee..3502320789d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -656,8 +656,8 @@ importers: specifier: ^12.0.0 version: 12.0.0 openai: - specifier: ^5.0.0 - version: 5.5.1(ws@8.18.3)(zod@3.25.61) + specifier: ^5.12.2 + version: 5.12.2(ws@8.18.3)(zod@3.25.61) os-name: specifier: ^6.0.0 version: 6.1.0 @@ -7621,8 +7621,8 @@ packages: resolution: {integrity: sha512-cxN6aIDPz6rm8hbebcP7vrQNhvRcveZoJU72Y7vskh4oIm+BZwBECnx5nTmrlres1Qapvx27Qo1Auukpf8PKXw==} engines: {node: '>=18'} - openai@5.5.1: - resolution: {integrity: sha512-5i19097mGotHA1eFsM6Tjd/tJ8uo9sa5Ysv4Q6bKJ2vtN6rc0MzMrUefXnLXYAJcmMQrC1Efhj0AvfIkXrQamw==} + openai@5.12.2: + resolution: {integrity: sha512-xqzHHQch5Tws5PcKR2xsZGX9xtch+JQFz5zb14dGqlshmmDAFBFEWmeIpf7wVqWV+w7Emj7jRgkNJakyKE0tYQ==} hasBin: true peerDependencies: ws: ^8.18.0 @@ -17631,7 +17631,7 @@ snapshots: is-inside-container: 1.0.0 is-wsl: 3.1.0 - openai@5.5.1(ws@8.18.3)(zod@3.25.61): + openai@5.12.2(ws@8.18.3)(zod@3.25.61): optionalDependencies: ws: 8.18.3 zod: 3.25.61 diff --git a/src/api/index.ts b/src/api/index.ts index c29c230b063..f8df58c768d 100644 --- a/src/api/index.ts +++ b/src/api/index.ts @@ -52,6 +52,14 @@ export interface ApiHandlerCreateMessageMetadata { * Used to enforce "skip once" after a condense operation. */ suppressPreviousResponseId?: boolean + /** + * Controls whether the response should be stored for 30 days in OpenAI's Responses API. + * When true (default), responses are stored and can be referenced in future requests + * using the previous_response_id for efficient conversation continuity. + * Set to false to opt out of response storage for privacy or compliance reasons. + * @default true + */ + store?: boolean } export interface ApiHandler { diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts index 0acdb6202e3..e9e54049303 100644 --- a/src/api/providers/__tests__/openai-native.spec.ts +++ b/src/api/providers/__tests__/openai-native.spec.ts @@ -5,62 +5,15 @@ import { Anthropic } from "@anthropic-ai/sdk" import { OpenAiNativeHandler } from "../openai-native" import { ApiHandlerOptions } from "../../../shared/api" -// Mock OpenAI client -const mockCreate = vitest.fn() +// Mock OpenAI client - now everything uses Responses API +const mockResponsesCreate = vitest.fn() vitest.mock("openai", () => { return { __esModule: true, default: vitest.fn().mockImplementation(() => ({ - chat: { - completions: { - create: mockCreate.mockImplementation(async (options) => { - if (!options.stream) { - return { - id: "test-completion", - choices: [ - { - message: { role: "assistant", content: "Test response" }, - finish_reason: "stop", - index: 0, - }, - ], - usage: { - prompt_tokens: 10, - completion_tokens: 5, - total_tokens: 15, - }, - } - } - - return { - [Symbol.asyncIterator]: async function* () { - yield { - choices: [ - { - delta: { content: "Test response" }, - index: 0, - }, - ], - usage: null, - } - yield { - choices: [ - { - delta: {}, - index: 0, - }, - ], - usage: { - prompt_tokens: 10, - completion_tokens: 5, - total_tokens: 15, - }, - } - }, - } - }), - }, + responses: { + create: mockResponsesCreate, }, })), } @@ -83,7 +36,18 @@ describe("OpenAiNativeHandler", () => { openAiNativeApiKey: "test-api-key", } handler = new OpenAiNativeHandler(mockOptions) - mockCreate.mockClear() + mockResponsesCreate.mockClear() + // Clear fetch mock if it exists + if ((global as any).fetch) { + delete (global as any).fetch + } + }) + + afterEach(() => { + // Clean up fetch mock + if ((global as any).fetch) { + delete (global as any).fetch + } }) describe("constructor", () => { @@ -102,7 +66,33 @@ describe("OpenAiNativeHandler", () => { }) describe("createMessage", () => { - it("should handle streaming responses", async () => { + it("should handle streaming responses via Responses API", async () => { + // Mock fetch for Responses API fallback + const mockFetch = vitest.fn().mockResolvedValue({ + ok: true, + body: new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode('data: {"type":"response.text.delta","delta":"Test"}\n\n'), + ) + controller.enqueue( + new TextEncoder().encode('data: {"type":"response.text.delta","delta":" response"}\n\n'), + ) + controller.enqueue( + new TextEncoder().encode( + 'data: {"type":"response.done","response":{"usage":{"prompt_tokens":10,"completion_tokens":2}}}\n\n', + ), + ) + controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) + controller.close() + }, + }), + }) + global.fetch = mockFetch as any + + // Mock SDK to fail so it falls back to fetch + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + const stream = handler.createMessage(systemPrompt, messages) const chunks: any[] = [] for await (const chunk of stream) { @@ -111,505 +101,38 @@ describe("OpenAiNativeHandler", () => { expect(chunks.length).toBeGreaterThan(0) const textChunks = chunks.filter((chunk) => chunk.type === "text") - expect(textChunks).toHaveLength(1) - expect(textChunks[0].text).toBe("Test response") + expect(textChunks).toHaveLength(2) + expect(textChunks[0].text).toBe("Test") + expect(textChunks[1].text).toBe(" response") }) it("should handle API errors", async () => { - mockCreate.mockRejectedValueOnce(new Error("API Error")) + // Mock fetch to return error + const mockFetch = vitest.fn().mockResolvedValue({ + ok: false, + status: 500, + text: async () => "Internal Server Error", + }) + global.fetch = mockFetch as any + + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + const stream = handler.createMessage(systemPrompt, messages) await expect(async () => { for await (const _chunk of stream) { // Should not reach here } - }).rejects.toThrow("API Error") - }) - - it("should handle missing content in response for o1 model", async () => { - // Use o1 model which supports developer role - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "o1", - }) - - mockCreate.mockResolvedValueOnce({ - [Symbol.asyncIterator]: async function* () { - yield { - choices: [ - { - delta: { content: null }, - index: 0, - }, - ], - usage: { - prompt_tokens: 0, - completion_tokens: 0, - total_tokens: 0, - }, - } - }, - }) - - const generator = handler.createMessage(systemPrompt, messages) - const results = [] - for await (const result of generator) { - results.push(result) - } - - // Verify essential fields directly - expect(results.length).toBe(1) - expect(results[0].type).toBe("usage") - // Use type assertion to avoid TypeScript errors - const usageResult = results[0] as any - expect(usageResult.inputTokens).toBe(0) - expect(usageResult.outputTokens).toBe(0) - // When no cache tokens are present, they should be undefined - expect(usageResult.cacheWriteTokens).toBeUndefined() - expect(usageResult.cacheReadTokens).toBeUndefined() - - // Verify developer role is used for system prompt with o1 model - expect(mockCreate).toHaveBeenCalledWith({ - model: "o1", - messages: [ - { role: "developer", content: "Formatting re-enabled\n" + systemPrompt }, - { role: "user", content: "Hello!" }, - ], - stream: true, - stream_options: { include_usage: true }, - }) - }) - - it("should handle o3-mini model family correctly", async () => { - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "o3-mini", - }) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - expect(mockCreate).toHaveBeenCalledWith({ - model: "o3-mini", - messages: [ - { role: "developer", content: "Formatting re-enabled\n" + systemPrompt }, - { role: "user", content: "Hello!" }, - ], - stream: true, - stream_options: { include_usage: true }, - reasoning_effort: "medium", - }) - }) - }) - - describe("streaming models", () => { - beforeEach(() => { - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-4.1", - }) - }) - - it("should handle streaming response", async () => { - const mockStream = [ - { choices: [{ delta: { content: "Hello" } }], usage: null }, - { choices: [{ delta: { content: " there" } }], usage: null }, - { choices: [{ delta: { content: "!" } }], usage: { prompt_tokens: 10, completion_tokens: 5 } }, - ] - - mockCreate.mockResolvedValueOnce( - (async function* () { - for (const chunk of mockStream) { - yield chunk - } - })(), - ) - - const generator = handler.createMessage(systemPrompt, messages) - const results = [] - for await (const result of generator) { - results.push(result) - } - - // Verify text responses individually - expect(results.length).toBe(4) - expect(results[0]).toMatchObject({ type: "text", text: "Hello" }) - expect(results[1]).toMatchObject({ type: "text", text: " there" }) - expect(results[2]).toMatchObject({ type: "text", text: "!" }) - - // Check usage data fields but use toBeCloseTo for floating point comparison - expect(results[3].type).toBe("usage") - // Use type assertion to avoid TypeScript errors - expect((results[3] as any).inputTokens).toBe(10) - expect((results[3] as any).outputTokens).toBe(5) - expect((results[3] as any).totalCost).toBeCloseTo(0.00006, 6) - - expect(mockCreate).toHaveBeenCalledWith({ - model: "gpt-4.1", - temperature: 0, - messages: [ - { role: "system", content: systemPrompt }, - { role: "user", content: "Hello!" }, - ], - stream: true, - stream_options: { include_usage: true }, - }) - }) - - it("should not include verbosity parameter for models that don't support it", async () => { - // Test with gpt-4.1 which does NOT support verbosity - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-4.1", - verbosity: "high", // Set verbosity but it should be ignored - }) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // Verify that verbosity is NOT included in the request - const callArgs = mockCreate.mock.calls[0][0] - expect(callArgs).not.toHaveProperty("verbosity") - expect(callArgs.model).toBe("gpt-4.1") - expect(callArgs.temperature).toBe(0) - expect(callArgs.stream).toBe(true) - }) - - it("should not include verbosity for gpt-4o models", async () => { - // Test with gpt-4o which does NOT support verbosity - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-4o", - verbosity: "medium", // Set verbosity but it should be ignored - }) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // Verify that verbosity is NOT included in the request - const callArgs = mockCreate.mock.calls[0][0] - expect(callArgs).not.toHaveProperty("verbosity") - expect(callArgs.model).toBe("gpt-4o") - }) - - it("should not include verbosity for gpt-4.1-mini models", async () => { - // Test with gpt-4.1-mini which does NOT support verbosity - handler = new OpenAiNativeHandler({ - ...mockOptions, - apiModelId: "gpt-4.1-mini", - verbosity: "low", // Set verbosity but it should be ignored - }) - - const stream = handler.createMessage(systemPrompt, messages) - const chunks: any[] = [] - for await (const chunk of stream) { - chunks.push(chunk) - } - - // Verify that verbosity is NOT included in the request - const callArgs = mockCreate.mock.calls[0][0] - expect(callArgs).not.toHaveProperty("verbosity") - expect(callArgs.model).toBe("gpt-4.1-mini") - }) - - it("should handle empty delta content", async () => { - const mockStream = [ - { choices: [{ delta: {} }], usage: null }, - { choices: [{ delta: { content: null } }], usage: null }, - { choices: [{ delta: { content: "Hello" } }], usage: { prompt_tokens: 10, completion_tokens: 5 } }, - ] - - mockCreate.mockResolvedValueOnce( - (async function* () { - for (const chunk of mockStream) { - yield chunk - } - })(), - ) - - const generator = handler.createMessage(systemPrompt, messages) - const results = [] - for await (const result of generator) { - results.push(result) - } - - // Verify responses individually - expect(results.length).toBe(2) - expect(results[0]).toMatchObject({ type: "text", text: "Hello" }) - - // Check usage data fields but use toBeCloseTo for floating point comparison - expect(results[1].type).toBe("usage") - // Use type assertion to avoid TypeScript errors - expect((results[1] as any).inputTokens).toBe(10) - expect((results[1] as any).outputTokens).toBe(5) - expect((results[1] as any).totalCost).toBeCloseTo(0.00006, 6) - }) - - it("should handle cache tokens in streaming response", async () => { - const mockStream = [ - { choices: [{ delta: { content: "Hello" } }], usage: null }, - { choices: [{ delta: { content: " cached" } }], usage: null }, - { - choices: [{ delta: { content: " response" } }], - usage: { - prompt_tokens: 100, - completion_tokens: 10, - prompt_tokens_details: { - cached_tokens: 80, - audio_tokens: 0, - }, - completion_tokens_details: { - reasoning_tokens: 0, - audio_tokens: 0, - accepted_prediction_tokens: 0, - rejected_prediction_tokens: 0, - }, - }, - }, - ] - - mockCreate.mockResolvedValueOnce( - (async function* () { - for (const chunk of mockStream) { - yield chunk - } - })(), - ) - - const generator = handler.createMessage(systemPrompt, messages) - const results = [] - for await (const result of generator) { - results.push(result) - } - - // Verify text responses - expect(results.length).toBe(4) - expect(results[0]).toMatchObject({ type: "text", text: "Hello" }) - expect(results[1]).toMatchObject({ type: "text", text: " cached" }) - expect(results[2]).toMatchObject({ type: "text", text: " response" }) - - // Check usage data includes cache tokens - expect(results[3].type).toBe("usage") - const usageChunk = results[3] as any - expect(usageChunk.inputTokens).toBe(100) // Total input tokens (includes cached) - expect(usageChunk.outputTokens).toBe(10) - expect(usageChunk.cacheReadTokens).toBe(80) // Cached tokens from prompt_tokens_details - expect(usageChunk.cacheWriteTokens).toBeUndefined() // No cache write tokens in standard response - - // Verify cost calculation takes cache into account - // GPT-4.1 pricing: input $2/1M, output $8/1M, cache read $0.5/1M - // OpenAI's prompt_tokens includes cached tokens, so we need to calculate: - // - Non-cached input tokens: 100 - 80 = 20 - // - Cost for non-cached input: (20 / 1_000_000) * 2.0 - // - Cost for cached input: (80 / 1_000_000) * 0.5 - // - Cost for output: (10 / 1_000_000) * 8.0 - const nonCachedInputTokens = 100 - 80 - const expectedNonCachedInputCost = (nonCachedInputTokens / 1_000_000) * 2.0 - const expectedCacheReadCost = (80 / 1_000_000) * 0.5 - const expectedOutputCost = (10 / 1_000_000) * 8.0 - const expectedTotalCost = expectedNonCachedInputCost + expectedCacheReadCost + expectedOutputCost - expect(usageChunk.totalCost).toBeCloseTo(expectedTotalCost, 10) - }) - - it("should handle cache write tokens if present", async () => { - const mockStream = [ - { choices: [{ delta: { content: "Test" } }], usage: null }, - { - choices: [{ delta: {} }], - usage: { - prompt_tokens: 150, - completion_tokens: 5, - prompt_tokens_details: { - cached_tokens: 50, - }, - cache_creation_input_tokens: 30, // Cache write tokens - }, - }, - ] - - mockCreate.mockResolvedValueOnce( - (async function* () { - for (const chunk of mockStream) { - yield chunk - } - })(), - ) - - const generator = handler.createMessage(systemPrompt, messages) - const results = [] - for await (const result of generator) { - results.push(result) - } - - // Check usage data includes both cache read and write tokens - const usageChunk = results.find((r) => r.type === "usage") as any - expect(usageChunk).toBeDefined() - expect(usageChunk.inputTokens).toBe(150) - expect(usageChunk.outputTokens).toBe(5) - expect(usageChunk.cacheReadTokens).toBe(50) - expect(usageChunk.cacheWriteTokens).toBe(30) + }).rejects.toThrow("OpenAI service error") }) }) describe("completePrompt", () => { - it("should complete prompt successfully with gpt-4.1 model", async () => { - const result = await handler.completePrompt("Test prompt") - expect(result).toBe("Test response") - expect(mockCreate).toHaveBeenCalledWith({ - model: "gpt-4.1", - messages: [{ role: "user", content: "Test prompt" }], - temperature: 0, - }) - }) - - it("should complete prompt successfully with o1 model", async () => { - handler = new OpenAiNativeHandler({ - apiModelId: "o1", - openAiNativeApiKey: "test-api-key", - }) - - const result = await handler.completePrompt("Test prompt") - expect(result).toBe("Test response") - expect(mockCreate).toHaveBeenCalledWith({ - model: "o1", - messages: [{ role: "user", content: "Test prompt" }], - }) - }) - - it("should complete prompt successfully with o1-preview model", async () => { - handler = new OpenAiNativeHandler({ - apiModelId: "o1-preview", - openAiNativeApiKey: "test-api-key", - }) - - const result = await handler.completePrompt("Test prompt") - expect(result).toBe("Test response") - expect(mockCreate).toHaveBeenCalledWith({ - model: "o1-preview", - messages: [{ role: "user", content: "Test prompt" }], - }) - }) - - it("should complete prompt successfully with o1-mini model", async () => { - handler = new OpenAiNativeHandler({ - apiModelId: "o1-mini", - openAiNativeApiKey: "test-api-key", - }) - - const result = await handler.completePrompt("Test prompt") - expect(result).toBe("Test response") - expect(mockCreate).toHaveBeenCalledWith({ - model: "o1-mini", - messages: [{ role: "user", content: "Test prompt" }], - }) - }) - - it("should complete prompt successfully with o3-mini model", async () => { - handler = new OpenAiNativeHandler({ - apiModelId: "o3-mini", - openAiNativeApiKey: "test-api-key", - }) - - const result = await handler.completePrompt("Test prompt") - expect(result).toBe("Test response") - expect(mockCreate).toHaveBeenCalledWith({ - model: "o3-mini", - messages: [{ role: "user", content: "Test prompt" }], - reasoning_effort: "medium", - }) - }) - - it("should handle API errors", async () => { - mockCreate.mockRejectedValueOnce(new Error("API Error")) + it("should throw error for all models since Responses API doesn't support non-streaming", async () => { await expect(handler.completePrompt("Test prompt")).rejects.toThrow( - "OpenAI Native completion error: API Error", + "completePrompt is not supported. Use createMessage (Responses API) instead.", ) }) - - it("should handle empty response", async () => { - mockCreate.mockResolvedValueOnce({ - choices: [{ message: { content: "" } }], - }) - const result = await handler.completePrompt("Test prompt") - expect(result).toBe("") - }) - }) - - describe("temperature parameter handling", () => { - it("should include temperature for models that support it", async () => { - // Test with gpt-4.1 which supports temperature - handler = new OpenAiNativeHandler({ - apiModelId: "gpt-4.1", - openAiNativeApiKey: "test-api-key", - }) - - await handler.completePrompt("Test prompt") - expect(mockCreate).toHaveBeenCalledWith({ - model: "gpt-4.1", - messages: [{ role: "user", content: "Test prompt" }], - temperature: 0, - }) - }) - - it("should strip temperature for o1 family models", async () => { - const o1Models = ["o1", "o1-preview", "o1-mini"] - - for (const modelId of o1Models) { - handler = new OpenAiNativeHandler({ - apiModelId: modelId, - openAiNativeApiKey: "test-api-key", - }) - - mockCreate.mockClear() - await handler.completePrompt("Test prompt") - - const callArgs = mockCreate.mock.calls[0][0] - // Temperature should be undefined for o1 models - expect(callArgs.temperature).toBeUndefined() - expect(callArgs.model).toBe(modelId) - } - }) - - it("should strip temperature for o3-mini model", async () => { - handler = new OpenAiNativeHandler({ - apiModelId: "o3-mini", - openAiNativeApiKey: "test-api-key", - }) - - await handler.completePrompt("Test prompt") - - const callArgs = mockCreate.mock.calls[0][0] - // Temperature should be undefined for o3-mini models - expect(callArgs.temperature).toBeUndefined() - expect(callArgs.model).toBe("o3-mini") - expect(callArgs.reasoning_effort).toBe("medium") - }) - - it("should strip temperature in streaming mode for unsupported models", async () => { - handler = new OpenAiNativeHandler({ - apiModelId: "o1", - openAiNativeApiKey: "test-api-key", - }) - - const stream = handler.createMessage(systemPrompt, messages) - // Consume the stream - for await (const _chunk of stream) { - // Just consume the stream - } - - const callArgs = mockCreate.mock.calls[0][0] - expect(callArgs).not.toHaveProperty("temperature") - expect(callArgs.model).toBe("o1") - expect(callArgs.stream).toBe(true) - }) }) describe("getModel", () => { @@ -666,6 +189,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail so it uses fetch + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -691,22 +217,31 @@ describe("OpenAiNativeHandler", () => { }), ) const body1 = (mockFetch.mock.calls[0][1] as any).body as string - expect(body1).toContain('"model":"gpt-5-2025-08-07"') - expect(body1).toContain('"input":"Developer: You are a helpful assistant.\\n\\nUser: Hello!"') - expect(body1).toContain('"effort":"medium"') - expect(body1).toContain('"summary":"auto"') - expect(body1).toContain('"verbosity":"medium"') - expect(body1).toContain('"temperature":1') - expect(body1).toContain('"max_output_tokens"') + const parsedBody = JSON.parse(body1) + expect(parsedBody.model).toBe("gpt-5-2025-08-07") + // Now using structured format with content arrays + expect(parsedBody.input).toEqual([ + { + role: "developer", + content: [{ type: "input_text", text: "You are a helpful assistant." }], + }, + { + role: "user", + content: [{ type: "input_text", text: "Hello!" }], + }, + ]) + expect(parsedBody.reasoning?.effort).toBe("medium") + expect(parsedBody.reasoning?.summary).toBe("auto") + expect(parsedBody.text?.verbosity).toBe("medium") + // GPT-5 models don't include temperature + expect(parsedBody.temperature).toBeUndefined() + expect(parsedBody.max_output_tokens).toBeDefined() // Verify the streamed content const textChunks = chunks.filter((c) => c.type === "text") expect(textChunks).toHaveLength(2) expect(textChunks[0].text).toBe("Hello") expect(textChunks[1].text).toBe(" world") - - // Clean up - delete (global as any).fetch }) it("should handle GPT-5-mini model with Responses API", async () => { @@ -727,6 +262,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-mini-2025-08-07", @@ -745,9 +283,6 @@ describe("OpenAiNativeHandler", () => { body: expect.stringContaining('"model":"gpt-5-mini-2025-08-07"'), }), ) - - // Clean up - delete (global as any).fetch }) it("should handle GPT-5-nano model with Responses API", async () => { @@ -768,6 +303,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-nano-2025-08-07", @@ -786,9 +324,6 @@ describe("OpenAiNativeHandler", () => { body: expect.stringContaining('"model":"gpt-5-nano-2025-08-07"'), }), ) - - // Clean up - delete (global as any).fetch }) it("should support verbosity control for GPT-5", async () => { @@ -809,6 +344,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -829,9 +367,6 @@ describe("OpenAiNativeHandler", () => { body: expect.stringContaining('"verbosity":"low"'), }), ) - - // Clean up - delete (global as any).fetch }) it("should support minimal reasoning effort for GPT-5", async () => { @@ -852,6 +387,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -871,9 +409,6 @@ describe("OpenAiNativeHandler", () => { body: expect.stringContaining('"effort":"minimal"'), }), ) - - // Clean up - delete (global as any).fetch }) it("should support low reasoning effort for GPT-5", async () => { @@ -894,6 +429,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -914,15 +452,14 @@ describe("OpenAiNativeHandler", () => { }), ) const body2 = (mockFetch.mock.calls[0][1] as any).body as string - expect(body2).toContain('"model":"gpt-5-2025-08-07"') - expect(body2).toContain('"effort":"low"') - expect(body2).toContain('"summary":"auto"') - expect(body2).toContain('"verbosity":"medium"') - expect(body2).toContain('"temperature":1') - expect(body2).toContain('"max_output_tokens"') - - // Clean up - delete (global as any).fetch + const parsedBody = JSON.parse(body2) + expect(parsedBody.model).toBe("gpt-5-2025-08-07") + expect(parsedBody.reasoning?.effort).toBe("low") + expect(parsedBody.reasoning?.summary).toBe("auto") + expect(parsedBody.text?.verbosity).toBe("medium") + // GPT-5 models don't include temperature + expect(parsedBody.temperature).toBeUndefined() + expect(parsedBody.max_output_tokens).toBeDefined() }) it("should support both verbosity and reasoning effort together for GPT-5", async () => { @@ -943,6 +480,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -964,15 +504,14 @@ describe("OpenAiNativeHandler", () => { }), ) const body3 = (mockFetch.mock.calls[0][1] as any).body as string - expect(body3).toContain('"model":"gpt-5-2025-08-07"') - expect(body3).toContain('"effort":"minimal"') - expect(body3).toContain('"summary":"auto"') - expect(body3).toContain('"verbosity":"high"') - expect(body3).toContain('"temperature":1') - expect(body3).toContain('"max_output_tokens"') - - // Clean up - delete (global as any).fetch + const parsedBody = JSON.parse(body3) + expect(parsedBody.model).toBe("gpt-5-2025-08-07") + expect(parsedBody.reasoning?.effort).toBe("minimal") + expect(parsedBody.reasoning?.summary).toBe("auto") + expect(parsedBody.text?.verbosity).toBe("high") + // GPT-5 models don't include temperature + expect(parsedBody.temperature).toBeUndefined() + expect(parsedBody.max_output_tokens).toBeDefined() }) it("should handle actual GPT-5 Responses API format", async () => { @@ -1019,6 +558,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -1056,9 +598,6 @@ describe("OpenAiNativeHandler", () => { const expectedOutputCost = (20 / 1_000_000) * 10.0 const expectedTotalCost = expectedInputCost + expectedOutputCost expect(usageChunks[0].totalCost).toBeCloseTo(expectedTotalCost, 10) - - // Clean up - delete (global as any).fetch }) it("should handle Responses API with no content gracefully", async () => { @@ -1075,6 +614,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -1092,9 +634,6 @@ describe("OpenAiNativeHandler", () => { const contentChunks = chunks.filter((c) => c.type === "text" || c.type === "reasoning") expect(contentChunks).toHaveLength(0) - - // Clean up - delete (global as any).fetch }) it("should support previous_response_id for conversation continuity", async () => { @@ -1126,6 +665,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -1155,13 +697,10 @@ describe("OpenAiNativeHandler", () => { // Verify second request includes the provided previous_response_id let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body) expect(secondCallBody.previous_response_id).toBe("resp_456") - - // Clean up - delete (global as any).fetch }) it("should handle unhandled stream events gracefully", async () => { - // Mock fetch for the fallback SSE path (which is what gets used when SDK fails) + // Mock fetch for the fallback SSE path const mockFetch = vitest.fn().mockResolvedValue({ ok: true, body: new ReadableStream({ @@ -1183,21 +722,14 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any - // Also mock the SDK to throw an error so it falls back to fetch - const mockClient = { - responses: { - create: vitest.fn().mockRejectedValue(new Error("SDK not available")), - }, - } + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", }) - // Replace the client with our mock - ;(handler as any).client = mockClient - const stream = handler.createMessage(systemPrompt, messages) const chunks: any[] = [] const errors: any[] = [] @@ -1210,20 +742,10 @@ describe("OpenAiNativeHandler", () => { errors.push(error) } - // Log for debugging - if (chunks.length === 0 && errors.length === 0) { - console.log("No chunks and no errors received") - } - if (errors.length > 0) { - console.log("Errors:", errors) - } - expect(errors.length).toBe(0) const textChunks = chunks.filter((c) => c.type === "text") expect(textChunks.length).toBeGreaterThan(0) expect(textChunks[0].text).toBe("Hello") - - delete (global as any).fetch }) it("should use stored response ID when metadata doesn't provide one", async () => { @@ -1262,6 +784,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -1282,9 +807,6 @@ describe("OpenAiNativeHandler", () => { // Verify second request uses the stored response ID from first request let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body) expect(secondCallBody.previous_response_id).toBe("resp_789") - - // Clean up - delete (global as any).fetch }) it("should only send latest message when using previous_response_id", async () => { @@ -1328,6 +850,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -1345,11 +870,26 @@ describe("OpenAiNativeHandler", () => { // consume stream } - // Verify first request sends full conversation + // Verify first request sends full conversation in structured format let firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body) - expect(firstCallBody.input).toContain("Hello") - expect(firstCallBody.input).toContain("Hi there!") - expect(firstCallBody.input).toContain("How are you?") + expect(firstCallBody.input).toEqual([ + { + role: "developer", + content: [{ type: "input_text", text: systemPrompt }], + }, + { + role: "user", + content: [{ type: "input_text", text: "Hello" }], + }, + { + role: "assistant", + content: [{ type: "output_text", text: "Hi there!" }], + }, + { + role: "user", + content: [{ type: "input_text", text: "How are you?" }], + }, + ]) expect(firstCallBody.previous_response_id).toBeUndefined() // Second request with previous_response_id - should only send latest message @@ -1369,40 +909,49 @@ describe("OpenAiNativeHandler", () => { // consume stream } - // Verify second request only sends the latest user message + // Verify second request only sends the latest user message in structured format let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body) - expect(secondCallBody.input).toBe("User: What's the weather?") - expect(secondCallBody.input).not.toContain("Hello") - expect(secondCallBody.input).not.toContain("Hi there!") - expect(secondCallBody.input).not.toContain("How are you?") + expect(secondCallBody.input).toEqual([ + { + role: "user", + content: [{ type: "input_text", text: "What's the weather?" }], + }, + ]) expect(secondCallBody.previous_response_id).toBe("resp_001") - - // Clean up - delete (global as any).fetch }) - it("should correctly prepare GPT-5 input with conversation continuity", () => { + it("should correctly prepare structured input", () => { const gpt5Handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", }) + // Test with metadata that has previousResponseId // @ts-expect-error - private method - const { formattedInput, previousResponseId } = gpt5Handler.prepareGpt5Input(systemPrompt, messages, { - taskId: "task1", - previousResponseId: "resp_123", - }) + const { formattedInput, previousResponseId } = gpt5Handler.prepareResponsesApiInput( + systemPrompt, + messages, + { + taskId: "task1", + previousResponseId: "resp_123", + }, + ) expect(previousResponseId).toBe("resp_123") - expect(formattedInput).toBe("User: Hello!") + expect(formattedInput).toEqual([ + { + role: "user", + content: [{ type: "input_text", text: "Hello!" }], + }, + ]) }) it("should provide helpful error messages for different error codes", async () => { const testCases = [ - { status: 400, expectedMessage: "Invalid request to GPT-5 API" }, + { status: 400, expectedMessage: "Invalid request to Responses API" }, { status: 401, expectedMessage: "Authentication failed" }, { status: 403, expectedMessage: "Access denied" }, - { status: 404, expectedMessage: "GPT-5 API endpoint not found" }, + { status: 404, expectedMessage: "Responses API endpoint not found" }, { status: 429, expectedMessage: "Rate limit exceeded" }, { status: 500, expectedMessage: "OpenAI service error" }, ] @@ -1417,6 +966,9 @@ describe("OpenAiNativeHandler", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "gpt-5-2025-08-07", @@ -1429,17 +981,22 @@ describe("OpenAiNativeHandler", () => { // Should throw before yielding anything } }).rejects.toThrow(expectedMessage) - } - // Clean up - delete (global as any).fetch + // Clean up + delete (global as any).fetch + } }) }) }) -// Added tests for GPT-5 streaming event coverage per PR_review_gpt5_final.md - +// Additional tests for GPT-5 streaming event coverage describe("GPT-5 streaming event coverage (additional)", () => { + afterEach(() => { + if ((global as any).fetch) { + delete (global as any).fetch + } + }) + it("should handle reasoning delta events for GPT-5", async () => { const mockFetch = vitest.fn().mockResolvedValue({ ok: true, @@ -1458,8 +1015,10 @@ describe("GPT-5 streaming event coverage (additional)", () => { }, }), }) - // @ts-ignore - global.fetch = mockFetch + global.fetch = mockFetch as any + + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) const handler = new OpenAiNativeHandler({ apiModelId: "gpt-5-2025-08-07", @@ -1482,9 +1041,6 @@ describe("GPT-5 streaming event coverage (additional)", () => { expect(reasoningChunks[0].text).toBe("Thinking about the problem...") expect(textChunks).toHaveLength(1) expect(textChunks[0].text).toBe("The answer is...") - - // @ts-ignore - delete global.fetch }) it("should handle refusal delta events for GPT-5 and prefix output", async () => { @@ -1502,8 +1058,10 @@ describe("GPT-5 streaming event coverage (additional)", () => { }, }), }) - // @ts-ignore - global.fetch = mockFetch + global.fetch = mockFetch as any + + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) const handler = new OpenAiNativeHandler({ apiModelId: "gpt-5-2025-08-07", @@ -1522,9 +1080,6 @@ describe("GPT-5 streaming event coverage (additional)", () => { const textChunks = chunks.filter((c) => c.type === "text") expect(textChunks).toHaveLength(1) expect(textChunks[0].text).toBe("[Refusal] I cannot comply with this request.") - - // @ts-ignore - delete global.fetch }) it("should ignore malformed JSON lines in SSE stream", async () => { @@ -1552,8 +1107,10 @@ describe("GPT-5 streaming event coverage (additional)", () => { }, }), }) - // @ts-ignore - global.fetch = mockFetch + global.fetch = mockFetch as any + + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) const handler = new OpenAiNativeHandler({ apiModelId: "gpt-5-2025-08-07", @@ -1572,9 +1129,6 @@ describe("GPT-5 streaming event coverage (additional)", () => { // It should not throw and still capture the valid texts around the malformed line const textChunks = chunks.filter((c) => c.type === "text") expect(textChunks.map((c: any) => c.text)).toEqual(["Before", "After"]) - - // @ts-ignore - delete global.fetch }) describe("Codex Mini Model", () => { @@ -1619,6 +1173,9 @@ describe("GPT-5 streaming event coverage (additional)", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "codex-mini-latest", @@ -1671,12 +1228,18 @@ describe("GPT-5 streaming event coverage (additional)", () => { const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body) expect(requestBody).toMatchObject({ model: "codex-mini-latest", - input: "Developer: You are a helpful coding assistant.\n\nUser: Write a hello world function", + input: [ + { + role: "developer", + content: [{ type: "input_text", text: "You are a helpful coding assistant." }], + }, + { + role: "user", + content: [{ type: "input_text", text: "Write a hello world function" }], + }, + ], stream: true, }) - - // Clean up - delete (global as any).fetch }) it("should handle codex-mini-latest non-streaming completion", async () => { @@ -1687,7 +1250,7 @@ describe("GPT-5 streaming event coverage (additional)", () => { // Codex Mini now uses the same Responses API as GPT-5, which doesn't support non-streaming await expect(handler.completePrompt("Write a hello world function in Python")).rejects.toThrow( - "completePrompt is not supported for codex-mini-latest. Use createMessage (Responses API) instead.", + "completePrompt is not supported. Use createMessage (Responses API) instead.", ) }) @@ -1701,6 +1264,9 @@ describe("GPT-5 streaming event coverage (additional)", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "codex-mini-latest", @@ -1717,9 +1283,6 @@ describe("GPT-5 streaming event coverage (additional)", () => { // consume stream } }).rejects.toThrow("Rate limit exceeded") - - // Clean up - delete (global as any).fetch }) it("should handle codex-mini-latest with multiple user messages", async () => { @@ -1741,6 +1304,9 @@ describe("GPT-5 streaming event coverage (additional)", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "codex-mini-latest", @@ -1759,15 +1325,26 @@ describe("GPT-5 streaming event coverage (additional)", () => { chunks.push(chunk) } - // Verify the request body includes full conversation like GPT-5 + // Verify the request body includes full conversation in structured format const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body) - expect(requestBody.input).toContain("Developer: You are a helpful assistant") - expect(requestBody.input).toContain("User: First question") - expect(requestBody.input).toContain("Assistant: First answer") - expect(requestBody.input).toContain("User: Second question") - - // Clean up - delete (global as any).fetch + expect(requestBody.input).toEqual([ + { + role: "developer", + content: [{ type: "input_text", text: "You are a helpful assistant." }], + }, + { + role: "user", + content: [{ type: "input_text", text: "First question" }], + }, + { + role: "assistant", + content: [{ type: "output_text", text: "First answer" }], + }, + { + role: "user", + content: [{ type: "input_text", text: "Second question" }], + }, + ]) }) it("should handle codex-mini-latest stream error events", async () => { @@ -1793,6 +1370,9 @@ describe("GPT-5 streaming event coverage (additional)", () => { }) global.fetch = mockFetch as any + // Mock SDK to fail + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + handler = new OpenAiNativeHandler({ ...mockOptions, apiModelId: "codex-mini-latest", @@ -1810,9 +1390,6 @@ describe("GPT-5 streaming event coverage (additional)", () => { chunks.push(chunk) } }).rejects.toThrow("Responses API error: Model overloaded") - - // Clean up - delete (global as any).fetch }) }) }) diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 2ba85669631..3923d63a192 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -28,6 +28,9 @@ export type OpenAiNativeModel = ReturnType // GPT-5 specific types +// Constants for model identification +const GPT5_MODEL_PREFIX = "gpt-5" + export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI @@ -35,8 +38,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio private responseIdPromise: Promise | undefined private responseIdResolver: ((value: string | undefined) => void) | undefined - // Event types handled by the shared GPT-5 event processor to avoid duplication - private readonly gpt5CoreHandledTypes = new Set([ + // Event types handled by the shared event processor to avoid duplication + private readonly coreHandledEventTypes = new Set([ "response.text.delta", "response.output_text.delta", "response.reasoning.delta", @@ -60,7 +63,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio this.client = new OpenAI({ baseURL: this.options.openAiNativeBaseUrl, apiKey }) } - private normalizeGpt5Usage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined { + private normalizeUsage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined { if (!usage) return undefined const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0 @@ -103,114 +106,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { const model = this.getModel() - let id: "o3-mini" | "o3" | "o4-mini" | undefined - - if (model.id.startsWith("o3-mini")) { - id = "o3-mini" - } else if (model.id.startsWith("o3")) { - id = "o3" - } else if (model.id.startsWith("o4-mini")) { - id = "o4-mini" - } - - if (id) { - yield* this.handleReasonerMessage(model, id, systemPrompt, messages) - } else if (model.id.startsWith("o1")) { - yield* this.handleO1FamilyMessage(model, systemPrompt, messages) - } else if (this.isResponsesApiModel(model.id)) { - // Both GPT-5 and Codex Mini use the v1/responses endpoint - yield* this.handleResponsesApiMessage(model, systemPrompt, messages, metadata) - } else { - yield* this.handleDefaultModelMessage(model, systemPrompt, messages) - } - } - - private async *handleO1FamilyMessage( - model: OpenAiNativeModel, - systemPrompt: string, - messages: Anthropic.Messages.MessageParam[], - ): ApiStream { - // o1 supports developer prompt with formatting - // o1-preview and o1-mini only support user messages - const isOriginalO1 = model.id === "o1" - const { reasoning } = this.getModel() - - const response = await this.client.chat.completions.create({ - model: model.id, - messages: [ - { - role: isOriginalO1 ? "developer" : "user", - content: isOriginalO1 ? `Formatting re-enabled\n${systemPrompt}` : systemPrompt, - }, - ...convertToOpenAiMessages(messages), - ], - stream: true, - stream_options: { include_usage: true }, - ...(reasoning && reasoning), - }) - yield* this.handleStreamResponse(response, model) - } - - private async *handleReasonerMessage( - model: OpenAiNativeModel, - family: "o3-mini" | "o3" | "o4-mini", - systemPrompt: string, - messages: Anthropic.Messages.MessageParam[], - ): ApiStream { - const { reasoning } = this.getModel() - - const stream = await this.client.chat.completions.create({ - model: family, - messages: [ - { - role: "developer", - content: `Formatting re-enabled\n${systemPrompt}`, - }, - ...convertToOpenAiMessages(messages), - ], - stream: true, - stream_options: { include_usage: true }, - ...(reasoning && reasoning), - }) - - yield* this.handleStreamResponse(stream, model) - } - - private async *handleDefaultModelMessage( - model: OpenAiNativeModel, - systemPrompt: string, - messages: Anthropic.Messages.MessageParam[], - ): ApiStream { - const { reasoning, verbosity } = this.getModel() - - // Prepare the request parameters - const params: any = { - model: model.id, - temperature: this.options.modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE, - messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], - stream: true, - stream_options: { include_usage: true }, - ...(reasoning && reasoning), - } - - // Add verbosity only if the model supports it - if (verbosity && model.info.supportsVerbosity) { - params.verbosity = verbosity - } - - const stream = await this.client.chat.completions.create(params) - - if (typeof (stream as any)[Symbol.asyncIterator] !== "function") { - throw new Error( - "OpenAI SDK did not return an AsyncIterable for streaming response. Please check SDK version and usage.", - ) - } - - yield* this.handleStreamResponse( - stream as unknown as AsyncIterable, - model, - ) + // Use Responses API for ALL models + yield* this.handleResponsesApiMessage(model, systemPrompt, messages, metadata) } private async *handleResponsesApiMessage( @@ -219,20 +117,24 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { - // Prefer the official SDK Responses API with streaming; fall back to fetch-based SSE if needed. - const { verbosity } = this.getModel() - - // Both GPT-5 and Codex Mini use the same v1/responses endpoint format + // Use Responses API for ALL models + const { verbosity, reasoning } = this.getModel() - // Resolve reasoning effort (supports "minimal" for GPT‑5) - const reasoningEffort = this.getGpt5ReasoningEffort(model) + // Resolve reasoning effort for models that support it + const reasoningEffort = this.getReasoningEffort(model) // Wait for any pending response ID from a previous request to be available // This handles the race condition with fast nano model responses let effectivePreviousResponseId = metadata?.previousResponseId - // Only allow fallback to pending/last response id when not explicitly suppressed - if (!metadata?.suppressPreviousResponseId) { + // Check if we should suppress previous response ID (e.g., after condense or message edit) + if (metadata?.suppressPreviousResponseId) { + // Clear the stored lastResponseId to prevent it from being used in future requests + this.lastResponseId = undefined + effectivePreviousResponseId = undefined + } else { + // Only try to get fallback response IDs if not suppressing + // If we have a pending response ID promise, wait for it to resolve if (!effectivePreviousResponseId && this.responseIdPromise) { try { @@ -250,20 +152,44 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } // Fall back to the last known response ID if still not available - if (!effectivePreviousResponseId) { + if (!effectivePreviousResponseId && this.lastResponseId) { effectivePreviousResponseId = this.lastResponseId } } // Format input and capture continuity id - const { formattedInput, previousResponseId } = this.prepareGpt5Input(systemPrompt, messages, metadata) - const requestPreviousResponseId = effectivePreviousResponseId ?? previousResponseId + const { formattedInput, previousResponseId } = this.prepareResponsesApiInput(systemPrompt, messages, metadata) + const requestPreviousResponseId = effectivePreviousResponseId || previousResponseId // Create a new promise for this request's response ID this.responseIdPromise = new Promise((resolve) => { this.responseIdResolver = resolve }) + // Build request body + const requestBody = this.buildRequestBody( + model, + formattedInput, + requestPreviousResponseId, + systemPrompt, + verbosity, + reasoningEffort, + metadata, + ) + + // Make the request + yield* this.executeRequest(requestBody, model, metadata) + } + + private buildRequestBody( + model: OpenAiNativeModel, + formattedInput: any, + requestPreviousResponseId: string | undefined, + systemPrompt: string, + verbosity: any, + reasoningEffort: ReasoningEffortWithMinimal | undefined, + metadata?: ApiHandlerCreateMessageMetadata, + ): any { // Build a request body (also used for fallback) // Ensure we explicitly pass max_output_tokens for GPT‑5 based on Roo's reserved model response calculation // so requests do not default to very large limits (e.g., 120k). @@ -276,12 +202,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio temperature?: number max_output_tokens?: number previous_response_id?: string + store?: boolean + instructions?: string } - const requestBody: Gpt5RequestBody = { + return { model: model.id, input: formattedInput, stream: true, + store: metadata?.store !== false, // Default to true unless explicitly set to false + // Always include instructions (system prompt) when using previous_response_id + // This ensures the system prompt stays up-to-date even if it changes (e.g., mode switch) + ...(requestPreviousResponseId && { instructions: systemPrompt }), ...(reasoningEffort && { reasoning: { effort: reasoningEffort, @@ -289,13 +221,26 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio }, }), text: { verbosity: (verbosity || "medium") as VerbosityLevel }, - temperature: this.options.modelTemperature ?? GPT5_DEFAULT_TEMPERATURE, - // Explicitly include the calculated max output tokens for GPT‑5. + // Only include temperature if the model supports it + ...(model.info.supportsTemperature !== false && { + temperature: + this.options.modelTemperature ?? + (model.id.startsWith(GPT5_MODEL_PREFIX) + ? GPT5_DEFAULT_TEMPERATURE + : OPENAI_NATIVE_DEFAULT_TEMPERATURE), + }), + // Explicitly include the calculated max output tokens. // Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams). ...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}), ...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }), } + } + private async *executeRequest( + requestBody: any, + model: OpenAiNativeModel, + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { try { // Use the official SDK const stream = (await (this.client as any).responses.create(requestBody)) as AsyncIterable @@ -307,7 +252,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } for await (const event of stream) { - for await (const outChunk of this.processGpt5Event(event, model)) { + for await (const outChunk of this.processEvent(event, model)) { yield outChunk } } @@ -321,7 +266,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio if (is400Error && requestBody.previous_response_id && isPreviousResponseError) { // Log the error and retry without the previous_response_id console.warn( - `[GPT-5] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`, + `[Responses API] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`, ) // Remove the problematic previous_response_id and retry @@ -344,7 +289,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } for await (const event of retryStream) { - for await (const outChunk of this.processGpt5Event(event, model)) { + for await (const outChunk of this.processEvent(event, model)) { yield outChunk } } @@ -361,52 +306,88 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } } - private formatInputForResponsesAPI(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string { - // Format the conversation for the Responses API input field - // Use Developer role format for GPT-5 (aligning with o1/o3 Developer role usage per GPT-5 Responses guidance) - // This ensures consistent instruction handling across reasoning models - let formattedInput = `Developer: ${systemPrompt}\n\n` + private formatFullConversation(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): any { + // Format the entire conversation history for the Responses API using structured format + // This supports both text and images + const formattedMessages: any[] = [] + + // Add system prompt as developer message + formattedMessages.push({ + role: "developer", + content: [{ type: "input_text", text: systemPrompt }], + }) + // Process each message for (const message of messages) { - const role = message.role === "user" ? "User" : "Assistant" + const role = message.role === "user" ? "user" : "assistant" + const content: any[] = [] - // Handle text content if (typeof message.content === "string") { - formattedInput += `${role}: ${message.content}\n\n` + // For user messages, use input_text; for assistant messages, use output_text + if (role === "user") { + content.push({ type: "input_text", text: message.content }) + } else { + content.push({ type: "output_text", text: message.content }) + } } else if (Array.isArray(message.content)) { - // Handle content blocks - const textContent = message.content - .filter((block) => block.type === "text") - .map((block) => (block as any).text) - .join("\n") - if (textContent) { - formattedInput += `${role}: ${textContent}\n\n` + // For array content with potential images, format properly + for (const block of message.content) { + if (block.type === "text") { + // For user messages, use input_text; for assistant messages, use output_text + if (role === "user") { + content.push({ type: "input_text", text: (block as any).text }) + } else { + content.push({ type: "output_text", text: (block as any).text }) + } + } else if (block.type === "image") { + const image = block as Anthropic.Messages.ImageBlockParam + // Format image with proper data URL - images are always input_image + const imageUrl = `data:${image.source.media_type};base64,${image.source.data}` + content.push({ type: "input_image", image_url: imageUrl }) + } } } + + if (content.length > 0) { + formattedMessages.push({ role, content }) + } } - return formattedInput.trim() + return formattedMessages } - private formatSingleMessageForResponsesAPI(message: Anthropic.Messages.MessageParam): string { + private formatSingleStructuredMessage(message: Anthropic.Messages.MessageParam): any { // Format a single message for the Responses API when using previous_response_id - const role = message.role === "user" ? "User" : "Assistant" + // When using previous_response_id, we only send the latest user message + const role = message.role === "user" ? "user" : "assistant" - // Handle text content if (typeof message.content === "string") { - return `${role}: ${message.content}` + // For simple string content, return structured format with proper type + return { + role, + content: [{ type: "input_text", text: message.content }], + } } else if (Array.isArray(message.content)) { - // Handle content blocks - const textContent = message.content - .filter((block) => block.type === "text") - .map((block) => (block as any).text) - .join("\n") - if (textContent) { - return `${role}: ${textContent}` + // Extract text and image content from blocks + const content: any[] = [] + + for (const block of message.content) { + if (block.type === "text") { + // User messages use input_text + content.push({ type: "input_text", text: (block as any).text }) + } else if (block.type === "image") { + const image = block as Anthropic.Messages.ImageBlockParam + const imageUrl = `data:${image.source.media_type};base64,${image.source.data}` + content.push({ type: "input_image", image_url: imageUrl }) + } + } + + if (content.length > 0) { + return { role, content } } } - return "" + return null } private async *makeGpt5ResponsesAPIRequest( @@ -457,7 +438,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio if (response.status === 400 && requestBody.previous_response_id && isPreviousResponseError) { // Log the error and retry without the previous_response_id console.warn( - `[GPT-5 SSE] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`, + `[Responses API] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`, ) // Remove the problematic previous_response_id and retry @@ -482,32 +463,32 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio if (!retryResponse.ok) { // If retry also fails, throw the original error - throw new Error(`GPT-5 API retry failed (${retryResponse.status})`) + throw new Error(`Responses API retry failed (${retryResponse.status})`) } if (!retryResponse.body) { - throw new Error("GPT-5 Responses API error: No response body from retry request") + throw new Error("Responses API error: No response body from retry request") } // Handle the successful retry response - yield* this.handleGpt5StreamResponse(retryResponse.body, model) + yield* this.handleStreamResponse(retryResponse.body, model) return } // Provide user-friendly error messages based on status code switch (response.status) { case 400: - errorMessage = "Invalid request to GPT-5 API. Please check your input parameters." + errorMessage = "Invalid request to Responses API. Please check your input parameters." break case 401: errorMessage = "Authentication failed. Please check your OpenAI API key." break case 403: - errorMessage = "Access denied. Your API key may not have access to GPT-5 models." + errorMessage = "Access denied. Your API key may not have access to this endpoint." break case 404: errorMessage = - "GPT-5 API endpoint not found. The model may not be available yet or requires a different configuration." + "Responses API endpoint not found. The endpoint may not be available yet or requires a different configuration." break case 429: errorMessage = "Rate limit exceeded. Please try again later." @@ -518,7 +499,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio errorMessage = "OpenAI service error. Please try again later." break default: - errorMessage = `GPT-5 API error (${response.status})` + errorMessage = `Responses API error (${response.status})` } // Append details if available @@ -530,73 +511,74 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } if (!response.body) { - throw new Error("GPT-5 Responses API error: No response body") + throw new Error("Responses API error: No response body") } // Handle streaming response - yield* this.handleGpt5StreamResponse(response.body, model) + yield* this.handleStreamResponse(response.body, model) } catch (error) { if (error instanceof Error) { // Re-throw with the original error message if it's already formatted - if (error.message.includes("GPT-5")) { + if (error.message.includes("Responses API")) { throw error } // Otherwise, wrap it with context - throw new Error(`Failed to connect to GPT-5 API: ${error.message}`) + throw new Error(`Failed to connect to Responses API: ${error.message}`) } // Handle non-Error objects - throw new Error(`Unexpected error connecting to GPT-5 API`) + throw new Error(`Unexpected error connecting to Responses API`) } } /** - * Prepares the input and conversation continuity parameters for a GPT-5 API call. + * Prepares the input and conversation continuity parameters for a Responses API call. + * Decides whether to send full conversation or just the latest message based on previousResponseId. * * - If a `previousResponseId` is available (either from metadata or the handler's state), * it formats only the most recent user message for the input and returns the response ID * to maintain conversation context. * - Otherwise, it formats the entire conversation history (system prompt + messages) for the input. * - * @returns An object containing the formatted input string and the previous response ID (if used). + * @returns An object containing the formatted input and the previous response ID (if used). */ - private prepareGpt5Input( + private prepareResponsesApiInput( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, - ): { formattedInput: string; previousResponseId?: string } { - // Respect explicit suppression signal for continuity (e.g. immediately after condense) - const isFirstMessage = messages.length === 1 && messages[0].role === "user" - const allowFallback = !metadata?.suppressPreviousResponseId + ): { formattedInput: any; previousResponseId?: string } { + // Note: suppressPreviousResponseId is handled in handleResponsesApiMessage + // This method now only handles formatting based on whether we have a previous response ID - const previousResponseId = - metadata?.previousResponseId ?? (allowFallback && !isFirstMessage ? this.lastResponseId : undefined) + // Check for previous response ID from metadata or fallback to lastResponseId + const isFirstMessage = messages.length === 1 && messages[0].role === "user" + const previousResponseId = metadata?.previousResponseId ?? (!isFirstMessage ? this.lastResponseId : undefined) if (previousResponseId) { + // When using previous_response_id, only send the latest user message const lastUserMessage = [...messages].reverse().find((msg) => msg.role === "user") - const formattedInput = lastUserMessage ? this.formatSingleMessageForResponsesAPI(lastUserMessage) : "" - return { formattedInput, previousResponseId } + if (lastUserMessage) { + const formattedMessage = this.formatSingleStructuredMessage(lastUserMessage) + // formatSingleStructuredMessage now always returns an object with role and content + if (formattedMessage) { + return { formattedInput: [formattedMessage], previousResponseId } + } + } + return { formattedInput: [], previousResponseId } } else { - const formattedInput = this.formatInputForResponsesAPI(systemPrompt, messages) + // Format full conversation history (returns an array of structured messages) + const formattedInput = this.formatFullConversation(systemPrompt, messages) return { formattedInput } } } /** - * Handles the streaming response from the GPT-5 Responses API. + * Handles the streaming response from the Responses API. * * This function iterates through the Server-Sent Events (SSE) stream, parses each event, * and yields structured data chunks (`ApiStream`). It handles a wide variety of event types, * including text deltas, reasoning, usage data, and various status/tool events. - * - * The following event types are intentionally ignored as they are not currently consumed - * by the client application: - * - Audio events (`response.audio.*`) - * - Most tool call events (e.g., `response.function_call_arguments.*`, `response.mcp_call.*`, etc.) - * as the client does not yet support rendering these tool interactions. - * - Status events (`response.created`, `response.in_progress`, etc.) as they are informational - * and do not affect the final output. */ - private async *handleGpt5StreamResponse(body: ReadableStream, model: OpenAiNativeModel): ApiStream { + private async *handleStreamResponse(body: ReadableStream, model: OpenAiNativeModel): ApiStream { const reader = body.getReader() const decoder = new TextDecoder() let buffer = "" @@ -629,8 +611,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } // Delegate standard event types to the shared processor to avoid duplication - if (parsed?.type && this.gpt5CoreHandledTypes.has(parsed.type)) { - for await (const outChunk of this.processGpt5Event(parsed, model)) { + if (parsed?.type && this.coreHandledEventTypes.has(parsed.type)) { + for await (const outChunk of this.processEvent(parsed, model)) { // Track whether we've emitted any content so fallback handling can decide appropriately if (outChunk.type === "text" || outChunk.type === "reasoning") { hasContent = true @@ -670,7 +652,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } // Check for usage in the complete response if (parsed.response.usage) { - const usageData = this.normalizeGpt5Usage(parsed.response.usage, model) + const usageData = this.normalizeUsage(parsed.response.usage, model) if (usageData) { yield usageData } @@ -910,7 +892,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Response failed if (parsed.error || parsed.message) { throw new Error( - `GPT-5 response failed: ${parsed.error?.message || parsed.message || "Unknown failure"}`, + `Response failed: ${parsed.error?.message || parsed.message || "Unknown failure"}`, ) } } else if (parsed.type === "response.completed" || parsed.type === "response.done") { @@ -990,7 +972,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } } else if (parsed.usage) { // Handle usage if it arrives in a separate, non-completed event - const usageData = this.normalizeGpt5Usage(parsed.usage, model) + const usageData = this.normalizeUsage(parsed.usage, model) if (usageData) { yield usageData } @@ -1026,19 +1008,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // This can happen in certain edge cases and shouldn't break the flow } catch (error) { if (error instanceof Error) { - throw new Error(`Error processing GPT-5 response stream: ${error.message}`) + throw new Error(`Error processing response stream: ${error.message}`) } - throw new Error("Unexpected error processing GPT-5 response stream") + throw new Error("Unexpected error processing response stream") } finally { reader.releaseLock() } } /** - * Shared processor for GPT‑5 Responses API events. - * Used by both the official SDK streaming path and (optionally) by the SSE fallback. + * Shared processor for Responses API events. */ - private async *processGpt5Event(event: any, model: OpenAiNativeModel): ApiStream { + private async *processEvent(event: any, model: OpenAiNativeModel): ApiStream { // Persist response id for conversation continuity when available if (event?.response?.id) { this.resolveResponseId(event.response.id) @@ -1096,7 +1077,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Completion events that may carry usage if (event?.type === "response.done" || event?.type === "response.completed") { const usage = event?.response?.usage || event?.usage || undefined - const usageData = this.normalizeGpt5Usage(usage, model) + const usageData = this.normalizeUsage(usage, model) if (usageData) { yield usageData } @@ -1110,87 +1091,30 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } if (event?.usage) { - const usageData = this.normalizeGpt5Usage(event.usage, model) + const usageData = this.normalizeUsage(event.usage, model) if (usageData) { yield usageData } } } - private getGpt5ReasoningEffort(model: OpenAiNativeModel): ReasoningEffortWithMinimal | undefined { + private getReasoningEffort(model: OpenAiNativeModel): ReasoningEffortWithMinimal | undefined { const { reasoning, info } = model // Check if reasoning effort is configured if (reasoning && "reasoning_effort" in reasoning) { const effort = reasoning.reasoning_effort as string - // Support all effort levels including "minimal" for GPT-5 + // Support all effort levels if (effort === "minimal" || effort === "low" || effort === "medium" || effort === "high") { return effort as ReasoningEffortWithMinimal } } - // Centralize default: use the model's default from types if available; otherwise undefined + // Use the model's default from types if available return info.reasoningEffort as ReasoningEffortWithMinimal | undefined } - private isGpt5Model(modelId: string): boolean { - return modelId.startsWith("gpt-5") - } - - private isResponsesApiModel(modelId: string): boolean { - // Both GPT-5 and Codex Mini use the v1/responses endpoint - return modelId.startsWith("gpt-5") || modelId === "codex-mini-latest" - } - - private async *handleStreamResponse( - stream: AsyncIterable, - model: OpenAiNativeModel, - ): ApiStream { - for await (const chunk of stream) { - const delta = chunk.choices[0]?.delta - - if (delta?.content) { - yield { - type: "text", - text: delta.content, - } - } - - if (chunk.usage) { - yield* this.yieldUsage(model.info, chunk.usage) - } - } - } - - private async *yieldUsage(info: ModelInfo, usage: OpenAI.Completions.CompletionUsage | undefined): ApiStream { - const inputTokens = usage?.prompt_tokens || 0 - const outputTokens = usage?.completion_tokens || 0 - - // Extract cache tokens from prompt_tokens_details - // According to OpenAI API, cached_tokens represents tokens read from cache - const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || undefined - - // Cache write tokens are not typically reported in the standard streaming response - // They would be in cache_creation_input_tokens if available - const cacheWriteTokens = (usage as any)?.cache_creation_input_tokens || undefined - - const totalCost = calculateApiCostOpenAI( - info, - inputTokens, - outputTokens, - cacheWriteTokens || 0, - cacheReadTokens || 0, - ) - - yield { - type: "usage", - inputTokens: inputTokens, - outputTokens: outputTokens, - cacheWriteTokens: cacheWriteTokens, - cacheReadTokens: cacheReadTokens, - totalCost: totalCost, - } - } + // Removed isResponsesApiModel method as ALL models now use the Responses API override getModel() { const modelId = this.options.apiModelId @@ -1205,18 +1129,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio modelId: id, model: info, settings: this.options, - defaultTemperature: this.isGpt5Model(id) ? GPT5_DEFAULT_TEMPERATURE : OPENAI_NATIVE_DEFAULT_TEMPERATURE, + defaultTemperature: id.startsWith(GPT5_MODEL_PREFIX) + ? GPT5_DEFAULT_TEMPERATURE + : OPENAI_NATIVE_DEFAULT_TEMPERATURE, }) - // For models using the Responses API (GPT-5 and Codex Mini), ensure we support reasoning effort - if (this.isResponsesApiModel(id)) { - const effort = - (this.options.reasoningEffort as ReasoningEffortWithMinimal | undefined) ?? - (info.reasoningEffort as ReasoningEffortWithMinimal | undefined) + // For models using the Responses API, ensure we support reasoning effort + const effort = + (this.options.reasoningEffort as ReasoningEffortWithMinimal | undefined) ?? + (info.reasoningEffort as ReasoningEffortWithMinimal | undefined) - if (effort) { - ;(params.reasoning as any) = { reasoning_effort: effort } - } + if (effort) { + ;(params.reasoning as any) = { reasoning_effort: effort } } // The o3 models are named like "o3-mini-[reasoning-effort]", which are @@ -1225,7 +1149,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } /** - * Gets the last GPT-5 response ID captured from the Responses API stream. + * Gets the last response ID captured from the Responses API stream. * Used for maintaining conversation continuity across requests. * @returns The response ID, or undefined if not available yet */ @@ -1234,46 +1158,16 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } /** - * Sets the last GPT-5 response ID for conversation continuity. + * Sets the last response ID for conversation continuity. * Typically only used in tests or special flows. - * @param responseId The GPT-5 response ID to store + * @param responseId The response ID to store */ setResponseId(responseId: string): void { this.lastResponseId = responseId } async completePrompt(prompt: string): Promise { - try { - const { id, temperature, reasoning, verbosity } = this.getModel() - const isResponsesApi = this.isResponsesApiModel(id) - - if (isResponsesApi) { - // Models that use the Responses API (GPT-5 and Codex Mini) don't support non-streaming completion - throw new Error(`completePrompt is not supported for ${id}. Use createMessage (Responses API) instead.`) - } - - const params: any = { - model: id, - messages: [{ role: "user", content: prompt }], - } - - // Add temperature if supported - if (temperature !== undefined) { - params.temperature = temperature - } - - // Add reasoning parameters for models that support them - if (reasoning) { - Object.assign(params, reasoning) - } - - const response = await this.client.chat.completions.create(params) - return response.choices[0]?.message.content || "" - } catch (error) { - if (error instanceof Error) { - throw new Error(`OpenAI Native completion error: ${error.message}`) - } - throw error - } + // ALL models now use the Responses API which doesn't support non-streaming completion + throw new Error(`completePrompt is not supported. Use createMessage (Responses API) instead.`) } } diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 2103dacb274..f7cdb98b43f 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -99,6 +99,7 @@ import { getMessagesSinceLastSummary, summarizeConversation } from "../condense" import { maybeRemoveImageBlocks } from "../../api/transform/image-cleaning" import { restoreTodoListForTask } from "../tools/updateTodoListTool" import { AutoApprovalHandler } from "./AutoApprovalHandler" +import { Gpt5Metadata, ClineMessageWithMetadata } from "./types" const MAX_EXPONENTIAL_BACKOFF_SECONDS = 600 // 10 minutes @@ -711,9 +712,8 @@ export class Task extends EventEmitter implements TaskLike { this.emit(RooCodeEventName.TaskIdle, this.taskId) } - console.log(`[Task#${this.taskId}] pWaitFor askResponse(${type}) -> blocking`) + // Wait for askResponse to be set await pWaitFor(() => this.askResponse !== undefined || this.lastMessageTs !== askTs, { interval: 100 }) - console.log(`[Task#${this.taskId}] pWaitFor askResponse(${type}) -> unblocked (${this.askResponse})`) if (this.lastMessageTs !== askTs) { // Could happen if we send multiple asks in a row i.e. with @@ -837,6 +837,10 @@ export class Task extends EventEmitter implements TaskLike { return } await this.overwriteApiConversationHistory(messages) + + // Set flag to skip previous_response_id on the next API call after manual condense + this.skipPrevResponseIdOnce = true + const contextCondense: ContextCondense = { summary, cost, newContextTokens, prevContextTokens } await this.say( "condense_context", @@ -1008,7 +1012,7 @@ export class Task extends EventEmitter implements TaskLike { let imageBlocks: Anthropic.ImageBlockParam[] = formatResponse.imageBlocks(images) - console.log(`[subtasks] task ${this.taskId}.${this.instanceId} starting`) + // Task starting await this.initiateTaskLoop([ { @@ -1044,6 +1048,8 @@ export class Task extends EventEmitter implements TaskLike { } private async resumeTaskFromHistory() { + // Resuming task from history + if (this.enableTaskBridge) { try { this.bridgeService = this.bridgeService || UnifiedBridgeService.getInstance() @@ -1060,6 +1066,17 @@ export class Task extends EventEmitter implements TaskLike { const modifiedClineMessages = await this.getSavedClineMessages() + // Check for any stored GPT-5 response IDs in the message history + const gpt5Messages = modifiedClineMessages.filter( + (m) => + m.type === "say" && + m.say === "text" && + (m as ClineMessageWithMetadata).metadata?.gpt5?.previous_response_id, + ) + if (gpt5Messages.length > 0) { + const lastGpt5Message = gpt5Messages[gpt5Messages.length - 1] as ClineMessage & ClineMessageWithMetadata + } + // Remove any resume messages that may have been added before const lastRelevantMessageIndex = findLastIndex( modifiedClineMessages, @@ -1289,13 +1306,13 @@ export class Task extends EventEmitter implements TaskLike { await this.overwriteApiConversationHistory(modifiedApiConversationHistory) - console.log(`[subtasks] task ${this.taskId}.${this.instanceId} resuming from history item`) + // Task resuming from history item await this.initiateTaskLoop(newUserContent) } public dispose(): void { - console.log(`[Task] disposing task ${this.taskId}.${this.instanceId}`) + // Disposing task // Stop waiting for child task completion. if (this.pauseInterval) { @@ -1358,7 +1375,7 @@ export class Task extends EventEmitter implements TaskLike { } public async abortTask(isAbandoned = false) { - console.log(`[subtasks] aborting task ${this.taskId}.${this.instanceId}`) + // Aborting task // Will stop any autonomously running promises. if (isAbandoned) { @@ -1598,7 +1615,7 @@ export class Task extends EventEmitter implements TaskLike { // lastMessage.ts = Date.now() DO NOT update ts since it is used as a key for virtuoso list lastMessage.partial = false // instead of streaming partialMessage events, we do a save and post like normal to persist to disk - console.log("updating partial message", lastMessage) + // updating partial message // await this.saveClineMessages() } @@ -1699,7 +1716,7 @@ export class Task extends EventEmitter implements TaskLike { } if (this.abort) { - console.log(`aborting stream, this.abandoned = ${this.abandoned}`) + // Aborting stream if (!this.abandoned) { // Only need to gracefully abort if this instance @@ -2126,23 +2143,27 @@ export class Task extends EventEmitter implements TaskLike { this.clineMessages, (m) => m.type === "say" && - (m as any).say === "text" && - (m as any).metadata?.gpt5?.previous_response_id, + m.say === "text" && + !!(m as ClineMessageWithMetadata).metadata?.gpt5?.previous_response_id, ) if (idx !== -1) { // Use the previous_response_id from the last assistant message for this request - previousResponseId = ((this.clineMessages[idx] as any).metadata.gpt5.previous_response_id || - undefined) as string | undefined + const message = this.clineMessages[idx] as ClineMessage & ClineMessageWithMetadata + previousResponseId = message.metadata?.gpt5?.previous_response_id } + } else if (this.skipPrevResponseIdOnce) { + // Skipping previous_response_id due to recent condense operation - will send full conversation context } - } catch { + } catch (error) { + console.error(`[Task#${this.taskId}] Error retrieving GPT-5 response ID:`, error) // non-fatal } const metadata: ApiHandlerCreateMessageMetadata = { mode: mode, taskId: this.taskId, - ...(previousResponseId ? { previousResponseId } : {}), + // Only include previousResponseId if we're NOT suppressing it + ...(previousResponseId && !this.skipPrevResponseIdOnce ? { previousResponseId } : {}), // If a condense just occurred, explicitly suppress continuity fallback for the next call ...(this.skipPrevResponseIdOnce ? { suppressPreviousResponseId: true } : {}), } @@ -2307,19 +2328,23 @@ export class Task extends EventEmitter implements TaskLike { const lastResponseId: string | undefined = (this.api as any)?.getLastResponseId?.() const idx = findLastIndex( this.clineMessages, - (m) => m.type === "say" && (m as any).say === "text" && m.partial !== true, + (m) => m.type === "say" && m.say === "text" && m.partial !== true, ) if (idx !== -1) { - const msg = this.clineMessages[idx] as any - msg.metadata = msg.metadata ?? {} - msg.metadata.gpt5 = { + const msg = this.clineMessages[idx] as ClineMessage & ClineMessageWithMetadata + if (!msg.metadata) { + msg.metadata = {} + } + const gpt5Metadata: Gpt5Metadata = { ...(msg.metadata.gpt5 ?? {}), previous_response_id: lastResponseId, instructions: this.lastUsedInstructions, reasoning_summary: (reasoningMessage ?? "").trim() || undefined, } + msg.metadata.gpt5 = gpt5Metadata } - } catch { + } catch (error) { + console.error(`[Task#${this.taskId}] Error persisting GPT-5 metadata:`, error) // Non-fatal error in metadata persistence } } diff --git a/src/core/task/types.ts b/src/core/task/types.ts new file mode 100644 index 00000000000..607be51aab3 --- /dev/null +++ b/src/core/task/types.ts @@ -0,0 +1,37 @@ +/** + * Type definitions for Task-related metadata + */ + +/** + * GPT-5 specific metadata stored with assistant messages + * for maintaining conversation continuity across requests + */ +export interface Gpt5Metadata { + /** + * The response ID from the previous GPT-5 API response + * Used to maintain conversation continuity in subsequent requests + */ + previous_response_id?: string + + /** + * The system instructions/prompt used for this response + * Stored to track what instructions were active when the response was generated + */ + instructions?: string + + /** + * The reasoning summary from GPT-5's reasoning process + * Contains the model's internal reasoning if reasoning mode was enabled + */ + reasoning_summary?: string +} + +/** + * Extended ClineMessage type with GPT-5 metadata + */ +export interface ClineMessageWithMetadata { + metadata?: { + gpt5?: Gpt5Metadata + [key: string]: any + } +} diff --git a/src/package.json b/src/package.json index 9d694b2bbb3..dcd3a842f28 100644 --- a/src/package.json +++ b/src/package.json @@ -458,7 +458,7 @@ "monaco-vscode-textmate-theme-converter": "^0.1.7", "node-cache": "^5.1.2", "node-ipc": "^12.0.0", - "openai": "^5.0.0", + "openai": "^5.12.2", "os-name": "^6.0.0", "p-limit": "^6.2.0", "p-wait-for": "^5.0.2",