diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index 0d42c082a91..b67135aeba0 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -12,6 +12,7 @@ const mockCreate = vitest.fn() vitest.mock("openai", () => { const mockConstructor = vitest.fn() + const mockAzureConstructor = vitest.fn() return { __esModule: true, default: mockConstructor.mockImplementation(() => ({ @@ -66,6 +67,58 @@ vitest.mock("openai", () => { }, }, })), + AzureOpenAI: mockAzureConstructor.mockImplementation(() => ({ + chat: { + completions: { + create: mockCreate.mockImplementation(async (options) => { + if (!options.stream) { + return { + id: "test-completion", + choices: [ + { + message: { role: "assistant", content: "Test response", refusal: null }, + finish_reason: "stop", + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + } + + return { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { content: "Test response" }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: {}, + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + }, + } + }), + }, + }, + })), } }) @@ -783,6 +836,166 @@ describe("OpenAiHandler", () => { ) }) }) + + describe("GPT-5 Azure Support", () => { + it("should use responses API for GPT-5 models on Azure", async () => { + // Mock fetch for responses API + const mockFetch = vitest.fn().mockResolvedValue({ + ok: true, + body: { + getReader: () => ({ + read: vitest + .fn() + .mockResolvedValueOnce({ + done: false, + value: new TextEncoder().encode( + 'data: {"type":"response.text.delta","delta":"Hello"}\n\n', + ), + }) + .mockResolvedValueOnce({ + done: false, + value: new TextEncoder().encode( + 'data: {"type":"response.done","response":{"usage":{"input_tokens":10,"output_tokens":5}}}\n\n', + ), + }) + .mockResolvedValueOnce({ done: true }), + releaseLock: vitest.fn(), + }), + }, + }) + global.fetch = mockFetch + + const gpt5Handler = new OpenAiHandler({ + ...mockOptions, + openAiModelId: "gpt-5", + openAiUseAzure: true, + openAiBaseUrl: "https://test-resource.openai.azure.com/openai/responses", + azureApiVersion: "2025-04-01-preview", + reasoningEffort: "high", + modelTemperature: 1, + }) + + const systemPrompt = "You are a helpful assistant." + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello!" }] + + const stream = gpt5Handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify fetch was called with correct URL and body + expect(mockFetch).toHaveBeenCalledWith( + "https://test-resource.openai.azure.com/openai/responses", + expect.objectContaining({ + method: "POST", + headers: expect.objectContaining({ + "Content-Type": "application/json", + "api-key": "test-api-key", + Accept: "text/event-stream", + }), + body: expect.stringContaining( + '"input":"Developer: You are a helpful assistant.\\n\\nUser: Hello!"', + ), + }), + ) + + // Verify the request body contains GPT-5 specific parameters + const requestBody = JSON.parse((mockFetch.mock.calls[0][1] as any).body) + expect(requestBody.model).toBe("gpt-5") + expect(requestBody.input).toContain("Developer: You are a helpful assistant") + expect(requestBody.input).toContain("User: Hello!") + expect(requestBody.reasoning?.effort).toBe("high") + expect(requestBody.temperature).toBe(1) + expect(requestBody.stream).toBe(true) + + // Verify response chunks + expect(chunks).toHaveLength(2) + expect(chunks[0]).toEqual({ type: "text", text: "Hello" }) + expect(chunks[1]).toMatchObject({ + type: "usage", + inputTokens: 10, + outputTokens: 5, + }) + }) + + afterEach(() => { + // Clear the global fetch mock after each test + delete (global as any).fetch + }) + + it("should handle GPT-5 models with minimal reasoning effort", async () => { + // Mock fetch for responses API + const mockFetch = vitest.fn().mockResolvedValue({ + ok: true, + body: { + getReader: () => ({ + read: vitest + .fn() + .mockResolvedValueOnce({ + done: false, + value: new TextEncoder().encode( + 'data: {"type":"response.text.delta","delta":"Test"}\n\n', + ), + }) + .mockResolvedValueOnce({ done: true }), + releaseLock: vitest.fn(), + }), + }, + }) + global.fetch = mockFetch + + const gpt5Handler = new OpenAiHandler({ + ...mockOptions, + openAiModelId: "gpt-5-mini", + openAiUseAzure: true, + openAiBaseUrl: "https://test-resource.openai.azure.com/openai/responses", + reasoningEffort: "minimal", + }) + + const systemPrompt = "Test" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test" }] + + const stream = gpt5Handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify minimal reasoning effort is set + const requestBody = JSON.parse((mockFetch.mock.calls[0][1] as any).body) + expect(requestBody.reasoning?.effort).toBe("minimal") + }) + + it("should not use responses API for GPT-5 models when not on Azure", async () => { + // Clear any previous fetch mock + delete (global as any).fetch + + const gpt5Handler = new OpenAiHandler({ + ...mockOptions, + openAiModelId: "gpt-5", + openAiUseAzure: false, // Not using Azure + openAiBaseUrl: "https://api.openai.com/v1", + }) + + // This should use the regular chat completions API + const systemPrompt = "You are a helpful assistant." + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello!" }] + + const stream = gpt5Handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Should call the OpenAI client's chat.completions.create, not fetch + expect(mockCreate).toHaveBeenCalled() + expect(global.fetch).toBeUndefined() + }) + }) }) describe("getOpenAiModels", () => { diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index eed719cf0fb..371e3bc6b87 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -8,6 +8,9 @@ import { openAiModelInfoSaneDefaults, DEEP_SEEK_DEFAULT_TEMPERATURE, OPENAI_AZURE_AI_INFERENCE_PATH, + type ReasoningEffortWithMinimal, + type VerbosityLevel, + GPT5_DEFAULT_TEMPERATURE, } from "@roo-code/types" import type { ApiHandlerOptions } from "../../shared/api" @@ -86,6 +89,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format const ark = modelUrl.includes(".volces.com") + // Check if this is a GPT-5 model on Azure that needs the responses API + if (this.isGpt5Model(modelId) && this.options.openAiUseAzure) { + yield* this.handleGpt5ResponsesAPI(modelId, systemPrompt, messages, metadata) + return + } + if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) { yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages) return @@ -240,8 +249,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl protected processUsageMetrics(usage: any, _modelInfo?: ModelInfo): ApiStreamUsageChunk { return { type: "usage", - inputTokens: usage?.prompt_tokens || 0, - outputTokens: usage?.completion_tokens || 0, + inputTokens: usage?.input_tokens || usage?.prompt_tokens || 0, + outputTokens: usage?.output_tokens || usage?.completion_tokens || 0, cacheWriteTokens: usage?.cache_creation_input_tokens || undefined, cacheReadTokens: usage?.cache_read_input_tokens || undefined, } @@ -408,6 +417,286 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl requestOptions.max_completion_tokens = this.options.modelMaxTokens || modelInfo.maxTokens } } + + /** + * Checks if the model ID is a GPT-5 model + */ + private isGpt5Model(modelId: string): boolean { + return modelId.startsWith("gpt-5") || modelId.toLowerCase().startsWith("gpt-5") + } + + /** + * Handles GPT-5 models using the Azure responses API format + */ + private async *handleGpt5ResponsesAPI( + modelId: string, + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { + const apiKey = this.options.openAiApiKey ?? "not-provided" + const baseUrl = this.options.openAiBaseUrl ?? "" + + // Extract the base URL without the path for Azure endpoints + // Azure URLs typically look like: https://.openai.azure.com/openai/responses?api-version=... + const urlParts = baseUrl.match(/^(https?:\/\/[^\/]+)(\/.*)?$/) + const azureBaseUrl = urlParts ? urlParts[1] : baseUrl + const responsesUrl = `${azureBaseUrl}/openai/responses` + + // Format the input for the responses API + const formattedInput = this.formatInputForResponsesAPI(systemPrompt, messages) + + // Get model parameters + const { info: modelInfo, reasoning, verbosity } = this.getModel() + const reasoningEffort = this.getGpt5ReasoningEffort(reasoning) + + // Build request body for GPT-5 responses API + const requestBody: any = { + model: modelId, + input: formattedInput, + stream: true, + temperature: this.options.modelTemperature ?? GPT5_DEFAULT_TEMPERATURE, + } + + // Add reasoning effort if configured + if (reasoningEffort) { + requestBody.reasoning = { + effort: reasoningEffort, + } + // Add reasoning summary if enabled + if (this.options.enableGpt5ReasoningSummary !== false) { + requestBody.reasoning.summary = "auto" + } + } + + // Add verbosity if configured + if (verbosity) { + requestBody.text = { verbosity: verbosity } + } + + // Add max_output_tokens if configured + if (modelInfo.maxTokens) { + requestBody.max_output_tokens = modelInfo.maxTokens + } + + try { + const response = await fetch(responsesUrl, { + method: "POST", + headers: { + "Content-Type": "application/json", + "api-key": apiKey, + Accept: "text/event-stream", + }, + body: JSON.stringify(requestBody), + }) + + if (!response.ok) { + const errorText = await response.text() + let errorMessage = `GPT-5 API request failed (${response.status})` + + try { + const errorJson = JSON.parse(errorText) + if (errorJson.error?.message) { + errorMessage += `: ${errorJson.error.message}` + } + } catch { + errorMessage += `: ${errorText}` + } + + throw new Error(errorMessage) + } + + if (!response.body) { + throw new Error("GPT-5 Responses API error: No response body") + } + + // Handle streaming response + yield* this.handleGpt5StreamResponse(response.body, modelInfo) + } catch (error) { + if (error instanceof Error) { + throw error + } + throw new Error("Unexpected error connecting to GPT-5 API") + } + } + + /** + * Formats the conversation for the GPT-5 responses API input field + */ + private formatInputForResponsesAPI(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string { + // Use Developer role format for GPT-5 (consistent with OpenAI Native implementation) + let formattedInput = `Developer: ${systemPrompt}\n\n` + + for (const message of messages) { + const role = message.role === "user" ? "User" : "Assistant" + + // Handle text content + if (typeof message.content === "string") { + formattedInput += `${role}: ${message.content}\n\n` + } else if (Array.isArray(message.content)) { + // Handle content blocks + const textContent = message.content + .filter((block) => block.type === "text") + .map((block) => (block as any).text) + .join("\n") + if (textContent) { + formattedInput += `${role}: ${textContent}\n\n` + } + } + } + + return formattedInput.trim() + } + + /** + * Gets the GPT-5 reasoning effort from model configuration + */ + private getGpt5ReasoningEffort(reasoning: any): ReasoningEffortWithMinimal | undefined { + if (reasoning && "reasoning_effort" in reasoning) { + const effort = reasoning.reasoning_effort as string + if (effort === "minimal" || effort === "low" || effort === "medium" || effort === "high") { + return effort as ReasoningEffortWithMinimal + } + } + + // Check if reasoning effort is in options + const effort = this.options.reasoningEffort + if (effort === "minimal" || effort === "low" || effort === "medium" || effort === "high") { + return effort as ReasoningEffortWithMinimal + } + + return undefined + } + + /** + * Handles the streaming response from the GPT-5 Responses API + */ + private async *handleGpt5StreamResponse(body: ReadableStream, modelInfo: ModelInfo): ApiStream { + const reader = body.getReader() + const decoder = new TextDecoder() + let buffer = "" + let hasContent = false + + try { + while (true) { + const { done, value } = await reader.read() + if (done) break + + buffer += decoder.decode(value, { stream: true }) + const lines = buffer.split("\n") + buffer = lines.pop() || "" + + for (const line of lines) { + if (line.startsWith("data: ")) { + const data = line.slice(6).trim() + if (data === "[DONE]") { + continue + } + + try { + const parsed = JSON.parse(data) + + // Handle text delta events + if (parsed.type === "response.text.delta" || parsed.type === "response.output_text.delta") { + if (parsed.delta) { + hasContent = true + yield { + type: "text", + text: parsed.delta, + } + } + } + // Handle reasoning delta events + else if ( + parsed.type === "response.reasoning.delta" || + parsed.type === "response.reasoning_text.delta" || + parsed.type === "response.reasoning_summary.delta" || + parsed.type === "response.reasoning_summary_text.delta" + ) { + if (parsed.delta) { + hasContent = true + yield { + type: "reasoning", + text: parsed.delta, + } + } + } + // Handle refusal delta events + else if (parsed.type === "response.refusal.delta") { + if (parsed.delta) { + hasContent = true + yield { + type: "text", + text: `[Refusal] ${parsed.delta}`, + } + } + } + // Handle output item events + else if (parsed.type === "response.output_item.added") { + if (parsed.item) { + if (parsed.item.type === "text" && parsed.item.text) { + hasContent = true + yield { type: "text", text: parsed.item.text } + } else if (parsed.item.type === "reasoning" && parsed.item.text) { + hasContent = true + yield { type: "reasoning", text: parsed.item.text } + } + } + } + // Handle completion events with usage + else if (parsed.type === "response.done" || parsed.type === "response.completed") { + if (parsed.response?.usage || parsed.usage) { + const usage = parsed.response?.usage || parsed.usage + yield this.processUsageMetrics(usage, modelInfo) + } + } + // Handle complete response in initial event + else if ( + parsed.response && + parsed.response.output && + Array.isArray(parsed.response.output) + ) { + for (const outputItem of parsed.response.output) { + if (outputItem.type === "text" && outputItem.content) { + for (const content of outputItem.content) { + if (content.type === "text" && content.text) { + hasContent = true + yield { + type: "text", + text: content.text, + } + } + } + } + } + // Check for usage in the complete response + if (parsed.response.usage) { + yield this.processUsageMetrics(parsed.response.usage, modelInfo) + } + } + // Handle error events + else if (parsed.type === "response.error" || parsed.type === "error") { + if (parsed.error || parsed.message) { + throw new Error( + `GPT-5 API error: ${parsed.error?.message || parsed.message || "Unknown error"}`, + ) + } + } + } catch (e) { + // Silently ignore parsing errors for non-critical SSE data + } + } + } + } + } catch (error) { + if (error instanceof Error) { + throw new Error(`Error processing GPT-5 response stream: ${error.message}`) + } + throw new Error("Unexpected error processing GPT-5 response stream") + } finally { + reader.releaseLock() + } + } } export async function getOpenAiModels(baseUrl?: string, apiKey?: string, openAiHeaders?: Record) {