openai-native: gate text.verbosity by supportsVerbosity, remove DEBUG_RESPONSES_API, fix Gpt5RequestBody input type, add tests for non-verbosity models

hannesrudolph · hannesrudolph · commit 2022e28cfbd7 · 2025-08-27T18:29:40.000-06:00
diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts
@@ -1375,5 +1375,86 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 				}
 			}).rejects.toThrow("Responses API error: Model overloaded")
 		})
+
+		// New tests: ensure text.verbosity is omitted for models without supportsVerbosity
+		describe("Verbosity gating for non-GPT-5 models", () => {
+			it("should omit text.verbosity for gpt-4.1", async () => {
+				const mockFetch = vitest.fn().mockResolvedValue({
+					ok: true,
+					body: new ReadableStream({
+						start(controller) {
+							controller.enqueue(
+								new TextEncoder().encode('data: {"type":"response.done","response":{}}\n\n'),
+							)
+							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+							controller.close()
+						},
+					}),
+				})
+				;(global as any).fetch = mockFetch as any
+
+				// Force SDK path to fail so we use fetch fallback
+				mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
+				const handler = new OpenAiNativeHandler({
+					apiModelId: "gpt-4.1",
+					openAiNativeApiKey: "test-api-key",
+					verbosity: "high",
+				})
+
+				const systemPrompt = "You are a helpful assistant."
+				const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello!" }]
+				const stream = handler.createMessage(systemPrompt, messages)
+
+				for await (const _ of stream) {
+					// drain
+				}
+
+				const bodyStr = (mockFetch.mock.calls[0][1] as any).body as string
+				const parsedBody = JSON.parse(bodyStr)
+				expect(parsedBody.model).toBe("gpt-4.1")
+				expect(parsedBody.text).toBeUndefined()
+				expect(bodyStr).not.toContain('"verbosity"')
+			})
+
+			it("should omit text.verbosity for gpt-4o", async () => {
+				const mockFetch = vitest.fn().mockResolvedValue({
+					ok: true,
+					body: new ReadableStream({
+						start(controller) {
+							controller.enqueue(
+								new TextEncoder().encode('data: {"type":"response.done","response":{}}\n\n'),
+							)
+							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+							controller.close()
+						},
+					}),
+				})
+				;(global as any).fetch = mockFetch as any
+
+				// Force SDK path to fail so we use fetch fallback
+				mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
+				const handler = new OpenAiNativeHandler({
+					apiModelId: "gpt-4o",
+					openAiNativeApiKey: "test-api-key",
+					verbosity: "low",
+				})
+
+				const systemPrompt = "You are a helpful assistant."
+				const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello!" }]
+				const stream = handler.createMessage(systemPrompt, messages)
+
+				for await (const _ of stream) {
+					// drain
+				}
+
+				const bodyStr = (mockFetch.mock.calls[0][1] as any).body as string
+				const parsedBody = JSON.parse(bodyStr)
+				expect(parsedBody.model).toBe("gpt-4o")
+				expect(parsedBody.text).toBeUndefined()
+				expect(bodyStr).not.toContain('"verbosity"')
+			})
+		})
 	})
 })
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
@@ -31,9 +31,6 @@ export type OpenAiNativeModel = ReturnType<OpenAiNativeHandler["getModel"]>
 // Constants for model identification
 const GPT5_MODEL_PREFIX = "gpt-5"
 
-// Debug flag for logging (can be controlled via environment variable or config)
-const DEBUG_RESPONSES_API = process.env.DEBUG_RESPONSES_API === "true"
-
 export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
@@ -198,7 +195,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		// so requests do not default to very large limits (e.g., 120k).
 		interface Gpt5RequestBody {
 			model: string
-			input: string
+			input: Array<{ role: "user" | "assistant"; content: any[] }>
 			stream: boolean
 			reasoning?: { effort: ReasoningEffortWithMinimal; summary?: "auto" }
 			text?: { verbosity: VerbosityLevel }
@@ -209,7 +206,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			instructions?: string
 		}
 
-		return {
+		const body: Gpt5RequestBody = {
 			model: model.id,
 			input: formattedInput,
 			stream: true,
@@ -224,7 +221,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 					...(this.options.enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}),
 				},
 			}),
-			text: { verbosity: (verbosity || "medium") as VerbosityLevel },
 			// Only include temperature if the model supports it
 			...(model.info.supportsTemperature !== false && {
 				temperature:
@@ -238,6 +234,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}),
 			...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
 		}
+
+		// Include text.verbosity only when the model explicitly supports it
+		if (model.info.supportsVerbosity === true) {
+			body.text = { verbosity: (verbosity || "medium") as VerbosityLevel }
+		}
+
+		return body
 	}
 
 	private async *executeRequest(
@@ -269,11 +272,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
 			if (is400Error && requestBody.previous_response_id && isPreviousResponseError) {
 				// Log the error and retry without the previous_response_id
-				if (DEBUG_RESPONSES_API) {
-					console.debug(
-						`[Responses API] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`,
-					)
-				}
 
 				// Remove the problematic previous_response_id and retry
 				const retryRequestBody = { ...requestBody }
@@ -440,11 +438,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
 				if (response.status === 400 && requestBody.previous_response_id && isPreviousResponseError) {
 					// Log the error and retry without the previous_response_id
-					if (DEBUG_RESPONSES_API) {
-						console.debug(
-							`[Responses API] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`,
-						)
-					}
 
 					// Remove the problematic previous_response_id and retry
 					const retryRequestBody = { ...requestBody }