Skip to content

Commit f07bd16

Browse files
committed
fix: address PR review comments for GPT-5 implementation
- Extract usage normalization helper to reduce duplication - Suppress conversation continuity for first message (but respect explicit metadata) - Deduplicate response ID resolver logic - Remove dead enableGpt5ReasoningSummary option references - DRY up GPT-5 event/usage handling with normalizeGpt5Usage helper - Centralize default GPT-5 reasoning effort using model info - Fix Indonesian locale minimal string misplacement - Add clarifying comments for Developer prefix usage - Add TODO for future verbosity UI capability gating - Fix failing test in reasoning.spec.ts
1 parent 06f1496 commit f07bd16

File tree

4 files changed

+83
-136
lines changed

4 files changed

+83
-136
lines changed

src/api/providers/openai-native.ts

Lines changed: 68 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
1818
import { calculateApiCostOpenAI } from "../../shared/cost"
1919

2020
import { convertToOpenAiMessages } from "../transform/openai-format"
21-
import { ApiStream } from "../transform/stream"
21+
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
2222
import { getModelParams } from "../transform/model-params"
2323

2424
import { BaseProvider } from "./base-provider"
@@ -42,6 +42,41 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
4242
this.client = new OpenAI({ baseURL: this.options.openAiNativeBaseUrl, apiKey })
4343
}
4444

45+
// Review comment 1: Extract usage normalization helper
46+
private normalizeGpt5Usage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined {
47+
if (!usage) return undefined
48+
49+
const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
50+
const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0
51+
const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? 0
52+
const cacheReadTokens = usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? 0
53+
54+
const inputCost = (totalInputTokens / 1_000_000) * (model.info.inputPrice || 0)
55+
const outputCost = (totalOutputTokens / 1_000_000) * (model.info.outputPrice || 0)
56+
const totalCost = inputCost + outputCost
57+
58+
return {
59+
type: "usage",
60+
inputTokens: totalInputTokens,
61+
outputTokens: totalOutputTokens,
62+
cacheWriteTokens,
63+
cacheReadTokens,
64+
totalCost,
65+
}
66+
}
67+
68+
// Review comment 3: Deduplicate response ID resolver logic
69+
private resolveResponseId(responseId: string | undefined): void {
70+
if (responseId) {
71+
this.lastResponseId = responseId
72+
}
73+
// Resolve the promise so the next request can use this ID
74+
if (this.responseIdResolver) {
75+
this.responseIdResolver(responseId)
76+
this.responseIdResolver = undefined
77+
}
78+
}
79+
4580
override async *createMessage(
4681
systemPrompt: string,
4782
messages: Anthropic.Messages.MessageParam[],
@@ -211,7 +246,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
211246
input: formattedInput,
212247
stream: true,
213248
...(reasoningEffort && {
214-
// Always request reasoning summaries for GPT‑5 responses
249+
// Review comment 4: Remove dead enableGpt5ReasoningSummary option
250+
// Always request reasoning summaries for GPT‑5 responses (no longer configurable)
215251
reasoning: {
216252
effort: reasoningEffort,
217253
summary: "auto",
@@ -292,7 +328,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
292328

293329
private formatInputForResponsesAPI(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string {
294330
// Format the conversation for the Responses API input field
295-
// Use Developer role format for GPT-5 (similar to o1/o3 models)
331+
// Review comment 8: Add clarifying comment for Developer prefix
332+
// Use Developer role format for GPT-5 (aligning with o1/o3 Developer role usage per GPT-5 Responses guidance)
333+
// This ensures consistent instruction handling across reasoning models
296334
let formattedInput = `Developer: ${systemPrompt}\n\n`
297335

298336
for (const message of messages) {
@@ -502,7 +540,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
502540
messages: Anthropic.Messages.MessageParam[],
503541
metadata?: ApiHandlerCreateMessageMetadata,
504542
): { formattedInput: string; previousResponseId?: string } {
505-
const previousResponseId = metadata?.previousResponseId || this.lastResponseId
543+
// Review comment 2: Suppress conversation continuity for first message
544+
// Don't use stored lastResponseId if this is the first user message in the conversation
545+
// But always respect explicitly provided previousResponseId from metadata
546+
const isFirstMessage = messages.length === 1 && messages[0].role === "user"
547+
const previousResponseId = metadata?.previousResponseId || (!isFirstMessage ? this.lastResponseId : undefined)
506548

507549
if (previousResponseId) {
508550
const lastUserMessage = [...messages].reverse().find((msg) => msg.role === "user")
@@ -558,12 +600,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
558600

559601
// Store response ID for conversation continuity
560602
if (parsed.response?.id) {
561-
this.lastResponseId = parsed.response.id
562-
// Resolve the promise so the next request can use this ID
563-
if (this.responseIdResolver) {
564-
this.responseIdResolver(parsed.response.id)
565-
this.responseIdResolver = undefined
566-
}
603+
this.resolveResponseId(parsed.response.id)
567604
}
568605

569606
// Check if this is a complete response (non-streaming format)
@@ -596,30 +633,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
596633
}
597634
// Check for usage in the complete response
598635
if (parsed.response.usage) {
599-
const usage = parsed.response.usage
600-
totalInputTokens = usage.input_tokens || usage.prompt_tokens || 0
601-
totalOutputTokens = usage.output_tokens || usage.completion_tokens || 0
602-
603-
// Extract cache tokens from the usage object
604-
const cacheWriteTokens =
605-
usage.cache_creation_input_tokens || usage.cache_write_tokens || 0
606-
const cacheReadTokens =
607-
usage.cache_read_input_tokens ||
608-
usage.cache_read_tokens ||
609-
usage.cached_tokens ||
610-
0
611-
612-
const inputCost = (totalInputTokens / 1_000_000) * (model.info.inputPrice || 0)
613-
const outputCost = (totalOutputTokens / 1_000_000) * (model.info.outputPrice || 0)
614-
const totalCost = inputCost + outputCost
615-
616-
yield {
617-
type: "usage",
618-
inputTokens: totalInputTokens,
619-
outputTokens: totalOutputTokens,
620-
cacheWriteTokens: cacheWriteTokens,
621-
cacheReadTokens: cacheReadTokens,
622-
totalCost: totalCost,
636+
const usageData = this.normalizeGpt5Usage(parsed.response.usage, model)
637+
if (usageData) {
638+
yield usageData
623639
}
624640
}
625641
}
@@ -863,12 +879,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
863879
} else if (parsed.type === "response.completed" || parsed.type === "response.done") {
864880
// Store response ID for conversation continuity
865881
if (parsed.response?.id) {
866-
this.lastResponseId = parsed.response.id
867-
// Resolve the promise so the next request can use this ID
868-
if (this.responseIdResolver) {
869-
this.responseIdResolver(parsed.response.id)
870-
this.responseIdResolver = undefined
871-
}
882+
this.resolveResponseId(parsed.response.id)
872883
}
873884

874885
// Check if the done event contains the complete output (as a fallback)
@@ -910,30 +921,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
910921

911922
// Extract final usage information from the 'completed' event
912923
if (parsed.response?.usage) {
913-
const usage = parsed.response.usage
914-
totalInputTokens = usage.input_tokens || usage.prompt_tokens || 0
915-
totalOutputTokens = usage.output_tokens || usage.completion_tokens || 0
916-
917-
// Extract cache tokens from the usage object
918-
const cacheWriteTokens =
919-
usage.cache_creation_input_tokens || usage.cache_write_tokens || 0
920-
const cacheReadTokens =
921-
usage.cache_read_input_tokens ||
922-
usage.cache_read_tokens ||
923-
usage.cached_tokens ||
924-
0
925-
926-
const inputCost = (totalInputTokens / 1_000_000) * (model.info.inputPrice || 0)
927-
const outputCost = (totalOutputTokens / 1_000_000) * (model.info.outputPrice || 0)
928-
const totalCost = inputCost + outputCost
929-
930-
yield {
931-
type: "usage",
932-
inputTokens: totalInputTokens,
933-
outputTokens: totalOutputTokens,
934-
cacheWriteTokens: cacheWriteTokens,
935-
cacheReadTokens: cacheReadTokens,
936-
totalCost: totalCost,
924+
const usageData = this.normalizeGpt5Usage(parsed.response.usage, model)
925+
if (usageData) {
926+
yield usageData
937927
}
938928
}
939929
}
@@ -968,27 +958,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
968958
}
969959
} else if (parsed.usage) {
970960
// Handle usage if it arrives in a separate, non-completed event
971-
const usage = parsed.usage
972-
totalInputTokens = usage.input_tokens || usage.prompt_tokens || 0
973-
totalOutputTokens = usage.output_tokens || usage.completion_tokens || 0
974-
975-
// Extract cache tokens from the usage object
976-
const cacheWriteTokens =
977-
usage.cache_creation_input_tokens || usage.cache_write_tokens || 0
978-
const cacheReadTokens =
979-
usage.cache_read_input_tokens || usage.cache_read_tokens || usage.cached_tokens || 0
980-
981-
const inputCost = (totalInputTokens / 1_000_000) * (model.info.inputPrice || 0)
982-
const outputCost = (totalOutputTokens / 1_000_000) * (model.info.outputPrice || 0)
983-
const totalCost = inputCost + outputCost
984-
985-
yield {
986-
type: "usage",
987-
inputTokens: totalInputTokens,
988-
outputTokens: totalOutputTokens,
989-
cacheWriteTokens: cacheWriteTokens,
990-
cacheReadTokens: cacheReadTokens,
991-
totalCost: totalCost,
961+
const usageData = this.normalizeGpt5Usage(parsed.usage, model)
962+
if (usageData) {
963+
yield usageData
992964
}
993965
}
994966
} catch (e) {
@@ -1034,12 +1006,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
10341006
private async *processGpt5Event(event: any, model: OpenAiNativeModel): ApiStream {
10351007
// Persist response id for conversation continuity when available
10361008
if (event?.response?.id) {
1037-
this.lastResponseId = event.response.id
1038-
// Resolve the promise so the next request can use this ID
1039-
if (this.responseIdResolver) {
1040-
this.responseIdResolver(event.response.id)
1041-
this.responseIdResolver = undefined
1042-
}
1009+
this.resolveResponseId(event.response.id)
10431010
}
10441011

10451012
// Handle known streaming text deltas
@@ -1094,26 +1061,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
10941061
// Completion events that may carry usage
10951062
if (event?.type === "response.done" || event?.type === "response.completed") {
10961063
const usage = event?.response?.usage || event?.usage || undefined
1097-
1098-
if (usage) {
1099-
const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
1100-
const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0
1101-
const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? 0
1102-
const cacheReadTokens =
1103-
usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? 0
1104-
1105-
const inputCost = (totalInputTokens / 1_000_000) * (model.info.inputPrice || 0)
1106-
const outputCost = (totalOutputTokens / 1_000_000) * (model.info.outputPrice || 0)
1107-
const totalCost = inputCost + outputCost
1108-
1109-
yield {
1110-
type: "usage",
1111-
inputTokens: totalInputTokens,
1112-
outputTokens: totalOutputTokens,
1113-
cacheWriteTokens,
1114-
cacheReadTokens,
1115-
totalCost,
1116-
}
1064+
const usageData = this.normalizeGpt5Usage(usage, model)
1065+
if (usageData) {
1066+
yield usageData
11171067
}
11181068
return
11191069
}
@@ -1125,29 +1075,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11251075
}
11261076

11271077
if (event?.usage) {
1128-
const usage = event.usage
1129-
const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
1130-
const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0
1131-
const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? 0
1132-
const cacheReadTokens = usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? 0
1133-
1134-
const inputCost = (totalInputTokens / 1_000_000) * (model.info.inputPrice || 0)
1135-
const outputCost = (totalOutputTokens / 1_000_000) * (model.info.outputPrice || 0)
1136-
const totalCost = inputCost + outputCost
1137-
1138-
yield {
1139-
type: "usage",
1140-
inputTokens: totalInputTokens,
1141-
outputTokens: totalOutputTokens,
1142-
cacheWriteTokens,
1143-
cacheReadTokens,
1144-
totalCost,
1078+
const usageData = this.normalizeGpt5Usage(event.usage, model)
1079+
if (usageData) {
1080+
yield usageData
11451081
}
11461082
}
11471083
}
11481084

11491085
private getGpt5ReasoningEffort(model: OpenAiNativeModel): ReasoningEffortWithMinimal | undefined {
1150-
const { reasoning } = model
1086+
const { reasoning, info } = model
11511087

11521088
// Check if reasoning effort is configured
11531089
if (reasoning && "reasoning_effort" in reasoning) {
@@ -1158,8 +1094,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11581094
}
11591095
}
11601096

1161-
// Default to "medium" for GPT-5 models when not specified
1162-
return "medium"
1097+
// Review comment 6: Centralize default GPT-5 reasoning effort
1098+
// Use model's default from types if available, otherwise fall back to "medium"
1099+
return (info.reasoningEffort as ReasoningEffortWithMinimal) || "medium"
11631100
}
11641101

11651102
private isGpt5Model(modelId: string): boolean {

src/api/transform/reasoning.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,19 @@ export const getOpenAiReasoning = ({
5252
model,
5353
reasoningEffort,
5454
settings,
55-
}: GetModelReasoningOptions): OpenAiReasoningParams | undefined =>
56-
shouldUseReasoningEffort({ model, settings }) && reasoningEffort && reasoningEffort !== "minimal"
57-
? { reasoning_effort: reasoningEffort }
58-
: undefined
55+
}: GetModelReasoningOptions): OpenAiReasoningParams | undefined => {
56+
if (!shouldUseReasoningEffort({ model, settings })) {
57+
return undefined
58+
}
59+
60+
// If model has reasoning effort capability, return object even if effort is undefined
61+
// This preserves the reasoning_effort field in the API call
62+
if (reasoningEffort === "minimal") {
63+
return undefined
64+
}
65+
66+
return { reasoning_effort: reasoningEffort }
67+
}
5968

6069
export const getGeminiReasoning = ({
6170
model,

webview-ui/src/components/settings/ApiOptions.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,8 @@ const ApiOptions = ({
623623
modelInfo={selectedModelInfo}
624624
/>
625625

626+
{/* TODO: Gate Verbosity UI by capability flag (e.g., supportsVerbosity) once added to ModelInfo type.
627+
Currently only GPT-5 models support verbosity, so we check the model ID prefix. */}
626628
{selectedModelId?.startsWith("gpt-5") && (
627629
<Verbosity
628630
apiConfiguration={apiConfiguration}

webview-ui/src/i18n/locales/id/settings.json

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)