Skip to content

Commit 06f1496

Browse files
committed
fix: handle GPT-5 response ID race condition with nano model
- Add promise-based synchronization for response ID persistence - Wait for pending response ID from previous request before using it - Resolve promise when response ID is received or cleared - Add 100ms timeout to avoid blocking too long on ID resolution - Properly clean up resolver on errors to prevent memory leaks This fixes the race condition where fast nano model responses could cause the next request to be initiated before the response ID was fully persisted.
1 parent ccb089c commit 06f1496

File tree

1 file changed

+56
-10
lines changed

1 file changed

+56
-10
lines changed

src/api/providers/openai-native.ts

Lines changed: 56 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
3232
protected options: ApiHandlerOptions
3333
private client: OpenAI
3434
private lastResponseId: string | undefined
35+
private responseIdPromise: Promise<string | undefined> | undefined
36+
private responseIdResolver: ((value: string | undefined) => void) | undefined
3537

3638
constructor(options: ApiHandlerOptions) {
3739
super()
@@ -167,23 +169,40 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
167169
// Resolve reasoning effort (supports "minimal" for GPT‑5)
168170
const reasoningEffort = this.getGpt5ReasoningEffort(model)
169171

170-
// Prepare inputs and potential conversation continuity.
171-
// If metadata.previousResponseId is not provided, attempt to read the last persisted assistant turn's gpt5.previous_response_id
172-
// via a metadata hook (taskId should be set for all Task-driven requests).
172+
// Wait for any pending response ID from a previous request to be available
173+
// This handles the race condition with fast nano model responses
173174
let effectivePreviousResponseId = metadata?.previousResponseId
174-
try {
175-
if (!effectivePreviousResponseId && metadata?.taskId) {
176-
// Defer to Task layer via metadata hook if present (the Task will propagate this in future refactor).
177-
// For now, keep behavior unchanged if not available.
175+
176+
// If we have a pending response ID promise, wait for it to resolve
177+
if (!effectivePreviousResponseId && this.responseIdPromise) {
178+
try {
179+
const resolvedId = await Promise.race([
180+
this.responseIdPromise,
181+
// Timeout after 100ms to avoid blocking too long
182+
new Promise<undefined>((resolve) => setTimeout(() => resolve(undefined), 100)),
183+
])
184+
if (resolvedId) {
185+
effectivePreviousResponseId = resolvedId
186+
}
187+
} catch {
188+
// Non-fatal if promise fails
178189
}
179-
} catch {
180-
// Non-fatal if lookup fails
181190
}
182191

183-
// Format input and capture continuity id (falls back to this.lastResponseId when metadata doesn't provide one)
192+
// Fall back to the last known response ID if still not available
193+
if (!effectivePreviousResponseId) {
194+
effectivePreviousResponseId = this.lastResponseId
195+
}
196+
197+
// Format input and capture continuity id
184198
const { formattedInput, previousResponseId } = this.prepareGpt5Input(systemPrompt, messages, metadata)
185199
const requestPreviousResponseId = effectivePreviousResponseId ?? previousResponseId
186200

201+
// Create a new promise for this request's response ID
202+
this.responseIdPromise = new Promise<string | undefined>((resolve) => {
203+
this.responseIdResolver = resolve
204+
})
205+
187206
// Build a request body (also used for fallback)
188207
// Ensure we explicitly pass max_output_tokens for GPT‑5 based on Roo's reserved model response calculation
189208
// so requests do not default to very large limits (e.g., 120k).
@@ -375,6 +394,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
375394

376395
// Clear the stored lastResponseId to prevent using it again
377396
this.lastResponseId = undefined
397+
// Resolve the promise with undefined to unblock any waiting requests
398+
const sdkResolver = this.responseIdResolver
399+
if (sdkResolver) {
400+
sdkResolver(undefined)
401+
this.responseIdResolver = undefined
402+
}
403+
// Resolve the promise with undefined to unblock any waiting requests
404+
const resolver = this.responseIdResolver
405+
if (resolver) {
406+
resolver(undefined)
407+
this.responseIdResolver = undefined
408+
}
378409

379410
// Retry the request without the previous_response_id
380411
const retryResponse = await fetch(url, {
@@ -528,6 +559,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
528559
// Store response ID for conversation continuity
529560
if (parsed.response?.id) {
530561
this.lastResponseId = parsed.response.id
562+
// Resolve the promise so the next request can use this ID
563+
if (this.responseIdResolver) {
564+
this.responseIdResolver(parsed.response.id)
565+
this.responseIdResolver = undefined
566+
}
531567
}
532568

533569
// Check if this is a complete response (non-streaming format)
@@ -828,6 +864,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
828864
// Store response ID for conversation continuity
829865
if (parsed.response?.id) {
830866
this.lastResponseId = parsed.response.id
867+
// Resolve the promise so the next request can use this ID
868+
if (this.responseIdResolver) {
869+
this.responseIdResolver(parsed.response.id)
870+
this.responseIdResolver = undefined
871+
}
831872
}
832873

833874
// Check if the done event contains the complete output (as a fallback)
@@ -994,6 +1035,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
9941035
// Persist response id for conversation continuity when available
9951036
if (event?.response?.id) {
9961037
this.lastResponseId = event.response.id
1038+
// Resolve the promise so the next request can use this ID
1039+
if (this.responseIdResolver) {
1040+
this.responseIdResolver(event.response.id)
1041+
this.responseIdResolver = undefined
1042+
}
9971043
}
9981044

9991045
// Handle known streaming text deltas

0 commit comments

Comments
 (0)