@@ -52,9 +52,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
5252 constructor ( options : ApiHandlerOptions ) {
5353 super ( )
5454 this . options = options
55- // Default to including reasoning.summary: "auto" for GPT‑5 unless explicitly disabled
56- if ( this . options . enableGpt5ReasoningSummary === undefined ) {
57- this . options . enableGpt5ReasoningSummary = true
55+ // Default to including reasoning.summary: "auto" for models that support Responses API
56+ // reasoning summaries unless explicitly disabled.
57+ if ( this . options . enableResponsesReasoningSummary === undefined ) {
58+ this . options . enableResponsesReasoningSummary = true
5859 }
5960 const apiKey = this . options . openAiNativeApiKey ?? "not-provided"
6061 this . client = new OpenAI ( { baseURL : this . options . openAiNativeBaseUrl , apiKey } )
@@ -176,10 +177,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
176177 reasoningEffort : ReasoningEffortExtended | undefined ,
177178 metadata ?: ApiHandlerCreateMessageMetadata ,
178179 ) : any {
179- // Build a request body
180- // Ensure we explicitly pass max_output_tokens for GPT‑5 based on Roo's reserved model response calculation
180+ // Build a request body for the OpenAI Responses API.
181+ // Ensure we explicitly pass max_output_tokens based on Roo's reserved model response calculation
181182 // so requests do not default to very large limits (e.g., 120k).
182- interface Gpt5RequestBody {
183+ interface ResponsesRequestBody {
183184 model : string
184185 input : Array < { role : "user" | "assistant" ; content : any [ ] } | { type : string ; content : string } >
185186 stream : boolean
@@ -191,13 +192,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
191192 instructions ?: string
192193 service_tier ?: ServiceTier
193194 include ?: string [ ]
195+ /** Prompt cache retention policy: "in_memory" (default) or "24h" for extended caching */
196+ prompt_cache_retention ?: "in_memory" | "24h"
194197 }
195198
196199 // Validate requested tier against model support; if not supported, omit.
197200 const requestedTier = ( this . options . openAiNativeServiceTier as ServiceTier | undefined ) || undefined
198201 const allowedTierNames = new Set ( model . info . tiers ?. map ( ( t ) => t . name ) . filter ( Boolean ) || [ ] )
199202
200- const body : Gpt5RequestBody = {
203+ // Decide whether to enable extended prompt cache retention for this request
204+ const promptCacheRetention = this . getPromptCacheRetention ( model )
205+
206+ const body : ResponsesRequestBody = {
201207 model : model . id ,
202208 input : formattedInput ,
203209 stream : true ,
@@ -213,7 +219,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
213219 ? {
214220 reasoning : {
215221 ...( reasoningEffort ? { effort : reasoningEffort } : { } ) ,
216- ...( this . options . enableGpt5ReasoningSummary ? { summary : "auto" as const } : { } ) ,
222+ ...( this . options . enableResponsesReasoningSummary ? { summary : "auto" as const } : { } ) ,
217223 } ,
218224 }
219225 : { } ) ,
@@ -229,6 +235,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
229235 ( requestedTier === "default" || allowedTierNames . has ( requestedTier ) ) && {
230236 service_tier : requestedTier ,
231237 } ) ,
238+ // Enable extended prompt cache retention for models that support it.
239+ // This uses the OpenAI Responses API `prompt_cache_retention` parameter.
240+ ...( promptCacheRetention ? { prompt_cache_retention : promptCacheRetention } : { } ) ,
232241 }
233242
234243 // Include text.verbosity only when the model explicitly supports it
@@ -263,7 +272,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
263272 }
264273 } catch ( sdkErr : any ) {
265274 // For errors, fallback to manual SSE via fetch
266- yield * this . makeGpt5ResponsesAPIRequest ( requestBody , model , metadata , systemPrompt , messages )
275+ yield * this . makeResponsesApiRequest ( requestBody , model , metadata , systemPrompt , messages )
267276 }
268277 }
269278
@@ -322,7 +331,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
322331 return formattedMessages
323332 }
324333
325- private async * makeGpt5ResponsesAPIRequest (
334+ private async * makeResponsesApiRequest (
326335 requestBody : any ,
327336 model : OpenAiNativeModel ,
328337 metadata ?: ApiHandlerCreateMessageMetadata ,
@@ -347,7 +356,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
347356 if ( ! response . ok ) {
348357 const errorText = await response . text ( )
349358
350- let errorMessage = `GPT-5 API request failed (${ response . status } )`
359+ let errorMessage = `OpenAI Responses API request failed (${ response . status } )`
351360 let errorDetails = ""
352361
353362 // Try to parse error as JSON for better error messages
@@ -803,7 +812,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
803812 }
804813 }
805814
806- // Usage for done/completed is already handled by processGpt5Event in SDK path.
815+ // Usage for done/completed is already handled by processEvent in the SDK path.
807816 // For SSE path, usage often arrives separately; avoid double-emitting here.
808817 }
809818 // These are structural or status events, we can just log them at a lower level or ignore.
@@ -977,6 +986,23 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
977986 return selected && selected !== "disable" ? ( selected as any ) : undefined
978987 }
979988
989+ /**
990+ * Returns the appropriate prompt cache retention policy for the given model, if any.
991+ *
992+ * The policy is driven by ModelInfo.promptCacheRetention so that model-specific details
993+ * live in the shared types layer rather than this provider. When set to "24h" and the
994+ * model supports prompt caching, extended prompt cache retention is requested.
995+ */
996+ private getPromptCacheRetention ( model : OpenAiNativeModel ) : "24h" | undefined {
997+ if ( ! model . info . supportsPromptCache ) return undefined
998+
999+ if ( model . info . promptCacheRetention === "24h" ) {
1000+ return "24h"
1001+ }
1002+
1003+ return undefined
1004+ }
1005+
9801006 /**
9811007 * Returns a shallow-cloned ModelInfo with pricing overridden for the given tier, if available.
9821008 * If no tier or no overrides exist, the original ModelInfo is returned.
@@ -1083,7 +1109,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
10831109 if ( reasoningEffort ) {
10841110 requestBody . reasoning = {
10851111 effort : reasoningEffort ,
1086- ...( this . options . enableGpt5ReasoningSummary ? { summary : "auto" as const } : { } ) ,
1112+ ...( this . options . enableResponsesReasoningSummary ? { summary : "auto" as const } : { } ) ,
10871113 }
10881114 }
10891115
@@ -1102,6 +1128,12 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11021128 requestBody . text = { verbosity : ( verbosity || "medium" ) as VerbosityLevel }
11031129 }
11041130
1131+ // Enable extended prompt cache retention for eligible models
1132+ const promptCacheRetention = this . getPromptCacheRetention ( model )
1133+ if ( promptCacheRetention ) {
1134+ requestBody . prompt_cache_retention = promptCacheRetention
1135+ }
1136+
11051137 // Make the non-streaming request
11061138 const response = await ( this . client as any ) . responses . create ( requestBody )
11071139
0 commit comments