Skip to content

Commit 301731a

Browse files
committed
feat: add supportsTemperature and Responses API flags
Extend ModelInfo schema with supportsTemperature and usesResponsesApi capabilities to control request param inclusion and API selection. Refactor OpenAiNativeHandler to generically handle Responses API models instead of hardcoded families, normalizing IDs and gating temperature, verbosity, and max token params via getModelParams. Update GlamaHandler, LiteLLMHandler, UnboundHandler, and XAIHandler to use getModelParams for capability-aware temperature/max token handling. Enhance tests to cover Responses API flows, conversation continuity, and temperature stripping for unsupported models, replacing SSE mocks with SDK responses.create where applicable.
1 parent 5e07bc4 commit 301731a

File tree

12 files changed

+719
-829
lines changed

12 files changed

+719
-829
lines changed

packages/types/src/model.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ export const modelInfoSchema = z.object({
4646
supportsPromptCache: z.boolean(),
4747
// Capability flag to indicate whether the model supports an output verbosity parameter
4848
supportsVerbosity: z.boolean().optional(),
49+
// Indicates whether the model accepts a temperature parameter
50+
supportsTemperature: z.boolean().optional(),
51+
// Indicates that this model should be called via the Responses API instead of Chat Completions
52+
usesResponsesApi: z.boolean().optional(),
4953
supportsReasoningBudget: z.boolean().optional(),
5054
requiredReasoningBudget: z.boolean().optional(),
5155
supportsReasoningEffort: z.boolean().optional(),

packages/types/src/providers/openai.ts

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ export const openAiNativeModels = {
1919
description: "GPT-5: The best model for coding and agentic tasks across domains",
2020
// supportsVerbosity is a new capability; ensure ModelInfo includes it
2121
supportsVerbosity: true,
22+
usesResponsesApi: true,
23+
// Q: Why do we not send the temperature for GPT-5?
24+
// A: Because OpenAI does not support temperature over the API for GPT-5.
25+
supportsTemperature: false,
2226
},
2327
"gpt-5-mini-2025-08-07": {
2428
maxTokens: 128000,
@@ -32,6 +36,8 @@ export const openAiNativeModels = {
3236
cacheReadsPrice: 0.03,
3337
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
3438
supportsVerbosity: true,
39+
usesResponsesApi: true,
40+
supportsTemperature: false,
3541
},
3642
"gpt-5-nano-2025-08-07": {
3743
maxTokens: 128000,
@@ -45,6 +51,8 @@ export const openAiNativeModels = {
4551
cacheReadsPrice: 0.01,
4652
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
4753
supportsVerbosity: true,
54+
usesResponsesApi: true,
55+
supportsTemperature: false,
4856
},
4957
"gpt-4.1": {
5058
maxTokens: 32_768,
@@ -83,6 +91,8 @@ export const openAiNativeModels = {
8391
cacheReadsPrice: 0.5,
8492
supportsReasoningEffort: true,
8593
reasoningEffort: "medium",
94+
usesResponsesApi: true,
95+
supportsTemperature: false,
8696
},
8797
"o3-high": {
8898
maxTokens: 100_000,
@@ -93,6 +103,8 @@ export const openAiNativeModels = {
93103
outputPrice: 8.0,
94104
cacheReadsPrice: 0.5,
95105
reasoningEffort: "high",
106+
usesResponsesApi: true,
107+
supportsTemperature: false,
96108
},
97109
"o3-low": {
98110
maxTokens: 100_000,
@@ -103,6 +115,8 @@ export const openAiNativeModels = {
103115
outputPrice: 8.0,
104116
cacheReadsPrice: 0.5,
105117
reasoningEffort: "low",
118+
usesResponsesApi: true,
119+
supportsTemperature: false,
106120
},
107121
"o4-mini": {
108122
maxTokens: 100_000,
@@ -114,6 +128,8 @@ export const openAiNativeModels = {
114128
cacheReadsPrice: 0.275,
115129
supportsReasoningEffort: true,
116130
reasoningEffort: "medium",
131+
usesResponsesApi: true,
132+
supportsTemperature: false,
117133
},
118134
"o4-mini-high": {
119135
maxTokens: 100_000,
@@ -124,6 +140,8 @@ export const openAiNativeModels = {
124140
outputPrice: 4.4,
125141
cacheReadsPrice: 0.275,
126142
reasoningEffort: "high",
143+
usesResponsesApi: true,
144+
supportsTemperature: false,
127145
},
128146
"o4-mini-low": {
129147
maxTokens: 100_000,
@@ -134,6 +152,8 @@ export const openAiNativeModels = {
134152
outputPrice: 4.4,
135153
cacheReadsPrice: 0.275,
136154
reasoningEffort: "low",
155+
usesResponsesApi: true,
156+
supportsTemperature: false,
137157
},
138158
"o3-mini": {
139159
maxTokens: 100_000,
@@ -145,6 +165,8 @@ export const openAiNativeModels = {
145165
cacheReadsPrice: 0.55,
146166
supportsReasoningEffort: true,
147167
reasoningEffort: "medium",
168+
usesResponsesApi: true,
169+
supportsTemperature: false,
148170
},
149171
"o3-mini-high": {
150172
maxTokens: 100_000,
@@ -155,6 +177,8 @@ export const openAiNativeModels = {
155177
outputPrice: 4.4,
156178
cacheReadsPrice: 0.55,
157179
reasoningEffort: "high",
180+
usesResponsesApi: true,
181+
supportsTemperature: false,
158182
},
159183
"o3-mini-low": {
160184
maxTokens: 100_000,
@@ -165,6 +189,8 @@ export const openAiNativeModels = {
165189
outputPrice: 4.4,
166190
cacheReadsPrice: 0.55,
167191
reasoningEffort: "low",
192+
usesResponsesApi: true,
193+
supportsTemperature: false,
168194
},
169195
o1: {
170196
maxTokens: 100_000,
@@ -174,6 +200,8 @@ export const openAiNativeModels = {
174200
inputPrice: 15,
175201
outputPrice: 60,
176202
cacheReadsPrice: 7.5,
203+
usesResponsesApi: true,
204+
supportsTemperature: false,
177205
},
178206
"o1-preview": {
179207
maxTokens: 32_768,
@@ -183,6 +211,8 @@ export const openAiNativeModels = {
183211
inputPrice: 15,
184212
outputPrice: 60,
185213
cacheReadsPrice: 7.5,
214+
usesResponsesApi: true,
215+
supportsTemperature: false,
186216
},
187217
"o1-mini": {
188218
maxTokens: 65_536,
@@ -192,6 +222,8 @@ export const openAiNativeModels = {
192222
inputPrice: 1.1,
193223
outputPrice: 4.4,
194224
cacheReadsPrice: 0.55,
225+
usesResponsesApi: true,
226+
supportsTemperature: false,
195227
},
196228
"gpt-4.5-preview": {
197229
maxTokens: 16_384,
@@ -228,6 +260,7 @@ export const openAiNativeModels = {
228260
inputPrice: 1.5,
229261
outputPrice: 6,
230262
cacheReadsPrice: 0,
263+
usesResponsesApi: true,
231264
description:
232265
"Codex Mini: Cloud-based software engineering agent powered by codex-1, a version of o3 optimized for coding tasks. Trained with reinforcement learning to generate human-style code, adhere to instructions, and iteratively run tests.",
233266
},
@@ -247,6 +280,5 @@ export const openAiModelInfoSaneDefaults: ModelInfo = {
247280
export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
248281

249282
export const OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0
250-
export const GPT5_DEFAULT_TEMPERATURE = 1.0
251283

252284
export const OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions"

0 commit comments

Comments
 (0)