Skip to content

fix & refactor: add supportsTemperature and Responses API flags #6969

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions packages/types/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ export const modelInfoSchema = z.object({
supportsPromptCache: z.boolean(),
// Capability flag to indicate whether the model supports an output verbosity parameter
supportsVerbosity: z.boolean().optional(),
// Indicates whether the model accepts a temperature parameter
supportsTemperature: z.boolean().optional(),
// Indicates that this model should be called via the Responses API instead of Chat Completions
usesResponsesApi: z.boolean().optional(),
supportsReasoningBudget: z.boolean().optional(),
requiredReasoningBudget: z.boolean().optional(),
supportsReasoningEffort: z.boolean().optional(),
Expand Down
34 changes: 33 additions & 1 deletion packages/types/src/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ export const openAiNativeModels = {
description: "GPT-5: The best model for coding and agentic tasks across domains",
// supportsVerbosity is a new capability; ensure ModelInfo includes it
supportsVerbosity: true,
usesResponsesApi: true,
// Q: Why do we not send the temperature for GPT-5?
// A: Because OpenAI does not support temperature over the API for GPT-5.
supportsTemperature: false,
},
"gpt-5-mini-2025-08-07": {
maxTokens: 128000,
Expand All @@ -32,6 +36,8 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.03,
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
supportsVerbosity: true,
usesResponsesApi: true,
supportsTemperature: false,
},
"gpt-5-nano-2025-08-07": {
maxTokens: 128000,
Expand All @@ -45,6 +51,8 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.01,
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
supportsVerbosity: true,
usesResponsesApi: true,
supportsTemperature: false,
},
"gpt-4.1": {
maxTokens: 32_768,
Expand Down Expand Up @@ -83,6 +91,8 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.5,
supportsReasoningEffort: true,
reasoningEffort: "medium",
usesResponsesApi: true,
supportsTemperature: false,
},
"o3-high": {
maxTokens: 100_000,
Expand All @@ -93,6 +103,8 @@ export const openAiNativeModels = {
outputPrice: 8.0,
cacheReadsPrice: 0.5,
reasoningEffort: "high",
usesResponsesApi: true,
supportsTemperature: false,
},
"o3-low": {
maxTokens: 100_000,
Expand All @@ -103,6 +115,8 @@ export const openAiNativeModels = {
outputPrice: 8.0,
cacheReadsPrice: 0.5,
reasoningEffort: "low",
usesResponsesApi: true,
supportsTemperature: false,
},
"o4-mini": {
maxTokens: 100_000,
Expand All @@ -114,6 +128,8 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.275,
supportsReasoningEffort: true,
reasoningEffort: "medium",
usesResponsesApi: true,
supportsTemperature: false,
},
"o4-mini-high": {
maxTokens: 100_000,
Expand All @@ -124,6 +140,8 @@ export const openAiNativeModels = {
outputPrice: 4.4,
cacheReadsPrice: 0.275,
reasoningEffort: "high",
usesResponsesApi: true,
supportsTemperature: false,
},
"o4-mini-low": {
maxTokens: 100_000,
Expand All @@ -134,6 +152,8 @@ export const openAiNativeModels = {
outputPrice: 4.4,
cacheReadsPrice: 0.275,
reasoningEffort: "low",
usesResponsesApi: true,
supportsTemperature: false,
},
"o3-mini": {
maxTokens: 100_000,
Expand All @@ -145,6 +165,8 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.55,
supportsReasoningEffort: true,
reasoningEffort: "medium",
usesResponsesApi: true,
supportsTemperature: false,
},
"o3-mini-high": {
maxTokens: 100_000,
Expand All @@ -155,6 +177,8 @@ export const openAiNativeModels = {
outputPrice: 4.4,
cacheReadsPrice: 0.55,
reasoningEffort: "high",
usesResponsesApi: true,
supportsTemperature: false,
},
"o3-mini-low": {
maxTokens: 100_000,
Expand All @@ -165,6 +189,8 @@ export const openAiNativeModels = {
outputPrice: 4.4,
cacheReadsPrice: 0.55,
reasoningEffort: "low",
usesResponsesApi: true,
supportsTemperature: false,
},
o1: {
maxTokens: 100_000,
Expand All @@ -174,6 +200,8 @@ export const openAiNativeModels = {
inputPrice: 15,
outputPrice: 60,
cacheReadsPrice: 7.5,
usesResponsesApi: true,
supportsTemperature: false,
},
"o1-preview": {
maxTokens: 32_768,
Expand All @@ -183,6 +211,8 @@ export const openAiNativeModels = {
inputPrice: 15,
outputPrice: 60,
cacheReadsPrice: 7.5,
usesResponsesApi: true,
supportsTemperature: false,
},
"o1-mini": {
maxTokens: 65_536,
Expand All @@ -192,6 +222,8 @@ export const openAiNativeModels = {
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.55,
usesResponsesApi: true,
supportsTemperature: false,
},
"gpt-4.5-preview": {
maxTokens: 16_384,
Expand Down Expand Up @@ -228,6 +260,7 @@ export const openAiNativeModels = {
inputPrice: 1.5,
outputPrice: 6,
cacheReadsPrice: 0,
usesResponsesApi: true,
description:
"Codex Mini: Cloud-based software engineering agent powered by codex-1, a version of o3 optimized for coding tasks. Trained with reinforcement learning to generate human-style code, adhere to instructions, and iteratively run tests.",
},
Expand All @@ -247,6 +280,5 @@ export const openAiModelInfoSaneDefaults: ModelInfo = {
export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"

export const OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0
export const GPT5_DEFAULT_TEMPERATURE = 1.0

export const OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions"
Loading
Loading