diff --git a/docs/my-website/docs/providers/azure/azure.md b/docs/my-website/docs/providers/azure/azure.md index 1feec52b3ec6..f290fc3774a1 100644 --- a/docs/my-website/docs/providers/azure/azure.md +++ b/docs/my-website/docs/providers/azure/azure.md @@ -369,16 +369,20 @@ model_list: -## GPT-5 Models +## GPT-5 Reasoning Models | Property | Details | |-------|-------| -| Description | Azure OpenAI GPT-5 models | +| Description | Azure OpenAI GPT-5 reasoning models | | Provider Route on LiteLLM | `azure/gpt5_series/` or `azure/gpt-5-deployment-name` | -LiteLLM supports using Azure GPT-5 models in one of the two ways: +LiteLLM supports using Azure GPT-5 reasoning models in one of the two ways: 1. Explicit Routing: `model = azure/gpt5_series/`. In this scenario the model onboarded to litellm follows the format `model=azure/gpt5_series/`. -2. Inferred Routing (If the azure deployment name contains `gpt-5` in the name): `model = azure/gpt-5-mini`. In this scenario the model onboarded to litellm follows the format `model=azure/gpt-5-mini`. +2. Inferred Routing (If the azure deployment name contains `gpt-5` but not `gpt-5-chat`): `model = azure/gpt-5-mini`. In this scenario the model onboarded to litellm follows the format `model=azure/gpt-5-mini`. + +:::note +For GPT-5 chat models (e.g., `gpt-5-chat`), do not use the `gpt5_series/` prefix, as these are not reasoning models. +::: #### Explicit Routing Use `azure/gpt5_series/` for explicit GPT-5 model routing. diff --git a/litellm/__init__.py b/litellm/__init__.py index fbc3c7950818..b331618c0ff0 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -1202,8 +1202,8 @@ def add_known_models(): from .llms.openai.chat.gpt_transformation import ( OpenAIGPTConfig, ) -from .llms.openai.chat.gpt_5_transformation import ( - OpenAIGPT5Config, +from .llms.openai.chat.gpt_5_reasoning_transformation import ( + OpenAIGPT5ReasoningConfig, ) from .llms.openai.transcriptions.whisper_transformation import ( OpenAIWhisperAudioTranscriptionConfig, @@ -1218,7 +1218,7 @@ def add_known_models(): ) openAIGPTAudioConfig = OpenAIGPTAudioConfig() -openAIGPT5Config = OpenAIGPT5Config() +openAIGPT5ReasoningConfig = OpenAIGPT5ReasoningConfig() from .llms.nvidia_nim.chat.transformation import NvidiaNimConfig from .llms.nvidia_nim.embed import NvidiaNimEmbeddingConfig @@ -1256,7 +1256,7 @@ def add_known_models(): from .llms.heroku.chat.transformation import HerokuChatConfig from .llms.cometapi.chat.transformation import CometAPIConfig from .llms.azure.chat.gpt_transformation import AzureOpenAIConfig -from .llms.azure.chat.gpt_5_transformation import AzureOpenAIGPT5Config +from .llms.azure.chat.gpt_5_reasoning_transformation import AzureOpenAIGPT5ReasoningConfig from .llms.azure.completion.transformation import AzureOpenAITextConfig from .llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig from .llms.llamafile.chat.transformation import LlamafileChatConfig diff --git a/litellm/litellm_core_utils/get_supported_openai_params.py b/litellm/litellm_core_utils/get_supported_openai_params.py index 06e650f938dc..4c6870dd223c 100644 --- a/litellm/litellm_core_utils/get_supported_openai_params.py +++ b/litellm/litellm_core_utils/get_supported_openai_params.py @@ -121,8 +121,8 @@ def get_supported_openai_params( # noqa: PLR0915 return litellm.AzureOpenAIO1Config().get_supported_openai_params( model=model ) - elif litellm.AzureOpenAIGPT5Config.is_model_gpt_5_model(model=model): - return litellm.AzureOpenAIGPT5Config().get_supported_openai_params( + elif litellm.AzureOpenAIGPT5ReasoningConfig.is_model_gpt_5_reasoning_model(model=model): + return litellm.AzureOpenAIGPT5ReasoningConfig().get_supported_openai_params( model=model ) else: diff --git a/litellm/llms/azure/azure.py b/litellm/llms/azure/azure.py index 7c5b693b453e..42110b2993db 100644 --- a/litellm/llms/azure/azure.py +++ b/litellm/llms/azure/azure.py @@ -230,8 +230,8 @@ def completion( # noqa: PLR0915 ) data = {"model": None, "messages": messages, **optional_params} - elif litellm.AzureOpenAIGPT5Config.is_model_gpt_5_model(model=model): - data = litellm.AzureOpenAIGPT5Config().transform_request( + elif litellm.AzureOpenAIGPT5ReasoningConfig.is_model_gpt_5_reasoning_model(model=model): + data = litellm.AzureOpenAIGPT5ReasoningConfig().transform_request( model=model, messages=messages, optional_params=optional_params, diff --git a/litellm/llms/azure/chat/gpt_5_transformation.py b/litellm/llms/azure/chat/gpt_5_reasoning_transformation.py similarity index 71% rename from litellm/llms/azure/chat/gpt_5_transformation.py rename to litellm/llms/azure/chat/gpt_5_reasoning_transformation.py index d563a2889ca6..a4920f27149a 100644 --- a/litellm/llms/azure/chat/gpt_5_transformation.py +++ b/litellm/llms/azure/chat/gpt_5_reasoning_transformation.py @@ -2,28 +2,28 @@ from typing import List -from litellm.llms.openai.chat.gpt_5_transformation import OpenAIGPT5Config +from litellm.llms.openai.chat.gpt_5_reasoning_transformation import OpenAIGPT5ReasoningConfig from litellm.types.llms.openai import AllMessageValues from .gpt_transformation import AzureOpenAIConfig -class AzureOpenAIGPT5Config(AzureOpenAIConfig, OpenAIGPT5Config): - """Azure specific handling for gpt-5 models.""" +class AzureOpenAIGPT5ReasoningConfig(AzureOpenAIConfig, OpenAIGPT5ReasoningConfig): + """Azure specific handling for gpt-5 reasoning models.""" GPT5_SERIES_ROUTE = "gpt5_series/" @classmethod - def is_model_gpt_5_model(cls, model: str) -> bool: + def is_model_gpt_5_reasoning_model(cls, model: str) -> bool: """Check if the Azure model string refers to a gpt-5 variant. Accepts both explicit gpt-5 model names and the ``gpt5_series/`` prefix used for manual routing. """ - return "gpt-5" in model or "gpt5_series" in model + return ("gpt-5" in model and "gpt-5-chat" not in model) or "gpt5_series" in model def get_supported_openai_params(self, model: str) -> List[str]: - return OpenAIGPT5Config.get_supported_openai_params(self, model=model) + return OpenAIGPT5ReasoningConfig.get_supported_openai_params(self, model=model) def map_openai_params( self, @@ -33,7 +33,7 @@ def map_openai_params( drop_params: bool, api_version: str = "", ) -> dict: - return OpenAIGPT5Config.map_openai_params( + return OpenAIGPT5ReasoningConfig.map_openai_params( self, non_default_params=non_default_params, optional_params=optional_params, diff --git a/litellm/llms/openai/chat/gpt_5_transformation.py b/litellm/llms/openai/chat/gpt_5_reasoning_transformation.py similarity index 90% rename from litellm/llms/openai/chat/gpt_5_transformation.py rename to litellm/llms/openai/chat/gpt_5_reasoning_transformation.py index fa357c1bd229..3ccc61dee5b3 100644 --- a/litellm/llms/openai/chat/gpt_5_transformation.py +++ b/litellm/llms/openai/chat/gpt_5_reasoning_transformation.py @@ -7,10 +7,10 @@ from .gpt_transformation import OpenAIGPTConfig -class OpenAIGPT5Config(OpenAIGPTConfig): - """Configuration for gpt-5 models including GPT-5-Codex variants. +class OpenAIGPT5ReasoningConfig(OpenAIGPTConfig): + """Configuration for gpt-5 reasoning models including GPT-5-Codex variants. - Handles OpenAI API quirks for the gpt-5 series like: + Handles OpenAI API quirks for the gpt-5 reasoning series like: - Mapping ``max_tokens`` -> ``max_completion_tokens``. - Dropping unsupported ``temperature`` values when requested. @@ -18,8 +18,8 @@ class OpenAIGPT5Config(OpenAIGPTConfig): """ @classmethod - def is_model_gpt_5_model(cls, model: str) -> bool: - return "gpt-5" in model + def is_model_gpt_5_reasoning_model(cls, model: str) -> bool: + return "gpt-5" in model and "gpt-5-chat" not in model @classmethod def is_model_gpt_5_codex_model(cls, model: str) -> bool: diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index 3347e5332425..d3d2ab8fa327 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -51,7 +51,7 @@ from ...types.llms.openai import * from ..base import BaseLLM -from .chat.gpt_5_transformation import OpenAIGPT5Config +from .chat.gpt_5_reasoning_transformation import OpenAIGPT5ReasoningConfig from .chat.o_series_transformation import OpenAIOSeriesConfig from .common_utils import ( BaseOpenAILLM, @@ -60,7 +60,7 @@ ) openaiOSeriesConfig = OpenAIOSeriesConfig() -openAIGPT5Config = OpenAIGPT5Config() +openAIGPT5ReasoningConfig = OpenAIGPT5ReasoningConfig() class MistralEmbeddingConfig: @@ -189,8 +189,8 @@ def get_supported_openai_params(self, model: str) -> list: """ if openaiOSeriesConfig.is_model_o_series_model(model=model): return openaiOSeriesConfig.get_supported_openai_params(model=model) - elif openAIGPT5Config.is_model_gpt_5_model(model=model): - return openAIGPT5Config.get_supported_openai_params(model=model) + elif openAIGPT5ReasoningConfig.is_model_gpt_5_reasoning_model(model=model): + return openAIGPT5ReasoningConfig.get_supported_openai_params(model=model) elif litellm.openAIGPTAudioConfig.is_model_gpt_audio_model(model=model): return litellm.openAIGPTAudioConfig.get_supported_openai_params(model=model) else: @@ -225,8 +225,8 @@ def map_openai_params( model=model, drop_params=drop_params, ) - elif openAIGPT5Config.is_model_gpt_5_model(model=model): - return openAIGPT5Config.map_openai_params( + elif openAIGPT5ReasoningConfig.is_model_gpt_5_reasoning_model(model=model): + return openAIGPT5ReasoningConfig.map_openai_params( non_default_params=non_default_params, optional_params=optional_params, model=model, diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 5b862b98a820..1b249ca22f21 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1997,7 +1997,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": false, + "supports_tool_choice": true, "supports_vision": true }, "azure/gpt-5-chat-latest": { @@ -2029,7 +2029,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": false, + "supports_tool_choice": true, "supports_vision": true }, "azure/gpt-5-codex": { diff --git a/litellm/utils.py b/litellm/utils.py index 3c6c3ac86e4f..5d95f663d74c 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4001,8 +4001,8 @@ def _check_valid_arg(supported_params: List[str]): else False ), ) - elif litellm.AzureOpenAIGPT5Config.is_model_gpt_5_model(model=model): - optional_params = litellm.AzureOpenAIGPT5Config().map_openai_params( + elif litellm.AzureOpenAIGPT5ReasoningConfig.is_model_gpt_5_reasoning_model(model=model): + optional_params = litellm.AzureOpenAIGPT5ReasoningConfig().map_openai_params( non_default_params=non_default_params, optional_params=optional_params, model=model, @@ -6899,9 +6899,9 @@ def get_provider_chat_config( # noqa: PLR0915 return litellm.openaiOSeriesConfig elif ( provider == LlmProviders.OPENAI - and litellm.OpenAIGPT5Config.is_model_gpt_5_model(model=model) + and litellm.OpenAIGPT5ReasoningConfig.is_model_gpt_5_reasoning_model(model=model) ): - return litellm.OpenAIGPT5Config() + return litellm.OpenAIGPT5ReasoningConfig() elif litellm.LlmProviders.DEEPSEEK == provider: return litellm.DeepSeekChatConfig() elif litellm.LlmProviders.GROQ == provider: @@ -7012,8 +7012,8 @@ def get_provider_chat_config( # noqa: PLR0915 elif litellm.LlmProviders.AZURE == provider: if litellm.AzureOpenAIO1Config().is_o_series_model(model=model): return litellm.AzureOpenAIO1Config() - if litellm.AzureOpenAIGPT5Config.is_model_gpt_5_model(model=model): - return litellm.AzureOpenAIGPT5Config() + if litellm.AzureOpenAIGPT5ReasoningConfig.is_model_gpt_5_reasoning_model(model=model): + return litellm.AzureOpenAIGPT5ReasoningConfig() return litellm.AzureOpenAIConfig() elif litellm.LlmProviders.AZURE_AI == provider: return litellm.AzureAIStudioConfig() diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 5b862b98a820..1b249ca22f21 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1997,7 +1997,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": false, + "supports_tool_choice": true, "supports_vision": true }, "azure/gpt-5-chat-latest": { @@ -2029,7 +2029,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": false, + "supports_tool_choice": true, "supports_vision": true }, "azure/gpt-5-codex": { diff --git a/tests/test_litellm/llms/azure/chat/test_azure_gpt5_transformation.py b/tests/test_litellm/llms/azure/chat/test_azure_gpt5_reasoning_transformation.py similarity index 82% rename from tests/test_litellm/llms/azure/chat/test_azure_gpt5_transformation.py rename to tests/test_litellm/llms/azure/chat/test_azure_gpt5_reasoning_transformation.py index 2ef2020b09a2..77bee5f49b56 100644 --- a/tests/test_litellm/llms/azure/chat/test_azure_gpt5_transformation.py +++ b/tests/test_litellm/llms/azure/chat/test_azure_gpt5_reasoning_transformation.py @@ -1,22 +1,21 @@ import pytest import litellm -from litellm.llms.azure.chat.gpt_5_transformation import AzureOpenAIGPT5Config +from litellm.llms.azure.chat.gpt_5_reasoning_transformation import AzureOpenAIGPT5ReasoningConfig @pytest.fixture() -def config() -> AzureOpenAIGPT5Config: - return AzureOpenAIGPT5Config() +def config() -> AzureOpenAIGPT5ReasoningConfig: + return AzureOpenAIGPT5ReasoningConfig() -def test_azure_gpt5_supports_reasoning_effort(config: AzureOpenAIGPT5Config): +def test_azure_gpt5_supports_reasoning_effort(config: AzureOpenAIGPT5ReasoningConfig): assert "reasoning_effort" in config.get_supported_openai_params(model="gpt-5") assert "reasoning_effort" in config.get_supported_openai_params( model="gpt5_series/my-deployment" ) - -def test_azure_gpt5_maps_max_tokens(config: AzureOpenAIGPT5Config): +def test_azure_gpt5_maps_max_tokens(config: AzureOpenAIGPT5ReasoningConfig): params = config.map_openai_params( non_default_params={"max_tokens": 5}, optional_params={}, @@ -28,7 +27,7 @@ def test_azure_gpt5_maps_max_tokens(config: AzureOpenAIGPT5Config): assert "max_tokens" not in params -def test_azure_gpt5_temperature_error(config: AzureOpenAIGPT5Config): +def test_azure_gpt5_temperature_error(config: AzureOpenAIGPT5ReasoningConfig): with pytest.raises(litellm.utils.UnsupportedParamsError): config.map_openai_params( non_default_params={"temperature": 0.2}, @@ -38,8 +37,7 @@ def test_azure_gpt5_temperature_error(config: AzureOpenAIGPT5Config): api_version="2024-05-01-preview", ) - -def test_azure_gpt5_series_transform_request(config: AzureOpenAIGPT5Config): +def test_azure_gpt5_series_transform_request(config: AzureOpenAIGPT5ReasoningConfig): request = config.transform_request( model="gpt5_series/gpt-5", messages=[], @@ -51,13 +49,13 @@ def test_azure_gpt5_series_transform_request(config: AzureOpenAIGPT5Config): # GPT-5-Codex specific tests for Azure -def test_azure_gpt5_codex_model_detection(config: AzureOpenAIGPT5Config): +def test_azure_gpt5_codex_model_detection(config: AzureOpenAIGPT5ReasoningConfig): """Test that Azure GPT-5-Codex models are correctly detected.""" - assert config.is_model_gpt_5_model("gpt-5-codex") - assert config.is_model_gpt_5_model("gpt5_series/gpt-5-codex") + assert config.is_model_gpt_5_reasoning_model("gpt-5-codex") + assert config.is_model_gpt_5_reasoning_model("gpt5_series/gpt-5-codex") -def test_azure_gpt5_codex_supports_reasoning_effort(config: AzureOpenAIGPT5Config): +def test_azure_gpt5_codex_supports_reasoning_effort(config: AzureOpenAIGPT5ReasoningConfig): """Test that Azure GPT-5-Codex supports reasoning_effort parameter.""" assert "reasoning_effort" in config.get_supported_openai_params(model="gpt-5-codex") assert "reasoning_effort" in config.get_supported_openai_params( @@ -65,7 +63,7 @@ def test_azure_gpt5_codex_supports_reasoning_effort(config: AzureOpenAIGPT5Confi ) -def test_azure_gpt5_codex_maps_max_tokens(config: AzureOpenAIGPT5Config): +def test_azure_gpt5_codex_maps_max_tokens(config: AzureOpenAIGPT5ReasoningConfig): """Test that Azure GPT-5-Codex correctly maps max_tokens to max_completion_tokens.""" params = config.map_openai_params( non_default_params={"max_tokens": 150}, @@ -78,7 +76,7 @@ def test_azure_gpt5_codex_maps_max_tokens(config: AzureOpenAIGPT5Config): assert "max_tokens" not in params -def test_azure_gpt5_codex_temperature_error(config: AzureOpenAIGPT5Config): +def test_azure_gpt5_codex_temperature_error(config: AzureOpenAIGPT5ReasoningConfig): """Test that Azure GPT-5-Codex raises error for unsupported temperature.""" with pytest.raises(litellm.utils.UnsupportedParamsError): config.map_openai_params( @@ -90,7 +88,7 @@ def test_azure_gpt5_codex_temperature_error(config: AzureOpenAIGPT5Config): ) -def test_azure_gpt5_codex_series_transform_request(config: AzureOpenAIGPT5Config): +def test_azure_gpt5_codex_series_transform_request(config: AzureOpenAIGPT5ReasoningConfig): """Test that Azure GPT-5-Codex series routing works correctly.""" request = config.transform_request( model="gpt5_series/gpt-5-codex", diff --git a/tests/test_litellm/llms/openai/test_gpt5_transformation.py b/tests/test_litellm/llms/openai/test_gpt5_reasoning_transformation.py similarity index 85% rename from tests/test_litellm/llms/openai/test_gpt5_transformation.py rename to tests/test_litellm/llms/openai/test_gpt5_reasoning_transformation.py index 876eb8b29f98..a114ef15e4a7 100644 --- a/tests/test_litellm/llms/openai/test_gpt5_transformation.py +++ b/tests/test_litellm/llms/openai/test_gpt5_reasoning_transformation.py @@ -2,7 +2,7 @@ import litellm from litellm.llms.openai.openai import OpenAIConfig -from litellm.llms.openai.chat.gpt_5_transformation import OpenAIGPT5Config +from litellm.llms.openai.chat.gpt_5_reasoning_transformation import OpenAIGPT5ReasoningConfig @pytest.fixture() @@ -11,8 +11,8 @@ def config() -> OpenAIConfig: @pytest.fixture() -def gpt5_config() -> OpenAIGPT5Config: - return OpenAIGPT5Config() +def gpt5_config() -> OpenAIGPT5ReasoningConfig: + return OpenAIGPT5ReasoningConfig() def test_gpt5_supports_reasoning_effort(config: OpenAIConfig): @@ -20,6 +20,10 @@ def test_gpt5_supports_reasoning_effort(config: OpenAIConfig): assert "reasoning_effort" in config.get_supported_openai_params(model="gpt-5-mini") +def test_gpt5_chat_does_not_support_reasoning_effort(config: OpenAIConfig): + assert "reasoning_effort" not in config.get_supported_openai_params(model="gpt-5-chat-latest") + + def test_gpt5_maps_max_tokens(config: OpenAIConfig): params = config.map_openai_params( non_default_params={"max_tokens": 10}, @@ -50,6 +54,15 @@ def test_gpt5_temperature_error(config: OpenAIConfig): drop_params=False, ) +def test_gpt5_chat_supports_temperature(config: OpenAIConfig): + # temperature is supported for chat models + params = config.map_openai_params( + non_default_params={"temperature": 0.3}, + optional_params={}, + model="gpt-5-chat-latest", + drop_params=False, + ) + assert params["temperature"] == 0.3 def test_gpt5_unsupported_params_drop(config: OpenAIConfig): assert "top_p" not in config.get_supported_openai_params(model="gpt-5") @@ -63,9 +76,9 @@ def test_gpt5_unsupported_params_drop(config: OpenAIConfig): # GPT-5-Codex specific tests -def test_gpt5_codex_model_detection(gpt5_config: OpenAIGPT5Config): +def test_gpt5_codex_model_detection(gpt5_config: OpenAIGPT5ReasoningConfig): """Test that GPT-5-Codex models are correctly detected as GPT-5 models.""" - assert gpt5_config.is_model_gpt_5_model("gpt-5-codex") + assert gpt5_config.is_model_gpt_5_reasoning_model("gpt-5-codex") assert gpt5_config.is_model_gpt_5_codex_model("gpt-5-codex") # Regular GPT-5 models should not be detected as codex @@ -141,7 +154,7 @@ def test_gpt5_codex_unsupported_params_drop(config: OpenAIConfig): assert param not in config.get_supported_openai_params(model="gpt-5-codex") -def test_gpt5_codex_supports_tool_choice(gpt5_config: OpenAIGPT5Config): +def test_gpt5_codex_supports_tool_choice(gpt5_config: OpenAIGPT5ReasoningConfig): """Test that GPT-5-Codex supports tool_choice parameter.""" supported_params = gpt5_config.get_supported_openai_params(model="gpt-5-codex") assert "tool_choice" in supported_params