Revert "Merge pull request #12865 from aholmberg/holmberg/default-reasoning"

ishaan-jaff · ishaan-jaff · commit 7ff93cd1b1c7 · 2025-08-30T13:07:33.000-07:00
This reverts commit 68f4847, reversing changes made to df39320.
diff --git a/litellm/completion_extras/litellm_responses_transformation/transformation.py b/litellm/completion_extras/litellm_responses_transformation/transformation.py
@@ -157,8 +157,8 @@ def transform_request(
                 responses_api_request["metadata"] = value
             elif key in ("previous_response_id"):
                 responses_api_request["previous_response_id"] = value
-
-        responses_api_request["reasoning"] = self._map_reasoning_effort(optional_params.get("reasoning_effort"))
+            elif key == "reasoning_effort":
+                responses_api_request["reasoning"] = self._map_reasoning_effort(value)
 
         # Get stream parameter from litellm_params if not in optional_params
         stream = optional_params.get("stream") or litellm_params.get("stream", False)
@@ -452,7 +452,7 @@ def _convert_tools_to_responses_format(
             responses_tools.append(tool)
         return cast(List["ALL_RESPONSES_API_TOOL_PARAMS"], responses_tools)
 
-    def _map_reasoning_effort(self, reasoning_effort: Optional[str]) -> Reasoning:
+    def _map_reasoning_effort(self, reasoning_effort: str) -> Optional[Reasoning]:
         if reasoning_effort == "high":
             return Reasoning(effort="high", summary="detailed")
         elif reasoning_effort == "medium":
@@ -462,7 +462,7 @@ def _map_reasoning_effort(self, reasoning_effort: Optional[str]) -> Reasoning:
             return Reasoning(effort="low", summary="auto")
         elif reasoning_effort == "minimal":
             return Reasoning(effort="minimal", summary="auto")
-        return Reasoning(summary="auto")
+        return None
 
     def _map_responses_status_to_finish_reason(self, status: Optional[str]) -> str:
         """Map responses API status to chat completion finish_reason"""
diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
@@ -420,10 +420,8 @@ def apply_response_schema_transformation(self, value: dict, optional_params: dic
 
     @staticmethod
     def _map_reasoning_effort_to_thinking_budget(
-        reasoning_effort: Optional[str],
+        reasoning_effort: str,
     ) -> GeminiThinkingConfig:
-        if not reasoning_effort:
-            return { "includeThoughts": True }
         if reasoning_effort == "low":
             return {
                 "thinkingBudget": DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
@@ -617,17 +615,6 @@ def map_openai_params(  # noqa: PLR0915
                 optional_params = self._add_tools_to_optional_params(
                     optional_params, [_tools]
                 )
-        
-        ######################################################################################
-        # If the model supports reasoning and `thinkingConfig` is not set as yet 
-        # we should set it to includeThoughts
-        ######################################################################################
-        if supports_reasoning(model) and "thinkingConfig" not in optional_params:
-            optional_params["thinkingConfig"] = (
-                VertexGeminiConfig._map_reasoning_effort_to_thinking_budget(
-                    non_default_params.get("reasoning_effort")
-                )
-            )
         if litellm.vertex_ai_safety_settings is not None:
             optional_params["safety_settings"] = litellm.vertex_ai_safety_settings
 
diff --git a/tests/test_litellm/completion_extras/litellm_responses_transformation/test_completion_extras_litellm_responses_transformation_transformation.py b/tests/test_litellm/completion_extras/litellm_responses_transformation/test_completion_extras_litellm_responses_transformation_transformation.py
@@ -10,152 +10,77 @@
 import pytest
 
 sys.path.insert(
-    0, os.path.abspath("../../../../..")
-)  # Adds the parent directory to the system path
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system-path
 import litellm
-from litellm.completion_extras.litellm_responses_transformation.transformation import (
-    LiteLLMResponsesTransformationHandler,
-    OpenAiResponsesToChatCompletionStreamIterator,
-)
-from litellm.types.llms.openai import Reasoning
-from litellm.types.utils import Delta, ModelResponseStream, StreamingChoices
-
-
-class TestLiteLLMResponsesTransformation:
-    def setup_method(self):
-        self.handler = LiteLLMResponsesTransformationHandler()
-        self.model = "responses-api-model"
-        self.logging_obj = MagicMock()
-
-    def test_transform_request_reasoning_effort(self):
-        """
-        Test that reasoning_effort is mapped to reasoning parameter correctly.
-        """
-        # Case 1: reasoning_effort = "high"
-        optional_params_high = {"reasoning_effort": "high"}
-        result_high = self.handler.transform_request(
-            model=self.model,
-            messages=[],
-            optional_params=optional_params_high,
-            litellm_params={},
-            headers={},
-            litellm_logging_obj=self.logging_obj,
-        )
-        assert "reasoning" in result_high
-        assert result_high["reasoning"] == Reasoning(effort="high", summary="detailed")
-
-        # Case 2: reasoning_effort = "medium"
-        optional_params_medium = {"reasoning_effort": "medium"}
-        result_medium = self.handler.transform_request(
-            model=self.model,
-            messages=[],
-            optional_params=optional_params_medium,
-            litellm_params={},
-            headers={},
-            litellm_logging_obj=self.logging_obj,
-        )
-        assert "reasoning" in result_medium
-        assert result_medium["reasoning"] == Reasoning(effort="medium", summary="auto")
-
-        # Case 3: reasoning_effort = "low"
-        optional_params_low = {"reasoning_effort": "low"}
-        result_low = self.handler.transform_request(
-            model=self.model,
-            messages=[],
-            optional_params=optional_params_low,
-            litellm_params={},
-            headers={},
-            litellm_logging_obj=self.logging_obj,
-        )
-        assert "reasoning" in result_low
-        assert result_low["reasoning"] == Reasoning(effort="low", summary="auto")
-
-        # Case 4: no reasoning_effort
-        optional_params_none = {}
-        result_none = self.handler.transform_request(
-            model=self.model,
-            messages=[],
-            optional_params=optional_params_none,
-            litellm_params={},
-            headers={},
-            litellm_logging_obj=self.logging_obj,
-        )
-        assert "reasoning" in result_none
-        assert result_none["reasoning"] == Reasoning(summary="auto")
-
-        # Case 5: reasoning_effort = None
-        optional_params_explicit_none = {"reasoning_effort": None}
-        result_explicit_none = self.handler.transform_request(
-            model=self.model,
-            messages=[],
-            optional_params=optional_params_explicit_none,
-            litellm_params={},
-            headers={},
-            litellm_logging_obj=self.logging_obj,
-        )
-        assert "reasoning" in result_explicit_none
-        assert result_explicit_none["reasoning"] == Reasoning(summary="auto")
-
-    def test_convert_chat_completion_messages_to_responses_api_image_input(self):
-        """
-        Test that chat completion messages with image inputs are converted correctly.
-        """
-        user_content = "What's in this image?"
-        user_image = "https://w7.pngwing.com/pngs/666/274/png-transparent-image-pictures-icon-photo-thumbnail.png"
-
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": user_content,
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {"url": user_image},
-                    },
-                ],
-            },
-        ]
-
-        response, _ = self.handler.convert_chat_completion_messages_to_responses_api(messages)
-
-        response_str = json.dumps(response)
-
-        assert user_content in response_str
-        assert user_image in response_str
-
-        print("response: ", response)
-        assert response[0]["content"][1]["image_url"] == user_image
-
-    def test_openai_responses_chunk_parser_reasoning_summary(self):
-        """
-        Test that OpenAI responses chunk parser handles reasoning summary correctly.
-        """
-        iterator = OpenAiResponsesToChatCompletionStreamIterator(
-            streaming_response=None, sync_stream=True
-        )
-
-        chunk = {
-            "delta": "**Compar",
-            "item_id": "rs_686d544208748198b6912e27b7c299c00e24bd875d35bade",
-            "output_index": 0,
-            "sequence_number": 4,
-            "summary_index": 0,
-            "type": "response.reasoning_summary_text.delta",
-        }
-
-        result = iterator.chunk_parser(chunk)
-
-        assert isinstance(result, ModelResponseStream)
-        assert len(result.choices) == 1
-        choice = result.choices[0]
-        assert isinstance(choice, StreamingChoices)
-        assert choice.index == 0
-        delta = choice.delta
-        assert isinstance(delta, Delta)
-        assert delta.content is None
-        assert delta.reasoning_content == "**Compar"
-        assert delta.tool_calls is None
-        assert delta.function_call is None
+
+
+def test_convert_chat_completion_messages_to_responses_api_image_input():
+    from litellm.completion_extras.litellm_responses_transformation.transformation import (
+        LiteLLMResponsesTransformationHandler,
+    )
+
+    handler = LiteLLMResponsesTransformationHandler()
+
+    user_content = "What's in this image?"
+    user_image = "https://w7.pngwing.com/pngs/666/274/png-transparent-image-pictures-icon-photo-thumbnail.png"
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": user_content,
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {"url": user_image},
+                },
+            ],
+        },
+    ]
+
+    response, _ = handler.convert_chat_completion_messages_to_responses_api(messages)
+
+    response_str = json.dumps(response)
+
+    assert user_content in response_str
+    assert user_image in response_str
+
+    print("response: ", response)
+    assert response[0]["content"][1]["image_url"] == user_image
+
+
+def test_openai_responses_chunk_parser_reasoning_summary():
+    from litellm.completion_extras.litellm_responses_transformation.transformation import (
+        OpenAiResponsesToChatCompletionStreamIterator,
+    )
+    from litellm.types.utils import Delta, ModelResponseStream, StreamingChoices
+
+    iterator = OpenAiResponsesToChatCompletionStreamIterator(
+        streaming_response=None, sync_stream=True
+    )
+
+    chunk = {
+        "delta": "**Compar",
+        "item_id": "rs_686d544208748198b6912e27b7c299c00e24bd875d35bade",
+        "output_index": 0,
+        "sequence_number": 4,
+        "summary_index": 0,
+        "type": "response.reasoning_summary_text.delta",
+    }
+
+    result = iterator.chunk_parser(chunk)
+
+    assert isinstance(result, ModelResponseStream)
+    assert len(result.choices) == 1
+    choice = result.choices[0]
+    assert isinstance(choice, StreamingChoices)
+    assert choice.index == 0
+    delta = choice.delta
+    assert isinstance(delta, Delta)
+    assert delta.content is None
+    assert delta.reasoning_content == "**Compar"
+    assert delta.tool_calls is None
+    assert delta.function_call is None
diff --git a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py
@@ -442,82 +442,6 @@ def test_vertex_ai_map_thinking_param_with_budget_tokens_0():
     }
 
 
-def test_vertex_ai_reasoning_effort_mapping():
-    """
-    Test that reasoning_effort is mapped to thinkingConfig correctly for models that support it.
-    - A default thinking config is applied if reasoning_effort is not specified.
-    - reasoning_effort correctly maps to thinkingConfig.
-    - No thinkingConfig is applied for models that do not support reasoning.
-    - reasoning_effort is prioritized over thinking param.
-    """
-    v = VertexGeminiConfig()
-    optional_params = {}
-
-    # Case 1: Model supports reasoning, no reasoning_effort provided
-    # Should apply default thinkingConfig
-    with patch(
-        "litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini.supports_reasoning",
-        return_value=True,
-    ):
-        result_params = v.map_openai_params(
-            non_default_params={},
-            optional_params=deepcopy(optional_params),
-            model="gemini-2.5-pro",
-            drop_params=False,
-        )
-        assert "thinkingConfig" in result_params
-        assert result_params["thinkingConfig"] == {"includeThoughts": True}
-
-    # Case 2: Model supports reasoning, reasoning_effort is 'low'
-    # Should apply thinkingConfig with budget
-    with patch(
-        "litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini.supports_reasoning",
-        return_value=True,
-    ):
-        result_params_with_effort = v.map_openai_params(
-            non_default_params={"reasoning_effort": "low"},
-            optional_params=deepcopy(optional_params),
-            model="gemini-2.5-pro",
-            drop_params=False,
-        )
-        assert "thinkingConfig" in result_params_with_effort
-        assert result_params_with_effort["thinkingConfig"]["includeThoughts"] is True
-        assert "thinkingBudget" in result_params_with_effort["thinkingConfig"]
-
-    # Case 3: Model does not support reasoning
-    # Should not apply thinkingConfig
-    with patch(
-        "litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini.supports_reasoning",
-        return_value=False,
-    ):
-        result_params_no_support = v.map_openai_params(
-            non_default_params={},
-            optional_params=deepcopy(optional_params),
-            model="gemini-pro",
-            drop_params=False,
-        )
-        assert "thinkingConfig" not in result_params_no_support
-
-    # Case 4: Model supports reasoning, but reasoning_effort is set, should be prioritized over thinking
-    with patch(
-        "litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini.supports_reasoning",
-        return_value=True,
-    ):
-        result_params_with_effort = v.map_openai_params(
-            non_default_params={
-                "reasoning_effort": "low",
-                "thinking": {"type": "enabled", "budget_tokens": 1000},
-            },
-            optional_params=deepcopy(optional_params),
-            model="gemini-2.5-pro",
-            drop_params=False,
-        )
-        assert "thinkingConfig" in result_params_with_effort
-        assert result_params_with_effort["thinkingConfig"]["includeThoughts"] is True
-        assert "thinkingBudget" in result_params_with_effort["thinkingConfig"]
-        assert result_params_with_effort["thinkingConfig"]["thinkingBudget"] != 1000
-
-
 def test_vertex_ai_map_tools():
     v = VertexGeminiConfig()
     tools = v._map_function(value=[{"code_execution": {}}])
@@ -1109,10 +1033,11 @@ def test_vertex_ai_code_line_length():
     This is a meta-test to ensure the code change meets the 40-character requirement.
     """
     import inspect
+
     from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
         VertexGeminiConfig,
     )
-    
+
     # Get the source code of the _transform_parts method
     source_lines = inspect.getsource(VertexGeminiConfig._transform_parts).split('\n')