langchain-ai
diff --git a/‎libs/genai/langchain_google_genai/_common.py‎
Lines changed: 26 additions & 5 deletions b/‎libs/genai/langchain_google_genai/_common.py‎
Lines changed: 26 additions & 5 deletions
diff --git a/‎libs/genai/langchain_google_genai/chat_models.py‎
Lines changed: 30 additions & 15 deletions b/‎libs/genai/langchain_google_genai/chat_models.py‎
Lines changed: 30 additions & 15 deletions
diff --git a/‎libs/genai/tests/integration_tests/test_chat_models.py‎
Lines changed: 77 additions & 1 deletion b/‎libs/genai/tests/integration_tests/test_chat_models.py‎
Lines changed: 77 additions & 1 deletion
@@ -80,6 +80,9 @@ class _BaseGoogleGenerativeAI(BaseModel):
     If unset, will use the model's default value, which varies by model.
 
     See [docs](https://ai.google.dev/gemini-api/docs/models) for model-specific limits.
+
+    To constrain the number of thinking tokens to use when generating a response, see
+    the `thinking_budget` parameter.
     """
 
     n: int = 1
@@ -157,20 +160,38 @@ class _BaseGoogleGenerativeAI(BaseModel):
     )
     """A list of modalities of the response"""
 
-    thinking_budget: int | None = Field(
+    media_resolution: MediaResolution | None = Field(
         default=None,
     )
-    """Indicates the thinking budget in tokens."""
+    """Media resolution for the input media."""
 
-    media_resolution: MediaResolution | None = Field(
+    thinking_budget: int | None = Field(
         default=None,
     )
-    """Media resolution for the input media."""
+    """Indicates the thinking budget in tokens.
+
+    Used to disable thinking for supported models (when set to `0`) or to constrain
+    the number of tokens used for thinking.
+
+    Dynamic thinking (allowing the model to decide how many tokens to use) is
+    enabled when set to `-1`.
+
+    More information, including per-model limits, can be found in the
+    [Gemini API docs](https://ai.google.dev/gemini-api/docs/thinking#set-budget).
+    """
 
     include_thoughts: bool | None = Field(
         default=None,
     )
-    """Indicates whether to include thoughts in the response."""
+    """Indicates whether to include thoughts in the response.
+
+    !!! note
+
+        This parameter is only applicable for models that support thinking.
+
+        This does not disable thinking; to disable thinking, set `thinking_budget` to
+        `0`. for supported models. See the `thinking_budget` parameter for more details.
+    """
 
     safety_settings: dict[HarmCategory, HarmBlockThreshold] | None = None
     """Default safety settings to use for all generations.
 
@@ -222,13 +222,9 @@ def _chat_with_retry(**kwargs: Any) -> Any:
         except Exception:
             raise
 
-    params = (
-        {k: v for k, v in kwargs.items() if k in _allowed_params_prediction_service}
-        if (request := kwargs.get("request"))
-        and hasattr(request, "model")
-        and "gemini" in request.model
-        else kwargs
-    )
+    params = {
+        k: v for k, v in kwargs.items() if k in _allowed_params_prediction_service
+    }
     return _chat_with_retry(**params)
 
 
@@ -271,13 +267,9 @@ async def _achat_with_retry(**kwargs: Any) -> Any:
         except Exception:
             raise
 
-    params = (
-        {k: v for k, v in kwargs.items() if k in _allowed_params_prediction_service}
-        if (request := kwargs.get("request"))
-        and hasattr(request, "model")
-        and "gemini" in request.model
-        else kwargs
-    )
+    params = {
+        k: v for k, v in kwargs.items() if k in _allowed_params_prediction_service
+    }
     return await _achat_with_retry(**params)
 
 
@@ -654,7 +646,10 @@ def _parse_chat_history(
                             if sig_str and isinstance(sig_str, str):
                                 # Decode base64-encoded signature back to bytes
                                 sig_bytes = base64.b64decode(sig_str)
-                                function_call_sigs[idx] = sig_bytes
+                                if "index" in item:
+                                    function_call_sigs[item["index"]] = sig_bytes
+                                else:
+                                    function_call_sigs[idx] = sig_bytes
 
                 for tool_call_idx, tool_call in enumerate(message.tool_calls):
                     function_call = FunctionCall(
@@ -911,6 +906,7 @@ def _parse_response_candidate(
                 sig_block = {
                     "type": "function_call_signature",
                     "signature": thought_sig,
+                    "index": len(tool_calls) - 1,
                 }
                 function_call_signatures.append(sig_block)
 
@@ -1651,6 +1647,25 @@ class Joke(BaseModel):
         success rates and mitigation strategies like prompt...
         ```
 
+    Thinking:
+        For thinking models, you have the option to adjust the number of internal
+        thinking tokens used (`thinking_budget`) or to disable thinking altogether.
+        Note that not all models allow disabling thinking.
+
+        See the [Gemini API docs](https://ai.google.dev/gemini-api/docs/thinking) for
+        more details on thinking models.
+
+        To see a thinking model's thoughts, set `include_thoughts=True` to have the
+        model's reasoning summaries included in the response.
+
+        ```python
+        llm = ChatGoogleGenerativeAI(
+            model="gemini-2.5-flash",
+            include_thoughts=True,
+        )
+        ai_msg = llm.invoke("How many 'r's are in the word 'strawberry'?")
+        ```
+
     Token usage:
         ```python
         ai_msg = llm.invoke(messages)
 
@@ -4,6 +4,7 @@
 import json
 from collections.abc import Generator, Sequence
 from typing import Literal, cast
+from unittest.mock import patch
 
 import pytest
 from langchain_core.messages import (
@@ -459,7 +460,82 @@ def analyze_weather(location: str, date: str) -> dict:
 
         # Test we can pass the result back in (with signature)
         next_message = {"role": "user", "content": "Thanks!"}
-        _ = llm_with_tools.invoke([input_message, result, next_message])
+        follow_up_result = llm_with_tools.invoke([input_message, result, next_message])
+
+        # Verify the follow-up call succeeded and returned a valid response
+        assert isinstance(follow_up_result, AIMessage)
+        assert follow_up_result.content is not None
+
+        # If there were signatures in the original response, verify they were properly
+        # handled in the follow-up (no errors should occur)
+        if signature_blocks:
+            # The fact that we got a successful response means signatures were converted
+            # correctly
+            # Additional verification that response metadata is preserved
+            assert "model_provider" in follow_up_result.response_metadata
+            assert (
+                follow_up_result.response_metadata["model_provider"] == "google_genai"
+            )
+
+
+@pytest.mark.flaky(retries=3, delay=1)
+def test_thought_signature_round_trip() -> None:
+    """Test thought signatures are properly preserved in round-trip conversations."""
+
+    @tool
+    def simple_tool(query: str) -> str:
+        """A simple tool for testing."""
+        return f"Response to: {query}"
+
+    llm = ChatGoogleGenerativeAI(
+        model=_THINKING_MODEL, include_thoughts=True, output_version="v1"
+    )
+    llm_with_tools = llm.bind_tools([simple_tool])
+
+    # First call with function calling to generate signatures
+    first_message = {
+        "role": "user",
+        "content": "Use the tool to help answer: What is 2+2?",
+    }
+
+    # Patch the conversion function to verify it's called with signatures
+    with patch(
+        "langchain_google_genai.chat_models._convert_from_v1_to_generativelanguage_v1beta"
+    ) as mock_convert:
+        # Set up the mock to call the real function but also track calls
+        from langchain_google_genai._compat import (
+            _convert_from_v1_to_generativelanguage_v1beta as real_convert,
+        )
+
+        mock_convert.side_effect = real_convert
+
+        first_result = llm_with_tools.invoke([first_message])
+
+        # Verify we got a response with structured content (contains signatures)
+        assert isinstance(first_result, AIMessage)
+        assert isinstance(first_result.content, list)
+
+        # Second call - this should trigger signature conversion
+        second_message = {"role": "user", "content": "Thanks!"}
+        second_result = llm_with_tools.invoke(
+            [first_message, first_result, second_message]
+        )
+
+        # Verify the conversion function was called when processing the first_result
+        # (it should be called once for the first_result message)
+        assert mock_convert.call_count >= 1
+
+        # Find the call that processed our AI message with signatures
+        ai_message_calls = [
+            call
+            for call in mock_convert.call_args_list
+            if call[0][1] == "google_genai"  # model_provider argument
+        ]
+        assert len(ai_message_calls) >= 1
+
+        # Verify the second call succeeded (signatures were properly converted)
+        assert isinstance(second_result, AIMessage)
+        assert second_result.content is not None
 
 
 def test_chat_google_genai_invoke_thinking_disabled() -> None: