feat(vertex_ai.py): support parsing thinking content into gemini format

krrishdholakia · krrishdholakia · commit b6f6dc5c1c00 · 2025-08-30T18:49:34.000-07:00
allows function calls with thought signatures to be sent back to gemini Closes #13842
diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py
@@ -105,6 +105,64 @@ def _process_gemini_image(image_url: str, format: Optional[str] = None) -> PartT
         raise e
 
 
+def _snake_to_camel(snake_str: str) -> str:
+    """Convert snake_case to camelCase"""
+    components = snake_str.split("_")
+    return components[0] + "".join(x.capitalize() for x in components[1:])
+
+
+def _camel_to_snake(camel_str: str) -> str:
+    """Convert camelCase to snake_case"""
+    import re
+
+    return re.sub(r"(?<!^)(?=[A-Z])", "_", camel_str).lower()
+
+
+def _get_equivalent_key(key: str, available_keys: set) -> Optional[str]:
+    """
+    Get the equivalent key from available keys, checking both camelCase and snake_case variants
+    """
+    if key in available_keys:
+        return key
+
+    # Try camelCase version
+    camel_key = _snake_to_camel(key)
+    if camel_key in available_keys:
+        return camel_key
+
+    # Try snake_case version
+    snake_key = _camel_to_snake(key)
+    if snake_key in available_keys:
+        return snake_key
+
+    return None
+
+
+def check_if_part_exists_in_parts(
+    parts: List[PartType], part: PartType, excluded_keys: List[str] = []
+) -> bool:
+    """
+    Check if a part exists in a list of parts
+    Handles both camelCase and snake_case key variations (e.g., function_call vs functionCall)
+    """
+    keys_to_compare = set(part.keys()) - set(excluded_keys)
+    for p in parts:
+        p_keys = set(p.keys())
+        # Check if all keys in part have equivalent values in p
+        match_found = True
+        for key in keys_to_compare:
+            equivalent_key = _get_equivalent_key(key, p_keys)
+            if equivalent_key is None or p.get(equivalent_key, None) != part.get(
+                key, None
+            ):
+                match_found = False
+                break
+
+        if match_found:
+            return True
+    return False
+
+
 def _gemini_convert_messages_with_history(  # noqa: PLR0915
     messages: List[AllMessageValues],
 ) -> List[ContentType]:
@@ -236,10 +294,33 @@ def _gemini_convert_messages_with_history(  # noqa: PLR0915
                 assistant_msg = ChatCompletionAssistantMessage(**msg_dict)  # type: ignore
                 _message_content = assistant_msg.get("content", None)
                 reasoning_content = assistant_msg.get("reasoning_content", None)
+                thinking_blocks = assistant_msg.get("thinking_blocks")
                 if reasoning_content is not None:
                     assistant_content.append(
                         PartType(thought=True, text=reasoning_content)
                     )
+                if thinking_blocks is not None:
+                    for block in thinking_blocks:
+                        block_thinking_str = block.get("thinking")
+                        block_signature = block.get("signature")
+                        if (
+                            block_thinking_str is not None
+                            and block_signature is not None
+                        ):
+                            try:
+                                assistant_content.append(
+                                    PartType(
+                                        thoughtSignature=block_signature,
+                                        **json.loads(block_thinking_str),
+                                    )
+                                )
+                            except Exception:
+                                assistant_content.append(
+                                    PartType(
+                                        thoughtSignature=block_signature,
+                                        text=block_thinking_str,
+                                    )
+                                )
                 if _message_content is not None and isinstance(_message_content, list):
                     _parts = []
                     for element in _message_content:
@@ -262,9 +343,17 @@ def _gemini_convert_messages_with_history(  # noqa: PLR0915
                     assistant_msg.get("tool_calls", []) is not None
                     or assistant_msg.get("function_call") is not None
                 ):  # support assistant tool invoke conversion
-                    assistant_content.extend(
-                        convert_to_gemini_tool_call_invoke(assistant_msg)
+                    gemini_tool_call_parts = convert_to_gemini_tool_call_invoke(
+                        assistant_msg
                     )
+                    ## check if gemini_tool_call already exists in assistant_content
+                    for gemini_tool_call_part in gemini_tool_call_parts:
+                        if not check_if_part_exists_in_parts(
+                            assistant_content,
+                            gemini_tool_call_part,
+                            excluded_keys=["thoughtSignature"],
+                        ):
+                            assistant_content.append(gemini_tool_call_part)
                     last_message_with_tool_calls = assistant_msg
 
                 msg_i += 1
@@ -476,6 +565,7 @@ async def async_transform_request_body(
         optional_params=optional_params,
     )
 
+
 def _default_user_message_when_system_message_passed() -> ChatCompletionUserMessage:
     """
     Returns a default user message when a "system" message is passed in gemini fails.
@@ -484,6 +574,7 @@ def _default_user_message_when_system_message_passed() -> ChatCompletionUserMess
     """
     return ChatCompletionUserMessage(content=".", role="user")
 
+
 def _transform_system_message(
     supports_system_message: bool, messages: List[AllMessageValues]
 ) -> Tuple[Optional[SystemInstructions], List[AllMessageValues]]:
diff --git a/litellm/types/llms/vertex_ai.py b/litellm/types/llms/vertex_ai.py
@@ -41,6 +41,7 @@ class PartType(TypedDict, total=False):
     function_call: FunctionCall
     function_response: FunctionResponse
     thought: bool
+    thoughtSignature: str
 
 
 class HttpxFunctionCall(TypedDict):
diff --git a/tests/llm_translation/test_gemini.py b/tests/llm_translation/test_gemini.py
@@ -661,10 +661,33 @@ def test_system_message_with_no_user_message():
     assert response.choices[0].message.content is not None
 
 
+def get_current_weather(location, unit="fahrenheit"):
+    """Get the current weather in a given location"""
+    if "tokyo" in location.lower():
+        return json.dumps({"location": "Tokyo", "temperature": "10", "unit": "celsius"})
+    elif "san francisco" in location.lower():
+        return json.dumps(
+            {"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}
+        )
+    elif "paris" in location.lower():
+        return json.dumps({"location": "Paris", "temperature": "22", "unit": "celsius"})
+    else:
+        return json.dumps({"location": location, "temperature": "unknown"})
+
+
 def test_gemini_with_thinking():
     from litellm import completion
 
     litellm._turn_on_debug()
+    litellm.modify_params = True
+    model = "gemini/gemini-2.5-flash"
+    messages = [
+        {
+            "role": "user",
+            "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
+        }
+    ]
+
     tools = [
         {
             "type": "function",
@@ -676,20 +699,69 @@ def test_gemini_with_thinking():
                     "properties": {
                         "location": {
                             "type": "string",
-                            "description": "The city and state, e.g. San Francisco, CA",
+                            "description": "The city and state",
+                        },
+                        "unit": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
                         },
-                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                     },
                     "required": ["location"],
                 },
             },
         }
     ]
-    messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
-
-    result = completion(
-        model="gemini/gemini-2.5-flash",
+    response = litellm.completion(
+        model=model,
         messages=messages,
         tools=tools,
+        tool_choice="auto",  # auto is default, but we'll be explicit
+        reasoning_effort="low",
     )
-    print(f"result: {result}")
+    print("Response\n", response)
+    response_message = response.choices[0].message
+    tool_calls = response_message.tool_calls
+
+    print("Expecting there to be 3 tool calls")
+    assert len(tool_calls) > 0  # this has to call the function for SF, Tokyo and paris
+
+    # Step 2: check if the model wanted to call a function
+    print(f"tool_calls: {tool_calls}")
+    if tool_calls:
+        # Step 3: call the function
+        # Note: the JSON response may not always be valid; be sure to handle errors
+        available_functions = {
+            "get_current_weather": get_current_weather,
+        }  # only one function in this example, but you can have multiple
+        messages.append(response_message)  # extend conversation with assistant's reply
+        print("Response message\n", response_message)
+        # Step 4: send the info for each function call and function response to the model
+        for tool_call in tool_calls:
+            function_name = tool_call.function.name
+            if function_name not in available_functions:
+                # the model called a function that does not exist in available_functions - don't try calling anything
+                return
+            function_to_call = available_functions[function_name]
+            function_args = json.loads(tool_call.function.arguments)
+            function_response = function_to_call(
+                location=function_args.get("location"),
+                unit=function_args.get("unit"),
+            )
+            messages.append(
+                {
+                    "tool_call_id": tool_call.id,
+                    "role": "tool",
+                    "name": function_name,
+                    "content": function_response,
+                }
+            )  # extend conversation with function response
+        print(f"messages: {messages}")
+        second_response = litellm.completion(
+            model=model,
+            messages=messages,
+            seed=22,
+            reasoning_effort="low",
+            tools=tools,
+            drop_params=True,
+        )  # get a new response from the model where it can see the function response
+        print("second response\n", second_response)
diff --git a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_ai_gemini_transformation.py b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_ai_gemini_transformation.py
@@ -0,0 +1,75 @@
+from litellm.llms.vertex_ai.gemini.transformation import check_if_part_exists_in_parts
+
+
+def test_check_if_part_exists_in_parts():
+    parts = [
+        {"text": "Hello", "thought": True},
+        {"text": "World", "thought": False},
+    ]
+    part = {"text": "Hello", "thought": True}
+    new_part = {"text": "Hello World", "thought": True}
+    assert check_if_part_exists_in_parts(parts, part)
+    assert not check_if_part_exists_in_parts(parts, new_part, ["thought"])
+    assert check_if_part_exists_in_parts(parts, new_part, ["text"])
+
+
+def test_check_if_part_exists_in_parts_camel_case_snake_case():
+    """Test that function handles both camelCase and snake_case key variations"""
+    # Test snake_case to camelCase matching
+    parts_with_snake_case = [
+        {
+            "function_call": {
+                "name": "get_current_weather",
+                "args": {"location": "San Francisco, CA"},
+            }
+        },
+        {"text": "Some other content"},
+    ]
+
+    part_with_camel_case = {
+        "functionCall": {
+            "name": "get_current_weather",
+            "args": {"location": "San Francisco, CA"},
+        }
+    }
+
+    # Should find match between function_call and functionCall
+    assert check_if_part_exists_in_parts(parts_with_snake_case, part_with_camel_case)
+
+    # Test camelCase to snake_case matching
+    parts_with_camel_case = [
+        {"functionCall": {"name": "calculate_sum", "args": {"a": 1, "b": 2}}}
+    ]
+
+    part_with_snake_case = {
+        "function_call": {"name": "calculate_sum", "args": {"a": 1, "b": 2}}
+    }
+
+    # Should find match between functionCall and function_call
+    assert check_if_part_exists_in_parts(parts_with_camel_case, part_with_snake_case)
+
+    # Test no match when values differ
+    part_with_different_values = {
+        "function_call": {"name": "different_function", "args": {"x": 5}}
+    }
+
+    assert not check_if_part_exists_in_parts(
+        parts_with_snake_case, part_with_different_values
+    )
+
+    # Test multiple keys with mixed casing
+    parts_mixed = [
+        {
+            "function_call": {"name": "test"},
+            "thoughtSignature": "reasoning",
+            "text": "content",
+        }
+    ]
+
+    part_mixed_casing = {
+        "functionCall": {"name": "test"},
+        "thought_signature": "reasoning",
+        "text": "content",
+    }
+
+    assert check_if_part_exists_in_parts(parts_mixed, part_mixed_casing)