fix: map LiteLLM thought parts to reasoning_content

GWeale · copybara-github · commit 6c67b6c0f414 · 2026-01-08T10:10:13.000-08:00
- Treat Part(thought=True) as reasoning_content when building assistant messages. - Add unit tests for thought-only and thought+text cases. Close #4069 Co-authored-by: George Weale <gweale@google.com> PiperOrigin-RevId: 853790274
diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
@@ -482,13 +482,15 @@ async def _content_to_message_param(
 
   # Handle user or assistant messages
   role = _to_litellm_role(content.role)
-  message_content = await _get_content(content.parts, provider=provider) or None
 
   if role == "user":
+    user_parts = [part for part in content.parts if not part.thought]
+    message_content = await _get_content(user_parts, provider=provider) or None
     return ChatCompletionUserMessage(role="user", content=message_content)
   else:  # assistant/model
     tool_calls = []
-    content_present = False
+    content_parts: list[types.Part] = []
+    reasoning_parts: list[types.Part] = []
     for part in content.parts:
       if part.function_call:
         tool_calls.append(
@@ -501,10 +503,16 @@ async def _content_to_message_param(
                 ),
             )
         )
-      elif part.text or part.inline_data:
-        content_present = True
+      elif part.thought:
+        reasoning_parts.append(part)
+      else:
+        content_parts.append(part)
 
-    final_content = message_content if content_present else None
+    final_content = (
+        await _get_content(content_parts, provider=provider)
+        if content_parts
+        else None
+    )
     if final_content and isinstance(final_content, list):
       # when the content is a single text object, we can use it directly.
       # this is needed for ollama_chat provider which fails if content is a list
@@ -514,10 +522,24 @@ async def _content_to_message_param(
           else final_content
       )
 
+    reasoning_texts = []
+    for part in reasoning_parts:
+      if part.text:
+        reasoning_texts.append(part.text)
+      elif (
+          part.inline_data
+          and part.inline_data.data
+          and part.inline_data.mime_type
+          and part.inline_data.mime_type.startswith("text/")
+      ):
+        reasoning_texts.append(_decode_inline_text_data(part.inline_data.data))
+
+    reasoning_content = _NEW_LINE.join(text for text in reasoning_texts if text)
     return ChatCompletionAssistantMessage(
         role=role,
         content=final_content,
         tool_calls=tool_calls or None,
+        reasoning_content=reasoning_content or None,
     )
 
 
@@ -587,8 +609,8 @@ async def _get_content(
 ) -> OpenAIMessageContent:
   """Converts a list of parts to litellm content.
 
-  Thought parts represent internal model reasoning and are always dropped so
-  they are not replayed back to the model in subsequent turns.
+  Callers may need to filter out thought parts before calling this helper if
+  thought parts are not needed.
 
   Args:
     parts: The parts to convert.
@@ -598,9 +620,9 @@ async def _get_content(
     The litellm content.
   """
 
-  parts_without_thought = [part for part in parts if not part.thought]
-  if len(parts_without_thought) == 1:
-    part = parts_without_thought[0]
+  parts_list = list(parts)
+  if len(parts_list) == 1:
+    part = parts_list[0]
     if part.text:
       return part.text
     if (
@@ -612,10 +634,7 @@ async def _get_content(
       return _decode_inline_text_data(part.inline_data.data)
 
   content_objects = []
-  for part in parts_without_thought:
-    # Skip thought parts to prevent reasoning from being replayed in subsequent
-    # turns. Thought parts are internal model reasoning and should not be sent
-    # back to the model.
+  for part in parts_list:
     if part.text:
       content_objects.append({
           "type": "text",
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
@@ -1860,6 +1860,59 @@ async def test_content_to_message_param_assistant_message():
   assert message["content"] == "Test response"
 
 
+@pytest.mark.asyncio
+async def test_content_to_message_param_user_filters_thought_parts():
+  thought_part = types.Part.from_text(text="internal reasoning")
+  thought_part.thought = True
+  content_part = types.Part.from_text(text="visible content")
+  content = types.Content(role="user", parts=[thought_part, content_part])
+
+  message = await _content_to_message_param(content)
+
+  assert message["role"] == "user"
+  assert message["content"] == "visible content"
+
+
+@pytest.mark.asyncio
+async def test_content_to_message_param_assistant_thought_message():
+  part = types.Part.from_text(text="internal reasoning")
+  part.thought = True
+  content = types.Content(role="assistant", parts=[part])
+
+  message = await _content_to_message_param(content)
+
+  assert message["role"] == "assistant"
+  assert message["content"] is None
+  assert message["reasoning_content"] == "internal reasoning"
+
+
+@pytest.mark.asyncio
+async def test_content_to_message_param_model_thought_message():
+  part = types.Part.from_text(text="internal reasoning")
+  part.thought = True
+  content = types.Content(role="model", parts=[part])
+
+  message = await _content_to_message_param(content)
+
+  assert message["role"] == "assistant"
+  assert message["content"] is None
+  assert message["reasoning_content"] == "internal reasoning"
+
+
+@pytest.mark.asyncio
+async def test_content_to_message_param_assistant_thought_and_content_message():
+  thought_part = types.Part.from_text(text="internal reasoning")
+  thought_part.thought = True
+  content_part = types.Part.from_text(text="visible content")
+  content = types.Content(role="assistant", parts=[thought_part, content_part])
+
+  message = await _content_to_message_param(content)
+
+  assert message["role"] == "assistant"
+  assert message["content"] == "visible content"
+  assert message["reasoning_content"] == "internal reasoning"
+
+
 @pytest.mark.asyncio
 async def test_content_to_message_param_function_call():
   content = types.Content(
@@ -2087,42 +2140,35 @@ def test_split_message_content_prefers_existing_structured_calls():
 
 
 @pytest.mark.asyncio
-async def test_get_content_filters_thought_parts():
-  """Test that thought parts are filtered from content.
-
-  Thought parts contain model reasoning that should not be sent back to
-  the model in subsequent turns. This test verifies that _get_content
-  skips parts with thought=True.
+async def test_get_content_does_not_filter_thought_parts():
+  """Test that _get_content does not drop thought parts.
 
-  See: https://github.com/google/adk-python/issues/3948
+  Thought filtering is handled by the caller (e.g., _content_to_message_param)
+  to avoid duplicating logic across helpers.
   """
-  # Create a thought part (reasoning) and a regular text part
   thought_part = types.Part(text="Internal reasoning...", thought=True)
   regular_part = types.Part.from_text(text="Visible response")
-  parts = [thought_part, regular_part]
 
-  content = await _get_content(parts)
+  content = await _get_content([thought_part, regular_part])
 
-  # The thought part should be filtered out, leaving only the regular text
-  assert content == "Visible response"
+  assert content == [
+      {"type": "text", "text": "Internal reasoning..."},
+      {"type": "text", "text": "Visible response"},
+  ]
 
 
 @pytest.mark.asyncio
-async def test_get_content_filters_all_thought_parts():
-  """Test that all thought parts are filtered when only thoughts present.
-
-  When all parts are thought parts, _get_content should return an empty list.
-
-  See: https://github.com/google/adk-python/issues/3948
-  """
+async def test_get_content_all_thought_parts():
+  """Test that thought parts convert like regular text parts."""
   thought_part1 = types.Part(text="First reasoning...", thought=True)
   thought_part2 = types.Part(text="Second reasoning...", thought=True)
-  parts = [thought_part1, thought_part2]
 
-  content = await _get_content(parts)
+  content = await _get_content([thought_part1, thought_part2])
 
-  # All thought parts should be filtered out
-  assert content == []
+  assert content == [
+      {"type": "text", "text": "First reasoning..."},
+      {"type": "text", "text": "Second reasoning..."},
+  ]
 
 
 @pytest.mark.asyncio