Python: Require tool_call_id parameter for string-based tool messages in ChatHistory (#12753)

moonbox3 · web-flow · commit 5332c2e3306a · 2025-07-23T04:07:20.000Z
### Motivation and Context The `add_tool_message` method of `ChatHistory` accepts string content without requiring a `tool_call_id` parameter. This design is flawed from an API design perspective. It created tool messages that: 1. Violated chat completion protocols: most LLM APIs (OpenAI, etc.) require tool messages to have a `tool_call_id` that references a previous function call 2. Broke conversation flow: tool messages should always be responses to specific tool calls, maintaining the call-response relationship 3. Created malformed message sequences: messages without proper tool call IDs would fail when sent to language models  ### Description This PR modifies the `add_tool_message` string overload to require a `tool_call_id` parameter, which makes sure we have proper tool call protocol compliance. 1. The string overload now validates that `tool_call_id` is provided and raises a clear error if missing 2. Tool messages now create `FunctionResultContent` objects with both `id` and `call_id` set to the provided `tool_call_id` 3. Added support for an optional `function_name` parameter for better bookkeeping 4. Provides informative error messages explaining why `tool_call_id` is required - Closes #12744 Before: ```python # This would create an invalid tool message without tool_call_id # when the `.to_dict()` method is called on `ChatMessageContent` chat_history.add_tool_message("Function result") # Creates broken message and losing pairing to the corresponding `FunctionCallContent`. ``` After: ```python # Now requires tool_call_id for proper protocol compliance chat_history.add_tool_message("Function result") # raises clear error due to missing `tool_call_id` chat_history.add_tool_message("Function result", tool_call_id="call_123") # works correctly # Optional function name for better bookkeeping chat_history.add_tool_message("Function result", tool_call_id="call_123", function_name="get_weather") # works as well ```  ### Contribution Checklist  - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone 😄
diff --git a/.github/workflows/python-test-coverage.yml b/.github/workflows/python-test-coverage.yml
@@ -34,7 +34,7 @@ jobs:
           cache-suffix: ${{ runner.os }}-${{ env.UV_PYTHON }}
           cache-dependency-glob: "**/uv.lock"
       - name: Install the project
-        run: uv sync --all-extras --dev
+        run: uv sync --all-extras --dev -U --prerelease=if-necessary-or-explicit
       - name: Test with pytest
         run: uv run --frozen pytest -q --junitxml=pytest.xml --cov=semantic_kernel --cov-report=term-missing:skip-covered --cov-report=xml:python-coverage.xml ./tests/unit
       - name: Upload coverage report
diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -37,7 +37,7 @@ dependencies = [
     "numpy >= 1.25.0; python_version < '3.12'",
     "numpy >= 1.26.0; python_version >= '3.12'",
     # openai connector
-    "openai >= 1.67",
+    "openai >= 1.91.1",
     # openapi and swagger
     "openapi_core >= 0.18,<0.20",
     "websockets >= 13, < 16",
diff --git a/python/semantic_kernel/contents/chat_history.py b/python/semantic_kernel/contents/chat_history.py
@@ -12,6 +12,7 @@
 
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.const import CHAT_HISTORY_TAG, CHAT_MESSAGE_CONTENT_TAG
+from semantic_kernel.contents.function_result_content import FunctionResultContent
 from semantic_kernel.contents.kernel_content import KernelContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.exceptions import ContentInitializationError, ContentSerializationError
@@ -154,15 +155,29 @@ def add_tool_message(self, content: str | list[KernelContent], **kwargs: Any) ->
         """Add a tool message to the chat history.
 
         Args:
-            content: The content of the tool message, can be a string or a
-            list of KernelContent instances that are turned into a single ChatMessageContent.
-            **kwargs: Additional keyword arguments.
+            content: The content of the tool message. If a string, tool_call_id must be provided
+                as a keyword argument. If a list of KernelContent instances, they should contain
+                properly configured FunctionResultContent objects.
+            **kwargs: Additional keyword arguments. For string content, tool_call_id is required.
+                Optionally one may provide function_name to specify the tool function name. The
+                function_name is only used for bookkeeping purposes as part of ChatHistory and is
+                not included in the call to the model.
         """
         raise NotImplementedError
 
     @add_tool_message.register
     def _(self, content: str, **kwargs: Any) -> None:
-        """Add a tool message to the chat history."""
+        """Add a tool message to the chat history.
+
+        Args:
+            content: The result content of the tool call.
+            **kwargs: Additional keyword arguments. 'tool_call_id' is required when using string content.
+        """
+        if "tool_call_id" not in kwargs:
+            raise ContentInitializationError(
+                "tool_call_id is required when adding a tool message with string content. "
+                "Tool messages must reference the specific tool call they respond to."
+            )
         self.add_message(message=self._prepare_for_add(role=AuthorRole.TOOL, content=content, **kwargs))
 
     @add_tool_message.register(list)
@@ -203,9 +218,21 @@ def _prepare_for_add(
     ) -> dict[str, str]:
         """Prepare a message to be added to the history."""
         kwargs["role"] = role
-        if content:
+
+        if role == AuthorRole.TOOL and content and not items:
+            tool_call_id = kwargs.pop("tool_call_id", None)
+            function_name = kwargs.pop("function_name", "unknown")
+            function_result_content = FunctionResultContent(
+                function_name=function_name,
+                result=content,
+                id=tool_call_id,  # Set both id and call_id for compatibility
+                call_id=tool_call_id,
+                **kwargs,
+            )
+            kwargs["items"] = [function_result_content]
+        elif content:
             kwargs["content"] = content
-        if items:
+        elif items:
             kwargs["items"] = items
         return kwargs
 
diff --git a/python/tests/unit/agents/openai_responses/test_openai_responses_thread_actions.py b/python/tests/unit/agents/openai_responses/test_openai_responses_thread_actions.py
@@ -11,7 +11,7 @@
 from openai.types.responses.response_output_message import ResponseOutputMessage
 from openai.types.responses.response_output_text import ResponseOutputText
 from openai.types.responses.response_stream_event import ResponseStreamEvent
-from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent
+from openai.types.responses.response_text_delta_event import Logprob, ResponseTextDeltaEvent
 
 from semantic_kernel.agents.open_ai.openai_responses_agent import OpenAIResponsesAgent
 from semantic_kernel.agents.open_ai.responses_agent_thread_actions import ResponsesAgentThreadActions
@@ -250,6 +250,7 @@ async def __anext__(self):
         delta="Test partial content",
         content_index=0,
         item_id="fake-item-id",
+        logprobs=[Logprob(token="test_token", logprob=0.3)],
         output_index=0,
         type="response.output_text.delta",
         sequence_number=0,
diff --git a/python/tests/unit/contents/test_chat_history.py b/python/tests/unit/contents/test_chat_history.py
@@ -108,12 +108,30 @@ def test_add_assistant_message_list(chat_history: ChatHistory):
     assert chat_history.messages[-1].role == AuthorRole.ASSISTANT
 
 
+def test_add_tool_message_raises_without_tool_call_id(chat_history: ChatHistory):
+    content = "Tool message"
+    with pytest.raises(ContentInitializationError):
+        chat_history.add_tool_message(content)
+
+
 def test_add_tool_message(chat_history: ChatHistory):
     content = "Tool message"
-    chat_history.add_tool_message(content)
-    assert chat_history.messages[-1].content == content
+    chat_history.add_tool_message(content, tool_call_id="call_123")
+
+
+def test_add_tool_message_to_dict_succeeds(chat_history: ChatHistory):
+    content = "Tool message"
+    chat_history.add_tool_message(content, tool_call_id="call_123", function_name="test_function")
     assert chat_history.messages[-1].role == AuthorRole.TOOL
 
+    msg = chat_history.messages[-1]
+    assert isinstance(msg.items[0], FunctionResultContent)
+    assert msg.items[0].function_name == "test_function"
+    result = msg.to_dict()
+    assert result["content"] == content
+    assert result["role"] == AuthorRole.TOOL
+    assert result["tool_call_id"] == "call_123"
+
 
 def test_add_tool_message_list(chat_history: ChatHistory):
     content = [FunctionResultContent(id="test", result="Tool message")]
diff --git a/python/uv.lock b/python/uv.lock