fix(llma): tool calls in streaming OpenAI Chat Completions

Radu-Raicea · Radu-Raicea · commit 722eeadcf663 · 2025-09-02T11:39:54.000-04:00
diff --git a/posthog/ai/openai/openai.py b/posthog/ai/openai/openai.py
@@ -12,10 +12,13 @@
 from posthog.ai.utils import (
     call_llm_and_track_usage,
     extract_available_tool_calls,
+    with_privacy_mode,
 )
 from posthog.ai.openai.openai_converter import (
     extract_openai_usage_from_chunk,
     extract_openai_content_from_chunk,
+    extract_openai_tool_calls_from_chunk,
+    accumulate_openai_tool_calls,
 )
 from posthog.client import Client as PostHogClient
 from posthog import setup
@@ -310,6 +313,7 @@ def _create_streaming(
         start_time = time.time()
         usage_stats: Dict[str, int] = {}
         accumulated_content = []
+        accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
         if "stream_options" not in kwargs:
             kwargs["stream_options"] = {}
         kwargs["stream_options"]["include_usage"] = True
@@ -318,6 +322,7 @@ def _create_streaming(
         def generator():
             nonlocal usage_stats
             nonlocal accumulated_content  # noqa: F824
+            nonlocal accumulated_tool_calls
 
             try:
                 for chunk in response:
@@ -333,11 +338,26 @@ def generator():
                     if content is not None:
                         accumulated_content.append(content)
 
+                    # Extract and accumulate tool calls from chunk
+                    chunk_tool_calls = extract_openai_tool_calls_from_chunk(chunk)
+                    if chunk_tool_calls:
+                        accumulate_openai_tool_calls(
+                            accumulated_tool_calls, chunk_tool_calls
+                        )
+
                     yield chunk
 
             finally:
                 end_time = time.time()
                 latency = end_time - start_time
+
+                # Convert accumulated tool calls dict to list
+                tool_calls_list = (
+                    list(accumulated_tool_calls.values())
+                    if accumulated_tool_calls
+                    else None
+                )
+
                 self._capture_streaming_event(
                     posthog_distinct_id,
                     posthog_trace_id,
@@ -348,6 +368,7 @@ def generator():
                     usage_stats,
                     latency,
                     accumulated_content,
+                    tool_calls_list,
                     extract_available_tool_calls("openai", kwargs),
                 )
 
@@ -364,6 +385,7 @@ def _capture_streaming_event(
         usage_stats: Dict[str, int],
         latency: float,
         output: Any,
+        tool_calls: Optional[List[Dict[str, Any]]] = None,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
     ):
         from posthog.ai.types import StreamingEventData
@@ -381,7 +403,7 @@ def _capture_streaming_event(
             base_url=str(self._client.base_url),
             kwargs=kwargs,
             formatted_input=format_openai_streaming_input(kwargs, "chat"),
-            formatted_output=format_openai_streaming_output(output, "chat"),
+            formatted_output=format_openai_streaming_output(output, "chat", tool_calls),
             usage_stats=standardize_openai_usage(usage_stats, "chat"),
             latency=latency,
             distinct_id=posthog_distinct_id,
diff --git a/posthog/ai/openai/openai_async.py b/posthog/ai/openai/openai_async.py
@@ -19,6 +19,8 @@
 from posthog.ai.openai.openai_converter import (
     extract_openai_usage_from_chunk,
     extract_openai_content_from_chunk,
+    extract_openai_tool_calls_from_chunk,
+    accumulate_openai_tool_calls,
     format_openai_streaming_output,
 )
 from posthog.client import Client as PostHogClient
@@ -332,6 +334,7 @@ async def _create_streaming(
         start_time = time.time()
         usage_stats: Dict[str, int] = {}
         accumulated_content = []
+        accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
 
         if "stream_options" not in kwargs:
             kwargs["stream_options"] = {}
@@ -341,6 +344,7 @@ async def _create_streaming(
         async def async_generator():
             nonlocal usage_stats
             nonlocal accumulated_content  # noqa: F824
+            nonlocal accumulated_tool_calls
 
             try:
                 async for chunk in response:
@@ -354,11 +358,26 @@ async def async_generator():
                     if content is not None:
                         accumulated_content.append(content)
 
+                    # Extract and accumulate tool calls from chunk
+                    chunk_tool_calls = extract_openai_tool_calls_from_chunk(chunk)
+                    if chunk_tool_calls:
+                        accumulate_openai_tool_calls(
+                            accumulated_tool_calls, chunk_tool_calls
+                        )
+
                     yield chunk
 
             finally:
                 end_time = time.time()
                 latency = end_time - start_time
+
+                # Convert accumulated tool calls dict to list
+                tool_calls_list = (
+                    list(accumulated_tool_calls.values())
+                    if accumulated_tool_calls
+                    else None
+                )
+
                 await self._capture_streaming_event(
                     posthog_distinct_id,
                     posthog_trace_id,
@@ -369,6 +388,7 @@ async def async_generator():
                     usage_stats,
                     latency,
                     accumulated_content,
+                    tool_calls_list,
                     extract_available_tool_calls("openai", kwargs),
                 )
 
@@ -385,6 +405,7 @@ async def _capture_streaming_event(
         usage_stats: Dict[str, int],
         latency: float,
         output: Any,
+        tool_calls: Optional[List[Dict[str, Any]]] = None,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
     ):
         if posthog_trace_id is None:
@@ -400,7 +421,7 @@ async def _capture_streaming_event(
             "$ai_output_choices": with_privacy_mode(
                 self._client._ph_client,
                 posthog_privacy_mode,
-                format_openai_streaming_output(output, "chat"),
+                format_openai_streaming_output(output, "chat", tool_calls),
             ),
             "$ai_http_status": 200,
             "$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
diff --git a/posthog/ai/openai/openai_converter.py b/posthog/ai/openai/openai_converter.py
@@ -358,33 +358,147 @@ def extract_openai_content_from_chunk(
     return None
 
 
+def extract_openai_tool_calls_from_chunk(chunk: Any) -> Optional[List[Dict[str, Any]]]:
+    """
+    Extract tool calls from an OpenAI streaming chunk.
+
+    Args:
+        chunk: Streaming chunk from OpenAI API
+
+    Returns:
+        List of tool call deltas if present, None otherwise
+    """
+    if (
+        hasattr(chunk, "choices")
+        and chunk.choices
+        and len(chunk.choices) > 0
+        and chunk.choices[0].delta
+        and hasattr(chunk.choices[0].delta, "tool_calls")
+        and chunk.choices[0].delta.tool_calls
+    ):
+        tool_calls = []
+        for tool_call in chunk.choices[0].delta.tool_calls:
+            tc_dict = {
+                "index": getattr(tool_call, "index", None),
+            }
+
+            if hasattr(tool_call, "id") and tool_call.id:
+                tc_dict["id"] = tool_call.id
+
+            if hasattr(tool_call, "type") and tool_call.type:
+                tc_dict["type"] = tool_call.type
+
+            if hasattr(tool_call, "function") and tool_call.function:
+                tc_dict["function"] = {}
+                if hasattr(tool_call.function, "name") and tool_call.function.name:
+                    tc_dict["function"]["name"] = tool_call.function.name
+                if (
+                    hasattr(tool_call.function, "arguments")
+                    and tool_call.function.arguments
+                ):
+                    tc_dict["function"]["arguments"] = tool_call.function.arguments
+
+            tool_calls.append(tc_dict)
+        return tool_calls
+
+    return None
+
+
+def accumulate_openai_tool_calls(
+    accumulated_tool_calls: Dict[int, Dict[str, Any]],
+    chunk_tool_calls: List[Dict[str, Any]],
+) -> None:
+    """
+    Accumulate tool calls from streaming chunks.
+
+    OpenAI sends tool calls incrementally:
+    - First chunk has id, type, function.name and partial function.arguments
+    - Subsequent chunks have more function.arguments
+
+    Args:
+        accumulated_tool_calls: Dictionary mapping index to accumulated tool call data
+        chunk_tool_calls: List of tool call deltas from current chunk
+    """
+    for tool_call_delta in chunk_tool_calls:
+        index = tool_call_delta.get("index")
+        if index is None:
+            continue
+
+        # Initialize tool call if first time seeing this index
+        if index not in accumulated_tool_calls:
+            accumulated_tool_calls[index] = {
+                "id": "",
+                "type": "function",
+                "function": {
+                    "name": "",
+                    "arguments": "",
+                },
+            }
+
+        # Update with new data from delta
+        tc = accumulated_tool_calls[index]
+
+        if "id" in tool_call_delta and tool_call_delta["id"]:
+            tc["id"] = tool_call_delta["id"]
+
+        if "type" in tool_call_delta and tool_call_delta["type"]:
+            tc["type"] = tool_call_delta["type"]
+
+        if "function" in tool_call_delta:
+            func_delta = tool_call_delta["function"]
+            if "name" in func_delta and func_delta["name"]:
+                tc["function"]["name"] = func_delta["name"]
+            if "arguments" in func_delta and func_delta["arguments"]:
+                # Arguments are sent incrementally, concatenate them
+                tc["function"]["arguments"] += func_delta["arguments"]
+
+
 def format_openai_streaming_output(
-    accumulated_content: Any, provider_type: str = "chat"
+    accumulated_content: Any,
+    provider_type: str = "chat",
+    tool_calls: Optional[List[Dict[str, Any]]] = None,
 ) -> List[FormattedMessage]:
     """
     Format the final output from OpenAI streaming.
 
     Args:
         accumulated_content: Accumulated content from streaming (string for chat, list for responses)
         provider_type: Either "chat" or "responses" to handle different API formats
+        tool_calls: Optional list of accumulated tool calls
 
     Returns:
         List of formatted messages
     """
 
     if provider_type == "chat":
-        # Chat API: accumulated_content is a string
-        if isinstance(accumulated_content, str):
-            return [
-                {
-                    "role": "assistant",
-                    "content": [{"type": "text", "text": accumulated_content}],
-                }
-            ]
-        # If it's a list of strings, join them
+        content_items: List[FormattedContentItem] = []
+
+        # Add text content if present
+        if isinstance(accumulated_content, str) and accumulated_content:
+            content_items.append({"type": "text", "text": accumulated_content})
         elif isinstance(accumulated_content, list):
-            text = "".join(str(item) for item in accumulated_content)
-            return [{"role": "assistant", "content": [{"type": "text", "text": text}]}]
+            # If it's a list of strings, join them
+            text = "".join(str(item) for item in accumulated_content if item)
+            if text:
+                content_items.append({"type": "text", "text": text})
+
+        # Add tool calls if present
+        if tool_calls:
+            for tool_call in tool_calls:
+                if "function" in tool_call:
+                    function_call: FormattedFunctionCall = {
+                        "type": "function",
+                        "id": tool_call.get("id", ""),
+                        "function": tool_call["function"],
+                    }
+                    content_items.append(function_call)
+
+        # Return formatted message with content
+        if content_items:
+            return [{"role": "assistant", "content": content_items}]
+        else:
+            # Empty response
+            return [{"role": "assistant", "content": []}]
 
     elif provider_type == "responses":
         # Responses API: accumulated_content is a list of output items
diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py
@@ -890,11 +890,29 @@ def test_streaming_with_tool_calls(mock_client):
         assert defined_tool["function"]["description"] == "Get weather"
         assert defined_tool["function"]["parameters"] == {}
 
-        # Check that the content was also accumulated
-        assert props["$ai_output_choices"][0]["content"][0]["type"] == "text"
+        # Check that both text content and tool calls were accumulated
+        output_content = props["$ai_output_choices"][0]["content"]
+
+        # Find text content and tool call in the output
+        text_content = None
+        tool_call_content = None
+        for item in output_content:
+            if item["type"] == "text":
+                text_content = item
+            elif item["type"] == "function":
+                tool_call_content = item
+
+        # Verify text content
+        assert text_content is not None
+        assert text_content["text"] == "The weather in San Francisco is 15°C."
+
+        # Verify tool call was captured
+        assert tool_call_content is not None
+        assert tool_call_content["id"] == "call_abc123"
+        assert tool_call_content["function"]["name"] == "get_weather"
         assert (
-            props["$ai_output_choices"][0]["content"][0]["text"]
-            == "The weather in San Francisco is 15°C."
+            tool_call_content["function"]["arguments"]
+            == '{"location": "San Francisco", "unit": "celsius"}'
         )
 
         # Check token usage