Strem the tool calls as a code message

chriscarrollsmith · chriscarrollsmith · commit fa1eed5b0078 · 2025-02-10T16:53:20.000-05:00
diff --git a/routers/chat.py b/routers/chat.py
@@ -1,19 +1,23 @@
 import logging
 import time
-from typing import Any
+from typing import Any, AsyncGenerator
 from fastapi.templating import Jinja2Templates
 from fastapi import APIRouter, Form, Depends, Request
 from fastapi.responses import StreamingResponse, HTMLResponse
 from openai import AsyncOpenAI
 from openai.resources.beta.threads.runs.runs import AsyncAssistantStreamManager
-from openai.types.beta.assistant_stream_event import ThreadMessageCreated, ThreadMessageDelta, ThreadRunCompleted, ThreadRunRequiresAction
+from openai.types.beta.assistant_stream_event import (
+    ThreadMessageCreated, ThreadMessageDelta, ThreadRunCompleted,
+    ThreadRunRequiresAction, ThreadRunStepCreated, ThreadRunStepDelta
+)
+from openai.types.beta.threads.run import RequiredAction
 from fastapi.responses import StreamingResponse
 from fastapi import APIRouter, Depends, Form, HTTPException
 from pydantic import BaseModel
 import json
 
-# Import our get_weather method
 from utils.weather import get_weather
+from utils.sse import sse_format
 
 logger: logging.Logger = logging.getLogger("uvicorn.error")
 logger.setLevel(logging.DEBUG)
@@ -95,41 +99,113 @@ async def stream_response(
     assistant_id: str,
     thread_id: str,
     client: AsyncOpenAI = Depends(lambda: AsyncOpenAI())
-) -> StreamingResponse:   
-    
-    # Create a generator to stream the response from the assistant
-    async def event_generator():
-        step_counter: int = 0
-        stream_manager: AsyncAssistantStreamManager = client.beta.threads.runs.stream(
-            assistant_id=assistant_id,
-            thread_id=thread_id
-        )
+) -> StreamingResponse:
+    """
+    Streams the assistant response via Server-Sent Events (SSE). If the assistant requires
+    a tool call, we capture that action, invoke the tool, and then re-run the stream
+    until completion. This is done in a DRY way by extracting the streaming logic 
+    into a helper function.
+    """
+
+    async def handle_assistant_stream(
+        templates: Jinja2Templates,
+        logger: logging.Logger,
+        stream_manager: AsyncAssistantStreamManager,
+        start_step_count: int = 0
+    ) -> AsyncGenerator:
+        """
+        Async generator to yield SSE events.
+        We yield a final 'metadata' dictionary event once we're done.
+        """
+        step_counter: int = start_step_count
+        required_action: RequiredAction | None = None
+        run_requires_action_event: ThreadRunRequiresAction | None = None
 
         async with stream_manager as event_handler:
             async for event in event_handler:
                 logger.info(f"{event}")
-                
+
                 if isinstance(event, ThreadMessageCreated):
                     step_counter += 1
 
-                    yield (
-                        f"event: messageCreated\n"
-                        f"data: {templates.get_template("components/assistant-step.html").render(
-                            step_type=f"assistantMessage",
+                    yield sse_format(
+                        "messageCreated",
+                        templates.get_template("components/assistant-step.html").render(
+                            step_type="assistantMessage",
                             stream_name=f"textDelta{step_counter}"
-                        ).replace("\n", "")}\n\n"
+                        )
                     )
-                    time.sleep(0.25) # Give the client time to render the message
+                    time.sleep(0.25)  # Give the client time to render the message
 
                 if isinstance(event, ThreadMessageDelta):
                     logger.info(f"Sending delta with name textDelta{step_counter}")
-                    yield (
-                        f"event: textDelta{step_counter}\n"
-                        f"data: {event.data.delta.content[0].text.value}\n\n"
+                    yield sse_format(
+                        f"textDelta{step_counter}",
+                        event.data.delta.content[0].text.value
+                    )
+
+                if isinstance(event, ThreadRunStepCreated) and event.data.type == "tool_calls":
+                    yield sse_format(
+                        f"toolCallCreated",
+                        templates.get_template('components/assistant-step.html').render(
+                            step_type='toolCall',
+                            stream_name=f'toolDelta{step_counter}'
+                        )
                     )
 
+                if isinstance(event, ThreadRunStepDelta) and event.data.delta.step_details.type == "tool_calls":
+                    if event.data.delta.step_details.tool_calls[0].function.name:
+                        yield sse_format(
+                            f"toolDelta{step_counter}",
+                            event.data.delta.step_details.tool_calls[0].function.name + "<br>"
+                        )
+                    elif event.data.delta.step_details.tool_calls[0].function.arguments:
+                        yield sse_format(
+                            f"toolDelta{step_counter}",
+                            event.data.delta.step_details.tool_calls[0].function.arguments
+                        )
+
+                # If the assistant run requires an action (a tool call), break and handle it
                 if isinstance(event, ThreadRunRequiresAction):
                     required_action = event.data.required_action
+                    run_requires_action_event = event
+                    if required_action.submit_tool_outputs:
+                        break
+
+                if isinstance(event, ThreadRunCompleted):
+                    yield sse_format("endStream", "DONE")
+
+        # At the end (or break) of this async generator, we yield a final "metadata" object
+        yield {
+            "type": "metadata",
+            "required_action": required_action,
+            "step_counter": step_counter,
+            "run_requires_action_event": run_requires_action_event
+        }
+
+    async def event_generator():
+        """
+        Main generator for SSE events. We call our helper function to handle the assistant
+        stream, and if the assistant requests a tool call, we do it and then re-run the stream.
+        """
+        step_counter = 0
+        # First run of the assistant stream
+        initial_manager = client.beta.threads.runs.stream(
+            assistant_id=assistant_id,
+            thread_id=thread_id
+        )
+
+        # We'll re-run the loop if needed for tool calls
+        stream_manager = initial_manager
+        while True:  
+            async for event in handle_assistant_stream(templates, logger, stream_manager, step_counter):
+                # Detect the special "metadata" event at the end of the generator
+                if isinstance(event, dict) and event.get("type") == "metadata":
+                    required_action: RequiredAction | None = event["required_action"]
+                    step_counter: int = event["step_counter"]
+                    run_requires_action_event: ThreadRunRequiresAction | None = event["run_requires_action_event"]
+
+                    # If the assistant still needs a tool call, do it and then re-stream
                     if required_action and required_action.submit_tool_outputs:
                         for tool_call in required_action.submit_tool_outputs.tool_calls:
                             yield (
@@ -154,14 +230,22 @@ async def event_generator():
                                     "tool_outputs": weather_output,
                                     "runId": event.data.id,
                                 }
-                                await post_tool_outputs(client, data_for_tool, thread_id)
-
-                if isinstance(event, ThreadRunCompleted):
-                    yield "event: endStream\ndata: DONE\n\n"
-
-            # Send a done event when the stream is complete
-            yield "event: endStream\ndata: DONE\n\n"
-    
+                        
+                        # Afterwards, create a fresh stream_manager for the next iteration
+                        new_stream_manager: AsyncAssistantStreamManager = await post_tool_outputs(
+                            client,
+                            data_for_tool,
+                            thread_id
+                        )
+                        stream_manager = new_stream_manager
+                        # proceed to rerun the loop
+                        break
+                    else:
+                        # No more tool calls needed; we're done streaming
+                        return
+                else:
+                    # Normal SSE events: yield them to the client
+                    yield event
 
     return StreamingResponse(
         event_generator(),
diff --git a/templates/components/assistant-step.html b/templates/components/assistant-step.html
@@ -1,3 +1,2 @@
 <!-- assistant-step.html -->
-<div class="{{ step_type }}" sse-swap="{{ stream_name }}">
-</div>
+<div class="{{ step_type }}" sse-swap="{{ stream_name }}"></div>
diff --git a/utils/sse.py b/utils/sse.py
@@ -0,0 +1,20 @@
+def sse_format(event: str, data: str, retry: int = None) -> str:
+    """
+    Helper function to format a Server-Sent Event (SSE) message.
+
+    Args:
+        event: The name/type of the event.
+        data: The data payload as a string.
+        retry: Optional retry timeout in milliseconds.
+
+    Returns:
+        A formatted SSE message string.
+    """
+    output = f"event: {event}\n"
+    if retry is not None:
+        output += f"retry: {retry}\n"
+    # Ensure each line of data is prefixed with "data: "
+    for line in data.splitlines():
+        output += f"data: {line}\n"
+    output += "\n"  # An extra newline indicates the end of the message.
+    return output