clean up tool calling flow & show in chat

dlqqq · dlqqq · commit 5aa46bf9a4a9 · 2025-09-15T10:10:46.000-07:00
diff --git a/packages/jupyter-ai/jupyter_ai/litellm_utils/__init__.py b/packages/jupyter-ai/jupyter_ai/litellm_utils/__init__.py
@@ -1,2 +1,2 @@
-from .toolcall_list import ToolCallList
-from .toolcall_types import *
+from .toolcall_list import *
+from .streaming_utils import *
diff --git a/packages/jupyter-ai/jupyter_ai/litellm_utils/streaming_utils.py b/packages/jupyter-ai/jupyter_ai/litellm_utils/streaming_utils.py
@@ -0,0 +1,13 @@
+from pydantic import BaseModel
+from .toolcall_list import ToolCallList
+
+class StreamResult(BaseModel):
+    id: str
+    """
+    ID of the new message.
+    """
+
+    tool_calls: ToolCallList
+    """
+    Tool calls requested by the LLM in its streamed response.
+    """
diff --git a/packages/jupyter-ai/jupyter_ai/litellm_utils/toolcall_list.py b/packages/jupyter-ai/jupyter_ai/litellm_utils/toolcall_list.py
@@ -1,9 +1,61 @@
 from litellm.utils import ChatCompletionDeltaToolCall, Function
 import json
+from pydantic import BaseModel
+from typing import Any
 
-from .toolcall_types import ResolvedToolCall, ResolvedFunction
+class ResolvedFunction(BaseModel):
+    """
+    A type-safe, parsed representation of `litellm.utils.Function`.
+    """
+
+    name: str
+    """
+    Name of the tool function to be called.
+
+    TODO: Check if this attribute is defined for non-function tools, e.g. tools
+    provided by a MCP server. The docstring on `litellm.utils.Function` implies
+    that `name` may be `None`.
+    """
+
+    arguments: dict[str, Any]
+    """
+    Arguments to the tool function, as a dictionary.
+    """
+
+
+class ResolvedToolCall(BaseModel):
+    """
+    A type-safe, parsed representation of
+    `litellm.utils.ChatCompletionDeltaToolCall`.
+    """
+
+    id: str | None
+    """
+    The ID of the tool call. This should always be provided by LiteLLM, this
+    type is left optional as we do not use this attribute.
+    """
+
+    type: str
+    """
+    The 'type' of tool call. Usually 'function'.
 
-class ToolCallList():
+    TODO: Make this a union of string literals to ensure we are handling every
+    potential type of tool call.
+    """
+
+    function: ResolvedFunction
+    """
+    The resolved function. See `ResolvedFunction` for more info.
+    """
+
+    index: int
+    """
+    The index of this tool call.
+
+    This is usually 0 unless the LLM supports parallel tool calling.
+    """
+
+class ToolCallList(BaseModel):
     """
     A helper object that defines a custom `__iadd__()` method which accepts a
     `tool_call_deltas: list[ChatCompletionDeltaToolCall]` argument. This class
@@ -27,14 +79,7 @@ class ToolCallList():
     ```
     """
 
-    _aggregate: list[ChatCompletionDeltaToolCall]
-
-    def __init__(self):
-        self.size = None
-        
-        # Initialize `_aggregate`
-        self._aggregate = []
-    
+    _aggregate: list[ChatCompletionDeltaToolCall] = []
 
     def __iadd__(self, other: list[ChatCompletionDeltaToolCall] | None) -> 'ToolCallList':
         """
@@ -116,6 +161,13 @@ def resolve(self) -> list[ResolvedToolCall]:
             resolved_toolcalls.append(resolved_toolcall)
         
         return resolved_toolcalls
-            
-        
-    
+    
+    def to_json(self) -> list[dict[str, Any]]:
+        """
+        Returns the list of tool calls as a Python dictionary that can be
+        JSON-serialized.
+        """
+        return [
+            model.model_dump() for model in self._aggregate
+        ]
+            
diff --git a/packages/jupyter-ai/jupyter_ai/litellm_utils/toolcall_types.py b/packages/jupyter-ai/jupyter_ai/litellm_utils/toolcall_types.py
diff --git a/packages/jupyter-ai/jupyter_ai/personas/base_persona.py b/packages/jupyter-ai/jupyter_ai/personas/base_persona.py
@@ -5,7 +5,7 @@
 from dataclasses import asdict
 from logging import Logger
 from time import time
-from typing import TYPE_CHECKING, Any, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Optional
 
 from jupyter_ai.config_manager import ConfigManager
 from jupyterlab_chat.models import Message, NewMessage, User
@@ -17,7 +17,7 @@
 from traitlets.config import LoggingConfigurable
 
 from .persona_awareness import PersonaAwareness
-from ..litellm_utils import ToolCallList, ResolvedToolCall
+from ..litellm_utils import ToolCallList, StreamResult, ResolvedToolCall
 
 # Import toolkits
 from jupyter_ai_tools.toolkits.file_system import toolkit as fs_toolkit
@@ -247,7 +247,7 @@ def as_user_dict(self) -> dict[str, Any]:
 
     async def stream_message(
         self, reply_stream: "AsyncIterator[ModelResponseStream | str]"
-    ) -> Tuple[ResolvedToolCall, ToolCallList]:
+    ) -> StreamResult:
         """
         Takes an async iterator, dubbed the 'reply stream', and streams it to a
         new message by this persona in the YChat. The async iterator may yield
@@ -263,12 +263,21 @@ async def stream_message(
         """
         stream_id: Optional[str] = None
         stream_interrupted = False
+        tool_calls = ToolCallList()
         try:
             self.awareness.set_local_state_field("isWriting", True)
-            toolcall_list = ToolCallList()
-            resolved_toolcalls: list[ResolvedToolCall] = []
 
             async for chunk in reply_stream:
+                # Start the stream with an empty message on the initial reply.
+                # Bind the new message ID to `stream_id`.
+                if not stream_id:
+                    stream_id = self.ychat.add_message(
+                        NewMessage(body="", sender=self.id)
+                    )
+                    self.message_interrupted[stream_id] = asyncio.Event()
+                    self.awareness.set_local_state_field("isWriting", stream_id)
+                assert stream_id
+
                 # Compute `content_delta` and `tool_calls_delta` based on the
                 # type of object yielded by `reply_stream`.
                 if isinstance(chunk, ModelResponseStream):
@@ -307,16 +316,6 @@ async def stream_message(
 
                 # Append `content_delta` to the existing message.
                 if content_delta:
-                    # Start the stream with an empty message on the initial reply.
-                    # Bind the new message ID to `stream_id`.
-                    if not stream_id:
-                        stream_id = self.ychat.add_message(
-                            NewMessage(body="", sender=self.id)
-                        )
-                        self.message_interrupted[stream_id] = asyncio.Event()
-                        self.awareness.set_local_state_field("isWriting", stream_id)
-                    assert stream_id
-
                     self.ychat.update_message(
                         Message(
                             id=stream_id,
@@ -328,10 +327,8 @@ async def stream_message(
                         append=True,
                     )
                 if toolcalls_delta:
-                    toolcall_list += toolcalls_delta
+                    tool_calls += toolcalls_delta
             
-            # After the reply stream is complete, resolve the list of tool calls.
-            resolved_toolcalls = toolcall_list.resolve()
         except Exception as e:
             self.log.error(
                 f"Persona '{self.name}' encountered an exception printed below when attempting to stream output."
@@ -358,12 +355,17 @@ async def stream_message(
                     )
                     return None
             
-            # Otherwise return the resolved list.
+            # TODO: determine where this should live
+            resolved_toolcalls = tool_calls.resolve()
             if len(resolved_toolcalls):
                 count = len(resolved_toolcalls)
                 names = sorted([tc.function.name for tc in resolved_toolcalls])
                 self.log.info(f"AI response triggered {count} tool calls: {names}")
-            return resolved_toolcalls, toolcall_list
+
+            return StreamResult(
+                id=stream_id,
+                tool_calls=tool_calls
+            )
             
 
     def send_message(self, body: str) -> None:
@@ -552,7 +554,9 @@ async def run_tools(self, tools: list[ResolvedToolCall]) -> list[dict]:
             tool_defn = DEFAULT_TOOLKITS[toolkit_name].get_tool_unsafe(tool_name)
 
             # Run tool and store its output
-            output = await tool_defn.callable(**tool_call.function.arguments)
+            output = tool_defn.callable(**tool_call.function.arguments)
+            if asyncio.iscoroutine(output):
+                output = await output
 
             # Store the tool output in a dictionary accepted by LiteLLM
             output_dict = {
diff --git a/packages/jupyter-ai/jupyter_ai/personas/jupyternaut/jupyternaut.py b/packages/jupyter-ai/jupyter_ai/personas/jupyternaut/jupyternaut.py
@@ -1,4 +1,6 @@
 from typing import Any, Optional
+import time
+import json
 
 from jupyterlab_chat.models import Message
 from litellm import acompletion
@@ -9,7 +11,6 @@
     JUPYTERNAUT_SYSTEM_PROMPT_TEMPLATE,
     JupyternautSystemPromptArgs,
 )
-from ...litellm_utils import ResolvedToolCall
 
 
 class JupyternautPersona(BasePersona):
@@ -39,34 +40,60 @@ async def process_message(self, message: Message) -> None:
 
         model_id = self.config_manager.chat_model
 
-        # `True` on the first LLM invocation, `False` on all invocations after.
-        initial_invocation = True
-        # List of tool calls requested by the LLM in the previous invocaiton.
-        tool_calls: list[ResolvedToolCall] = []
-        tool_call_list = None
+        # `True` before the first LLM response is sent, `False` afterwards.
+        initial_response = True
         # List of tool call outputs computed in the previous invocation.
         tool_call_outputs: list[dict] = []
 
-        # Loop until the AI is complete running all its tools.
-        while initial_invocation or len(tool_call_outputs):
-            messages = self.get_context_as_messages(model_id, message)
-
-            # TODO: Find a better way to track tool calls
-            if not initial_invocation and tool_calls:
-                self.log.error(messages[-1])
-                messages[-1]['tool_calls'] = tool_call_list._aggregate
-                messages.extend(tool_call_outputs)
+        # Initialize list of messages, including history and context
+        messages: list[dict] = self.get_context_as_messages(model_id, message)
 
-            self.log.error(messages)
+        # Loop until the AI is complete running all its tools.
+        while initial_response or len(tool_call_outputs):
+            # Stream message to the chat
             response_aiter = await acompletion(
                 model=model_id,
                 messages=messages,
                 tools=self.get_tools(model_id),
                 stream=True,
             )
-            tool_calls, tool_call_list = await self.stream_message(response_aiter)
-            initial_invocation = False
-            tool_call_outputs = await self.run_tools(tool_calls)
+            result = await self.stream_message(response_aiter)
+            initial_response = False
+
+            # Append new reply to `messages`
+            reply = self.ychat.get_message(result.id)
+            tool_calls_json = result.tool_calls.to_json()
+            messages.append({
+                "role": "assistant",
+                "content": reply.body,
+                "tool_calls": tool_calls_json
+            })
+            
+            # Show tool call requests to YChat (not synced with `messages`)
+            if len(tool_calls_json):
+                self.ychat.update_message(Message(
+                    id=result.id,
+                    body=f"\n\n```\n{json.dumps(tool_calls_json, indent=2)}\n```\n",
+                    sender=self.id,
+                    time=time.time(),
+                    raw_time=False
+                ), append=True)
+
+            # Run tools and append outputs to `messages`
+            tool_call_outputs = await self.run_tools(result.tool_calls.resolve())
+            messages.extend(tool_call_outputs)
+
+            # Add tool call outputs to YChat (not synced with `messages`)
+            if tool_call_outputs:
+                self.ychat.update_message(Message(
+                    id=result.id,
+                    body=f"\n\n```\n{json.dumps(tool_call_outputs, indent=2)}\n```\n",
+                    sender=self.id,
+                    time=time.time(),
+                    raw_time=False
+                ), append=True)
+    
+
 
     def get_context_as_messages(
         self, model_id: str, message: Message