Python: Include streaming code output for OpenAI Assistants (microsoft#9080)

moonbox3 · web-flow · commit de80f8c1f424 · 2024-10-03T15:08:47.000Z
### Motivation and Context In the recent release of OpenAI Assistant streaming responses, there was a gap related to not exposing code interpreter output, when available.  ### Description This PR closes that gap, and adds the ability to yield messages that contain code input/output. A new concept sample was added to show this. - Unit test coverage also added.  ### Contribution Checklist  - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone 😄
diff --git a/python/samples/concepts/agents/assistant_agent_file_manipulation_streaming.py b/python/samples/concepts/agents/assistant_agent_file_manipulation_streaming.py
@@ -0,0 +1,88 @@
+# Copyright (c) Microsoft. All rights reserved.
+import asyncio
+import os
+
+from semantic_kernel.agents.open_ai import OpenAIAssistantAgent
+from semantic_kernel.agents.open_ai.azure_assistant_agent import AzureAssistantAgent
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
+from semantic_kernel.kernel import Kernel
+
+#####################################################################
+# The following sample demonstrates how to create an OpenAI         #
+# assistant using either Azure OpenAI or OpenAI and leverage the    #
+# assistant's ability to stream the response and have the code      #
+# interpreter work with  uploaded files                             #
+#####################################################################
+
+AGENT_NAME = "FileManipulation"
+AGENT_INSTRUCTIONS = "Find answers to the user's questions in the provided file."
+
+
+# A helper method to invoke the agent with the user input
+async def invoke_streaming_agent(agent: OpenAIAssistantAgent, thread_id: str, input: str) -> None:
+    """Invoke the streaming agent with the user input."""
+    await agent.add_chat_message(thread_id=thread_id, message=ChatMessageContent(role=AuthorRole.USER, content=input))
+
+    print(f"# {AuthorRole.USER}: '{input}'")
+
+    first_chunk = True
+    async for content in agent.invoke_stream(thread_id=thread_id):
+        if content.role != AuthorRole.TOOL:
+            if first_chunk:
+                print(f"# {content.role}: ", end="", flush=True)
+                first_chunk = False
+            print(content.content, end="", flush=True)
+        elif content.role == AuthorRole.TOOL and content.metadata.get("code"):
+            print("")
+            print(f"# {content.role} (code):\n\n{content.content}")
+    print()
+
+
+async def main():
+    # Create the instance of the Kernel
+    kernel = Kernel()
+
+    # Define a service_id for the sample
+    service_id = "agent"
+
+    # Get the path to the sales.csv file
+    csv_file_path = os.path.join(
+        os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
+        "resources",
+        "agent_assistant_file_manipulation",
+        "sales.csv",
+    )
+
+    # Create the assistant agent
+    agent = await AzureAssistantAgent.create(
+        kernel=kernel,
+        service_id=service_id,
+        name=AGENT_NAME,
+        instructions=AGENT_INSTRUCTIONS,
+        enable_code_interpreter=True,
+        code_interpreter_filenames=[csv_file_path],
+    )
+
+    # Create a thread and specify the file to use for code interpretation
+    thread_id = await agent.create_thread()
+
+    try:
+        await invoke_streaming_agent(agent, thread_id=thread_id, input="Which segment had the most sales?")
+        await invoke_streaming_agent(
+            agent, thread_id=thread_id, input="List the top 5 countries that generated the most profit."
+        )
+        await invoke_streaming_agent(
+            agent,
+            thread_id=thread_id,
+            input="Create a tab delimited file report of profit by each country per month.",
+        )
+    finally:
+        if agent is not None:
+            [await agent.delete_file(file_id) for file_id in agent.code_interpreter_file_ids]
+            await agent.delete_thread(thread_id)
+            await agent.delete()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/semantic_kernel/agents/open_ai/assistant_content_generation.py b/python/semantic_kernel/agents/open_ai/assistant_content_generation.py
@@ -8,6 +8,7 @@
 from openai.types.beta.threads.image_file_content_block import ImageFileContentBlock
 from openai.types.beta.threads.image_file_delta_block import ImageFileDeltaBlock
 from openai.types.beta.threads.message_delta_event import MessageDeltaEvent
+from openai.types.beta.threads.runs.code_interpreter_tool_call import CodeInterpreter
 from openai.types.beta.threads.text_content_block import TextContentBlock
 from openai.types.beta.threads.text_delta_block import TextDeltaBlock
 
@@ -32,6 +33,7 @@
     from openai.types.beta.threads.annotation import Annotation
     from openai.types.beta.threads.runs import RunStep
     from openai.types.beta.threads.runs.tool_call import ToolCall
+    from openai.types.beta.threads.runs.tool_calls_step_details import ToolCallsStepDetails
 
 
 ###################################################################
@@ -258,6 +260,56 @@ def generate_code_interpreter_content(agent_name: str, code: str) -> "ChatMessag
     )
 
 
+@experimental_function
+def generate_streaming_tools_content(
+    agent_name: str, step_details: "ToolCallsStepDetails"
+) -> "StreamingChatMessageContent | None":
+    """Generate code interpreter content.
+
+    Args:
+        agent_name: The agent name.
+        step_details: The current step details.
+
+    Returns:
+        StreamingChatMessageContent: The chat message content.
+    """
+    items: list[StreamingTextContent | StreamingFileReferenceContent] = []
+
+    metadata: dict[str, bool] = {}
+    for index, tool in enumerate(step_details.tool_calls):
+        if tool.type != "code_interpreter":
+            continue
+        if tool.code_interpreter.input:
+            items.append(
+                StreamingTextContent(
+                    choice_index=index,
+                    text=tool.code_interpreter.input,
+                )
+            )
+            metadata["code"] = True
+        if len(tool.code_interpreter.outputs) > 0:
+            for output in tool.code_interpreter.outputs:
+                assert isinstance(output, CodeInterpreter)  # nosec
+                if output.image.file_id:
+                    items.append(
+                        StreamingFileReferenceContent(
+                            file_id=output.image.file_id,
+                        )
+                    )
+
+    return (
+        StreamingChatMessageContent(
+            role=AuthorRole.TOOL,
+            name=agent_name,
+            items=items,  # type: ignore
+            choice_index=0,
+            metadata=metadata if metadata else None,
+        )
+        if len(items) > 0
+        else None
+    )
+
+
 @experimental_function
 def generate_annotation_content(annotation: "Annotation") -> AnnotationContent:
     """Generate annotation content."""
diff --git a/python/semantic_kernel/agents/open_ai/open_ai_assistant_base.py b/python/semantic_kernel/agents/open_ai/open_ai_assistant_base.py
@@ -25,6 +25,7 @@
     generate_function_result_content,
     generate_message_content,
     generate_streaming_message_content,
+    generate_streaming_tools_content,
     get_function_call_contents,
     get_message_contents,
 )
@@ -920,6 +921,10 @@ async def _invoke_internal_stream(
                             message_id = event.data.step_details.message_creation.message_id
                             if message_id not in active_messages:
                                 active_messages[message_id] = event.data
+                        elif hasattr(event.data.step_details, "tool_calls"):
+                            tool_content = generate_streaming_tools_content(self.name, event.data.step_details)
+                            if tool_content:
+                                yield tool_content
                     elif event.event == "thread.run.requires_action":
                         run = event.data
                         function_action_result = await self._handle_streaming_requires_action(run, function_steps)
diff --git a/python/tests/unit/agents/test_open_ai_assistant_base.py b/python/tests/unit/agents/test_open_ai_assistant_base.py
@@ -517,6 +517,34 @@ def mock_thread_run_step_completed():
     )
 
 
+def mock_thread_run_step_completed_with_code():
+    return ThreadRunStepCompleted(
+        data=RunStep(
+            id="step_id_2",
+            type="message_creation",
+            completed_at=int(datetime.now(timezone.utc).timestamp()),
+            created_at=int((datetime.now(timezone.utc) - timedelta(minutes=2)).timestamp()),
+            step_details=ToolCallsStepDetails(
+                type="tool_calls",
+                tool_calls=[
+                    CodeInterpreterToolCall(
+                        id="tool_call_id",
+                        code_interpreter=CodeInterpreter(input="test code", outputs=[]),
+                        type="code_interpreter",
+                    )
+                ],
+            ),
+            assistant_id="assistant_id",
+            object="thread.run.step",
+            run_id="run_id",
+            status="completed",
+            thread_id="thread_id",
+            usage=Usage(completion_tokens=10, prompt_tokens=5, total_tokens=15),
+        ),
+        event="thread.run.step.completed",
+    )
+
+
 def mock_run_with_last_error():
     return ThreadRunFailed(
         data=Run(
@@ -1161,6 +1189,31 @@ async def test_invoke_stream(
         assert len(messages) > 0
 
 
+@pytest.mark.asyncio
+async def test_invoke_stream_code_output(
+    azure_openai_assistant_agent,
+    mock_assistant,
+    azure_openai_unit_test_env,
+):
+    events = [mock_thread_run_step_completed_with_code()]
+
+    with patch.object(azure_openai_assistant_agent, "client", spec=AsyncAzureOpenAI) as mock_client:
+        mock_client.beta = MagicMock()
+        mock_client.beta.threads = MagicMock()
+        mock_client.beta.assistants = MagicMock()
+        mock_client.beta.assistants.create = AsyncMock(return_value=mock_assistant)
+
+        mock_client.beta.threads.runs = MagicMock()
+        mock_client.beta.threads.runs.stream = MagicMock(return_value=MockStream(events))
+
+        azure_openai_assistant_agent.assistant = await azure_openai_assistant_agent.create_assistant()
+
+        messages = []
+        async for content in azure_openai_assistant_agent.invoke_stream("thread_id", messages=messages):
+            assert content is not None
+            assert content.metadata.get("code") is True
+
+
 @pytest.mark.asyncio
 async def test_invoke_stream_requires_action(
     azure_openai_assistant_agent, mock_assistant, mock_thread_messages, azure_openai_unit_test_env
diff --git a/python/uv.lock b/python/uv.lock