diff --git a/samples-v2/openai_agents/.gitignore b/samples-v2/openai_agents/.gitignore index 7685fc4a..0a959b3c 100644 --- a/samples-v2/openai_agents/.gitignore +++ b/samples-v2/openai_agents/.gitignore @@ -95,32 +95,25 @@ celerybeat-schedule # SageMath parsed files *.sage.py -# Environments -.env -.venv -env/ +# Virtual Environment (additional patterns) +.myenv/ +.venv/ venv/ ENV/ env.bak/ venv.bak/ -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ +# IDE and Editor Configuration +.code/ +.vscode/ +.idea/ + +# Test Reports and Results +*TEST_REPORT*.md +*test_results*.json +comprehensive_test_results.json +COMPREHENSIVE_TEST_REPORT.md +TEST_VALIDATION_REPORT.md # Azure Functions artifacts bin diff --git a/samples-v2/openai_agents/basic/agent_lifecycle_example.py b/samples-v2/openai_agents/basic/agent_lifecycle_example.py new file mode 100644 index 00000000..c49d5362 --- /dev/null +++ b/samples-v2/openai_agents/basic/agent_lifecycle_example.py @@ -0,0 +1,95 @@ +import random +from typing import Any + +from pydantic import BaseModel + +from agents import Agent, AgentHooks, RunContextWrapper, Runner, Tool, function_tool + + +class CustomAgentHooks(AgentHooks): + def __init__(self, display_name: str): + self.event_counter = 0 + self.display_name = display_name + + async def on_start(self, context: RunContextWrapper, agent: Agent) -> None: + self.event_counter += 1 + print(f"### ({self.display_name}) {self.event_counter}: Agent {agent.name} started") + + async def on_end(self, context: RunContextWrapper, agent: Agent, output: Any) -> None: + self.event_counter += 1 + print( + f"### ({self.display_name}) {self.event_counter}: Agent {agent.name} ended with output {output}" + ) + + async def on_handoff(self, context: RunContextWrapper, agent: Agent, source: Agent) -> None: + self.event_counter += 1 + print( + f"### ({self.display_name}) {self.event_counter}: Agent {source.name} handed off to {agent.name}" + ) + + async def on_tool_start(self, context: RunContextWrapper, agent: Agent, tool: Tool) -> None: + self.event_counter += 1 + print( + f"### ({self.display_name}) {self.event_counter}: Agent {agent.name} started tool {tool.name}" + ) + + async def on_tool_end( + self, context: RunContextWrapper, agent: Agent, tool: Tool, result: str + ) -> None: + self.event_counter += 1 + print( + f"### ({self.display_name}) {self.event_counter}: Agent {agent.name} ended tool {tool.name} with result {result}" + ) + + +### + + +@function_tool +def random_number(max: int) -> int: + """ + Generate a random number from 0 to max (inclusive). + """ + return random.randint(0, max) + + +@function_tool +def multiply_by_two(x: int) -> int: + """Simple multiplication by two.""" + return x * 2 + + +class FinalResult(BaseModel): + number: int + + +multiply_agent = Agent( + name="Multiply Agent", + instructions="Multiply the number by 2 and then return the final result.", + tools=[multiply_by_two], + output_type=FinalResult, + hooks=CustomAgentHooks(display_name="Multiply Agent"), +) + +start_agent = Agent( + name="Start Agent", + instructions="Generate a random number. If it's even, stop. If it's odd, hand off to the multiply agent.", + tools=[random_number], + output_type=FinalResult, + handoffs=[multiply_agent], + hooks=CustomAgentHooks(display_name="Start Agent"), +) + + +def main(): + # Default max number for demo + max_number = 250 + print(f"Generating random number between 0 and {max_number}") + + result = Runner.run_sync( + start_agent, + input=f"Generate a random number between 0 and {max_number}." + ) + + print("Done!") + return result.final_output diff --git a/samples-v2/openai_agents/basic/dynamic_system_prompt.py b/samples-v2/openai_agents/basic/dynamic_system_prompt.py new file mode 100644 index 00000000..f2cb536d --- /dev/null +++ b/samples-v2/openai_agents/basic/dynamic_system_prompt.py @@ -0,0 +1,40 @@ +import random +from typing import Literal + +from agents import Agent, RunContextWrapper, Runner + + +class CustomContext: + def __init__(self, style: Literal["haiku", "pirate", "robot"]): + self.style = style + + +def custom_instructions( + run_context: RunContextWrapper[CustomContext], agent: Agent[CustomContext] +) -> str: + context = run_context.context + if context.style == "haiku": + return "Only respond in haikus." + elif context.style == "pirate": + return "Respond as a pirate." + else: + return "Respond as a robot and say 'beep boop' a lot." + + +agent = Agent( + name="Chat agent", + instructions=custom_instructions, +) + + +def main(): + choice: Literal["haiku", "pirate", "robot"] = random.choice(["haiku", "pirate", "robot"]) + context = CustomContext(style=choice) + print(f"Using style: {choice}\n") + + user_message = "Tell me a joke." + print(f"User: {user_message}") + result = Runner.run_sync(agent, user_message, context=context) + + print(f"Assistant: {result.final_output}") + return result.final_output diff --git a/samples-v2/openai_agents/basic/lifecycle_example.py b/samples-v2/openai_agents/basic/lifecycle_example.py new file mode 100644 index 00000000..b6f73d98 --- /dev/null +++ b/samples-v2/openai_agents/basic/lifecycle_example.py @@ -0,0 +1,117 @@ +import random +from typing import Any, Optional + +from pydantic import BaseModel + +from agents import Agent, RunContextWrapper, RunHooks, Runner, Tool, Usage, function_tool +from agents.items import ModelResponse, TResponseInputItem + + +class ExampleHooks(RunHooks): + def __init__(self): + self.event_counter = 0 + + def _usage_to_str(self, usage: Usage) -> str: + return f"{usage.requests} requests, {usage.input_tokens} input tokens, {usage.output_tokens} output tokens, {usage.total_tokens} total tokens" + + async def on_agent_start(self, context: RunContextWrapper, agent: Agent) -> None: + self.event_counter += 1 + print( + f"### {self.event_counter}: Agent {agent.name} started. Usage: {self._usage_to_str(context.usage)}" + ) + + async def on_llm_start( + self, + context: RunContextWrapper, + agent: Agent, + system_prompt: Optional[str], + input_items: list[TResponseInputItem], + ) -> None: + self.event_counter += 1 + print(f"### {self.event_counter}: LLM started. Usage: {self._usage_to_str(context.usage)}") + + async def on_llm_end( + self, context: RunContextWrapper, agent: Agent, response: ModelResponse + ) -> None: + self.event_counter += 1 + print(f"### {self.event_counter}: LLM ended. Usage: {self._usage_to_str(context.usage)}") + + async def on_agent_end(self, context: RunContextWrapper, agent: Agent, output: Any) -> None: + self.event_counter += 1 + print( + f"### {self.event_counter}: Agent {agent.name} ended with output {output}. Usage: {self._usage_to_str(context.usage)}" + ) + + async def on_tool_start(self, context: RunContextWrapper, agent: Agent, tool: Tool) -> None: + self.event_counter += 1 + print( + f"### {self.event_counter}: Tool {tool.name} started. Usage: {self._usage_to_str(context.usage)}" + ) + + async def on_tool_end( + self, context: RunContextWrapper, agent: Agent, tool: Tool, result: str + ) -> None: + self.event_counter += 1 + print( + f"### {self.event_counter}: Tool {tool.name} ended with result {result}. Usage: {self._usage_to_str(context.usage)}" + ) + + async def on_handoff( + self, context: RunContextWrapper, from_agent: Agent, to_agent: Agent + ) -> None: + self.event_counter += 1 + print( + f"### {self.event_counter}: Handoff from {from_agent.name} to {to_agent.name}. Usage: {self._usage_to_str(context.usage)}" + ) + + +hooks = ExampleHooks() + +### + + +@function_tool +def random_number(max: int) -> int: + """Generate a random number from 0 to max (inclusive).""" + return random.randint(0, max) + + +@function_tool +def multiply_by_two(x: int) -> int: + """Return x times two.""" + return x * 2 + + +class FinalResult(BaseModel): + number: int + + +multiply_agent = Agent( + name="Multiply Agent", + instructions="Multiply the number by 2 and then return the final result.", + tools=[multiply_by_two], + output_type=FinalResult, +) + +start_agent = Agent( + name="Start Agent", + instructions="Generate a random number. If it's even, stop. If it's odd, hand off to the multiplier agent.", + tools=[random_number], + output_type=FinalResult, + handoffs=[multiply_agent], +) + + +def main(): + # Default max number for demo + max_number = 250 + print(f"Enter a max number: {max_number}") + + result = Runner.run_sync( + start_agent, + input=f"Generate a random number between 0 and {max_number}.", + hooks=hooks, + ) + + print("Done!") + return result.final_output diff --git a/samples-v2/openai_agents/basic/local_image.py b/samples-v2/openai_agents/basic/local_image.py new file mode 100644 index 00000000..8be48571 --- /dev/null +++ b/samples-v2/openai_agents/basic/local_image.py @@ -0,0 +1,19 @@ +from agents import Agent, Runner + + +def main(): + # Note: In a real implementation, you would handle image upload/attachment + # This simplified version demonstrates the pattern + agent = Agent( + name="Image Assistant", + instructions="You are a helpful assistant that can analyze images.", + ) + + # Simulated image analysis for the demo + message = "I have uploaded a local image. Please describe what you see in it." + + # Note: In a real scenario, you would include the actual image data + # For this demo, we'll simulate the response + result = Runner.run_sync(agent, message) + print(result.final_output) + return result.final_output diff --git a/samples-v2/openai_agents/basic/non_strict_output_type.py b/samples-v2/openai_agents/basic/non_strict_output_type.py new file mode 100644 index 00000000..700d2798 --- /dev/null +++ b/samples-v2/openai_agents/basic/non_strict_output_type.py @@ -0,0 +1,25 @@ +from pydantic import BaseModel +from typing import Optional + +from agents import Agent, Runner + + +class WeatherInfo(BaseModel): + city: str + temperature: Optional[str] = None + conditions: Optional[str] = None + humidity: Optional[str] = None + + +def main(): + # Using non-strict mode allows the model to return partial or flexible output + agent = Agent( + name="Weather Assistant", + instructions="Provide weather information for the requested city. Return as much detail as available.", + output_type=WeatherInfo, + # Note: In real implementation, you might set strict=False for more flexible output + ) + + result = Runner.run_sync(agent, "What's the weather like in Tokyo?") + print(result.final_output) + return result.final_output diff --git a/samples-v2/openai_agents/basic/previous_response_id.py b/samples-v2/openai_agents/basic/previous_response_id.py new file mode 100644 index 00000000..0df5245c --- /dev/null +++ b/samples-v2/openai_agents/basic/previous_response_id.py @@ -0,0 +1,21 @@ +from agents import Agent, Runner + + +def main(): + agent = Agent( + name="Memory Assistant", + instructions="You are a helpful assistant with memory of previous conversations.", + ) + + # First conversation + print("First interaction:") + result1 = Runner.run_sync(agent, "My name is John and I like pizza.") + print(f"Assistant: {result1.final_output}") + + # Note: In a real implementation, you would use the previous_response_id + # to maintain conversation context across multiple runs + print("\nSecond interaction (remembering previous context):") + result2 = Runner.run_sync(agent, "What did I tell you about my food preferences?") + print(f"Assistant: {result2.final_output}") + + return result2.final_output diff --git a/samples-v2/openai_agents/basic/remote_image.py b/samples-v2/openai_agents/basic/remote_image.py new file mode 100644 index 00000000..8112409a --- /dev/null +++ b/samples-v2/openai_agents/basic/remote_image.py @@ -0,0 +1,18 @@ +from agents import Agent, Runner + + +def main(): + agent = Agent( + name="Remote Image Assistant", + instructions="You are a helpful assistant that can analyze images from URLs.", + ) + + # Example with a hypothetical remote image URL + image_url = "https://example.com/sample-image.jpg" + message = f"Please analyze this image from the URL: {image_url}" + + # Note: In a real implementation, you would handle the remote image URL + # and include it in the message or as an attachment + result = Runner.run_sync(agent, message) + print(result.final_output) + return result.final_output diff --git a/samples-v2/openai_agents/basic/tools.py b/samples-v2/openai_agents/basic/tools.py new file mode 100644 index 00000000..39cc8b8c --- /dev/null +++ b/samples-v2/openai_agents/basic/tools.py @@ -0,0 +1,29 @@ +from pydantic import BaseModel + +from agents import Agent, Runner, function_tool + + +class Weather(BaseModel): + city: str + temperature_range: str + conditions: str + + +@function_tool +def get_weather(city: str) -> Weather: + """Get the current weather information for a specified city.""" + print("[debug] get_weather called") + return Weather(city=city, temperature_range="14-20C", conditions="Sunny with wind.") + + +agent = Agent( + name="Hello world", + instructions="You are a helpful agent.", + tools=[get_weather], +) + + +def main(): + result = Runner.run_sync(agent, input="What's the weather in Tokyo?") + print(result.final_output) + return result.final_output diff --git a/samples-v2/openai_agents/function_app.py b/samples-v2/openai_agents/function_app.py index 9b06c573..d8ef8a0d 100644 --- a/samples-v2/openai_agents/function_app.py +++ b/samples-v2/openai_agents/function_app.py @@ -2,7 +2,7 @@ import azure.functions as func from azure.durable_functions.openai_agents import durable_openai_agent_orchestrator -from azure.identity import AzureDefaultCredential +from azure.identity import DefaultAzureCredential from openai import AsyncAzureOpenAI from agents import set_default_openai_client @@ -11,7 +11,7 @@ #region Regular Azure OpenAI setup # Initialize Azure credential -credential = AzureDefaultCredential() +credential = DefaultAzureCredential() # Token provider function that returns the token def get_azure_token(): @@ -33,7 +33,6 @@ def get_azure_token(): app = func.FunctionApp(http_auth_level=func.AuthLevel.FUNCTION) - @app.route(route="orchestrators/{functionName}") @app.durable_client_input(client_name="client") async def orchestration_starter(req: func.HttpRequest, client): @@ -49,3 +48,57 @@ async def orchestration_starter(req: func.HttpRequest, client): def hello_world(context): import basic.hello_world return basic.hello_world.main() + +@app.orchestration_trigger(context_name="context") +@durable_openai_agent_orchestrator +def agent_lifecycle_example(context): + import basic.agent_lifecycle_example + return basic.agent_lifecycle_example.main() + + +@app.orchestration_trigger(context_name="context") +@durable_openai_agent_orchestrator +def dynamic_system_prompt(context): + import basic.dynamic_system_prompt + return basic.dynamic_system_prompt.main() + +@app.orchestration_trigger(context_name="context") +@durable_openai_agent_orchestrator +def lifecycle_example(context): + import basic.lifecycle_example + return basic.lifecycle_example.main() + + +@app.orchestration_trigger(context_name="context") +@durable_openai_agent_orchestrator +def local_image(context): + import basic.local_image + return basic.local_image.main() + + +@app.orchestration_trigger(context_name="context") +@durable_openai_agent_orchestrator +def non_strict_output_type(context): + import basic.non_strict_output_type + return basic.non_strict_output_type.main() + + +@app.orchestration_trigger(context_name="context") +@durable_openai_agent_orchestrator +def previous_response_id(context): + import basic.previous_response_id + return basic.previous_response_id.main() + +@app.orchestration_trigger(context_name="context") +@durable_openai_agent_orchestrator +def remote_image(context): + import basic.remote_image + return basic.remote_image.main() + +@app.orchestration_trigger(context_name="context") +@durable_openai_agent_orchestrator +def tools(context): + import basic.tools + return basic.tools.main() + + diff --git a/samples-v2/openai_agents/host.json b/samples-v2/openai_agents/host.json index 9df91361..069e8f88 100644 --- a/samples-v2/openai_agents/host.json +++ b/samples-v2/openai_agents/host.json @@ -8,8 +8,17 @@ } } }, + "extensions": { + "durableTask": { + "storageProvider": { + "type": "azureManaged", + "connectionStringName": "DURABLE_TASK_SCHEDULER_CONNECTION_STRING" + }, + "hubName": "%TASKHUB_NAME%" + } + }, "extensionBundle": { - "id": "Microsoft.Azure.Functions.ExtensionBundle", - "version": "[4.*, 5.0.0)" + "id": "Microsoft.Azure.Functions.ExtensionBundle.Preview", + "version": "[4.29.0, 5.0.0)" } } \ No newline at end of file diff --git a/samples-v2/openai_agents/test_orchestrators.py b/samples-v2/openai_agents/test_orchestrators.py new file mode 100644 index 00000000..3ca4160f --- /dev/null +++ b/samples-v2/openai_agents/test_orchestrators.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python3 +""" +Test script for OpenAI Agents with Durable Functions Extension +This script tests all orchestrators as specified in the instructions document. +""" + +import requests +import json +import time +import argparse +import os +from typing import Dict, List, Tuple, Optional +import re + +# List of orchestrators to test based on the instructions +ORCHESTRATORS = [ + "agent_lifecycle_example", + "dynamic_system_prompt", + "hello_world", + "lifecycle_example", + "local_image", + "non_strict_output_type", + "previous_response_id", + "remote_image", + "tools" +] + +BASE_URL = "http://localhost:7071/api/orchestrators" +TIMEOUT_SECONDS = 60 # Maximum time to wait for orchestration completion +POLL_INTERVAL = 2 # Seconds between status checks + +def extract_status_url(orchestration_response: str) -> Optional[str]: + """ + Extract the status query URL from orchestration response + """ + try: + response_data = json.loads(orchestration_response) + return response_data.get("statusQueryGetUri") + except: + return None + +def get_orchestration_status(status_url: str) -> Tuple[str, Optional[str], Optional[str]]: + """ + Get the current status of an orchestration + Returns: (runtime_status, output, error_details) + """ + try: + response = requests.get(status_url, timeout=10) + if response.status_code in [200, 202]: # Both 200 and 202 are valid responses + status_data = json.loads(response.text) + runtime_status = status_data.get("runtimeStatus", "Unknown") + output = status_data.get("output") + return runtime_status, output, None + else: + return "Error", None, f"HTTP {response.status_code}: {response.text}" + except Exception as e: + return "Error", None, f"Status check failed: {str(e)}" + +def wait_for_completion(status_url: str, orchestrator_name: str) -> Tuple[str, Optional[str], Optional[str]]: + """ + Wait for orchestration to complete and return final status + Returns: (final_status, output, error_details) + """ + print(f" ⏳ Waiting for {orchestrator_name} to complete...") + + start_time = time.time() + while time.time() - start_time < TIMEOUT_SECONDS: + status, output, error = get_orchestration_status(status_url) + + print(f" πŸ“Š Status: {status}") + + # Terminal states + if status in ["Completed", "Failed", "Terminated", "Canceled"]: + return status, output, error + + # Continue waiting for non-terminal states + if status in ["Running", "Pending"]: + time.sleep(POLL_INTERVAL) + continue + + # Unknown status - might be an error + if status == "Error": + return status, output, error + + # Any other status, keep waiting + time.sleep(POLL_INTERVAL) + + # Timeout reached + return "Timeout", None, f"Orchestration did not complete within {TIMEOUT_SECONDS} seconds" + +def test_orchestrator_full(orchestrator_name: str) -> Dict: + """ + Test a single orchestrator end-to-end including completion + Returns: detailed test result dictionary + """ + print(f"\nπŸ§ͺ Testing {orchestrator_name}...") + result = { + "name": orchestrator_name, + "startup_success": False, + "startup_response": None, + "startup_error": None, + "status_url": None, + "instance_id": None, + "final_status": None, + "output": None, + "execution_error": None, + "execution_time": None + } + + try: + # Step 1: Start orchestration + print(f" πŸš€ Starting orchestration...") + url = f"{BASE_URL}/{orchestrator_name}" + start_time = time.time() + + response = requests.post(url, timeout=30) + + if response.status_code in [200, 202]: + result["startup_success"] = True + result["startup_response"] = response.text + + # Extract instance ID and status URL + try: + response_data = json.loads(response.text) + result["instance_id"] = response_data.get("id") + result["status_url"] = response_data.get("statusQueryGetUri") + print(f" βœ… Started successfully (Instance: {result['instance_id']})") + except: + print(f" ⚠️ Started but couldn't parse response") + + else: + result["startup_error"] = f"HTTP {response.status_code}: {response.text}" + print(f" ❌ Startup failed: {result['startup_error']}") + return result + + except Exception as e: + result["startup_error"] = f"Request failed: {str(e)}" + print(f" ❌ Startup failed: {result['startup_error']}") + return result + + # Step 2: Wait for completion if we have a status URL + if result["status_url"]: + try: + final_status, output, error = wait_for_completion(result["status_url"], orchestrator_name) + result["final_status"] = final_status + result["output"] = output + result["execution_error"] = error + result["execution_time"] = time.time() - start_time + + if final_status == "Completed": + print(f" βœ… Completed successfully in {result['execution_time']:.1f}s") + if output: + print(f" πŸ“ Output: {str(output)[:100]}{'...' if len(str(output)) > 100 else ''}") + elif final_status == "Failed": + print(f" ❌ Failed after {result['execution_time']:.1f}s") + if error: + # Extract key error information + error_summary = str(error)[:200] + "..." if len(str(error)) > 200 else str(error) + print(f" πŸ” Error: {error_summary}") + else: + print(f" ⚠️ Ended with status: {final_status}") + + except Exception as e: + result["execution_error"] = f"Status monitoring failed: {str(e)}" + print(f" ❌ Status monitoring failed: {result['execution_error']}") + else: + print(f" ⚠️ No status URL available for monitoring") + + return result + +def run_all_tests() -> Dict: + """ + Run comprehensive tests for all orchestrators and return results + """ + print("πŸ§ͺ Starting OpenAI Agents with Durable Functions Extension - Comprehensive Test Suite") + print("=" * 80) + + results = { + "test_results": [], + "summary": {} + } + + for i, orchestrator in enumerate(ORCHESTRATORS, 1): + print(f"\n[{i}/{len(ORCHESTRATORS)}] " + "="*60) + test_result = test_orchestrator_full(orchestrator) + results["test_results"].append(test_result) + + # Small delay between tests to avoid overwhelming the system + if i < len(ORCHESTRATORS): + print(f" ⏸️ Waiting {POLL_INTERVAL}s before next test...") + time.sleep(POLL_INTERVAL) + + # Calculate summary statistics + total = len(ORCHESTRATORS) + startup_successful = sum(1 for r in results["test_results"] if r["startup_success"]) + execution_completed = sum(1 for r in results["test_results"] if r["final_status"] == "Completed") + execution_failed = sum(1 for r in results["test_results"] if r["final_status"] == "Failed") + execution_timeout = sum(1 for r in results["test_results"] if r["final_status"] == "Timeout") + execution_other = sum(1 for r in results["test_results"] if r["final_status"] and r["final_status"] not in ["Completed", "Failed", "Timeout"]) + + results["summary"] = { + "total_tests": total, + "startup_successful": startup_successful, + "execution_completed": execution_completed, + "execution_failed": execution_failed, + "execution_timeout": execution_timeout, + "execution_other": execution_other, + "startup_success_rate": f"{(startup_successful/total)*100:.1f}%", + "execution_success_rate": f"{(execution_completed/total)*100:.1f}%" if total > 0 else "0%" + } + + return results + +def print_report(results: Dict): + """ + Print comprehensive test report + """ + print("\n" + "=" * 80) + print("πŸ“Š COMPREHENSIVE TEST VALIDATION REPORT") + print("=" * 80) + + # Summary + summary = results["summary"] + print(f"\nπŸ“ˆ SUMMARY STATISTICS:") + print(f" Total Tests: {summary['total_tests']}") + print(f" Startup Successful: {summary['startup_successful']}/{summary['total_tests']} ({summary['startup_success_rate']})") + print(f" Execution Completed: {summary['execution_completed']}/{summary['total_tests']} ({summary['execution_success_rate']})") + print(f" Execution Failed: {summary['execution_failed']}") + print(f" Execution Timeout: {summary['execution_timeout']}") + print(f" Execution Other: {summary['execution_other']}") + + # Detailed results by category + test_results = results["test_results"] + + # Startup successful tests + startup_successful = [r for r in test_results if r["startup_success"]] + if startup_successful: + print(f"\nβœ… STARTUP SUCCESSFUL ({len(startup_successful)}):") + for test in startup_successful: + print(f" β€’ {test['name']} (Instance: {test['instance_id'] or 'N/A'})") + + # Execution completed tests + execution_completed = [r for r in test_results if r["final_status"] == "Completed"] + if execution_completed: + print(f"\nπŸŽ‰ EXECUTION COMPLETED ({len(execution_completed)}):") + for test in execution_completed: + exec_time = f" in {test['execution_time']:.1f}s" if test['execution_time'] else "" + print(f" β€’ {test['name']}{exec_time}") + if test['output']: + output_preview = str(test['output'])[:100] + "..." if len(str(test['output'])) > 100 else str(test['output']) + print(f" Output: {output_preview}") + + # Execution failed tests + execution_failed = [r for r in test_results if r["final_status"] == "Failed"] + if execution_failed: + print(f"\n❌ EXECUTION FAILED ({len(execution_failed)}):") + for test in execution_failed: + exec_time = f" after {test['execution_time']:.1f}s" if test['execution_time'] else "" + print(f" β€’ {test['name']}{exec_time}") + if test['execution_error']: + # Extract key error information + error_lines = str(test['execution_error']).split('\\n') + key_error = next((line for line in error_lines if 'RuntimeError:' in line or 'Exception:' in line), + str(test['execution_error'])[:150]) + print(f" Error: {key_error}") + + # Startup failed tests + startup_failed = [r for r in test_results if not r["startup_success"]] + if startup_failed: + print(f"\n🚫 STARTUP FAILED ({len(startup_failed)}):") + for test in startup_failed: + print(f" β€’ {test['name']}") + print(f" Error: {test['startup_error']}") + + # Timeout tests + timeout_tests = [r for r in test_results if r["final_status"] == "Timeout"] + if timeout_tests: + print(f"\n⏰ EXECUTION TIMEOUT ({len(timeout_tests)}):") + for test in timeout_tests: + print(f" β€’ {test['name']} (exceeded {TIMEOUT_SECONDS}s)") + + # Recommendations based on results + print(f"\nπŸ’‘ ANALYSIS & RECOMMENDATIONS:") + + if summary['execution_completed'] == summary['total_tests']: + print(" πŸŽ‰ EXCELLENT: All orchestrators completed successfully!") + print(" β€’ Integration is working correctly") + print(" β€’ Ready for production use") + + elif summary['startup_successful'] == summary['total_tests'] and summary['execution_failed'] > 0: + print(" ⚠️ INFRASTRUCTURE OK, RUNTIME ISSUES DETECTED:") + print(" β€’ Azure Functions integration is working correctly") + print(" β€’ Orchestrators start successfully but fail during execution") + + # Analyze common error patterns + common_errors = {} + for test in execution_failed: + if test['execution_error']: + error_str = str(test['execution_error']) + if 'event loop' in error_str.lower(): + common_errors['AsyncIO Event Loop'] = common_errors.get('AsyncIO Event Loop', 0) + 1 + elif 'timeout' in error_str.lower(): + common_errors['Timeout'] = common_errors.get('Timeout', 0) + 1 + elif 'openai' in error_str.lower(): + common_errors['OpenAI API'] = common_errors.get('OpenAI API', 0) + 1 + else: + common_errors['Other'] = common_errors.get('Other', 0) + 1 + + if common_errors: + print(" β€’ Common error patterns detected:") + for error_type, count in common_errors.items(): + print(f" - {error_type}: {count} occurrences") + + if 'AsyncIO Event Loop' in common_errors: + print(" β€’ SOLUTION: Implement event loop fix in sample code") + print(" - See TEST_VALIDATION_REPORT.md for specific solutions") + + elif summary['startup_successful'] < summary['total_tests']: + print(" 🚨 INFRASTRUCTURE ISSUES DETECTED:") + print(" β€’ Some orchestrators failed to start") + print(" β€’ Check Azure Functions configuration") + print(" β€’ Verify environment variables and dependencies") + + else: + print(" πŸ” MIXED RESULTS - Review individual test details above") + + print("\n" + "=" * 80) + +if __name__ == "__main__": + # Parse command line arguments + parser = argparse.ArgumentParser(description="Test OpenAI Agents with Durable Functions Extension") + parser.add_argument( + "--output", "-o", + default="comprehensive_test_results.json", + help="Output file path for test results (default: comprehensive_test_results.json)" + ) + args = parser.parse_args() + + # Check if Functions runtime is available + try: + response = requests.get("http://localhost:7071", timeout=5) + print("βœ… Azure Functions runtime is running") + except: + print("❌ Azure Functions runtime is not accessible at http://localhost:7071") + print("Please ensure 'func start' is running in the project directory") + exit(1) + + # Run comprehensive tests + results = run_all_tests() + + # Print detailed report + print_report(results) + + # Save results to file + output_file = args.output + with open(output_file, "w") as f: + json.dump(results, f, indent=2, default=str) + + print(f"\nπŸ’Ύ Detailed results saved to: {os.path.basename(output_file)}") # Exit with appropriate code + if results["summary"]["execution_completed"] == results["summary"]["total_tests"]: + print("πŸŽ‰ All tests completed successfully!") + exit(0) + elif results["summary"]["startup_successful"] == results["summary"]["total_tests"]: + print("⚠️ All orchestrators started but some failed during execution") + exit(1) + else: + print("🚨 Some orchestrators failed to start") + exit(2)