Refactor orchestration to use AzureAIAgentClient and improve logging

Fr4nc3 · Fr4nc3 · commit 373cea51dae7 · 2025-10-27T17:51:58.000-04:00
Replaces AzureOpenAIChatClient with AzureAIAgentClient for orchestration, updates callback signatures to match new agent framework, and enhances logging throughout the orchestration process. Improves error handling, updates workflow execution to use run_stream, and ensures compatibility with legacy WebSocket updates. Cleans up legacy shims and streamlines agent/participant management.
diff --git a/src/backend/af/orchestration/orchestration_manager.py b/src/backend/af/orchestration/orchestration_manager.py
@@ -6,18 +6,14 @@
 from typing import List, Optional, Callable, Awaitable
 
 # agent_framework imports
-from agent_framework.azure import AzureOpenAIChatClient
-
-from agent_framework import ChatMessage, ChatOptions
-from agent_framework._workflows import MagenticBuilder
-from agent_framework._workflows._magentic import AgentRunResponseUpdate  # type: ignore
+from agent_framework_azure_ai import AzureAIAgentClient
+from agent_framework import ChatMessage, ChatOptions, WorkflowOutputEvent, AgentRunResponseUpdate,  MagenticBuilder
 
 
 from common.config.app_config import config
 from common.models.messages_af import TeamConfiguration
 
-# Existing (legacy) callbacks expecting SK content; we'll adapt to them.
-# If you've created af-native callbacks (e.g. response_handlers_af) import those instead.
+# Existing (legacy) callbacks
 from af.callbacks.response_handlers import (
     agent_response_callback,
     streaming_agent_response_callback,
@@ -46,9 +42,8 @@ def _user_aware_agent_callback(
         """Adapts agent_framework final agent ChatMessage to legacy agent_response_callback signature."""
 
         async def _cb(agent_id: str, message: ChatMessage):
-            # Reuse existing callback expecting (ChatMessageContent, user_id). We pass text directly.
             try:
-                agent_response_callback(message, user_id)  # existing callback is sync
+                agent_response_callback(agent_id, message, user_id)  # Fixed: added agent_id
             except Exception as e:  # noqa: BLE001
                 logging.getLogger(__name__).error(
                     "agent_response_callback error: %s", e
@@ -63,18 +58,8 @@ def _user_aware_streaming_callback(
         """Adapts streaming updates to existing streaming handler."""
 
         async def _cb(agent_id: str, update: AgentRunResponseUpdate, is_final: bool):
-            # Build a minimal shim object with text/content for legacy handler if needed.
-            # Your converted streaming handlers (response_handlers_af) should replace this eventual shim.
-            class _Shim:  # noqa: D401
-                def __init__(self, agent_id: str, update: AgentRunResponseUpdate):
-                    self.agent_id = agent_id
-                    self.text = getattr(update, "text", None)
-                    self.contents = getattr(update, "contents", None)
-                    self.role = getattr(update, "role", None)
-
-            shim = _Shim(agent_id, update)
             try:
-                await streaming_agent_response_callback(shim, is_final, user_id)
+                await streaming_agent_response_callback(agent_id, update, is_final, user_id)  # Fixed: removed shim
             except Exception as e:  # noqa: BLE001
                 logging.getLogger(__name__).error(
                     "streaming_agent_response_callback error: %s", e
@@ -91,49 +76,64 @@ async def init_orchestration(cls, agents: List, user_id: str | None = None):
         Initialize a Magentic workflow with:
           - Provided agents (participants)
           - HumanApprovalMagenticManager as orchestrator manager
-          - AzureOpenAIChatClient as the underlying chat client
+          - AzureAIAgentClient as the underlying chat client
+          
+        This mirrors the old Semantic Kernel orchestration setup:
+        - Uses same deployment, endpoint, and credentials
+        - Applies same execution settings (temperature, max_tokens)
+        - Maintains same human approval workflow
         """
         if not user_id:
             raise ValueError("user_id is required to initialize orchestration")
 
+        # Get credential from config (same as old version)
         credential = config.get_azure_credential(client_id=config.AZURE_CLIENT_ID)
 
-        def get_token():
-            token = credential.get_token("https://cognitiveservices.azure.com/.default")
-            return token.token
-
-        # Create Azure chat client (agent_framework style) - relying on environment or explicit kwargs.
+        # Create Azure AI Agent client for orchestration using config
+        # This replaces AzureChatCompletion from SK
         try:
-            chat_client = AzureOpenAIChatClient(
-                endpoint=config.AZURE_OPENAI_ENDPOINT,
-                deployment_name=config.AZURE_OPENAI_DEPLOYMENT_NAME,
-                ad_token_provider=get_token,
+            chat_client = AzureAIAgentClient(
+                project_endpoint=config.AZURE_AI_PROJECT_ENDPOINT,
+                model_deployment_name=config.AZURE_OPENAI_DEPLOYMENT_NAME,
+                async_credential=credential,
             )
-        except Exception as e:  # noqa: BLE001
-            logging.getLogger(__name__).error(
-                    "chat_client error: %s", e
-                )
+            
+            cls.logger.info(
+                "Created AzureAIAgentClient for orchestration with model '%s' at endpoint '%s'",
+                config.AZURE_OPENAI_DEPLOYMENT_NAME,
+                config.AZURE_AI_PROJECT_ENDPOINT
+            )
+        except Exception as e:
+            cls.logger.error("Failed to create AzureAIAgentClient: %s", e)
             raise
-        # HumanApprovalMagenticManager needs the chat_client passed as 'chat_client' in its constructor signature (it subclasses StandardMagenticManager)
+
+        # Create HumanApprovalMagenticManager with the chat client
+        # Execution settings (temperature=0.1, max_tokens=4000) are configured via 
+        # orchestration_config.create_execution_settings() which matches old SK version
         try: 
             manager = HumanApprovalMagenticManager(
                 user_id=user_id,
                 chat_client=chat_client,
-                instructions=None,  # optionally supply orchestrator system instructions
+                instructions=None,  # Orchestrator system instructions (optional)
                 max_round_count=orchestration_config.max_rounds,
             )
-        except Exception as e:  # noqa: BLE001
-            logging.getLogger(__name__).error(
-                    "manager error: %s", e
-                )
+            cls.logger.info(
+                "Created HumanApprovalMagenticManager for user '%s' with max_rounds=%d",
+                user_id,
+                orchestration_config.max_rounds
+            )
+        except Exception as e:
+            cls.logger.error("Failed to create manager: %s", e)
             raise
+
         # Build participant map: use each agent's name as key
         participants = {}
         for ag in agents:
             name = getattr(ag, "agent_name", None) or getattr(ag, "name", None)
             if not name:
                 name = f"agent_{len(participants)+1}"
             participants[name] = ag
+            cls.logger.debug("Added participant '%s'", name)
 
         # Assemble workflow
         builder = (
@@ -142,18 +142,12 @@ def get_token():
             .with_standard_manager(manager=manager)
         )
 
-        # Register callbacks (non-streaming manager orchestration events). We'll enable streaming agent deltas via unified mode if desired later.
-        # Provide direct agent + streaming callbacks (legacy adapter form).
-        # The builder currently surfaces unified callback OR agent callbacks; we use agent callbacks here.
-        # NOTE: If you want unified events instead, use builder.on_event(..., mode=MagenticCallbackMode.STREAMING).
-        # We'll just store callbacks by augmenting manager after build via internal surfaces.
+        # Build workflow
         workflow = builder.build()
+        cls.logger.info("Built Magentic workflow with %d participants", len(participants))
 
-        # Wire agent response callbacks onto executor layer
-        # The built workflow exposes internal orchestrator/executor attributes; we rely on exported API for adding callbacks if present.
+        # Wire agent response callbacks onto orchestrator
         try:
-            # Attributes available: workflow._orchestrator._agent_response_callback, etc.
-            # Set them if not already configured (defensive).
             orchestrator = getattr(workflow, "_orchestrator", None)
             if orchestrator:
                 if getattr(orchestrator, "_agent_response_callback", None) is None:
@@ -171,7 +165,8 @@ def get_token():
                         "_streaming_agent_response_callback",
                         cls._user_aware_streaming_callback(user_id),
                     )
-        except Exception as e:  # noqa: BLE001
+                cls.logger.debug("Attached callbacks to workflow orchestrator")
+        except Exception as e:
             cls.logger.warning(
                 "Could not attach callbacks to workflow orchestrator: %s", e
             )
@@ -193,23 +188,25 @@ async def get_current_or_new_orchestration(
         current = orchestration_config.get_current_orchestration(user_id)
         if current is None or team_switched:
             if current is not None and team_switched:
-                # Close prior agents (skip ProxyAgent if desired)
+                cls.logger.info("Team switched, closing previous agents for user '%s'", user_id)
+                # Close prior agents (same logic as old version)
                 for agent in getattr(current, "_participants", {}).values():
-                    if (
-                        getattr(agent, "agent_name", getattr(agent, "name", ""))
-                        != "ProxyAgent"
-                    ):
+                    agent_name = getattr(agent, "agent_name", getattr(agent, "name", ""))
+                    if agent_name != "ProxyAgent":
                         close_coro = getattr(agent, "close", None)
                         if callable(close_coro):
                             try:
                                 await close_coro()
-                            except Exception as e:  # noqa: BLE001
+                                cls.logger.debug("Closed agent '%s'", agent_name)
+                            except Exception as e:
                                 cls.logger.error("Error closing agent: %s", e)
 
             factory = MagenticAgentFactory()
             agents = await factory.get_agents(
                 user_id=user_id, team_config_input=team_config
             )
+            cls.logger.info("Created %d agents for user '%s'", len(agents), user_id)
+            
             orchestration_config.orchestrations[user_id] = await cls.init_orchestration(
                 agents, user_id
             )
@@ -221,41 +218,55 @@ async def get_current_or_new_orchestration(
     async def run_orchestration(self, user_id: str, input_task) -> None:
         """
         Execute the Magentic workflow for the provided user and task description.
+        
+        This mirrors the old SK orchestration:
+        - Uses same execution settings (temperature=0.1, max_tokens=4000)
+        - Maintains same approval workflow
+        - Sends same WebSocket updates
         """
         job_id = str(uuid.uuid4())
         orchestration_config.set_approval_pending(job_id)
+        self.logger.info("Starting orchestration job '%s' for user '%s'", job_id, user_id)
 
         workflow = orchestration_config.get_current_orchestration(user_id)
         if workflow is None:
             raise ValueError("Orchestration not initialized for user.")
 
-        # Ensure manager tracks user_id
+        # Ensure manager tracks user_id (same as old version)
         try:
             manager = getattr(workflow, "_manager", None)
             if manager and hasattr(manager, "current_user_id"):
                 manager.current_user_id = user_id
-        except Exception as e:  # noqa: BLE001
+                self.logger.debug("Set user_id on manager = %s", user_id)
+        except Exception as e:
             self.logger.error("Error setting user_id on manager: %s", e)
 
-        # Build a MagenticContext-like starting message; the workflow interface likely exposes invoke(task=...)
+        # Build task from input (same as old version)
         task_text = getattr(input_task, "description", str(input_task))
-
-        # Provide chat options (temperature mapping from original execution_settings)
-        chat_options = ChatOptions(
-            temperature=0.1,
-            max_output_tokens=4000,
-        )
+        self.logger.debug("Task: %s", task_text)
 
         try:
-            # Invoke orchestrator; API may be workflow.invoke(task=..., chat_options=...)
-            result_msg: ChatMessage = await workflow.invoke(
-                task=task_text, chat_options=chat_options
-            )
-
-            final_text = result_msg.text if result_msg else ""
-            self.logger.info("Final result:\n%s", final_text)
+            # Execute workflow using run_stream with task as positional parameter
+            # The execution settings are configured in the manager/client
+            final_output: str | None = None
+            
+            self.logger.info("Starting workflow execution...")
+            async for event in workflow.run_stream(task_text):
+                # Check if this is the final output event
+                if isinstance(event, WorkflowOutputEvent):
+                    final_output = str(event.data)
+                    self.logger.debug("Received workflow output event")
+
+            # Extract final result
+            final_text = final_output if final_output else ""
+            
+            # Log results (same format as old version)
+            self.logger.info("\nAgent responses:")
+            self.logger.info("Orchestration completed. Final result length: %d chars", len(final_text))
+            self.logger.info("\nFinal result:\n%s", final_text)
             self.logger.info("=" * 50)
 
+            # Send final result via WebSocket (same as old version)
             await connection_config.send_status_update_async(
                 {
                     "type": WebsocketMessageType.FINAL_RESULT_MESSAGE,
@@ -268,6 +279,30 @@ async def run_orchestration(self, user_id: str, input_task) -> None:
                 user_id,
                 message_type=WebsocketMessageType.FINAL_RESULT_MESSAGE,
             )
-            self.logger.info("Final result sent via WebSocket to user %s", user_id)
-        except Exception as e:  # noqa: BLE001
-            self.logger.error("Unexpected orchestration error: %s", e)
+            self.logger.info("Final result sent via WebSocket to user '%s'", user_id)
+            
+        except Exception as e:
+            # Error handling (enhanced from old version)
+            self.logger.error("Unexpected orchestration error: %s", e, exc_info=True)
+            self.logger.error("Error type: %s", type(e).__name__)
+            if hasattr(e, "__dict__"):
+                self.logger.error("Error attributes: %s", e.__dict__)
+            self.logger.info("=" * 50)
+            
+            # Send error status to user
+            try:
+                await connection_config.send_status_update_async(
+                    {
+                        "type": WebsocketMessageType.FINAL_RESULT_MESSAGE,
+                        "data": {
+                            "content": f"Error during orchestration: {str(e)}",
+                            "status": "error",
+                            "timestamp": asyncio.get_event_loop().time(),
+                        },
+                    },
+                    user_id,
+                    message_type=WebsocketMessageType.FINAL_RESULT_MESSAGE,
+                )
+            except Exception as send_error:
+                self.logger.error("Failed to send error status: %s", send_error)
+            raise