cnoe-io · suwhang-cisco · Jan 19, 2026 · Jan 26, 2026 · Jan 26, 2026
@@ -9,15 +9,18 @@
 import time
 import httpx
 import traceback
+import json
+
 from langchain_core.messages import AIMessage
+from langchain_core.tools import tool
 from langgraph.graph.state import CompiledStateGraph
 from langgraph.checkpoint.memory import InMemorySaver
 from cnoe_agent_utils import LLMFactory
 from langchain_mcp_adapters.client import MultiServerMCPClient
 from typing import Optional, Dict, Any, List
 
-
 from ai_platform_engineering.multi_agents.platform_engineer import platform_registry
+from ai_platform_engineering.multi_agents.platform_engineer.response_format import PlatformEngineerResponse
 from ai_platform_engineering.multi_agents.platform_engineer.prompts import agent_prompts, generate_system_prompt
 from ai_platform_engineering.multi_agents.tools import (
     reflect_on_output,
@@ -48,12 +51,24 @@
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 
+# Explicit ResponseFormat tool so Bedrock sees it in tool list
+@tool("ResponseFormat", args_schema=PlatformEngineerResponse)
+def response_format_tool(**kwargs):
+  try:
+    return json.dumps(kwargs)
+  except Exception:
+    return str(kwargs)
+
 # RAG Configuration
-ENABLE_RAG = os.getenv("ENABLE_RAG", "false").lower() in ("true", "1", "yes")
+ENABLE_RAG = os.getenv("ENABLE_RAG", "false").lower() == "true"
 RAG_SERVER_URL = os.getenv("RAG_SERVER_URL", "http://localhost:9446").strip("/")
 RAG_CONNECTIVITY_RETRIES = 5
 RAG_CONNECTIVITY_WAIT_SECONDS = 10
 
+# Structured Response Configuration
+# When enabled, LLM uses ResponseFormat tool for final answers instead of [FINAL ANSWER] marker
+USE_STRUCTURED_RESPONSE = os.getenv("USE_STRUCTURED_RESPONSE", "false").lower() == "true"
+
 class AIPlatformEngineerMAS:
   def __init__(self):
     # Use existing platform_registry and enable dynamic monitoring
@@ -248,7 +263,7 @@ def _build_graph(self) -> None:
             logger.error(f"Error during RAG setup: {e}")
             self.rag_enabled = False
 
-    system_prompt = generate_system_prompt(current_agents, self.rag_config)
+    system_prompt = generate_system_prompt(current_agents, self.rag_config, USE_STRUCTURED_RESPONSE)
 
     logger.info(f"📝 Generated system prompt: {len(system_prompt)} chars")
 
@@ -281,6 +296,13 @@ def _build_graph(self) -> None:
         list_files,  # list_files("/tmp/repo", pattern="*.yaml")
     ]
 
+    # Add ResponseFormat tool only when structured response mode is enabled
+    if USE_STRUCTURED_RESPONSE:
+      all_tools.append(response_format_tool)
+      logger.info("✅ Structured response mode enabled - added ResponseFormat tool")
+    else:
+      logger.info("❌ Structured response mode disabled - ResponseFormat tool not added and using [FINAL ANSWER] marker in prompt config")
+
     # Add RAG tools if initially loaded
     if self.rag_tools:
       all_tools.extend(self.rag_tools)
@@ -303,14 +325,36 @@ def _build_graph(self) -> None:
 
     logger.info("🎨 Creating deep agent with system prompt")
 
-    deep_agent = async_create_deep_agent(
-      tools=all_tools,  # A2A tools + RAG tools + reflect_on_output for validation
-      instructions=system_prompt,  # System prompt enforces TODO-based execution workflow
-      subagents=subagents,  # CustomSubAgents for proper task() delegation
-      model=base_model,
-      # response_format=PlatformEngineerResponse  # Removed: Causes embedded JSON in streaming output
-      # Sub-agent DataParts (like Jarvis forms) still work - they're forwarded independently
-    )
+    # Response format instruction tells the LLM how to use the ResponseFormat tool
+    # Only used when USE_STRUCTURED_RESPONSE is enabled
+    if USE_STRUCTURED_RESPONSE:
+      response_format_instruction = (
+        "CRITICAL: You MUST call the ResponseFormat tool for EVERY response - including greetings, simple questions, and informational queries. "
+        "This is NON-NEGOTIABLE. Never output the final answer as plain text. "
+        "When you are ready to provide ANY answer (simple or complex), call ResponseFormat directly. "
+        "Do NOT output the answer as text before calling the tool - put it ONLY in the tool's 'content' field. "
+        "Normal streaming (tool calls, planning, intermediate outputs) is fine, but the FINAL answer must go through ResponseFormat. "
+        "Place the final user-facing answer (clean markdown, no thinking/preamble) in the 'content' field. "
+        "Set 'is_task_complete' to true when done, false otherwise. "
+        "Set 'require_user_input' to true only when you need more information from the user."
+      )
+    else:
+      # Unstructured mode: rely on [FINAL ANSWER] marker in prompt config
+      response_format_instruction = None
+
+    # Build deep agent kwargs - only include response_format when structured mode is enabled
+    deep_agent_kwargs = {
+      "tools": all_tools,  # A2A tools + RAG tools + reflect_on_output for validation
+      "instructions": system_prompt,  # System prompt enforces TODO-based execution workflow
+      "subagents": subagents,  # CustomSubAgents for proper task() delegation
+      "model": base_model,
+    }
+
+    # Add response_format only when structured response mode is enabled
+    if USE_STRUCTURED_RESPONSE and response_format_instruction:
+      deep_agent_kwargs["response_format"] = (response_format_instruction, PlatformEngineerResponse)
+
+    deep_agent = async_create_deep_agent(**deep_agent_kwargs)
 
     # Check if LANGGRAPH_DEV is defined in the environment
     if os.getenv("LANGGRAPH_DEV"):

@@ -9,6 +9,25 @@
 import logging
 logger = logging.getLogger(__name__)
 
+# [FINAL ANSWER] marker section - only included when structured response is disabled
+# This tells the LLM to use the marker-based approach for final answers
+FINAL_ANSWER_MARKER_SECTION = """
+**OUTPUT FORMAT - MANDATORY [FINAL ANSWER] Marker:**
+- EVERY response to the user MUST start with `[FINAL ANSWER]` on its own line
+- This marker separates your internal thinking/planning from the user-facing answer
+- Content BEFORE `[FINAL ANSWER]` = hidden (thinking, tool calls, search messages)
+- Content AFTER `[FINAL ANSWER]` = shown to user (the actual answer)
+- Example format:
+  ```
+  I'll search the knowledge base...
+  🔍 search...
+  [FINAL ANSWER]
+  ## Your Actual Answer Here
+  The information you requested is...
+  ```
+- NEVER include "I'll search...", "Let me...", "🔍 search..." AFTER the marker
+"""
+
 # ============================================================================
 # Load YAML config
 def load_prompt_config(path="prompt_config.yaml"):
@@ -74,12 +93,14 @@ def load_prompt_config(path="prompt_config.yaml"):
 # This allows CustomSubAgents to be created with proper react agent graphs
 
 # Generate system prompt dynamically based on tools and their tasks
-def generate_system_prompt(agents: Dict[str, Any], rag_config: Optional[Dict[str, Any]] = None):
+def generate_system_prompt(agents: Dict[str, Any], rag_config: Optional[Dict[str, Any]] = None, use_structured_response: bool = False):
   """
   Generate system prompt with static RAG tools.
 
   Args:
       agents: Dictionary of available agents with their descriptions
+      rag_config: Optional RAG configuration
+      use_structured_response: If True, exclude [FINAL ANSWER] marker section (use tool-based structured response instead)
 
   Returns:
       System prompt string
@@ -142,10 +163,20 @@ def generate_system_prompt(agents: Dict[str, Any], rag_config: Optional[Dict[str
   logger.debug(f"System Prompt Template: {yaml_template}")
   logger.debug(f"Tool Instructions: {tool_instructions_str}")
 
+  # Conditionally include [FINAL ANSWER] marker section based on structured response mode
+  # When structured response is enabled, we use the ResponseFormat tool instead of markers
+  final_answer_instructions = "" if use_structured_response else FINAL_ANSWER_MARKER_SECTION
+
+  if use_structured_response:
+    logger.info("Structured response mode enabled - excluding [FINAL ANSWER] marker section from prompt")
+  else:
+    logger.info("Unstructured response mode - including [FINAL ANSWER] marker section in prompt")
+
   if yaml_template:
       return yaml_template.format(
         rag_instructions=rag_instructions,
-        tool_instructions=tool_instructions_str
+        tool_instructions=tool_instructions_str,
+        final_answer_instructions=final_answer_instructions
       )
   else:
       return f"""
@@ -155,6 +186,7 @@ def generate_system_prompt(agents: Dict[str, Any], rag_config: Optional[Dict[str
 - Only respond to requests related to the integrated tools. Always call the appropriate agent or tool.
 - When responding, use markdown format. Make sure all URLs are presented as clickable links.
 
+{final_answer_instructions}
 
 {tool_instructions_str}
 """