usestrix · 0xhis · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/strix/agents/StrixAgent/system_prompt.jinja b/strix/agents/StrixAgent/system_prompt.jinja
diff --git a/strix/agents/base_agent.py b/strix/agents/base_agent.py
@@ -321,6 +321,11 @@ async def _initialize_sandbox_and_state(self, task: str) -> None:
         sandbox_mode = os.getenv("STRIX_SANDBOX_MODE", "false").lower() == "true"
         if not sandbox_mode and self.state.sandbox_id is None:
             from strix.runtime import get_runtime
+            from strix.telemetry.tracer import get_global_tracer
+
+            tracer = get_global_tracer()
+            if tracer:
+                tracer.update_agent_system_message(self.state.agent_id, "Setting up sandbox environment...")
 
             try:
                 runtime = get_runtime()
@@ -355,6 +360,10 @@ async def _initialize_sandbox_and_state(self, task: str) -> None:
     async def _process_iteration(self, tracer: Optional["Tracer"]) -> bool:
         final_response = None
 
+        if tracer:
+            tracer.update_agent_system_message(self.state.agent_id, "Thinking...")
+            await asyncio.sleep(0)
+
         async for response in self.llm.generate(self.state.get_conversation_history()):
             final_response = response
             if tracer and response.content:
@@ -383,10 +392,15 @@ async def _process_iteration(self, tracer: Optional["Tracer"]) -> bool:
         self.state.add_message("assistant", final_response.content, thinking_blocks=thinking_blocks)
         if tracer:
             tracer.clear_streaming_content(self.state.agent_id)
+            metadata = {}
+            if thinking_blocks:
+                metadata["thinking_blocks"] = thinking_blocks
+
             tracer.log_chat_message(
                 content=clean_content(final_response.content),
                 role="assistant",
                 agent_id=self.state.agent_id,
+                metadata=metadata if metadata else None,
             )
 
         actions = (
@@ -396,8 +410,13 @@ async def _process_iteration(self, tracer: Optional["Tracer"]) -> bool:
         )
 
         if actions:
+            if tracer:
+                tool_names = [a.get("toolName") or a.get("tool_name") or "tool" for a in actions]
+                tracer.update_agent_system_message(self.state.agent_id, f"Executing {', '.join(tool_names[:2])}...")
             return await self._execute_actions(actions, tracer)
 
+        if tracer:
+            tracer.update_agent_system_message(self.state.agent_id, "Processing response...")
         return False
 
     async def _execute_actions(self, actions: list[Any], tracer: Optional["Tracer"]) -> bool:

diff --git a/strix/interface/tui.py b/strix/interface/tui.py
@@ -1215,14 +1215,19 @@ def keymap_styled(keys: list[tuple[str, str]]) -> Text:
             return (Text(" "), keymap, False)
 
         if status == "running":
+            sys_msg = agent_data.get("system_message", "")
             if self._agent_has_real_activity(agent_id):
                 animated_text = Text()
                 animated_text.append_text(self._get_sweep_animation(self._sweep_colors))
+                if sys_msg:
+                    animated_text.append(sys_msg, style="dim italic")
+                    animated_text.append("  ", style="dim")
                 animated_text.append("esc", style="white")
                 animated_text.append(" ", style="dim")
                 animated_text.append("stop", style="dim")
                 return (animated_text, keymap_styled([("ctrl-q", "quit")]), True)
-            animated_text = self._get_animated_verb_text(agent_id, "Initializing")
+            msg = sys_msg or "Initializing..."
+            animated_text = self._get_animated_verb_text(agent_id, msg)
             return (animated_text, keymap_styled([("ctrl-q", "quit")]), True)
 
         return (None, Text(), False)
@@ -1394,7 +1399,7 @@ def _animate_dots(self) -> None:
         if not has_active_agents:
             has_active_agents = any(
                 agent_data.get("status", "running") in ["running", "waiting"]
-                for agent_data in self.tracer.agents.values()
+                for agent_data in list(self.tracer.agents.values())
             )
 
         if not has_active_agents:
@@ -1655,21 +1660,45 @@ def _render_chat_content(self, msg_data: dict[str, Any]) -> Any:
         content = msg_data.get("content", "")
         metadata = msg_data.get("metadata", {})
 
-        if not content:
-            return None
-
         if role == "user":
             return UserMessageRenderer.render_simple(content)
-        if role == "user":
-            return UserMessageRenderer.render_simple(content)
+        if role == "user":
+            if not content:
+                return None
+            return UserMessageRenderer.render_simple(content)
-        if role == "user":
-            return UserMessageRenderer.render_simple(content)
+        if role == "user":
+            if not content:
+                return None
+            return UserMessageRenderer.render_simple(content)
 
+        renderables = []
+
+        if "thinking_blocks" in metadata and metadata["thinking_blocks"]:
+            for block in metadata["thinking_blocks"]:
+                thought = block.get("thinking", "")
+                if thought:
+                    text = Text()
+                    text.append("🧠 ")
+                    text.append("Thinking", style="bold #a855f7")
+                    text.append("\n  ")
+                    indented_thought = "\n  ".join(thought.split("\n"))
+                    text.append(indented_thought, style="italic dim")
+                    renderables.append(Static(text, classes="tool-call thinking-tool completed"))
+
+        if not content and not renderables:
+            return None
+
         if metadata.get("interrupted"):
             streaming_result = self._render_streaming_content(content)
             interrupted_text = Text()
             interrupted_text.append("\n")
             interrupted_text.append("⚠ ", style="yellow")
             interrupted_text.append("Interrupted by user", style="yellow dim")
-            return self._merge_renderables([streaming_result, interrupted_text])
+            renderables.append(self._merge_renderables([streaming_result, interrupted_text]))
+        elif content:
+            msg_renderable = AgentMessageRenderer.render_simple(content)
+            if getattr(msg_renderable, "plain", True):
+                renderables.append(msg_renderable)
-            if getattr(msg_renderable, "plain", True):
-                renderables.append(msg_renderable)
+            msg_renderable = AgentMessageRenderer.render_simple(content)
+            renderables.append(msg_renderable)
-            if getattr(msg_renderable, "plain", True):
-                renderables.append(msg_renderable)
+            msg_renderable = AgentMessageRenderer.render_simple(content)
+            renderables.append(msg_renderable)
+
+        if not renderables:
+            return None
 
-        return AgentMessageRenderer.render_simple(content)
+        if len(renderables) == 1:
+            return renderables[0]
+
+        return self._merge_renderables(renderables)
 
     def _render_tool_content_simple(self, tool_data: dict[str, Any]) -> Any:
         tool_name = tool_data.get("tool_name", "Unknown Tool")

diff --git a/strix/llm/dedupe.py b/strix/llm/dedupe.py
@@ -11,45 +11,47 @@
 
 logger = logging.getLogger(__name__)
 
-DEDUPE_SYSTEM_PROMPT = """You are an expert vulnerability report deduplication judge.
-Your task is to determine if a candidate vulnerability report describes the SAME vulnerability
-as any existing report.
-
-CRITICAL DEDUPLICATION RULES:
-
-1. SAME VULNERABILITY means:
-   - Same root cause (e.g., "missing input validation" not just "SQL injection")
-   - Same affected component/endpoint/file (exact match or clear overlap)
-   - Same exploitation method or attack vector
-   - Would be fixed by the same code change/patch
-
-2. NOT DUPLICATES if:
-   - Different endpoints even with same vulnerability type (e.g., SQLi in /login vs /search)
-   - Different parameters in same endpoint (e.g., XSS in 'name' vs 'comment' field)
-   - Different root causes (e.g., stored XSS vs reflected XSS in same field)
-   - Different severity levels due to different impact
-   - One is authenticated, other is unauthenticated
-
-3. ARE DUPLICATES even if:
-   - Titles are worded differently
-   - Descriptions have different level of detail
-   - PoC uses different payloads but exploits same issue
-   - One report is more thorough than another
-   - Minor variations in technical analysis
-
-COMPARISON GUIDELINES:
+DEDUPE_SYSTEM_PROMPT = """# Role
+You are an expert vulnerability report deduplication judge.
+Your task is to determine if a candidate vulnerability report describes
+the SAME vulnerability as any existing report.
+
+# Deduplication Rules
+
+## SAME VULNERABILITY means:
+- Same root cause (e.g., "missing input validation" not just "SQL injection")
+- Same affected component/endpoint/file (exact match or clear overlap)
+- Same exploitation method or attack vector
+- Would be fixed by the same code change/patch
+
+## NOT DUPLICATES if:
+- Different endpoints even with same vulnerability type (e.g., SQLi in /login vs /search)
+- Different parameters in same endpoint (e.g., XSS in 'name' vs 'comment' field)
+- Different root causes (e.g., stored XSS vs reflected XSS in same field)
+- Different severity levels due to different impact
+- One is authenticated, other is unauthenticated
+
+## ARE DUPLICATES even if:
+- Titles are worded differently
+- Descriptions have different level of detail
+- PoC uses different payloads but exploits same issue
+- One report is more thorough than another
+- Minor variations in technical analysis
+
+# Comparison Guidelines
 - Focus on the technical root cause, not surface-level similarities
 - Same vulnerability type (SQLi, XSS) doesn't mean duplicate - location matters
 - Consider the fix: would fixing one also fix the other?
 - When uncertain, lean towards NOT duplicate
 
-FIELDS TO ANALYZE:
+# Fields to Analyze
 - title, description: General vulnerability info
 - target, endpoint, method: Exact location of vulnerability
 - technical_analysis: Root cause details
 - poc_description: How it's exploited
 - impact: What damage it can cause
 
+# Output Format
 YOU MUST RESPOND WITH EXACTLY THIS XML FORMAT AND NOTHING ELSE:
 
 <dedupe_result>
@@ -68,7 +70,7 @@
 <reason>Different endpoints: candidate is /api/search, existing is /api/login</reason>
 </dedupe_result>
 
-RULES:
+# Output Rules
 - is_duplicate MUST be exactly "true" or "false" (lowercase)
 - duplicate_id MUST be the exact ID from existing reports or empty if not duplicate
 - confidence MUST be a decimal (your confidence level in the decision)

diff --git a/strix/llm/llm.py b/strix/llm/llm.py
@@ -112,11 +112,20 @@ def set_agent_identity(self, agent_name: str | None, agent_id: str | None) -> No
     async def generate(
         self, conversation_history: list[dict[str, Any]]
     ) -> AsyncIterator[LLMResponse]:
+        from strix.telemetry.tracer import get_global_tracer
+
+        tracer = get_global_tracer()
+        if tracer and self.agent_id:
+            tracer.update_agent_system_message(self.agent_id, "Compressing memory...")
+
         messages = self._prepare_messages(conversation_history)
         max_retries = int(Config.get("strix_llm_max_retries") or "5")
 
         for attempt in range(max_retries + 1):
             try:
+                if tracer and self.agent_id:
+                    tracer.update_agent_system_message(self.agent_id, "Waiting for LLM provider...")
+
                 async for response in self._stream(messages):
                     yield response
                 return  # noqa: TRY300
@@ -130,11 +139,20 @@ async def _stream(self, messages: list[dict[str, Any]]) -> AsyncIterator[LLMResp
         accumulated = ""
         chunks: list[Any] = []
         done_streaming = 0
+        first_chunk_received = False
 
         self._total_stats.requests += 1
         response = await acompletion(**self._build_completion_args(messages), stream=True)
 
         async for chunk in response:
+            if not first_chunk_received:
+                first_chunk_received = True
+                from strix.telemetry.tracer import get_global_tracer
+
+                tracer = get_global_tracer()
+                if tracer and self.agent_id:
+                    tracer.update_agent_system_message(self.agent_id, "Generating response...")
+
             chunks.append(chunk)
             if done_streaming:
                 done_streaming += 1

diff --git a/strix/skills/coordination/root_agent.md b/strix/skills/coordination/root_agent.md
@@ -2,7 +2,7 @@
 name: root-agent
 description: Orchestration layer that coordinates specialized subagents for security assessments
 ---
-
+<instructions>
 # Root Agent
 
 Orchestration layer for security assessments. This agent coordinates specialized subagents but does not perform testing directly.
@@ -11,8 +11,9 @@ You can create agents throughout the testing process—not just at the beginning
 
 ## Role
 
-- Decompose targets into discrete, parallelizable tasks
-- Spawn and monitor specialized subagents
+- Decompose targets into discrete, parallelizable tasks mapped to OWASP WSTG categories
+- Spawn and monitor specialized subagents per WSTG domain
+- You MUST name your subagents with the appropriate WSTG ID prefix (e.g., `[WSTG-INFO] Discovery Agent`, `[WSTG-INPV] Injection Testing`)
 - Aggregate findings into a cohesive final report
 - Manage dependencies and handoffs between agents
 
@@ -25,21 +26,36 @@ Before spawning agents, analyze the target:
 3. **Determine approach** - blackbox, greybox, or whitebox assessment
 4. **Prioritize by risk** - critical assets and high-value targets first
 
-## Agent Architecture
+## Agent Architecture (WSTG-Aligned)
 
-Structure agents by function:
+Structure agents by WSTG testing category:
 
-**Reconnaissance**
+**Information Gathering (WSTG-INFO)**
 - Asset discovery and enumeration
 - Technology fingerprinting
 - Attack surface mapping
 
-**Vulnerability Assessment**
-- Injection testing (SQLi, XSS, command injection)
-- Authentication and session analysis
+**Configuration & Deployment (WSTG-CONF)**
+- Server misconfiguration testing
+- Default credentials and exposed panels
+- HTTP header and TLS analysis
+
+**Authentication & Session (WSTG-ATHN, WSTG-SESS)**
+- Authentication mechanism analysis
+- Session token testing
+- JWT/OAuth flow validation
+
+**Authorization (WSTG-ATHZ)**
 - Access control testing (IDOR, privilege escalation)
-- Business logic flaws
-- Infrastructure vulnerabilities
+- Role-based access control validation
+
+**Input Validation (WSTG-INPV)**
+- Injection testing (SQLi, XSS, command injection, SSRF, XXE)
+- File upload and path traversal testing
+
+**Business Logic (WSTG-BUSL)**
+- Workflow and process flow testing
+- Race condition and state manipulation
 
 **Exploitation and Validation**
 - Proof-of-concept development
@@ -58,14 +74,14 @@ Create agents with minimal dependencies. Parallel execution is faster than seque
 
 **Clear Objectives**
 
-Each agent should have a specific, measurable goal. Vague objectives lead to scope creep and redundant work.
+Each agent should have a specific, measurable goal scoped to a WSTG category. Vague objectives lead to scope creep and redundant work.
 
 **Avoid Duplication**
 
 Before creating agents:
-1. Analyze the target scope and break into independent tasks
+1. Analyze the target scope and break into independent WSTG-aligned tasks
 2. Check existing agents to avoid overlap
-3. Create agents with clear, specific objectives
+3. Create agents with clear, specific objectives mapped to WSTG domains and name them strictly with the prefix (e.g., `[WSTG-ATHN] API Auth Tester`)
 
 **Hierarchical Delegation**
 
@@ -88,5 +104,8 @@ When all agents report completion:
 
 1. Collect and deduplicate findings across agents
 2. Assess overall security posture
-3. Compile executive summary with prioritized recommendations
-4. Invoke finish tool with final report
+3. **Attacker Perspective Verification**: Pause and explicitly consider: "If I were a real-world attacker, where else would I look? What edge cases, forgotten endpoints, or chained exploits have been overlooked?" 
-3. **Attacker Perspective Verification**: Pause and explicitly consider: "If I were a real-world attacker, where else would I look? What edge cases, forgotten endpoints, or chained exploits have been overlooked?" 
+3. **Attacker Perspective Verification**: Pause and explicitly consider: "If I were a real-world attacker, where else would I look? What edge cases, forgotten endpoints, or chained exploits have been overlooked?"
-3. **Attacker Perspective Verification**: Pause and explicitly consider: "If I were a real-world attacker, where else would I look? What edge cases, forgotten endpoints, or chained exploits have been overlooked?" 
+3. **Attacker Perspective Verification**: Pause and explicitly consider: "If I were a real-world attacker, where else would I look? What edge cases, forgotten endpoints, or chained exploits have been overlooked?"
+4. If this verification reveals new potential attack vectors, spawn new agents to investigate them before concluding.
+5. Once fully satisfied no stones are left unturned, compile the executive summary with prioritized recommendations.
+6. Invoke finish tool with the final report.
+</instructions>