M7 Phase 3: Wire ShellExecutor into agent for command filtering (#41) (#49)

bug-ops · web-flow · commit 8fcd0b485700 · 2026-02-07T02:48:39.000+01:00
* Wire ShellExecutor into agent for command filtering (#41) BREAKING CHANGE: Agent::new() now requires tool_executor parameter Replace inline bash execution with ShellExecutor to fix SEC-001 CRITICAL vulnerability. All shell commands now routed through ToolExecutor trait with DEFAULT_BLOCKED patterns. Changes: - Add ToolExecutor generic parameter to Agent<P, C, T> - Update Agent::new() signature: add tool_executor as 4th parameter - Replace extract_and_execute_bash() with self.tool_executor.execute() - Handle ToolError::Blocked with generic security message - Remove 66 lines duplicate code (extract_bash_blocks, execute_bash) - Remove hardcoded SHELL_TIMEOUT constant (uses config value) - Update main.rs: create ShellExecutor from config Security improvements: - SEC-001 CRITICAL vulnerability resolved - 12 DEFAULT_BLOCKED patterns active - Error message does not leak blocked patterns - Audit trail via tracing::warn for blocked commands Testing: - 125/125 tests pass (4 new integration tests for blocked commands) - Zero clippy warnings - Comprehensive CHANGELOG.md update Migration: # Before Agent::new(provider, channel, &skills_prompt) # After use zeph_tools::shell::ShellExecutor; let executor = ShellExecutor::new(&config.tools.shell); Agent::new(provider, channel, &skills_prompt, executor) Resolves #41 (M7 Phase 3: Agent integration) Fixes SEC-001 CRITICAL security vulnerability Part of #34 (M7 Epic: Tool Execution Framework) * Fix unused variable warning in performance test Change executor to _executor in agent_respects_configured_timeout test to suppress unused variable warning that causes CI failure with -D warnings.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,13 +20,41 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 - 22 unit tests with 99.25% line coverage, zero clippy warnings
 - ADR-014: zeph-tools crate design rationale and architecture decisions
 
+#### M7 Phase 3 (Issue #41): Agent integration with ToolExecutor trait
+- Agent now uses `ShellExecutor` for all bash command execution with safety checks
+- Four integration tests for blocked command behavior and error handling
+- Security improvements: blocked commands no longer leak pattern details to users
+
+### Security
+
+- **CRITICAL fix for SEC-001**: Shell commands now filtered through ShellExecutor with DEFAULT_BLOCKED patterns (rm -rf /, sudo, mkfs, dd if=, curl, wget, nc, shutdown, reboot, halt, poweroff, init 0). Resolves command injection vulnerability.
+
+### Fixed
+
+- Shell command timeout now respects `config.tools.shell.timeout` (was hardcoded 30s)
+- Removed duplicate bash parsing logic from agent.rs (now centralized in zeph-tools)
+- Error message pattern leakage: blocked commands now show generic security policy message instead of leaking exact blocked pattern
+
 ### Changed
 
 **BREAKING CHANGES** (pre-1.0.0):
+- `Agent::new()` signature changed: now requires `tool_executor: T` as 4th parameter where `T: ToolExecutor`
+- `Agent` struct now generic over three types: `Agent<P, C, T>` (provider, channel, tool_executor)
 - Workspace `Cargo.toml` now defines `version = "0.2.0"` in `[workspace.package]` section
 - All crate manifests use `version.workspace = true` instead of explicit versions
 - Inter-crate dependencies now reference workspace definitions (e.g., `zeph-llm.workspace = true`)
 
+**Migration:**
+```rust
+// Before:
+let agent = Agent::new(provider, channel, &skills_prompt);
+
+// After:
+use zeph_tools::shell::ShellExecutor;
+let executor = ShellExecutor::new(&config.tools.shell);
+let agent = Agent::new(provider, channel, &skills_prompt, executor);
+```
+
 ## [0.2.0] - 2026-02-06
 
 ### Added
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -56,6 +56,7 @@ zeph-core.workspace = true
 zeph-llm.workspace = true
 zeph-memory.workspace = true
 zeph-skills.workspace = true
+zeph-tools.workspace = true
 
 [dev-dependencies]
 tempfile.workspace = true
diff --git a/crates/zeph-core/src/agent.rs b/crates/zeph-core/src/agent.rs
@@ -1,35 +1,35 @@
-use std::time::Duration;
-
-use tokio::process::Command;
 use tokio::sync::watch;
 use tokio_stream::StreamExt;
 use zeph_llm::provider::{LlmProvider, Message, Role};
 use zeph_memory::sqlite::{SqliteStore, role_str};
+use zeph_tools::executor::{ToolError, ToolExecutor};
 
 use crate::channel::Channel;
 use crate::context::build_system_prompt;
 
+// TODO(M14): Make configurable via AgentConfig (currently hardcoded for MVP)
 const MAX_SHELL_ITERATIONS: usize = 3;
-const SHELL_TIMEOUT: Duration = Duration::from_secs(30);
 
-pub struct Agent<P: LlmProvider, C: Channel> {
+pub struct Agent<P: LlmProvider, C: Channel, T: ToolExecutor> {
     provider: P,
     channel: C,
+    tool_executor: T,
     messages: Vec<Message>,
     memory: Option<SqliteStore>,
     conversation_id: Option<i64>,
     history_limit: u32,
     shutdown: watch::Receiver<bool>,
 }
 
-impl<P: LlmProvider, C: Channel> Agent<P, C> {
+impl<P: LlmProvider, C: Channel, T: ToolExecutor> Agent<P, C, T> {
     #[must_use]
-    pub fn new(provider: P, channel: C, skills_prompt: &str) -> Self {
+    pub fn new(provider: P, channel: C, skills_prompt: &str, tool_executor: T) -> Self {
         let system_prompt = build_system_prompt(skills_prompt);
         let (_tx, rx) = watch::channel(false);
         Self {
             provider,
             channel,
+            tool_executor,
             messages: vec![Message {
                 role: Role::System,
                 content: system_prompt,
@@ -133,20 +133,32 @@ impl<P: LlmProvider, C: Channel> Agent<P, C> {
             });
             self.persist_message(Role::Assistant, &response).await;
 
-            let Some(output) = extract_and_execute_bash(&response).await else {
-                return Ok(());
-            };
-
-            self.channel
-                .send(&format!("[shell output]\n{output}"))
-                .await?;
+            match self.tool_executor.execute(&response).await {
+                Ok(Some(output)) => {
+                    let formatted_output = format!("[shell output]\n{output}");
+                    self.channel.send(&formatted_output).await?;
 
-            let shell_msg = format!("[shell output]\n{output}");
-            self.messages.push(Message {
-                role: Role::User,
-                content: shell_msg.clone(),
-            });
-            self.persist_message(Role::User, &shell_msg).await;
+                    self.messages.push(Message {
+                        role: Role::User,
+                        content: formatted_output.clone(),
+                    });
+                    self.persist_message(Role::User, &formatted_output).await;
+                }
+                Ok(None) => return Ok(()),
+                Err(ToolError::Blocked { command }) => {
+                    tracing::warn!("blocked command: {command}");
+                    let error_msg = "This command is blocked by security policy.".to_string();
+                    self.channel.send(&error_msg).await?;
+                    return Ok(());
+                }
+                Err(e) => {
+                    tracing::error!("tool execution error: {e:#}");
+                    self.channel
+                        .send("Tool execution failed. Please try a different approach.")
+                        .await?;
+                    return Ok(());
+                }
+            }
         }
 
         Ok(())
@@ -177,145 +189,9 @@ impl<P: LlmProvider, C: Channel> Agent<P, C> {
 }
 
 async fn shutdown_signal(rx: &mut watch::Receiver<bool>) {
-    // Wait until the value becomes true
     while !*rx.borrow_and_update() {
         if rx.changed().await.is_err() {
-            // Sender dropped without ever setting true — hang forever so select picks the other branch
             std::future::pending::<()>().await;
         }
     }
 }
-
-fn extract_bash_blocks(text: &str) -> Vec<&str> {
-    let mut blocks = Vec::new();
-    let mut rest = text;
-
-    while let Some(start) = rest.find("```bash") {
-        let code_start = start + 7;
-        let after = &rest[code_start..];
-        if let Some(end) = after.find("```") {
-            blocks.push(after[..end].trim());
-            rest = &after[end + 3..];
-        } else {
-            break;
-        }
-    }
-
-    blocks
-}
-
-async fn execute_bash(code: &str) -> anyhow::Result<String> {
-    let result = tokio::time::timeout(
-        SHELL_TIMEOUT,
-        Command::new("bash").arg("-c").arg(code).output(),
-    )
-    .await;
-
-    match result {
-        Ok(Ok(output)) => {
-            let stdout = String::from_utf8_lossy(&output.stdout);
-            let stderr = String::from_utf8_lossy(&output.stderr);
-            let mut combined = String::new();
-            if !stdout.is_empty() {
-                combined.push_str(&stdout);
-            }
-            if !stderr.is_empty() {
-                if !combined.is_empty() {
-                    combined.push('\n');
-                }
-                combined.push_str("[stderr] ");
-                combined.push_str(&stderr);
-            }
-            if combined.is_empty() {
-                combined.push_str("(no output)");
-            }
-            Ok(combined)
-        }
-        Ok(Err(e)) => Ok(format!("[error] {e}")),
-        Err(_) => Ok("[error] command timed out after 30s".to_string()),
-    }
-}
-
-async fn extract_and_execute_bash(response: &str) -> Option<String> {
-    let blocks = extract_bash_blocks(response);
-    if blocks.is_empty() {
-        return None;
-    }
-
-    let mut outputs = Vec::with_capacity(blocks.len());
-    for block in blocks {
-        match execute_bash(block).await {
-            Ok(out) => outputs.push(format!("$ {block}\n{out}")),
-            Err(e) => outputs.push(format!("$ {block}\n[error] {e}")),
-        }
-    }
-
-    Some(outputs.join("\n\n"))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn extract_single_bash_block() {
-        let text = "Here is code:\n```bash\necho hello\n```\nDone.";
-        let blocks = extract_bash_blocks(text);
-        assert_eq!(blocks, vec!["echo hello"]);
-    }
-
-    #[test]
-    fn extract_multiple_bash_blocks() {
-        let text = "```bash\nls\n```\ntext\n```bash\npwd\n```";
-        let blocks = extract_bash_blocks(text);
-        assert_eq!(blocks, vec!["ls", "pwd"]);
-    }
-
-    #[test]
-    fn ignore_non_bash_blocks() {
-        let text = "```python\nprint('hi')\n```\n```bash\necho hi\n```";
-        let blocks = extract_bash_blocks(text);
-        assert_eq!(blocks, vec!["echo hi"]);
-    }
-
-    #[test]
-    fn no_blocks() {
-        let text = "Just plain text, no code blocks.";
-        let blocks = extract_bash_blocks(text);
-        assert!(blocks.is_empty());
-    }
-
-    #[test]
-    fn unclosed_block_ignored() {
-        let text = "```bash\necho hello";
-        let blocks = extract_bash_blocks(text);
-        assert!(blocks.is_empty());
-    }
-
-    #[tokio::test]
-    async fn execute_bash_simple() {
-        let result = execute_bash("echo hello").await.unwrap();
-        assert!(result.contains("hello"));
-    }
-
-    #[tokio::test]
-    async fn execute_bash_stderr() {
-        let result = execute_bash("echo err >&2").await.unwrap();
-        assert!(result.contains("[stderr]"));
-        assert!(result.contains("err"));
-    }
-
-    #[tokio::test]
-    async fn extract_and_execute_no_blocks() {
-        let result = extract_and_execute_bash("plain text").await;
-        assert!(result.is_none());
-    }
-
-    #[tokio::test]
-    async fn extract_and_execute_with_block() {
-        let text = "Run this:\n```bash\necho test123\n```";
-        let result = extract_and_execute_bash(text).await;
-        assert!(result.is_some());
-        assert!(result.unwrap().contains("test123"));
-    }
-}
diff --git a/src/main.rs b/src/main.rs
@@ -12,6 +12,7 @@ use zeph_llm::ollama::OllamaProvider;
 use zeph_memory::sqlite::SqliteStore;
 use zeph_skills::prompt::format_skills_prompt;
 use zeph_skills::registry::SkillRegistry;
+use zeph_tools::ShellExecutor;
 
 /// Enum dispatch for runtime channel selection, following the `AnyProvider` pattern.
 #[derive(Debug)]
@@ -98,7 +99,9 @@ async fn main() -> anyhow::Result<()> {
         let _ = shutdown_tx.send(true);
     });
 
-    let mut agent = Agent::new(provider, channel, &skills_prompt)
+    let shell_executor = ShellExecutor::new(&config.tools.shell);
+
+    let mut agent = Agent::new(provider, channel, &skills_prompt, shell_executor)
         .with_memory(store, conversation_id, config.memory.history_limit)
         .with_shutdown(shutdown_rx);
     agent.load_history().await?;
diff --git a/tests/integration.rs b/tests/integration.rs
@@ -9,6 +9,7 @@ use zeph_llm::provider::{LlmProvider, Message};
 use zeph_memory::sqlite::SqliteStore;
 use zeph_skills::loader::load_skill;
 use zeph_skills::registry::SkillRegistry;
+use zeph_tools::executor::{ToolError, ToolExecutor, ToolOutput};
 
 // -- Mock LLM Provider --
 
@@ -82,6 +83,16 @@ impl Channel for MockChannel {
     }
 }
 
+// -- Mock Tool Executor --
+
+struct MockToolExecutor;
+
+impl ToolExecutor for MockToolExecutor {
+    async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
+        Ok(None)
+    }
+}
+
 // -- Config tests --
 // Combined into one test to avoid env var races between parallel test threads.
 
@@ -223,8 +234,9 @@ async fn agent_roundtrip_mock() {
     let provider = MockProvider::new("mock response");
     let outputs = Arc::new(Mutex::new(Vec::new()));
     let channel = MockChannel::new(vec!["hello"], outputs.clone());
+    let executor = MockToolExecutor;
 
-    let mut agent = Agent::new(provider, channel, "");
+    let mut agent = Agent::new(provider, channel, "", executor);
     agent.run().await.unwrap();
 
     let collected = outputs.lock().unwrap();
@@ -237,8 +249,9 @@ async fn agent_multiple_messages() {
     let provider = MockProvider::new("reply");
     let outputs = Arc::new(Mutex::new(Vec::new()));
     let channel = MockChannel::new(vec!["first", "second", "third"], outputs.clone());
+    let executor = MockToolExecutor;
 
-    let mut agent = Agent::new(provider, channel, "");
+    let mut agent = Agent::new(provider, channel, "", executor);
     agent.run().await.unwrap();
 
     let collected = outputs.lock().unwrap();
@@ -251,11 +264,12 @@ async fn agent_with_memory() {
     let provider = MockProvider::new("remembered");
     let outputs = Arc::new(Mutex::new(Vec::new()));
     let channel = MockChannel::new(vec!["save this"], outputs.clone());
+    let executor = MockToolExecutor;
 
     let store = SqliteStore::new(":memory:").await.unwrap();
     let cid = store.create_conversation().await.unwrap();
 
-    let mut agent = Agent::new(provider, channel, "").with_memory(store, cid, 50);
+    let mut agent = Agent::new(provider, channel, "", executor).with_memory(store, cid, 50);
     agent.run().await.unwrap();
 }
 
@@ -264,10 +278,11 @@ async fn agent_shutdown_via_watch() {
     let provider = MockProvider::new("should not appear");
     let outputs = Arc::new(Mutex::new(Vec::new()));
     let channel = MockChannel::new(vec![], outputs.clone());
+    let executor = MockToolExecutor;
 
     let (tx, rx) = tokio::sync::watch::channel(false);
 
-    let mut agent = Agent::new(provider, channel, "").with_shutdown(rx);
+    let mut agent = Agent::new(provider, channel, "", executor).with_shutdown(rx);
 
     let _ = tx.send(true);
 
diff --git a/tests/performance_agent_integration.rs b/tests/performance_agent_integration.rs