Skip to content

Commit 8fcd0b4

Browse files
authored
M7 Phase 3: Wire ShellExecutor into agent for command filtering (#41) (#49)
* Wire ShellExecutor into agent for command filtering (#41) BREAKING CHANGE: Agent::new() now requires tool_executor parameter Replace inline bash execution with ShellExecutor to fix SEC-001 CRITICAL vulnerability. All shell commands now routed through ToolExecutor trait with DEFAULT_BLOCKED patterns. Changes: - Add ToolExecutor generic parameter to Agent<P, C, T> - Update Agent::new() signature: add tool_executor as 4th parameter - Replace extract_and_execute_bash() with self.tool_executor.execute() - Handle ToolError::Blocked with generic security message - Remove 66 lines duplicate code (extract_bash_blocks, execute_bash) - Remove hardcoded SHELL_TIMEOUT constant (uses config value) - Update main.rs: create ShellExecutor from config Security improvements: - SEC-001 CRITICAL vulnerability resolved - 12 DEFAULT_BLOCKED patterns active - Error message does not leak blocked patterns - Audit trail via tracing::warn for blocked commands Testing: - 125/125 tests pass (4 new integration tests for blocked commands) - Zero clippy warnings - Comprehensive CHANGELOG.md update Migration: # Before Agent::new(provider, channel, &skills_prompt) # After use zeph_tools::shell::ShellExecutor; let executor = ShellExecutor::new(&config.tools.shell); Agent::new(provider, channel, &skills_prompt, executor) Resolves #41 (M7 Phase 3: Agent integration) Fixes SEC-001 CRITICAL security vulnerability Part of #34 (M7 Epic: Tool Execution Framework) * Fix unused variable warning in performance test Change executor to _executor in agent_respects_configured_timeout test to suppress unused variable warning that causes CI failure with -D warnings.
1 parent 0a2872e commit 8fcd0b4

File tree

7 files changed

+585
-161
lines changed

7 files changed

+585
-161
lines changed

CHANGELOG.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,41 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
2020
- 22 unit tests with 99.25% line coverage, zero clippy warnings
2121
- ADR-014: zeph-tools crate design rationale and architecture decisions
2222

23+
#### M7 Phase 3 (Issue #41): Agent integration with ToolExecutor trait
24+
- Agent now uses `ShellExecutor` for all bash command execution with safety checks
25+
- Four integration tests for blocked command behavior and error handling
26+
- Security improvements: blocked commands no longer leak pattern details to users
27+
28+
### Security
29+
30+
- **CRITICAL fix for SEC-001**: Shell commands now filtered through ShellExecutor with DEFAULT_BLOCKED patterns (rm -rf /, sudo, mkfs, dd if=, curl, wget, nc, shutdown, reboot, halt, poweroff, init 0). Resolves command injection vulnerability.
31+
32+
### Fixed
33+
34+
- Shell command timeout now respects `config.tools.shell.timeout` (was hardcoded 30s)
35+
- Removed duplicate bash parsing logic from agent.rs (now centralized in zeph-tools)
36+
- Error message pattern leakage: blocked commands now show generic security policy message instead of leaking exact blocked pattern
37+
2338
### Changed
2439

2540
**BREAKING CHANGES** (pre-1.0.0):
41+
- `Agent::new()` signature changed: now requires `tool_executor: T` as 4th parameter where `T: ToolExecutor`
42+
- `Agent` struct now generic over three types: `Agent<P, C, T>` (provider, channel, tool_executor)
2643
- Workspace `Cargo.toml` now defines `version = "0.2.0"` in `[workspace.package]` section
2744
- All crate manifests use `version.workspace = true` instead of explicit versions
2845
- Inter-crate dependencies now reference workspace definitions (e.g., `zeph-llm.workspace = true`)
2946

47+
**Migration:**
48+
```rust
49+
// Before:
50+
let agent = Agent::new(provider, channel, &skills_prompt);
51+
52+
// After:
53+
use zeph_tools::shell::ShellExecutor;
54+
let executor = ShellExecutor::new(&config.tools.shell);
55+
let agent = Agent::new(provider, channel, &skills_prompt, executor);
56+
```
57+
3058
## [0.2.0] - 2026-02-06
3159

3260
### Added

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ zeph-core.workspace = true
5656
zeph-llm.workspace = true
5757
zeph-memory.workspace = true
5858
zeph-skills.workspace = true
59+
zeph-tools.workspace = true
5960

6061
[dev-dependencies]
6162
tempfile.workspace = true

crates/zeph-core/src/agent.rs

Lines changed: 32 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,35 @@
1-
use std::time::Duration;
2-
3-
use tokio::process::Command;
41
use tokio::sync::watch;
52
use tokio_stream::StreamExt;
63
use zeph_llm::provider::{LlmProvider, Message, Role};
74
use zeph_memory::sqlite::{SqliteStore, role_str};
5+
use zeph_tools::executor::{ToolError, ToolExecutor};
86

97
use crate::channel::Channel;
108
use crate::context::build_system_prompt;
119

10+
// TODO(M14): Make configurable via AgentConfig (currently hardcoded for MVP)
1211
const MAX_SHELL_ITERATIONS: usize = 3;
13-
const SHELL_TIMEOUT: Duration = Duration::from_secs(30);
1412

15-
pub struct Agent<P: LlmProvider, C: Channel> {
13+
pub struct Agent<P: LlmProvider, C: Channel, T: ToolExecutor> {
1614
provider: P,
1715
channel: C,
16+
tool_executor: T,
1817
messages: Vec<Message>,
1918
memory: Option<SqliteStore>,
2019
conversation_id: Option<i64>,
2120
history_limit: u32,
2221
shutdown: watch::Receiver<bool>,
2322
}
2423

25-
impl<P: LlmProvider, C: Channel> Agent<P, C> {
24+
impl<P: LlmProvider, C: Channel, T: ToolExecutor> Agent<P, C, T> {
2625
#[must_use]
27-
pub fn new(provider: P, channel: C, skills_prompt: &str) -> Self {
26+
pub fn new(provider: P, channel: C, skills_prompt: &str, tool_executor: T) -> Self {
2827
let system_prompt = build_system_prompt(skills_prompt);
2928
let (_tx, rx) = watch::channel(false);
3029
Self {
3130
provider,
3231
channel,
32+
tool_executor,
3333
messages: vec![Message {
3434
role: Role::System,
3535
content: system_prompt,
@@ -133,20 +133,32 @@ impl<P: LlmProvider, C: Channel> Agent<P, C> {
133133
});
134134
self.persist_message(Role::Assistant, &response).await;
135135

136-
let Some(output) = extract_and_execute_bash(&response).await else {
137-
return Ok(());
138-
};
139-
140-
self.channel
141-
.send(&format!("[shell output]\n{output}"))
142-
.await?;
136+
match self.tool_executor.execute(&response).await {
137+
Ok(Some(output)) => {
138+
let formatted_output = format!("[shell output]\n{output}");
139+
self.channel.send(&formatted_output).await?;
143140

144-
let shell_msg = format!("[shell output]\n{output}");
145-
self.messages.push(Message {
146-
role: Role::User,
147-
content: shell_msg.clone(),
148-
});
149-
self.persist_message(Role::User, &shell_msg).await;
141+
self.messages.push(Message {
142+
role: Role::User,
143+
content: formatted_output.clone(),
144+
});
145+
self.persist_message(Role::User, &formatted_output).await;
146+
}
147+
Ok(None) => return Ok(()),
148+
Err(ToolError::Blocked { command }) => {
149+
tracing::warn!("blocked command: {command}");
150+
let error_msg = "This command is blocked by security policy.".to_string();
151+
self.channel.send(&error_msg).await?;
152+
return Ok(());
153+
}
154+
Err(e) => {
155+
tracing::error!("tool execution error: {e:#}");
156+
self.channel
157+
.send("Tool execution failed. Please try a different approach.")
158+
.await?;
159+
return Ok(());
160+
}
161+
}
150162
}
151163

152164
Ok(())
@@ -177,145 +189,9 @@ impl<P: LlmProvider, C: Channel> Agent<P, C> {
177189
}
178190

179191
async fn shutdown_signal(rx: &mut watch::Receiver<bool>) {
180-
// Wait until the value becomes true
181192
while !*rx.borrow_and_update() {
182193
if rx.changed().await.is_err() {
183-
// Sender dropped without ever setting true — hang forever so select picks the other branch
184194
std::future::pending::<()>().await;
185195
}
186196
}
187197
}
188-
189-
fn extract_bash_blocks(text: &str) -> Vec<&str> {
190-
let mut blocks = Vec::new();
191-
let mut rest = text;
192-
193-
while let Some(start) = rest.find("```bash") {
194-
let code_start = start + 7;
195-
let after = &rest[code_start..];
196-
if let Some(end) = after.find("```") {
197-
blocks.push(after[..end].trim());
198-
rest = &after[end + 3..];
199-
} else {
200-
break;
201-
}
202-
}
203-
204-
blocks
205-
}
206-
207-
async fn execute_bash(code: &str) -> anyhow::Result<String> {
208-
let result = tokio::time::timeout(
209-
SHELL_TIMEOUT,
210-
Command::new("bash").arg("-c").arg(code).output(),
211-
)
212-
.await;
213-
214-
match result {
215-
Ok(Ok(output)) => {
216-
let stdout = String::from_utf8_lossy(&output.stdout);
217-
let stderr = String::from_utf8_lossy(&output.stderr);
218-
let mut combined = String::new();
219-
if !stdout.is_empty() {
220-
combined.push_str(&stdout);
221-
}
222-
if !stderr.is_empty() {
223-
if !combined.is_empty() {
224-
combined.push('\n');
225-
}
226-
combined.push_str("[stderr] ");
227-
combined.push_str(&stderr);
228-
}
229-
if combined.is_empty() {
230-
combined.push_str("(no output)");
231-
}
232-
Ok(combined)
233-
}
234-
Ok(Err(e)) => Ok(format!("[error] {e}")),
235-
Err(_) => Ok("[error] command timed out after 30s".to_string()),
236-
}
237-
}
238-
239-
async fn extract_and_execute_bash(response: &str) -> Option<String> {
240-
let blocks = extract_bash_blocks(response);
241-
if blocks.is_empty() {
242-
return None;
243-
}
244-
245-
let mut outputs = Vec::with_capacity(blocks.len());
246-
for block in blocks {
247-
match execute_bash(block).await {
248-
Ok(out) => outputs.push(format!("$ {block}\n{out}")),
249-
Err(e) => outputs.push(format!("$ {block}\n[error] {e}")),
250-
}
251-
}
252-
253-
Some(outputs.join("\n\n"))
254-
}
255-
256-
#[cfg(test)]
257-
mod tests {
258-
use super::*;
259-
260-
#[test]
261-
fn extract_single_bash_block() {
262-
let text = "Here is code:\n```bash\necho hello\n```\nDone.";
263-
let blocks = extract_bash_blocks(text);
264-
assert_eq!(blocks, vec!["echo hello"]);
265-
}
266-
267-
#[test]
268-
fn extract_multiple_bash_blocks() {
269-
let text = "```bash\nls\n```\ntext\n```bash\npwd\n```";
270-
let blocks = extract_bash_blocks(text);
271-
assert_eq!(blocks, vec!["ls", "pwd"]);
272-
}
273-
274-
#[test]
275-
fn ignore_non_bash_blocks() {
276-
let text = "```python\nprint('hi')\n```\n```bash\necho hi\n```";
277-
let blocks = extract_bash_blocks(text);
278-
assert_eq!(blocks, vec!["echo hi"]);
279-
}
280-
281-
#[test]
282-
fn no_blocks() {
283-
let text = "Just plain text, no code blocks.";
284-
let blocks = extract_bash_blocks(text);
285-
assert!(blocks.is_empty());
286-
}
287-
288-
#[test]
289-
fn unclosed_block_ignored() {
290-
let text = "```bash\necho hello";
291-
let blocks = extract_bash_blocks(text);
292-
assert!(blocks.is_empty());
293-
}
294-
295-
#[tokio::test]
296-
async fn execute_bash_simple() {
297-
let result = execute_bash("echo hello").await.unwrap();
298-
assert!(result.contains("hello"));
299-
}
300-
301-
#[tokio::test]
302-
async fn execute_bash_stderr() {
303-
let result = execute_bash("echo err >&2").await.unwrap();
304-
assert!(result.contains("[stderr]"));
305-
assert!(result.contains("err"));
306-
}
307-
308-
#[tokio::test]
309-
async fn extract_and_execute_no_blocks() {
310-
let result = extract_and_execute_bash("plain text").await;
311-
assert!(result.is_none());
312-
}
313-
314-
#[tokio::test]
315-
async fn extract_and_execute_with_block() {
316-
let text = "Run this:\n```bash\necho test123\n```";
317-
let result = extract_and_execute_bash(text).await;
318-
assert!(result.is_some());
319-
assert!(result.unwrap().contains("test123"));
320-
}
321-
}

src/main.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use zeph_llm::ollama::OllamaProvider;
1212
use zeph_memory::sqlite::SqliteStore;
1313
use zeph_skills::prompt::format_skills_prompt;
1414
use zeph_skills::registry::SkillRegistry;
15+
use zeph_tools::ShellExecutor;
1516

1617
/// Enum dispatch for runtime channel selection, following the `AnyProvider` pattern.
1718
#[derive(Debug)]
@@ -98,7 +99,9 @@ async fn main() -> anyhow::Result<()> {
9899
let _ = shutdown_tx.send(true);
99100
});
100101

101-
let mut agent = Agent::new(provider, channel, &skills_prompt)
102+
let shell_executor = ShellExecutor::new(&config.tools.shell);
103+
104+
let mut agent = Agent::new(provider, channel, &skills_prompt, shell_executor)
102105
.with_memory(store, conversation_id, config.memory.history_limit)
103106
.with_shutdown(shutdown_rx);
104107
agent.load_history().await?;

tests/integration.rs

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use zeph_llm::provider::{LlmProvider, Message};
99
use zeph_memory::sqlite::SqliteStore;
1010
use zeph_skills::loader::load_skill;
1111
use zeph_skills::registry::SkillRegistry;
12+
use zeph_tools::executor::{ToolError, ToolExecutor, ToolOutput};
1213

1314
// -- Mock LLM Provider --
1415

@@ -82,6 +83,16 @@ impl Channel for MockChannel {
8283
}
8384
}
8485

86+
// -- Mock Tool Executor --
87+
88+
struct MockToolExecutor;
89+
90+
impl ToolExecutor for MockToolExecutor {
91+
async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
92+
Ok(None)
93+
}
94+
}
95+
8596
// -- Config tests --
8697
// Combined into one test to avoid env var races between parallel test threads.
8798

@@ -223,8 +234,9 @@ async fn agent_roundtrip_mock() {
223234
let provider = MockProvider::new("mock response");
224235
let outputs = Arc::new(Mutex::new(Vec::new()));
225236
let channel = MockChannel::new(vec!["hello"], outputs.clone());
237+
let executor = MockToolExecutor;
226238

227-
let mut agent = Agent::new(provider, channel, "");
239+
let mut agent = Agent::new(provider, channel, "", executor);
228240
agent.run().await.unwrap();
229241

230242
let collected = outputs.lock().unwrap();
@@ -237,8 +249,9 @@ async fn agent_multiple_messages() {
237249
let provider = MockProvider::new("reply");
238250
let outputs = Arc::new(Mutex::new(Vec::new()));
239251
let channel = MockChannel::new(vec!["first", "second", "third"], outputs.clone());
252+
let executor = MockToolExecutor;
240253

241-
let mut agent = Agent::new(provider, channel, "");
254+
let mut agent = Agent::new(provider, channel, "", executor);
242255
agent.run().await.unwrap();
243256

244257
let collected = outputs.lock().unwrap();
@@ -251,11 +264,12 @@ async fn agent_with_memory() {
251264
let provider = MockProvider::new("remembered");
252265
let outputs = Arc::new(Mutex::new(Vec::new()));
253266
let channel = MockChannel::new(vec!["save this"], outputs.clone());
267+
let executor = MockToolExecutor;
254268

255269
let store = SqliteStore::new(":memory:").await.unwrap();
256270
let cid = store.create_conversation().await.unwrap();
257271

258-
let mut agent = Agent::new(provider, channel, "").with_memory(store, cid, 50);
272+
let mut agent = Agent::new(provider, channel, "", executor).with_memory(store, cid, 50);
259273
agent.run().await.unwrap();
260274
}
261275

@@ -264,10 +278,11 @@ async fn agent_shutdown_via_watch() {
264278
let provider = MockProvider::new("should not appear");
265279
let outputs = Arc::new(Mutex::new(Vec::new()));
266280
let channel = MockChannel::new(vec![], outputs.clone());
281+
let executor = MockToolExecutor;
267282

268283
let (tx, rx) = tokio::sync::watch::channel(false);
269284

270-
let mut agent = Agent::new(provider, channel, "").with_shutdown(rx);
285+
let mut agent = Agent::new(provider, channel, "", executor).with_shutdown(rx);
271286

272287
let _ = tx.send(true);
273288

0 commit comments

Comments
 (0)