bug-ops
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/zeph-config/src/migrate.rs‎
Lines changed: 56 additions & 0 deletions b/‎crates/zeph-config/src/migrate.rs‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎crates/zeph-tools/Cargo.toml‎
Lines changed: 2 additions & 1 deletion b/‎crates/zeph-tools/Cargo.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎crates/zeph-tools/src/audit.rs‎
Lines changed: 2 additions & 0 deletions b/‎crates/zeph-tools/src/audit.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎crates/zeph-tools/src/config.rs‎
Lines changed: 27 additions & 0 deletions b/‎crates/zeph-tools/src/config.rs‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎crates/zeph-tools/src/executor.rs‎
Lines changed: 10 additions & 0 deletions b/‎crates/zeph-tools/src/executor.rs‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎crates/zeph-tools/src/shell/mod.rs‎
Lines changed: 96 additions & 1 deletion b/‎crates/zeph-tools/src/shell/mod.rs‎
Lines changed: 96 additions & 1 deletion
@@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 ### Added
 
+- feat(tools): transactional `ShellExecutor` — opt-in snapshot+rollback for shell commands; file-level snapshot is captured before write commands (detected via `WRITE_INDICATORS` heuristic + redirection target extraction); rollback restores originals on configurable exit codes; new `ShellConfig` fields: `transactional`, `transaction_scope` (glob-filtered paths), `auto_rollback`, `auto_rollback_exit_codes`, `snapshot_required`; new `ToolError::SnapshotFailed`, `AuditResult::Rollback`, `ToolEvent::Rollback` variants; backed by `tempfile::TempDir` for automatic cleanup on success (closes #2414)
 - feat(core): `/new` slash command — resets conversation context (messages, compaction state, tool caches, focus/sidequest, pending plans) while preserving memory, MCP connections, providers, and skills; creates a new `ConversationId` in SQLite for audit trail; generates a session digest for the outgoing conversation fire-and-forget unless `--no-digest` is passed; active sub-agents and background compression tasks are cancelled; `--keep-plan` preserves a pending plan graph; available in all channels (CLI, TUI, Telegram) via the unified `handle_builtin_command` path (closes #2451)
 - feat(memory): Kumiho AGM-inspired belief revision for graph edges — new `BeliefRevisionConfig` with `similarity_threshold`; `find_superseded_edges()` uses contradiction heuristic (same relation domain + high cosine similarity = supersession); `superseded_by` column added to `graph_edges` for audit trail; `invalidate_edge_with_supersession()` in `GraphStore`; `resolve_edge_typed` accepts optional `BeliefRevisionConfig`; controlled by `[memory.graph.belief_revision] enabled = false` (migration 056, closes #2441)
 - feat(memory): D-MEM RPE-based tiered graph extraction routing — `RpeRouter` computes heuristic surprise score from context similarity and entity novelty; low-RPE turns skip the MAGMA LLM extraction pipeline; `consecutive_skips` safety valve forces extraction after `max_skip_turns` consecutive skips; `extract_candidate_entities()` helper for cheap regex+keyword entity detection; controlled by `[memory.graph.rpe] enabled = false, threshold = 0.3, max_skip_turns = 5` (closes #2442)
 
@@ -78,6 +78,7 @@ serde = "1.0"
 serde_json = "1.0"
 serde_norway = "0.9.42"
 serial_test = "3.4"
+globset = "0.4"
 similar = "2.7"
 sqlx = { version = "0.8", default-features = false }
 subtle = "2.6"
 
@@ -1422,6 +1422,62 @@ pub fn migrate_database_url(toml_src: &str) -> Result<MigrationResult, MigrateEr
     })
 }
 
+/// No-op migration for `[tools.shell]` transactional fields added in #2414.
+///
+/// All 5 new fields have `#[serde(default)]` so existing configs parse without changes.
+/// This step adds them as commented-out hints in `[tools.shell]` if not already present.
+///
+/// # Errors
+///
+/// Returns `MigrateError` if the TOML cannot be parsed or `[tools.shell]` is malformed.
+pub fn migrate_shell_transactional(toml_src: &str) -> Result<MigrationResult, MigrateError> {
+    let mut doc = toml_src.parse::<toml_edit::DocumentMut>()?;
+
+    let tools_shell_exists = doc
+        .get("tools")
+        .and_then(toml_edit::Item::as_table)
+        .is_some_and(|t| t.contains_key("shell"));
+    if !tools_shell_exists {
+        // No [tools.shell] section — nothing to annotate; new configs will get defaults.
+        return Ok(MigrationResult {
+            output: toml_src.to_owned(),
+            added_count: 0,
+            sections_added: Vec::new(),
+        });
+    }
+
+    let shell = doc
+        .get_mut("tools")
+        .and_then(toml_edit::Item::as_table_mut)
+        .and_then(|t| t.get_mut("shell"))
+        .and_then(toml_edit::Item::as_table_mut)
+        .ok_or(MigrateError::InvalidStructure(
+            "[tools.shell] is not a table",
+        ))?;
+
+    if shell.contains_key("transactional") {
+        return Ok(MigrationResult {
+            output: toml_src.to_owned(),
+            added_count: 0,
+            sections_added: Vec::new(),
+        });
+    }
+
+    let comment = "# Transactional shell: snapshot files before write commands, rollback on failure.\n\
+         # transactional = false\n\
+         # transaction_scope = []          # glob patterns; empty = all extracted paths\n\
+         # auto_rollback = false           # rollback when exit code >= 2\n\
+         # auto_rollback_exit_codes = []   # explicit exit codes; overrides >= 2 heuristic\n\
+         # snapshot_required = false       # abort if snapshot fails (default: warn and proceed)\n";
+    append_comment_to_table_suffix(shell, comment);
+
+    Ok(MigrationResult {
+        output: doc.to_string(),
+        added_count: 1,
+        sections_added: vec!["tools.shell.transactional".to_owned()],
+    })
+}
+
 // Helper to create a formatted value (used in tests).
 #[cfg(test)]
 fn make_formatted_str(s: &str) -> Value {
 
@@ -16,12 +16,14 @@ readme = "README.md"
 [dependencies]
 dirs.workspace = true
 glob.workspace = true
+globset.workspace = true
 regex.workspace = true
 reqwest = { workspace = true, features = ["rustls"] }
 schemars.workspace = true
 scrape-core.workspace = true
 serde = { workspace = true, features = ["derive"] }
 serde_json.workspace = true
+tempfile.workspace = true
 thiserror.workspace = true
 toml.workspace = true
 tokio = { workspace = true, features = ["fs", "io-util", "macros", "process", "rt", "sync", "time"] }
@@ -45,7 +47,6 @@ zeph-common = { workspace = true, features = ["treesitter"] }
 [dev-dependencies]
 insta.workspace = true
 proptest.workspace = true
-tempfile.workspace = true
 tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
 toml.workspace = true
 wiremock.workspace = true
 
@@ -68,6 +68,8 @@ pub enum AuditResult {
     Error { message: String },
     #[serde(rename = "timeout")]
     Timeout,
+    #[serde(rename = "rollback")]
+    Rollback { restored: usize, deleted: usize },
 }
 
 impl AuditLogger {
 
@@ -480,6 +480,7 @@ impl ToolsConfig {
 
 /// Shell-specific configuration: timeout, command blocklist, and allowlist overrides.
 #[derive(Debug, Deserialize, Serialize)]
+#[allow(clippy::struct_excessive_bools)]
 pub struct ShellConfig {
     #[serde(default = "default_timeout")]
     pub timeout: u64,
@@ -498,6 +499,27 @@ pub struct ShellConfig {
     /// spawning shell commands. Default covers common credential naming conventions.
     #[serde(default = "ShellConfig::default_env_blocklist")]
     pub env_blocklist: Vec<String>,
+    /// Enable transactional mode: snapshot files before write commands, rollback on failure.
+    #[serde(default)]
+    pub transactional: bool,
+    /// Glob patterns defining which paths are eligible for snapshotting.
+    /// Only files matching these patterns (relative to cwd) are captured.
+    /// Empty = snapshot all files referenced in the command.
+    #[serde(default)]
+    pub transaction_scope: Vec<String>,
+    /// Automatically rollback when exit code >= 2. Default: false.
+    /// Exit code 1 is excluded because many tools (grep, diff, test) use it for
+    /// non-error conditions.
+    #[serde(default)]
+    pub auto_rollback: bool,
+    /// Exit codes that trigger auto-rollback. Default: empty (uses >= 2 heuristic).
+    /// When non-empty, only these exact exit codes trigger rollback.
+    #[serde(default)]
+    pub auto_rollback_exit_codes: Vec<i32>,
+    /// When true, snapshot failure aborts execution with an error.
+    /// When false (default), snapshot failure emits a warning and execution proceeds.
+    #[serde(default)]
+    pub snapshot_required: bool,
 }
 
 impl ShellConfig {
@@ -561,6 +583,11 @@ impl Default for ShellConfig {
             allow_network: true,
             confirm_patterns: default_confirm_patterns(),
             env_blocklist: Self::default_env_blocklist(),
+            transactional: false,
+            transaction_scope: Vec::new(),
+            auto_rollback: false,
+            auto_rollback_exit_codes: Vec::new(),
+            snapshot_required: false,
         }
     }
 }
 
@@ -168,6 +168,12 @@ pub enum ToolEvent {
         filter_stats: Option<FilterStats>,
         diff: Option<DiffData>,
     },
+    Rollback {
+        tool_name: String,
+        command: String,
+        restored_count: usize,
+        deleted_count: usize,
+    },
 }
 
 pub type ToolEventTx = tokio::sync::mpsc::UnboundedSender<ToolEvent>;
@@ -233,6 +239,9 @@ pub enum ToolError {
         category: crate::error_taxonomy::ToolErrorCategory,
         message: String,
     },
+
+    #[error("snapshot failed: {reason}")]
+    SnapshotFailed { reason: String },
 }
 
 impl ToolError {
@@ -254,6 +263,7 @@ impl ToolError {
             Self::Http { status, .. } => classify_http_status(*status),
             Self::Execution(io_err) => classify_io_error(io_err),
             Self::Shell { category, .. } => *category,
+            Self::SnapshotFailed { .. } => ToolErrorCategory::PermanentFailure,
         }
     }
 
 
@@ -20,6 +20,9 @@ use crate::executor::{
 use crate::filter::{OutputFilterRegistry, sanitize_output};
 use crate::permissions::{PermissionAction, PermissionPolicy};
 
+mod transaction;
+use transaction::{TransactionSnapshot, affected_paths, build_scope_matchers, is_write_command};
+
 const DEFAULT_BLOCKED: &[&str] = &[
     "rm -rf /", "sudo", "mkfs", "dd if=", "curl", "wget", "nc ", "ncat", "netcat", "shutdown",
     "reboot", "halt",
@@ -105,6 +108,11 @@ pub struct ShellExecutor {
     output_filter_registry: Option<OutputFilterRegistry>,
     cancel_token: Option<CancellationToken>,
     skill_env: std::sync::RwLock<Option<std::collections::HashMap<String, String>>>,
+    transactional: bool,
+    auto_rollback: bool,
+    auto_rollback_exit_codes: Vec<i32>,
+    snapshot_required: bool,
+    transaction_scope_matchers: Vec<globset::GlobMatcher>,
 }
 
 impl ShellExecutor {
@@ -153,6 +161,11 @@ impl ShellExecutor {
             output_filter_registry: None,
             cancel_token: None,
             skill_env: std::sync::RwLock::new(None),
+            transactional: config.transactional,
+            auto_rollback: config.auto_rollback,
+            auto_rollback_exit_codes: config.auto_rollback_exit_codes.clone(),
+            snapshot_required: config.snapshot_required,
+            transaction_scope_matchers: build_scope_matchers(&config.transaction_scope),
         }
     }
 
@@ -256,6 +269,7 @@ impl ShellExecutor {
         }))
     }
 
+    #[allow(clippy::too_many_lines)]
     async fn execute_block(
         &self,
         block: &str,
@@ -264,6 +278,39 @@ impl ShellExecutor {
         self.check_permissions(block, skip_confirm).await?;
         self.validate_sandbox(block)?;
 
+        // Take a transactional snapshot before executing write commands.
+        let mut snapshot_warning: Option<String> = None;
+        let snapshot = if self.transactional && is_write_command(block) {
+            let paths = affected_paths(block, &self.transaction_scope_matchers);
+            if paths.is_empty() {
+                None
+            } else {
+                match TransactionSnapshot::capture(&paths) {
+                    Ok(snap) => {
+                        tracing::debug!(
+                            files = snap.file_count(),
+                            bytes = snap.total_bytes(),
+                            "transaction snapshot captured"
+                        );
+                        Some(snap)
+                    }
+                    Err(e) if self.snapshot_required => {
+                        return Err(ToolError::SnapshotFailed {
+                            reason: e.to_string(),
+                        });
+                    }
+                    Err(e) => {
+                        tracing::warn!(err = %e, "transaction snapshot failed, proceeding without rollback");
+                        snapshot_warning =
+                            Some(format!("[warn] snapshot failed: {e}; rollback unavailable"));
+                        None
+                    }
+                }
+            }
+        } else {
+            None
+        };
+
         if let Some(ref tx) = self.tool_event_tx {
             let _ = tx.send(ToolEvent::Started {
                 tool_name: "bash".to_owned(),
@@ -294,6 +341,49 @@ impl ShellExecutor {
         #[allow(clippy::cast_possible_truncation)]
         let duration_ms = start.elapsed().as_millis() as u64;
 
+        // Perform auto-rollback if configured and the exit code qualifies.
+        if let Some(snap) = snapshot {
+            let should_rollback = self.auto_rollback
+                && if self.auto_rollback_exit_codes.is_empty() {
+                    exit_code >= 2
+                } else {
+                    self.auto_rollback_exit_codes.contains(&exit_code)
+                };
+            if should_rollback {
+                match snap.rollback() {
+                    Ok(report) => {
+                        tracing::info!(
+                            restored = report.restored_count,
+                            deleted = report.deleted_count,
+                            "transaction rollback completed"
+                        );
+                        self.log_audit(
+                            block,
+                            AuditResult::Rollback {
+                                restored: report.restored_count,
+                                deleted: report.deleted_count,
+                            },
+                            duration_ms,
+                            None,
+                        )
+                        .await;
+                        if let Some(ref tx) = self.tool_event_tx {
+                            let _ = tx.send(ToolEvent::Rollback {
+                                tool_name: "bash".to_owned(),
+                                command: block.to_owned(),
+                                restored_count: report.restored_count,
+                                deleted_count: report.deleted_count,
+                            });
+                        }
+                    }
+                    Err(e) => {
+                        tracing::error!(err = %e, "transaction rollback failed");
+                    }
+                }
+            }
+            // On success (no rollback): snapshot dropped here; TempDir auto-cleans.
+        }
+
         let is_timeout = out.contains("[error] command timed out");
         let audit_result = if is_timeout {
             AuditResult::Timeout
@@ -358,7 +448,12 @@ impl ShellExecutor {
             per_block_stats.clone(),
         );
 
-        Ok((format!("$ {block}\n{filtered}"), per_block_stats))
+        let output_line = if let Some(warn) = snapshot_warning {
+            format!("{warn}\n$ {block}\n{filtered}")
+        } else {
+            format!("$ {block}\n{filtered}")
+        };
+        Ok((output_line, per_block_stats))
     }
 
     fn emit_completed(
Original file line number	Diff line number	Diff line change
`@@ -68,6 +68,8 @@ pub enum AuditResult {`
`68`	`68`	`Error { message: String },`
`69`	`69`	`#[serde(rename = "timeout")]`
`70`	`70`	`Timeout,`
	`71`	`+ #[serde(rename = "rollback")]`
	`72`	`+ Rollback { restored: usize, deleted: usize },`
`71`	`73`	`}`
`72`	`74`
`73`	`75`	`impl AuditLogger {`