bug-ops
diff --git a/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎crates/zeph-config/src/experiment.rs‎
Lines changed: 21 additions & 0 deletions b/‎crates/zeph-config/src/experiment.rs‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎crates/zeph-core/src/agent/builder.rs‎
Lines changed: 2 additions & 0 deletions b/‎crates/zeph-core/src/agent/builder.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎crates/zeph-core/src/agent/session_config.rs‎
Lines changed: 2 additions & 0 deletions b/‎crates/zeph-core/src/agent/session_config.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎crates/zeph-core/src/agent/tool_execution/native.rs‎
Lines changed: 164 additions & 0 deletions b/‎crates/zeph-core/src/agent/tool_execution/native.rs‎
Lines changed: 164 additions & 0 deletions
diff --git a/‎crates/zeph-core/src/agent/tool_execution/tests.rs‎
Lines changed: 108 additions & 0 deletions b/‎crates/zeph-core/src/agent/tool_execution/tests.rs‎
Lines changed: 108 additions & 0 deletions
@@ -13,6 +13,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 - feat(memory): D-MEM RPE-based tiered graph extraction routing — `RpeRouter` computes heuristic surprise score from context similarity and entity novelty; low-RPE turns skip the MAGMA LLM extraction pipeline; `consecutive_skips` safety valve forces extraction after `max_skip_turns` consecutive skips; `extract_candidate_entities()` helper for cheap regex+keyword entity detection; controlled by `[memory.graph.rpe] enabled = false, threshold = 0.3, max_skip_turns = 5` (closes #2442)
 - feat(llm): BaRP cost-weight dial in bandit router — `cost_weight` now penalises UCB arm scores during provider selection in addition to the existing reward-signal penalty; higher values bias the bandit toward cheaper providers at inference time; static cost tier heuristics based on provider name and model identifier; `cost_weight` is clamped to [0.0, 1.0] at bootstrap (#2415)
 - feat(llm): MAR (Memory-Augmented Routing) — new `[llm.routing.bandit] memory_confidence_threshold` (default 0.9); when the top-1 semantic recall score for the current query meets or exceeds the threshold the bandit biases toward fast/cheap providers; signal propagated from `SemanticMemory::recall` through `ContextSlot::SemanticRecall` to `RouterProvider`; no routing change when `cost_weight = 0.0` (operator intent respected) (#2443)
+- feat(tools): utility-guided tool dispatch gate — `UtilityScorer` scores each candidate tool call before execution using heuristic components (estimated gain, token cost, redundancy, exploration bonus); calls below `[tools.utility] threshold` are skipped with fail-closed semantics on scoring errors; user-requested tools bypass the gate unconditionally; disabled by default (`[tools.utility] enabled = false`) (closes #2424)
+- feat(orchestration): cascade-aware DAG routing — `CascadeDetector` tracks failure rates per root-anchored region; when a region's failure rate exceeds `[orchestration] cascade_failure_threshold`, tasks in that region are deprioritized in the ready queue so healthy branches run first; reset on `inject_tasks()`; disabled by default (closes #2425)
+- feat(orchestration): tree-optimized dispatch — `DispatchStrategy::TreeOptimized` sorts the ready queue by critical-path distance (deepest tasks first) for `FanOut`/`FanIn` topologies when `[orchestration] tree_optimized_dispatch = true`; disabled by default
+- feat(orchestration): `DispatchStrategy::CascadeAware` for `Mixed` topology when `cascade_routing = true`; requires `topology_selection = true` (startup warning emitted otherwise)
 - feat(acp): expose current model in `session/list` and emit `SessionInfoUpdate` on model change — each in-memory `SessionInfo` now carries `meta.currentModel`; after `session/set_config_option` with `configId=model` a `SessionInfoUpdate` notification with `meta.currentModel` is sent in addition to the existing `ConfigOptionUpdate`; same notification is sent after `session/set_session_model` (closes #2435)
 - feat(tools): adversarial policy agent — LLM-based pre-execution tool call validation against plain-language policies; configurable fail-closed/fail-open behavior (`fail_open = false` default); prompt injection hardening via code-fence param quoting; strict allow/deny response parsing; full `ToolExecutor` trait delegation; audit log `adversarial_policy_decision` field; executor chain order `PolicyGateExecutor → AdversarialPolicyGateExecutor → TrustGateExecutor`; gated on `policy-enforcer` feature; config `[tools.adversarial_policy]` (closes #2447)
 - feat(memory): Memex tool output archive — before compaction, `ToolOutput` bodies in the compaction range are saved to `tool_overflow` with `archive_type = 'archive'`; archived UUIDs are appended as a postfix after LLM summarization so references survive compaction; controlled by `[memory.compression] archive_tool_outputs = false`; archives are excluded from the short-lived cleanup job via `archive_type` column (migration 054, closes #2432)
 
@@ -55,6 +55,10 @@ fn default_completeness_threshold() -> f32 {
     0.7
 }
 
+fn default_cascade_failure_threshold() -> f32 {
+    0.5
+}
+
 fn default_plan_cache_similarity_threshold() -> f32 {
     0.90
 }
@@ -199,6 +203,20 @@ pub struct OrchestrationConfig {
     /// Values outside [0.0, 1.0] are rejected at startup by `Config::validate()`.
     #[serde(default = "default_completeness_threshold")]
     pub completeness_threshold: f32,
+    /// Enable cascade-aware routing for Mixed-topology DAGs. Requires `topology_selection = true`.
+    /// When enabled, tasks in failing subtrees are deprioritized in favour of healthy branches.
+    /// Default: false (opt-in).
+    #[serde(default)]
+    pub cascade_routing: bool,
+    /// Failure rate threshold (0.0–1.0) above which a DAG region is considered "cascading".
+    /// Must be in (0.0, 1.0]. Default: 0.5.
+    #[serde(default = "default_cascade_failure_threshold")]
+    pub cascade_failure_threshold: f32,
+    /// Enable tree-optimized dispatch for FanOut/FanIn topologies.
+    /// Sorts the ready queue by critical-path distance (deepest tasks first) to minimize
+    /// end-to-end latency. Default: false (opt-in).
+    #[serde(default)]
+    pub tree_optimized_dispatch: bool,
 }
 
 impl Default for OrchestrationConfig {
@@ -224,6 +242,9 @@ impl Default for OrchestrationConfig {
             verify_completeness: false,
             completeness_threshold: default_completeness_threshold(),
             tool_provider: String::new(),
+            cascade_routing: false,
+            cascade_failure_threshold: default_cascade_failure_threshold(),
+            tree_optimized_dispatch: false,
         }
     }
 }
 
@@ -1325,6 +1325,7 @@ impl<C: Channel> Agent<C> {
             graph_config,
             anomaly_config,
             result_cache_config,
+            utility_config,
             orchestration_config,
             // Not applied here: caller clones this before `apply_session_config` and applies
             // it per-session (e.g. `spawn_acp_agent` passes it to `with_debug_config`).
@@ -1381,6 +1382,7 @@ impl<C: Channel> Agent<C> {
         self.runtime.semantic_cache_threshold = semantic_cache_threshold;
         self.runtime.semantic_cache_max_candidates = semantic_cache_max_candidates;
         self = self.with_result_cache_config(&result_cache_config);
+        self.tool_orchestrator.set_utility_config(utility_config);
 
         self
     }
 
@@ -86,6 +86,7 @@ pub struct AgentSessionConfig {
     pub graph_config: GraphConfig,
     pub anomaly_config: zeph_tools::AnomalyConfig,
     pub result_cache_config: zeph_tools::ResultCacheConfig,
+    pub utility_config: zeph_tools::UtilityScoringConfig,
     pub orchestration_config: OrchestrationConfig,
     pub debug_config: DebugConfig,
     pub server_compaction: bool,
@@ -138,6 +139,7 @@ impl AgentSessionConfig {
             graph_config: config.memory.graph.clone(),
             anomaly_config: config.tools.anomaly.clone(),
             result_cache_config: config.tools.result_cache.clone(),
+            utility_config: config.tools.utility.clone(),
             orchestration_config: config.orchestration.clone(),
             debug_config: config.debug.clone(),
             server_compaction: config.llm.providers.iter().any(|e| e.server_compaction),
 
@@ -76,6 +76,7 @@ impl<C: Channel> Agent<C> {
     ) -> Result<(), super::super::error::AgentError> {
         self.tool_orchestrator.clear_doom_history();
         self.tool_orchestrator.clear_recent_tool_calls();
+        self.tool_orchestrator.clear_utility_state();
 
         // `mut` required when context-compression is enabled to inject focus tool definitions.
         #[cfg_attr(not(feature = "context-compression"), allow(unused_mut))]
@@ -791,6 +792,57 @@ impl<C: Channel> Agent<C> {
             }
         }
 
+        // Utility gate: score each call before dispatch. Calls below the threshold are skipped.
+        // Fail-closed on scoring errors (None when scoring produces invalid result).
+        // user_requested is only true for explicit /tool slash commands — never set from
+        // LLM-requested calls to prevent prompt-injection bypass (C2 fix).
+        let utility_blocked: Vec<bool> = {
+            #[allow(clippy::cast_possible_truncation)]
+            let tokens_consumed =
+                usize::try_from(self.providers.cached_prompt_tokens).unwrap_or(usize::MAX);
+            // token_budget = 0 signals "unknown" to UtilityContext — cost component is zeroed.
+            let token_budget: usize = 0;
+            let tool_calls_this_turn = self.tool_orchestrator.recent_tool_calls.len();
+            calls
+                .iter()
+                .enumerate()
+                .map(|(idx, call)| {
+                    if pre_exec_blocked[idx] {
+                        return false; // already blocked, no need to score
+                    }
+                    let ctx = zeph_tools::UtilityContext {
+                        tool_calls_this_turn: tool_calls_this_turn + idx,
+                        tokens_consumed,
+                        token_budget,
+                        // Never set from LLM call content to prevent prompt-injection bypass.
+                        user_requested: false,
+                    };
+                    let score = self.tool_orchestrator.utility_scorer.score(call, &ctx);
+                    tracing::debug!(
+                        tool = %call.tool_id,
+                        score = ?score.as_ref().map(|s| s.total),
+                        threshold = self.tool_orchestrator.utility_scorer.threshold(),
+                        "utility gate: scored tool call"
+                    );
+                    let execute = self
+                        .tool_orchestrator
+                        .utility_scorer
+                        .should_execute(score.as_ref(), false);
+                    if !execute {
+                        tracing::warn!(
+                            tool = %call.tool_id,
+                            score = ?score.as_ref().map(|s| s.total),
+                            threshold = self.tool_orchestrator.utility_scorer.threshold(),
+                            "utility gate: skipping low-utility tool call"
+                        );
+                    }
+                    // Record call regardless so subsequent calls in this batch see it as prior.
+                    self.tool_orchestrator.utility_scorer.record_call(call);
+                    !execute
+                })
+                .collect()
+        };
+
         // Repeat-detection (CRIT-3): record LLM-initiated calls BEFORE execution.
         // Retry re-executions must NOT be pushed here — they are handled inside the retry loop.
         // Build args hashes and check for repeats. Blocked calls get a pre-built error result.
@@ -1041,6 +1093,30 @@ impl<C: Channel> Agent<C> {
                     continue;
                 }
 
+                if utility_blocked[idx] {
+                    let threshold = self.tool_orchestrator.utility_scorer.threshold();
+                    let msg = format!(
+                        "[skipped] Tool call to {} was skipped by the utility gate \
+                         (score below threshold {threshold:.2}). \
+                         Try a different approach or disable the utility gate in config.",
+                        tc.name
+                    );
+                    let out = zeph_tools::ToolOutput {
+                        tool_name: tc.name.clone(),
+                        summary: msg,
+                        blocks_executed: 0,
+                        filter_stats: None,
+                        diff: None,
+                        streamed: false,
+                        terminal_id: None,
+                        locations: None,
+                        raw_response: None,
+                        claim_source: None,
+                    };
+                    tier_futs.push((idx, Box::pin(std::future::ready(Ok(Some(out))))));
+                    continue;
+                }
+
                 if repeat_blocked[idx] {
                     let msg = format!(
                         "[error] Repeated identical call to {} detected. \
@@ -2326,4 +2402,92 @@ mod tests {
             "tool must not enforce min_messages_per_focus: {result}"
         );
     }
+
+    // --- utility gate integration ---
+
+    #[test]
+    fn utility_gate_disabled_by_default_scorer_is_not_enabled() {
+        // The default ToolOrchestrator has scoring disabled — no calls are gated.
+        let agent = make_agent();
+        assert!(
+            !agent.tool_orchestrator.utility_scorer.is_enabled(),
+            "utility scorer must be disabled by default"
+        );
+    }
+
+    #[test]
+    fn set_utility_config_enables_scorer_on_agent() {
+        // set_utility_config wires the scorer into the tool orchestrator (integration path).
+        let mut agent = make_agent();
+        agent
+            .tool_orchestrator
+            .set_utility_config(zeph_tools::UtilityScoringConfig {
+                enabled: true,
+                threshold: 0.5,
+                ..zeph_tools::UtilityScoringConfig::default()
+            });
+        assert!(
+            agent.tool_orchestrator.utility_scorer.is_enabled(),
+            "scorer must be enabled after set_utility_config"
+        );
+        assert!(
+            (agent.tool_orchestrator.utility_scorer.threshold() - 0.5).abs() < f32::EPSILON,
+            "threshold must match config"
+        );
+    }
+
+    #[test]
+    fn clear_utility_state_resets_per_turn_redundancy_tracking() {
+        // Verify that clear_utility_state() clears the redundancy state so the
+        // next turn treats all calls as fresh (no stale redundancy carry-over).
+        use zeph_tools::{ToolCall, UtilityContext};
+
+        let mut agent = make_agent();
+        agent
+            .tool_orchestrator
+            .set_utility_config(zeph_tools::UtilityScoringConfig {
+                enabled: true,
+                threshold: 0.0,
+                ..zeph_tools::UtilityScoringConfig::default()
+            });
+
+        let call = ToolCall {
+            tool_id: "bash".to_owned(),
+            params: serde_json::Map::new(),
+        };
+        let ctx = UtilityContext {
+            tool_calls_this_turn: 0,
+            tokens_consumed: 0,
+            token_budget: 1000,
+            user_requested: false,
+        };
+
+        // Record the call to create redundancy state.
+        agent.tool_orchestrator.utility_scorer.record_call(&call);
+
+        // Before clear: redundancy is 1.0.
+        let score_before = agent
+            .tool_orchestrator
+            .utility_scorer
+            .score(&call, &ctx)
+            .unwrap();
+        assert!(
+            (score_before.redundancy - 1.0).abs() < f32::EPSILON,
+            "redundancy must be 1.0 before clear"
+        );
+
+        // clear_utility_state simulates turn start.
+        agent.tool_orchestrator.clear_utility_state();
+
+        // After clear: redundancy is 0.0.
+        let score_after = agent
+            .tool_orchestrator
+            .utility_scorer
+            .score(&call, &ctx)
+            .unwrap();
+        assert!(
+            score_after.redundancy.abs() < f32::EPSILON,
+            "redundancy must be 0.0 after clear_utility_state"
+        );
+    }
 }
@@ -4820,3 +4820,111 @@ async fn sanitize_tool_output_non_acp_session_normal_path() {
         "non-ACP session must NOT emit CrossBoundaryMcpToAcp"
     );
 }
+
+// --- utility gate integration tests ---
+
+#[tokio::test]
+async fn utility_gate_blocks_call_and_produces_skipped_output() {
+    // When threshold = 1.0, no realistic tool call can pass the gate.
+    // handle_native_tool_calls must produce a ToolResult with "[skipped]" content.
+    use super::super::agent_tests::{
+        MockChannel, MockToolExecutor, create_test_registry, mock_provider,
+    };
+    use zeph_llm::provider::{Message, MessagePart, Role, ToolUseRequest};
+
+    let provider = mock_provider(vec![]);
+    let channel = MockChannel::new(vec![]);
+    let registry = create_test_registry();
+    let executor = MockToolExecutor::no_tools();
+    let mut agent = super::super::Agent::new(provider, channel, registry, None, 5, executor);
+
+    // Push a system prompt so the assistant message has a valid preceding context.
+    agent
+        .msg
+        .messages
+        .push(Message::from_legacy(Role::System, "system"));
+
+    // Enable utility gate with threshold = 1.0 (blocks every call).
+    agent
+        .tool_orchestrator
+        .set_utility_config(zeph_tools::UtilityScoringConfig {
+            enabled: true,
+            threshold: 1.0,
+            ..zeph_tools::UtilityScoringConfig::default()
+        });
+
+    let tool_calls = vec![ToolUseRequest {
+        id: "call-1".to_owned(),
+        name: "bash".to_owned(),
+        input: serde_json::json!({"command": "ls"}),
+    }];
+
+    agent
+        .handle_native_tool_calls(None, &tool_calls)
+        .await
+        .unwrap();
+
+    // Find the ToolResult message injected by the utility gate.
+    let skipped = agent.msg.messages.iter().any(|m| {
+        m.parts.iter().any(|p| {
+            if let MessagePart::ToolResult { content, .. } = p {
+                content.contains("[skipped]")
+            } else {
+                false
+            }
+        })
+    });
+    assert!(
+        skipped,
+        "utility gate must produce [skipped] ToolResult when score < threshold"
+    );
+}
+
+#[tokio::test]
+async fn utility_gate_disabled_does_not_produce_skipped_output() {
+    // Default config has scoring disabled — calls must not produce [skipped] ToolResult.
+    use super::super::agent_tests::{
+        MockChannel, MockToolExecutor, create_test_registry, mock_provider,
+    };
+    use zeph_llm::provider::{Message, MessagePart, Role, ToolUseRequest};
+
+    let provider = mock_provider(vec![]);
+    let channel = MockChannel::new(vec![]);
+    let registry = create_test_registry();
+    let executor = MockToolExecutor::no_tools();
+    let mut agent = super::super::Agent::new(provider, channel, registry, None, 5, executor);
+
+    agent
+        .msg
+        .messages
+        .push(Message::from_legacy(Role::System, "system"));
+
+    // Utility scorer is disabled by default (enabled = false).
+    assert!(!agent.tool_orchestrator.utility_scorer.is_enabled());
+
+    let tool_calls = vec![ToolUseRequest {
+        id: "call-2".to_owned(),
+        name: "bash".to_owned(),
+        input: serde_json::json!({"command": "ls"}),
+    }];
+
+    agent
+        .handle_native_tool_calls(None, &tool_calls)
+        .await
+        .unwrap();
+
+    // No ToolResult must contain [skipped] — gate is disabled.
+    let has_skipped = agent.msg.messages.iter().any(|m| {
+        m.parts.iter().any(|p| {
+            if let MessagePart::ToolResult { content, .. } = p {
+                content.contains("[skipped]")
+            } else {
+                false
+            }
+        })
+    });
+    assert!(
+        !has_skipped,
+        "disabled utility gate must not produce [skipped] ToolResult"
+    );
+}