bug-ops
diff --git a/‎CHANGELOG.md‎
Lines changed: 30 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 30 additions & 1 deletion
diff --git a/‎Cargo.lock‎
Lines changed: 12 additions & 11 deletions b/‎Cargo.lock‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 12 additions & 11 deletions b/‎Cargo.toml‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎README.md‎
Lines changed: 6 additions & 6 deletions b/‎README.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎crates/zeph-a2a/Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎crates/zeph-a2a/Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/zeph-a2a/src/server/router.rs‎
Lines changed: 13 additions & 2 deletions b/‎crates/zeph-a2a/src/server/router.rs‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎crates/zeph-core/src/agent/context.rs‎
Lines changed: 9 additions & 5 deletions b/‎crates/zeph-core/src/agent/context.rs‎
Lines changed: 9 additions & 5 deletions
@@ -6,6 +6,34 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 ## [Unreleased]
 
+## [0.9.7] - 2026-02-15
+
+### Performance
+- Token estimation uses `len() / 3` for improved accuracy (#328)
+- Explicit tokio feature selection replacing broad feature gates (#326)
+- Concurrent skill embedding for faster startup (#327)
+- Pre-allocate strings in hot paths to reduce allocations (#329)
+- Parallel context building via `try_join!` (#331)
+- Criterion benchmark suite for core operations (#330)
+
+### Security
+- Path traversal protection in shell sandbox (#325)
+- Canonical path validation in skill loader (#322)
+- SSRF protection for MCP server connections (#323)
+- Remove MySQL/RSA vulnerable transitive dependencies (#324)
+- Secret redaction patterns for Google and GitLab tokens (#320)
+- TTL-based eviction for rate limiter entries (#321)
+
+### Changed
+- `QdrantOps` shared helper trait for Qdrant collection operations (#304)
+- `delegate_provider!` macro replacing boilerplate provider delegation (#303)
+- Remove `TuiError` in favor of unified error handling (#302)
+- Generic `recv_optional` replacing per-channel optional receive logic (#301)
+
+### Dependencies
+- Upgraded rmcp to 0.15, toml to 1.0, uuid to 1.21 (#296)
+- Cleaned up deny.toml advisory and license configuration (#312)
+
 ## [0.9.6] - 2026-02-15
 
 ### Changed
@@ -720,7 +748,8 @@ let agent = Agent::new(provider, channel, &skills_prompt, executor);
 - Agent calls channel.send_typing() before each LLM request
 - Agent::run() uses tokio::select! to race channel messages against shutdown signal
 
-[Unreleased]: https://github.com/bug-ops/zeph/compare/v0.9.6...HEAD
+[Unreleased]: https://github.com/bug-ops/zeph/compare/v0.9.7...HEAD
+[0.9.7]: https://github.com/bug-ops/zeph/compare/v0.9.6...v0.9.7
 [0.9.6]: https://github.com/bug-ops/zeph/compare/v0.9.5...v0.9.6
 [0.9.5]: https://github.com/bug-ops/zeph/compare/v0.9.4...v0.9.5
 [0.9.4]: https://github.com/bug-ops/zeph/compare/v0.9.3...v0.9.4
 
@@ -5,7 +5,7 @@ resolver = "3"
 [workspace.package]
 edition = "2024"
 rust-version = "1.88"
-version = "0.9.6"
+version = "0.9.7"
 authors = ["bug-ops"]
 license = "MIT"
 repository = "https://github.com/bug-ops/zeph"
@@ -50,6 +50,7 @@ thiserror = "2.0"
 tokenizers = { version = "0.22", default-features = false, features = ["fancy-regex"] }
 tokio = "1"
 tokio-stream = "0.1"
+tokio-util = "0.7"
 toml = "1.0"
 tower = "0.5"
 tower-http = "0.6"
@@ -59,16 +60,16 @@ tree-sitter = "0.26"
 unicode-width = "0.2"
 url = "2.5"
 uuid = "1.21"
-zeph-a2a = { path = "crates/zeph-a2a", version = "0.9.6" }
-zeph-channels = { path = "crates/zeph-channels", version = "0.9.6" }
-zeph-core = { path = "crates/zeph-core", version = "0.9.6" }
-zeph-index = { path = "crates/zeph-index", version = "0.9.6" }
-zeph-llm = { path = "crates/zeph-llm", version = "0.9.6" }
-zeph-mcp = { path = "crates/zeph-mcp", version = "0.9.6" }
-zeph-memory = { path = "crates/zeph-memory", version = "0.9.6" }
-zeph-skills = { path = "crates/zeph-skills", version = "0.9.6" }
-zeph-tools = { path = "crates/zeph-tools", version = "0.9.6" }
-zeph-tui = { path = "crates/zeph-tui", version = "0.9.6" }
+zeph-a2a = { path = "crates/zeph-a2a", version = "0.9.7" }
+zeph-channels = { path = "crates/zeph-channels", version = "0.9.7" }
+zeph-core = { path = "crates/zeph-core", version = "0.9.7" }
+zeph-index = { path = "crates/zeph-index", version = "0.9.7" }
+zeph-llm = { path = "crates/zeph-llm", version = "0.9.7" }
+zeph-mcp = { path = "crates/zeph-mcp", version = "0.9.7" }
+zeph-memory = { path = "crates/zeph-memory", version = "0.9.7" }
+zeph-skills = { path = "crates/zeph-skills", version = "0.9.7" }
+zeph-tools = { path = "crates/zeph-tools", version = "0.9.7" }
+zeph-tui = { path = "crates/zeph-tui", version = "0.9.7" }
 
 [workspace.lints.clippy]
 all = "warn"
 
@@ -15,13 +15,13 @@ Lightweight AI agent that routes tasks across **Ollama, Claude, OpenAI, and Hugg
 
 ## Why Zeph
 
-**Token-efficient by design.** Most agent frameworks inject every tool and instruction into every prompt. Zeph embeds skills and MCP tools as vectors, then selects only the top-K relevant ones per query via cosine similarity. Prompt size stays O(K) — not O(N) — regardless of how many capabilities are installed.
+**Token-efficient by design.** Most agent frameworks inject every tool and instruction into every prompt. Zeph embeds skills and MCP tools as vectors (with concurrent embedding via `buffer_unordered`), then selects only the top-K relevant ones per query via cosine similarity. Prompt size stays O(K) -- not O(N) -- regardless of how many capabilities are installed.
 
-**Intelligent context management.** Two-tier context pruning: Tier 1 selectively removes old tool outputs (clearing bodies from memory after persisting to SQLite) before falling back to Tier 2 LLM-based compaction, reducing unnecessary LLM calls. A token-based protection zone preserves recent context from pruning. Cross-session memory transfers knowledge between conversations with relevance filtering. Proportional budget allocation (8% summaries, 8% semantic recall, 4% cross-session, 30% code context, 50% recent history) keeps conversations efficient. Tool outputs are truncated at 30K chars with optional LLM-based summarization for large outputs. Doom-loop detection breaks runaway tool cycles after 3 identical consecutive outputs, with configurable iteration limits (default 10). ZEPH.md project config discovery walks up the directory tree and injects project-specific context when available. Config hot-reload applies runtime-safe fields (timeouts, security, memory limits) on file change without restart.
+**Intelligent context management.** Two-tier context pruning: Tier 1 selectively removes old tool outputs (clearing bodies from memory after persisting to SQLite) before falling back to Tier 2 LLM-based compaction, reducing unnecessary LLM calls. A token-based protection zone preserves recent context from pruning. Parallel context preparation via `try_join!` and optimized byte-length token estimation. Cross-session memory transfers knowledge between conversations with relevance filtering. Proportional budget allocation (8% summaries, 8% semantic recall, 4% cross-session, 30% code context, 50% recent history) keeps conversations efficient. Tool outputs are truncated at 30K chars with optional LLM-based summarization for large outputs. Doom-loop detection breaks runaway tool cycles after 3 identical consecutive outputs, with configurable iteration limits (default 10). ZEPH.md project config discovery walks up the directory tree and injects project-specific context when available. Config hot-reload applies runtime-safe fields (timeouts, security, memory limits) on file change without restart.
 
 **Run anywhere.** Local models via Ollama or Candle (GGUF with Metal/CUDA), cloud APIs (Claude, OpenAI, GPT-compatible endpoints like Together AI and Groq), or all of them at once through the multi-model orchestrator with automatic fallback chains.
 
-**Production-ready security.** Shell sandboxing with path restrictions, pattern-based permission policy per tool, destructive command confirmation, file operation sandbox with path traversal protection, tool output overflow-to-file (with LLM-accessible paths), secret redaction, audit logging, SSRF protection, and Trivy-scanned container images with 0 HIGH/CRITICAL CVEs.
+**Production-ready security.** Shell sandboxing with path restrictions and relative path traversal detection, pattern-based permission policy per tool, destructive command confirmation, file operation sandbox with path traversal protection, tool output overflow-to-file (with LLM-accessible paths), secret redaction (AWS, OpenAI, Anthropic, Google, GitLab), audit logging, SSRF protection (including MCP client), rate limiter with TTL-based eviction, and Trivy-scanned container images with 0 HIGH/CRITICAL CVEs.
 
 **Self-improving.** Skills evolve through failure detection, self-reflection, and LLM-generated improvements — with optional manual approval before activation.
 
@@ -94,13 +94,13 @@ cargo build --release --features tui
 | **Context Engineering** | Two-tier context pruning (selective tool-output pruning before LLM compaction), semantic recall injection, proportional budget allocation, token-based protection zone for recent context, config hot-reload | [Context](https://bug-ops.github.io/zeph/guide/context.html) · [Configuration](https://bug-ops.github.io/zeph/getting-started/configuration.html) |
 | **Semantic Memory** | SQLite + Qdrant vector search for contextual recall | [Memory](https://bug-ops.github.io/zeph/guide/semantic-memory.html) |
 | **Tool Permissions** | Pattern-based permission policy (allow/ask/deny) with glob matching per tool, excluded denied tools from prompts | [Tools](https://bug-ops.github.io/zeph/guide/tools.html) |
-| **MCP Client** | Connect external tool servers (stdio + HTTP), unified matching | [MCP](https://bug-ops.github.io/zeph/guide/mcp.html) |
+| **MCP Client** | Connect external tool servers (stdio + HTTP), unified matching, SSRF protection | [MCP](https://bug-ops.github.io/zeph/guide/mcp.html) |
 | **A2A Protocol** | Agent-to-agent communication via JSON-RPC 2.0 with SSE streaming, delegated task inference through agent pipeline | [A2A](https://bug-ops.github.io/zeph/guide/a2a.html) |
 | **Model Orchestrator** | Route tasks to different providers with fallback chains | [Orchestrator](https://bug-ops.github.io/zeph/guide/orchestrator.html) |
 | **Self-Learning** | Skills evolve via failure detection and LLM-generated improvements | [Self-Learning](https://bug-ops.github.io/zeph/guide/self-learning.html) |
 | **TUI Dashboard** | ratatui terminal UI with markdown rendering, deferred model warmup, scrollbar, mouse scroll, thinking blocks, conversation history, splash screen, live metrics, message queueing (max 10, FIFO with Ctrl+K clear) | [TUI](https://bug-ops.github.io/zeph/guide/tui.html) |
 | **Multi-Channel I/O** | CLI, Telegram, and TUI with streaming support | [Channels](https://bug-ops.github.io/zeph/guide/channels.html) |
-| **Defense-in-Depth** | Shell sandbox, file sandbox with path traversal protection, command filter, secret redaction, audit log, SSRF protection, doom-loop detection | [Security](https://bug-ops.github.io/zeph/security.html) |
+| **Defense-in-Depth** | Shell sandbox with relative path traversal detection, file sandbox, command filter, secret redaction (Google/GitLab patterns), audit log, SSRF protection (agent + MCP), rate limiter TTL eviction, doom-loop detection | [Security](https://bug-ops.github.io/zeph/security.html) |
 
 ## Architecture
 
@@ -120,7 +120,7 @@ zeph (binary) — bootstrap, AnyChannel dispatch, vault resolution (anyhow for t
 └── zeph-tui        — ratatui TUI dashboard with live agent metrics (optional)
 ```
 
-**Error handling:** Typed errors throughout all library crates -- `AgentError` (7 variants), `ChannelError` (4 variants), `LlmError`, `MemoryError`, `SkillError`. `anyhow` is used only in `main.rs` for top-level orchestration.
+**Error handling:** Typed errors throughout all library crates -- `AgentError` (7 variants), `ChannelError` (4 variants), `LlmError`, `MemoryError`, `SkillError`. `anyhow` is used only in `main.rs` for top-level orchestration. Shared Qdrant operations consolidated via `QdrantOps` helper. `AnyProvider` dispatch deduplicated via `delegate_provider!` macro.
 
 **Agent decomposition:** The agent module in `zeph-core` is split into 7 submodules (`mod.rs`, `context.rs`, `streaming.rs`, `persistence.rs`, `learning.rs`, `mcp.rs`, `index.rs`) with 5 inner field-grouping structs (`MemoryState`, `SkillState`, `ContextState`, `McpState`, `IndexState`).
 
 
@@ -20,6 +20,7 @@ subtle = { workspace = true, optional = true }
 serde_json.workspace = true
 thiserror.workspace = true
 tokio = { workspace = true, features = ["net", "sync"] }
+tokio-util.workspace = true
 url.workspace = true
 tokio-stream.workspace = true
 tower = { workspace = true, optional = true }
 
@@ -38,7 +38,7 @@ struct RateLimitState {
 fn spawn_eviction_task(counters: Arc<Mutex<HashMap<IpAddr, (u32, Instant)>>>) {
     tokio::spawn(async move {
         let mut interval = tokio::time::interval(EVICTION_INTERVAL);
-        interval.tick().await; // skip immediate first tick
+        interval.tick().await;
         loop {
             interval.tick().await;
             let now = Instant::now();
@@ -133,7 +133,18 @@ async fn rate_limit_middleware(
     let mut counters = state.counters.lock().await;
 
     if counters.len() >= MAX_RATE_LIMIT_ENTRIES && !counters.contains_key(&ip) {
-        counters.clear();
+        let before_eviction = counters.len();
+        counters.retain(|_, (_, ts)| now.duration_since(*ts) < RATE_WINDOW);
+        let after_eviction = counters.len();
+
+        if after_eviction >= MAX_RATE_LIMIT_ENTRIES {
+            tracing::warn!(
+                before = before_eviction,
+                after = after_eviction,
+                limit = MAX_RATE_LIMIT_ENTRIES,
+                "rate limiter still at capacity after stale entry eviction"
+            );
+        }
     }
 
     let entry = counters.entry(ip).or_insert((0, now));
 
@@ -65,6 +65,7 @@ impl<P: LlmProvider + Clone + 'static, C: Channel, T: ToolExecutor> Agent<P, C,
                 Role::Assistant => "assistant",
                 Role::System => "system",
             };
+            // write! to String never fails, safe to ignore
             let _ = write!(history_text, "[{role}]: {}", m.content);
         }
 
@@ -530,20 +531,23 @@ impl<P: LlmProvider + Clone + 'static, C: Channel, T: ToolExecutor> Agent<P, C,
         #[cfg(feature = "index")]
         self.remove_code_context_messages();
 
+        // Own the query to satisfy Send bounds when agent.run() is spawned
+        let query = query.to_owned();
+
         // Fetch all context sources concurrently
         #[cfg(not(feature = "index"))]
         let (summaries_msg, cross_session_msg, recall_msg) = tokio::try_join!(
             Self::fetch_summaries(&self.memory_state, alloc.summaries),
-            Self::fetch_cross_session(&self.memory_state, query, alloc.cross_session),
-            Self::fetch_semantic_recall(&self.memory_state, query, alloc.semantic_recall),
+            Self::fetch_cross_session(&self.memory_state, &query, alloc.cross_session),
+            Self::fetch_semantic_recall(&self.memory_state, &query, alloc.semantic_recall),
         )?;
 
         #[cfg(feature = "index")]
         let (summaries_msg, cross_session_msg, recall_msg, code_rag_text) = tokio::try_join!(
             Self::fetch_summaries(&self.memory_state, alloc.summaries),
-            Self::fetch_cross_session(&self.memory_state, query, alloc.cross_session),
-            Self::fetch_semantic_recall(&self.memory_state, query, alloc.semantic_recall),
-            Self::fetch_code_rag(&self.index, query, alloc.code_context),
+            Self::fetch_cross_session(&self.memory_state, &query, alloc.cross_session),
+            Self::fetch_semantic_recall(&self.memory_state, &query, alloc.semantic_recall),
+            Self::fetch_code_rag(&self.index, &query, alloc.code_context),
         )?;
 
         // Insert fetched messages (order: recall, cross-session, summaries at position 1)