Skip to content

Commit aa8bfb8

Browse files
authored
release: v0.9.7 (#333)
* release: prepare v0.9.7 * fix: resolve Send bounds for try_join! with tokio::spawn in TUI mode Run agent future directly in select! instead of spawning to avoid Send lifetime issues with try_join! captured references. * fix: address code review findings - Rate limiter: evict stale entries instead of clearing all - SSRF: use tokio::net::lookup_host instead of blocking DNS - SSRF: add IPv6-mapped IPv4, unique local, link-local checks - Shell sandbox: cache current_dir at function start - Context: document safe write! error ignore - Main: document Box::pin usage for large_futures lint
1 parent 6209781 commit aa8bfb8

File tree

19 files changed

+203
-79
lines changed

19 files changed

+203
-79
lines changed

CHANGELOG.md

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,34 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
66

77
## [Unreleased]
88

9+
## [0.9.7] - 2026-02-15
10+
11+
### Performance
12+
- Token estimation uses `len() / 3` for improved accuracy (#328)
13+
- Explicit tokio feature selection replacing broad feature gates (#326)
14+
- Concurrent skill embedding for faster startup (#327)
15+
- Pre-allocate strings in hot paths to reduce allocations (#329)
16+
- Parallel context building via `try_join!` (#331)
17+
- Criterion benchmark suite for core operations (#330)
18+
19+
### Security
20+
- Path traversal protection in shell sandbox (#325)
21+
- Canonical path validation in skill loader (#322)
22+
- SSRF protection for MCP server connections (#323)
23+
- Remove MySQL/RSA vulnerable transitive dependencies (#324)
24+
- Secret redaction patterns for Google and GitLab tokens (#320)
25+
- TTL-based eviction for rate limiter entries (#321)
26+
27+
### Changed
28+
- `QdrantOps` shared helper trait for Qdrant collection operations (#304)
29+
- `delegate_provider!` macro replacing boilerplate provider delegation (#303)
30+
- Remove `TuiError` in favor of unified error handling (#302)
31+
- Generic `recv_optional` replacing per-channel optional receive logic (#301)
32+
33+
### Dependencies
34+
- Upgraded rmcp to 0.15, toml to 1.0, uuid to 1.21 (#296)
35+
- Cleaned up deny.toml advisory and license configuration (#312)
36+
937
## [0.9.6] - 2026-02-15
1038

1139
### Changed
@@ -720,7 +748,8 @@ let agent = Agent::new(provider, channel, &skills_prompt, executor);
720748
- Agent calls channel.send_typing() before each LLM request
721749
- Agent::run() uses tokio::select! to race channel messages against shutdown signal
722750

723-
[Unreleased]: https://github.com/bug-ops/zeph/compare/v0.9.6...HEAD
751+
[Unreleased]: https://github.com/bug-ops/zeph/compare/v0.9.7...HEAD
752+
[0.9.7]: https://github.com/bug-ops/zeph/compare/v0.9.6...v0.9.7
724753
[0.9.6]: https://github.com/bug-ops/zeph/compare/v0.9.5...v0.9.6
725754
[0.9.5]: https://github.com/bug-ops/zeph/compare/v0.9.4...v0.9.5
726755
[0.9.4]: https://github.com/bug-ops/zeph/compare/v0.9.3...v0.9.4

Cargo.lock

Lines changed: 12 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ resolver = "3"
55
[workspace.package]
66
edition = "2024"
77
rust-version = "1.88"
8-
version = "0.9.6"
8+
version = "0.9.7"
99
authors = ["bug-ops"]
1010
license = "MIT"
1111
repository = "https://github.com/bug-ops/zeph"
@@ -50,6 +50,7 @@ thiserror = "2.0"
5050
tokenizers = { version = "0.22", default-features = false, features = ["fancy-regex"] }
5151
tokio = "1"
5252
tokio-stream = "0.1"
53+
tokio-util = "0.7"
5354
toml = "1.0"
5455
tower = "0.5"
5556
tower-http = "0.6"
@@ -59,16 +60,16 @@ tree-sitter = "0.26"
5960
unicode-width = "0.2"
6061
url = "2.5"
6162
uuid = "1.21"
62-
zeph-a2a = { path = "crates/zeph-a2a", version = "0.9.6" }
63-
zeph-channels = { path = "crates/zeph-channels", version = "0.9.6" }
64-
zeph-core = { path = "crates/zeph-core", version = "0.9.6" }
65-
zeph-index = { path = "crates/zeph-index", version = "0.9.6" }
66-
zeph-llm = { path = "crates/zeph-llm", version = "0.9.6" }
67-
zeph-mcp = { path = "crates/zeph-mcp", version = "0.9.6" }
68-
zeph-memory = { path = "crates/zeph-memory", version = "0.9.6" }
69-
zeph-skills = { path = "crates/zeph-skills", version = "0.9.6" }
70-
zeph-tools = { path = "crates/zeph-tools", version = "0.9.6" }
71-
zeph-tui = { path = "crates/zeph-tui", version = "0.9.6" }
63+
zeph-a2a = { path = "crates/zeph-a2a", version = "0.9.7" }
64+
zeph-channels = { path = "crates/zeph-channels", version = "0.9.7" }
65+
zeph-core = { path = "crates/zeph-core", version = "0.9.7" }
66+
zeph-index = { path = "crates/zeph-index", version = "0.9.7" }
67+
zeph-llm = { path = "crates/zeph-llm", version = "0.9.7" }
68+
zeph-mcp = { path = "crates/zeph-mcp", version = "0.9.7" }
69+
zeph-memory = { path = "crates/zeph-memory", version = "0.9.7" }
70+
zeph-skills = { path = "crates/zeph-skills", version = "0.9.7" }
71+
zeph-tools = { path = "crates/zeph-tools", version = "0.9.7" }
72+
zeph-tui = { path = "crates/zeph-tui", version = "0.9.7" }
7273

7374
[workspace.lints.clippy]
7475
all = "warn"

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ Lightweight AI agent that routes tasks across **Ollama, Claude, OpenAI, and Hugg
1515

1616
## Why Zeph
1717

18-
**Token-efficient by design.** Most agent frameworks inject every tool and instruction into every prompt. Zeph embeds skills and MCP tools as vectors, then selects only the top-K relevant ones per query via cosine similarity. Prompt size stays O(K) not O(N) regardless of how many capabilities are installed.
18+
**Token-efficient by design.** Most agent frameworks inject every tool and instruction into every prompt. Zeph embeds skills and MCP tools as vectors (with concurrent embedding via `buffer_unordered`), then selects only the top-K relevant ones per query via cosine similarity. Prompt size stays O(K) -- not O(N) -- regardless of how many capabilities are installed.
1919

20-
**Intelligent context management.** Two-tier context pruning: Tier 1 selectively removes old tool outputs (clearing bodies from memory after persisting to SQLite) before falling back to Tier 2 LLM-based compaction, reducing unnecessary LLM calls. A token-based protection zone preserves recent context from pruning. Cross-session memory transfers knowledge between conversations with relevance filtering. Proportional budget allocation (8% summaries, 8% semantic recall, 4% cross-session, 30% code context, 50% recent history) keeps conversations efficient. Tool outputs are truncated at 30K chars with optional LLM-based summarization for large outputs. Doom-loop detection breaks runaway tool cycles after 3 identical consecutive outputs, with configurable iteration limits (default 10). ZEPH.md project config discovery walks up the directory tree and injects project-specific context when available. Config hot-reload applies runtime-safe fields (timeouts, security, memory limits) on file change without restart.
20+
**Intelligent context management.** Two-tier context pruning: Tier 1 selectively removes old tool outputs (clearing bodies from memory after persisting to SQLite) before falling back to Tier 2 LLM-based compaction, reducing unnecessary LLM calls. A token-based protection zone preserves recent context from pruning. Parallel context preparation via `try_join!` and optimized byte-length token estimation. Cross-session memory transfers knowledge between conversations with relevance filtering. Proportional budget allocation (8% summaries, 8% semantic recall, 4% cross-session, 30% code context, 50% recent history) keeps conversations efficient. Tool outputs are truncated at 30K chars with optional LLM-based summarization for large outputs. Doom-loop detection breaks runaway tool cycles after 3 identical consecutive outputs, with configurable iteration limits (default 10). ZEPH.md project config discovery walks up the directory tree and injects project-specific context when available. Config hot-reload applies runtime-safe fields (timeouts, security, memory limits) on file change without restart.
2121

2222
**Run anywhere.** Local models via Ollama or Candle (GGUF with Metal/CUDA), cloud APIs (Claude, OpenAI, GPT-compatible endpoints like Together AI and Groq), or all of them at once through the multi-model orchestrator with automatic fallback chains.
2323

24-
**Production-ready security.** Shell sandboxing with path restrictions, pattern-based permission policy per tool, destructive command confirmation, file operation sandbox with path traversal protection, tool output overflow-to-file (with LLM-accessible paths), secret redaction, audit logging, SSRF protection, and Trivy-scanned container images with 0 HIGH/CRITICAL CVEs.
24+
**Production-ready security.** Shell sandboxing with path restrictions and relative path traversal detection, pattern-based permission policy per tool, destructive command confirmation, file operation sandbox with path traversal protection, tool output overflow-to-file (with LLM-accessible paths), secret redaction (AWS, OpenAI, Anthropic, Google, GitLab), audit logging, SSRF protection (including MCP client), rate limiter with TTL-based eviction, and Trivy-scanned container images with 0 HIGH/CRITICAL CVEs.
2525

2626
**Self-improving.** Skills evolve through failure detection, self-reflection, and LLM-generated improvements — with optional manual approval before activation.
2727

@@ -94,13 +94,13 @@ cargo build --release --features tui
9494
| **Context Engineering** | Two-tier context pruning (selective tool-output pruning before LLM compaction), semantic recall injection, proportional budget allocation, token-based protection zone for recent context, config hot-reload | [Context](https://bug-ops.github.io/zeph/guide/context.html) · [Configuration](https://bug-ops.github.io/zeph/getting-started/configuration.html) |
9595
| **Semantic Memory** | SQLite + Qdrant vector search for contextual recall | [Memory](https://bug-ops.github.io/zeph/guide/semantic-memory.html) |
9696
| **Tool Permissions** | Pattern-based permission policy (allow/ask/deny) with glob matching per tool, excluded denied tools from prompts | [Tools](https://bug-ops.github.io/zeph/guide/tools.html) |
97-
| **MCP Client** | Connect external tool servers (stdio + HTTP), unified matching | [MCP](https://bug-ops.github.io/zeph/guide/mcp.html) |
97+
| **MCP Client** | Connect external tool servers (stdio + HTTP), unified matching, SSRF protection | [MCP](https://bug-ops.github.io/zeph/guide/mcp.html) |
9898
| **A2A Protocol** | Agent-to-agent communication via JSON-RPC 2.0 with SSE streaming, delegated task inference through agent pipeline | [A2A](https://bug-ops.github.io/zeph/guide/a2a.html) |
9999
| **Model Orchestrator** | Route tasks to different providers with fallback chains | [Orchestrator](https://bug-ops.github.io/zeph/guide/orchestrator.html) |
100100
| **Self-Learning** | Skills evolve via failure detection and LLM-generated improvements | [Self-Learning](https://bug-ops.github.io/zeph/guide/self-learning.html) |
101101
| **TUI Dashboard** | ratatui terminal UI with markdown rendering, deferred model warmup, scrollbar, mouse scroll, thinking blocks, conversation history, splash screen, live metrics, message queueing (max 10, FIFO with Ctrl+K clear) | [TUI](https://bug-ops.github.io/zeph/guide/tui.html) |
102102
| **Multi-Channel I/O** | CLI, Telegram, and TUI with streaming support | [Channels](https://bug-ops.github.io/zeph/guide/channels.html) |
103-
| **Defense-in-Depth** | Shell sandbox, file sandbox with path traversal protection, command filter, secret redaction, audit log, SSRF protection, doom-loop detection | [Security](https://bug-ops.github.io/zeph/security.html) |
103+
| **Defense-in-Depth** | Shell sandbox with relative path traversal detection, file sandbox, command filter, secret redaction (Google/GitLab patterns), audit log, SSRF protection (agent + MCP), rate limiter TTL eviction, doom-loop detection | [Security](https://bug-ops.github.io/zeph/security.html) |
104104

105105
## Architecture
106106

@@ -120,7 +120,7 @@ zeph (binary) — bootstrap, AnyChannel dispatch, vault resolution (anyhow for t
120120
└── zeph-tui — ratatui TUI dashboard with live agent metrics (optional)
121121
```
122122

123-
**Error handling:** Typed errors throughout all library crates -- `AgentError` (7 variants), `ChannelError` (4 variants), `LlmError`, `MemoryError`, `SkillError`. `anyhow` is used only in `main.rs` for top-level orchestration.
123+
**Error handling:** Typed errors throughout all library crates -- `AgentError` (7 variants), `ChannelError` (4 variants), `LlmError`, `MemoryError`, `SkillError`. `anyhow` is used only in `main.rs` for top-level orchestration. Shared Qdrant operations consolidated via `QdrantOps` helper. `AnyProvider` dispatch deduplicated via `delegate_provider!` macro.
124124

125125
**Agent decomposition:** The agent module in `zeph-core` is split into 7 submodules (`mod.rs`, `context.rs`, `streaming.rs`, `persistence.rs`, `learning.rs`, `mcp.rs`, `index.rs`) with 5 inner field-grouping structs (`MemoryState`, `SkillState`, `ContextState`, `McpState`, `IndexState`).
126126

crates/zeph-a2a/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ subtle = { workspace = true, optional = true }
2020
serde_json.workspace = true
2121
thiserror.workspace = true
2222
tokio = { workspace = true, features = ["net", "sync"] }
23+
tokio-util.workspace = true
2324
url.workspace = true
2425
tokio-stream.workspace = true
2526
tower = { workspace = true, optional = true }

crates/zeph-a2a/src/server/router.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ struct RateLimitState {
3838
fn spawn_eviction_task(counters: Arc<Mutex<HashMap<IpAddr, (u32, Instant)>>>) {
3939
tokio::spawn(async move {
4040
let mut interval = tokio::time::interval(EVICTION_INTERVAL);
41-
interval.tick().await; // skip immediate first tick
41+
interval.tick().await;
4242
loop {
4343
interval.tick().await;
4444
let now = Instant::now();
@@ -133,7 +133,18 @@ async fn rate_limit_middleware(
133133
let mut counters = state.counters.lock().await;
134134

135135
if counters.len() >= MAX_RATE_LIMIT_ENTRIES && !counters.contains_key(&ip) {
136-
counters.clear();
136+
let before_eviction = counters.len();
137+
counters.retain(|_, (_, ts)| now.duration_since(*ts) < RATE_WINDOW);
138+
let after_eviction = counters.len();
139+
140+
if after_eviction >= MAX_RATE_LIMIT_ENTRIES {
141+
tracing::warn!(
142+
before = before_eviction,
143+
after = after_eviction,
144+
limit = MAX_RATE_LIMIT_ENTRIES,
145+
"rate limiter still at capacity after stale entry eviction"
146+
);
147+
}
137148
}
138149

139150
let entry = counters.entry(ip).or_insert((0, now));

crates/zeph-core/src/agent/context.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ impl<P: LlmProvider + Clone + 'static, C: Channel, T: ToolExecutor> Agent<P, C,
6565
Role::Assistant => "assistant",
6666
Role::System => "system",
6767
};
68+
// write! to String never fails, safe to ignore
6869
let _ = write!(history_text, "[{role}]: {}", m.content);
6970
}
7071

@@ -530,20 +531,23 @@ impl<P: LlmProvider + Clone + 'static, C: Channel, T: ToolExecutor> Agent<P, C,
530531
#[cfg(feature = "index")]
531532
self.remove_code_context_messages();
532533

534+
// Own the query to satisfy Send bounds when agent.run() is spawned
535+
let query = query.to_owned();
536+
533537
// Fetch all context sources concurrently
534538
#[cfg(not(feature = "index"))]
535539
let (summaries_msg, cross_session_msg, recall_msg) = tokio::try_join!(
536540
Self::fetch_summaries(&self.memory_state, alloc.summaries),
537-
Self::fetch_cross_session(&self.memory_state, query, alloc.cross_session),
538-
Self::fetch_semantic_recall(&self.memory_state, query, alloc.semantic_recall),
541+
Self::fetch_cross_session(&self.memory_state, &query, alloc.cross_session),
542+
Self::fetch_semantic_recall(&self.memory_state, &query, alloc.semantic_recall),
539543
)?;
540544

541545
#[cfg(feature = "index")]
542546
let (summaries_msg, cross_session_msg, recall_msg, code_rag_text) = tokio::try_join!(
543547
Self::fetch_summaries(&self.memory_state, alloc.summaries),
544-
Self::fetch_cross_session(&self.memory_state, query, alloc.cross_session),
545-
Self::fetch_semantic_recall(&self.memory_state, query, alloc.semantic_recall),
546-
Self::fetch_code_rag(&self.index, query, alloc.code_context),
548+
Self::fetch_cross_session(&self.memory_state, &query, alloc.cross_session),
549+
Self::fetch_semantic_recall(&self.memory_state, &query, alloc.semantic_recall),
550+
Self::fetch_code_rag(&self.index, &query, alloc.code_context),
547551
)?;
548552

549553
// Insert fetched messages (order: recall, cross-session, summaries at position 1)

0 commit comments

Comments
 (0)