Skip to content

Commit 3086048

Browse files
committed
feat: Improve formatting consistency, add observability playground, and enhance documentation
- Standardized line formatting across tests, examples, and main codebase for improved readability. - Added observability playground powered by Grafana LGTM for OTLP traces and metrics. - Updated `README.md`, `examples/README.md`, and `Getting_Started.md` to document the playground setup and usage. - Introduced new `playground_pipeline` example showcasing observability-first agent workflows. - Added functional bash scripts (`playground_up.sh`, `up.sh`) and Docker Compose configuration to support seamless playground setup. - Enhanced Grafana health checks and KVM artifact preparation with detailed logs.
1 parent 9e4e313 commit 3086048

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1939
-677
lines changed

Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,11 @@ opentelemetry = ["dep:opentelemetry", "dep:opentelemetry_sdk", "dep:opentelemetr
7676
name = "voidbox"
7777
path = "src/bin/voidbox.rs"
7878

79+
80+
[[example]]
81+
name = "playground_pipeline"
82+
path = "playground/playground_pipeline.rs"
83+
7984
[[test]]
8085
name = "e2e_skill_pipeline"
8186
path = "tests/e2e/skill_pipeline.rs"

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,25 @@ VOID_BOX_INITRAMFS=/tmp/void-box-test-rootfs.cpio.gz \
7373
cargo test --test e2e_telemetry -- --ignored --test-threads=1
7474
```
7575

76+
## Observability Playground
77+
78+
One command boots Grafana LGTM and runs a pipeline that exports traces and metrics over OTLP.
79+
80+
```bash
81+
playground/up.sh
82+
```
83+
84+
The script starts Docker Compose services, runs:
85+
86+
```bash
87+
cargo run --example playground_pipeline --features opentelemetry
88+
```
89+
90+
Then prints Grafana URL and service filter hints.
91+
It also asks for provider mode (`Anthropic`, `Ollama`, `Mock`) and prepares initramfs automatically:
92+
- `Mock` -> `scripts/build_test_image.sh` (`/tmp/void-box-test-rootfs.cpio.gz`, claudio mock)
93+
- `Anthropic` / `Ollama` -> `scripts/build_guest_image.sh` (`/tmp/void-box-rootfs.cpio.gz`)
94+
7695
## Examples
7796

7897
- `boot_diag`: VM boot diagnostics
@@ -82,6 +101,7 @@ cargo test --test e2e_telemetry -- --ignored --test-threads=1
82101
- `remote_skills`: pulls skills from remote repositories
83102
- `claude_workflow`: workflow plan/apply pattern in sandbox
84103
- `claude_in_voidbox_example`: interactive Claude-style session
104+
- `playground_pipeline`: observability-first pipeline demo for Grafana
85105

86106
See `examples/README.md` for per-example notes.
87107

claudio/src/main.rs

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,7 @@ impl DiscoveredSkills {
101101
if let Ok(entries) = fs::read_dir(skills_dir) {
102102
let mut files: Vec<_> = entries
103103
.filter_map(|e| e.ok())
104-
.filter(|e| {
105-
e.path()
106-
.extension()
107-
.map(|ext| ext == "md")
108-
.unwrap_or(false)
109-
})
104+
.filter(|e| e.path().extension().map(|ext| ext == "md").unwrap_or(false))
110105
.collect();
111106
files.sort_by_key(|e| e.file_name());
112107

@@ -121,7 +116,12 @@ impl DiscoveredSkills {
121116
// Read first non-empty, non-frontmatter line as title
122117
let title = read_skill_title(&path);
123118

124-
eprintln!("claudio: discovered skill '{}' ({}) -> {}", name, title, path.display());
119+
eprintln!(
120+
"claudio: discovered skill '{}' ({}) -> {}",
121+
name,
122+
title,
123+
path.display()
124+
);
125125
result.skill_files.push(name);
126126
result.skill_titles.push(title);
127127
}
@@ -162,7 +162,6 @@ impl DiscoveredSkills {
162162

163163
result
164164
}
165-
166165
}
167166

168167
/// Read the first meaningful line from a SKILL.md (skip frontmatter).
@@ -249,9 +248,7 @@ impl Config {
249248
.ok()
250249
.filter(|s| !s.is_empty()),
251250
session_id: format!("mock_sess_{}", std::process::id()),
252-
traceparent: env::var("TRACEPARENT")
253-
.ok()
254-
.filter(|s| !s.is_empty()),
251+
traceparent: env::var("TRACEPARENT").ok().filter(|s| !s.is_empty()),
255252
}
256253
}
257254

docs/GETTING_STARTED.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,20 @@ VOID_BOX_INITRAMFS=/tmp/void-box-test-rootfs.cpio.gz \
7171
cargo test --test e2e_telemetry -- --ignored --test-threads=1
7272
```
7373

74+
## Grafana Playground
75+
76+
To bring up Grafana + traces + metrics in one command:
77+
78+
```bash
79+
playground/up.sh
80+
```
81+
82+
This uses the OTLP-enabled example:
83+
84+
```bash
85+
cargo run --example playground_pipeline --features opentelemetry
86+
```
87+
7488
## Core Test Commands
7589

7690
```bash

docs/observability.md

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,50 @@
1-
# Observability for Claude-in-void runs
1+
# Observability
22

3-
## User-facing summary
3+
`void-box` captures traces, metrics, and structured logs for workflow runs.
44

5-
Each workflow run returns an **`ObservedResult<WorkflowResult>`**:
5+
## What You Get Per Run
66

7-
- **`result`**: `WorkflowResult` with `output`, `exit_code`, `step_outputs` (per-step stdout/stderr/exit_code), and `duration_ms`.
8-
- **`traces()`**: Spans for the workflow and each step (name, status, duration, attributes such as `stdout_bytes` / `stderr_bytes`).
9-
- **`metrics()`**: `MetricsSnapshot` with step durations (e.g. for dashboards or alerting).
10-
- **`logs()`**: Structured log entries (workflow/step start and finish, errors).
7+
- `ObservedResult.result`: workflow output, step outputs, exit code, duration
8+
- `ObservedResult.traces()`: workflow + step spans
9+
- `ObservedResult.metrics()`: in-memory metrics snapshot
10+
- `ObservedResult.logs()`: structured run logs
1111

12-
Use this to present a clear picture of each run: success/failure, which step failed, how long each step took, and optional export to OTLP for traces.
12+
## OTLP Export
1313

14-
## What is captured
14+
When configured, traces and metrics are exported via OTLP.
1515

16-
- **Per-step spans**: Created by the scheduler for each step. On success, the span records `stdout_bytes`; on failure, `stderr_bytes` and error status. Duration is always recorded and sent to the metrics collector.
17-
- **Workflow span**: Parent of all step spans; total duration.
18-
- **Logs**: Info at workflow start, debug at step start/finish, error when a step fails.
19-
- **Metrics**: Step duration (and any custom counters if added). Use `ObserveConfig::test()` for in-memory capture in tests; use `ObserveConfig::default()` and `.otlp_endpoint(...)` for production trace export.
16+
Required:
2017

21-
## Recording the executed command
18+
- build/run with feature flag: `--features opentelemetry`
19+
- set endpoint env var:
20+
- `VOIDBOX_OTLP_ENDPOINT=http://localhost:4317`
21+
- optional service name:
22+
- `VOIDBOX_SERVICE_NAME=void-box-playground`
2223

23-
`SpanGuard::record_exec(program, args)` exists to record the exact command (e.g. `claude-code plan /workspace`) on a step span. The scheduler does not call it because it does not see the program/args inside the step closure. To have the exec command on spans, either:
24+
Example:
2425

25-
- Thread the observer into `StepContext` and have `ctx.exec` / `ctx.exec_piped` record the command on the current step span, or
26-
- Have step code set a custom attribute via a future API.
26+
```bash
27+
VOIDBOX_OTLP_ENDPOINT=http://localhost:4317 \
28+
VOIDBOX_SERVICE_NAME=void-box-playground \
29+
cargo run --example playground_pipeline --features opentelemetry
30+
```
2731

28-
For now, step spans still give you step name, duration, and output sizes for debugging.
32+
## Fastest Grafana Path
33+
34+
Use the one-command playground script:
35+
36+
```bash
37+
playground/up.sh
38+
```
39+
40+
This will:
41+
42+
1. Start Grafana LGTM via Docker Compose
43+
2. Run `playground_pipeline` with OTLP enabled
44+
3. Print Grafana URL and service filter hints
45+
46+
Stop stack:
47+
48+
```bash
49+
playground/up.sh --down
50+
```

examples/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,11 @@ Interactive/demo style Claude-compatible session.
6464
```bash
6565
cargo run --example claude_in_voidbox_example
6666
```
67+
68+
## playground_pipeline
69+
70+
Observability-first pipeline for Grafana LGTM with OTLP export.
71+
72+
```bash
73+
cargo run --example playground_pipeline --features opentelemetry
74+
```

examples/claude_in_voidbox_example.rs

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ fn try_kvm_sandbox() -> Result<Option<Arc<Sandbox>>, Box<dyn Error>> {
109109
.memory_mb(512)
110110
.vcpus(1)
111111
.kernel(&kernel)
112-
.network(true); // Enable SLIRP networking for API access
112+
.network(true); // Enable SLIRP networking for API access
113113

114114
if let Some(ref p) = initramfs {
115115
b = b.initramfs(p);
@@ -134,16 +134,18 @@ fn try_kvm_sandbox() -> Result<Option<Arc<Sandbox>>, Box<dyn Error>> {
134134
}
135135

136136
/// Run claude-code with stream-json output and parse the result.
137-
async fn run_claude(
138-
sandbox: &Sandbox,
139-
prompt: &str,
140-
) -> Result<ClaudeExecResult, Box<dyn Error>> {
137+
async fn run_claude(sandbox: &Sandbox, prompt: &str) -> Result<ClaudeExecResult, Box<dyn Error>> {
141138
let out = sandbox
142-
.exec("claude-code", &[
143-
"-p", prompt,
144-
"--output-format", "stream-json",
145-
"--dangerously-skip-permissions",
146-
])
139+
.exec(
140+
"claude-code",
141+
&[
142+
"-p",
143+
prompt,
144+
"--output-format",
145+
"stream-json",
146+
"--dangerously-skip-permissions",
147+
],
148+
)
147149
.await?;
148150

149151
if !out.stderr.is_empty() {
@@ -160,10 +162,19 @@ fn print_telemetry(label: &str, result: &ClaudeExecResult) {
160162
println!(" Session: {}", result.session_id);
161163
println!(" Model: {}", result.model);
162164
println!(" Turns: {}", result.num_turns);
163-
println!(" Tokens: {} in / {} out", result.input_tokens, result.output_tokens);
165+
println!(
166+
" Tokens: {} in / {} out",
167+
result.input_tokens, result.output_tokens
168+
);
164169
println!(" Cost: ${:.6}", result.total_cost_usd);
165-
println!(" Duration: {}ms (API: {}ms)", result.duration_ms, result.duration_api_ms);
166-
println!(" Error: {}", if result.is_error { "YES" } else { "no" });
170+
println!(
171+
" Duration: {}ms (API: {}ms)",
172+
result.duration_ms, result.duration_api_ms
173+
);
174+
println!(
175+
" Error: {}",
176+
if result.is_error { "YES" } else { "no" }
177+
);
167178

168179
if !result.tool_calls.is_empty() {
169180
println!(" Tool calls: {}", result.tool_calls.len());
@@ -174,7 +185,13 @@ fn print_telemetry(label: &str, result: &ClaudeExecResult) {
174185
} else {
175186
output_preview.to_string()
176187
};
177-
println!(" [{}] {} (id={}) -> {}", i + 1, tc.tool_name, tc.tool_use_id, output_short);
188+
println!(
189+
" [{}] {} (id={}) -> {}",
190+
i + 1,
191+
tc.tool_name,
192+
tc.tool_use_id,
193+
output_short
194+
);
178195
}
179196
}
180197

@@ -202,7 +219,7 @@ fn maybe_create_otel_spans(result: &ClaudeExecResult) {
202219

203220
if otlp_configured {
204221
let tracer = void_box::observe::tracer::Tracer::new(
205-
void_box::observe::tracer::TracerConfig::in_memory()
222+
void_box::observe::tracer::TracerConfig::in_memory(),
206223
);
207224
void_box::observe::claude::create_otel_spans(result, None, &tracer);
208225
eprintln!(
@@ -247,8 +264,7 @@ async fn demo_multi_turn(sandbox: Arc<Sandbox>) -> Result<(), Box<dyn Error>> {
247264
// Turn 2: ask Claude to apply the plan
248265
let apply_prompt = format!(
249266
"Apply the following plan in {}. Execute each step.\n\n{}",
250-
WORKSPACE,
251-
plan_result.result_text,
267+
WORKSPACE, plan_result.result_text,
252268
);
253269
println!("\nTurn 2: apply\n prompt: {} bytes\n", apply_prompt.len());
254270

@@ -264,7 +280,10 @@ async fn demo_multi_turn(sandbox: Arc<Sandbox>) -> Result<(), Box<dyn Error>> {
264280

265281
println!("\n=== Session Summary ===");
266282
println!(" Total cost: ${:.6}", total_cost);
267-
println!(" Total tokens: {} in / {} out", total_tokens_in, total_tokens_out);
283+
println!(
284+
" Total tokens: {} in / {} out",
285+
total_tokens_in, total_tokens_out
286+
);
268287
println!(" Total tools: {}", total_tools);
269288

270289
if !apply_result.is_error {
@@ -297,7 +316,8 @@ async fn interactive_session(sandbox: Arc<Sandbox>) -> Result<(), Box<dyn Error>
297316
if input.is_empty() {
298317
continue;
299318
}
300-
if input.eq_ignore_ascii_case("quit") || input.eq_ignore_ascii_case("exit") || input == "q" {
319+
if input.eq_ignore_ascii_case("quit") || input.eq_ignore_ascii_case("exit") || input == "q"
320+
{
301321
break;
302322
}
303323

@@ -324,7 +344,10 @@ async fn interactive_session(sandbox: Arc<Sandbox>) -> Result<(), Box<dyn Error>
324344
if turn_count > 0 {
325345
println!("\n=== Session Summary ({} turns) ===", turn_count);
326346
println!(" Total cost: ${:.6}", total_cost);
327-
println!(" Total tokens: {} in / {} out", total_tokens_in, total_tokens_out);
347+
println!(
348+
" Total tokens: {} in / {} out",
349+
total_tokens_in, total_tokens_out
350+
);
328351
}
329352

330353
Ok(())

examples/claude_workflow.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
1818
})
1919
.step("apply", |ctx| async move {
2020
// Pipe plan output into claude-code apply
21-
ctx.exec_piped("claude-code", &["apply", "/workspace"]).await
21+
ctx.exec_piped("claude-code", &["apply", "/workspace"])
22+
.await
2223
})
2324
.pipe("plan", "apply")
2425
.output("apply")
@@ -37,15 +38,22 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
3738
println!("\n=== Step outputs ===");
3839
for (name, out) in &observed.result.step_outputs {
3940
let stdout = String::from_utf8_lossy(&out.stdout);
40-
println!(" {}: exit={} stdout_len={}", name, out.exit_code, stdout.len());
41+
println!(
42+
" {}: exit={} stdout_len={}",
43+
name,
44+
out.exit_code,
45+
stdout.len()
46+
);
4147
if !stdout.is_empty() && stdout.len() <= 200 {
4248
println!(" -> {}", stdout.trim());
4349
}
4450
}
4551

46-
println!("\n=== Observability ({} traces, {} logs) ===",
52+
println!(
53+
"\n=== Observability ({} traces, {} logs) ===",
4754
observed.traces().len(),
48-
observed.logs().len());
55+
observed.logs().len()
56+
);
4957
for span in observed.traces() {
5058
println!(" span: {} status={:?}", span.name, span.status);
5159
}

examples/remote_skills.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,15 @@ async fn main() -> Result<(), Box<dyn Error>> {
9090
println!("--- Building AgentBox with remote skills ---");
9191
println!();
9292

93-
let reasoning = Skill::agent("claude-code")
94-
.description("Autonomous reasoning and code execution");
93+
let reasoning =
94+
Skill::agent("claude-code").description("Autonomous reasoning and code execution");
9595

9696
let mut builder = AgentBox::new("developer")
9797
.skill(reasoning)
9898
.prompt(
9999
"You are a senior developer. Use your brainstorming, debugging, and TDD skills \
100100
to plan a new CLI tool that converts Markdown to HTML. First brainstorm the design, \
101-
then write tests, then implement."
101+
then write tests, then implement.",
102102
)
103103
.mock();
104104

@@ -139,11 +139,15 @@ async fn main() -> Result<(), Box<dyn Error>> {
139139
println!("--- Result ---");
140140
println!(" Box: {}", result.box_name);
141141
println!(" Error: {}", result.claude_result.is_error);
142-
println!(" Tokens: {} in / {} out",
143-
result.claude_result.input_tokens,
144-
result.claude_result.output_tokens);
142+
println!(
143+
" Tokens: {} in / {} out",
144+
result.claude_result.input_tokens, result.claude_result.output_tokens
145+
);
145146
println!();
146-
println!("Done. All {} remote skills were fetched and provisioned.", fetched_count);
147+
println!(
148+
"Done. All {} remote skills were fetched and provisioned.",
149+
fetched_count
150+
);
147151

148152
Ok(())
149153
}

0 commit comments

Comments
 (0)