the-void-ia
diff --git a/‎Cargo.toml‎
Lines changed: 5 additions & 0 deletions b/‎Cargo.toml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 20 additions & 0 deletions b/‎README.md‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎claudio/src/main.rs‎
Lines changed: 8 additions & 11 deletions b/‎claudio/src/main.rs‎
Lines changed: 8 additions & 11 deletions
diff --git a/‎docs/GETTING_STARTED.md‎
Lines changed: 14 additions & 0 deletions b/‎docs/GETTING_STARTED.md‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎docs/observability.md‎
Lines changed: 40 additions & 18 deletions b/‎docs/observability.md‎
Lines changed: 40 additions & 18 deletions
diff --git a/‎examples/README.md‎
Lines changed: 8 additions & 0 deletions b/‎examples/README.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎examples/claude_in_voidbox_example.rs‎
Lines changed: 43 additions & 20 deletions b/‎examples/claude_in_voidbox_example.rs‎
Lines changed: 43 additions & 20 deletions
diff --git a/‎examples/claude_workflow.rs‎
Lines changed: 12 additions & 4 deletions b/‎examples/claude_workflow.rs‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎examples/remote_skills.rs‎
Lines changed: 11 additions & 7 deletions b/‎examples/remote_skills.rs‎
Lines changed: 11 additions & 7 deletions
@@ -76,6 +76,11 @@ opentelemetry = ["dep:opentelemetry", "dep:opentelemetry_sdk", "dep:opentelemetr
 name = "voidbox"
 path = "src/bin/voidbox.rs"
 
+
+[[example]]
+name = "playground_pipeline"
+path = "playground/playground_pipeline.rs"
+
 [[test]]
 name = "e2e_skill_pipeline"
 path = "tests/e2e/skill_pipeline.rs"
 
@@ -73,6 +73,25 @@ VOID_BOX_INITRAMFS=/tmp/void-box-test-rootfs.cpio.gz \
 cargo test --test e2e_telemetry -- --ignored --test-threads=1
 ```
 
+## Observability Playground
+
+One command boots Grafana LGTM and runs a pipeline that exports traces and metrics over OTLP.
+
+```bash
+playground/up.sh
+```
+
+The script starts Docker Compose services, runs:
+
+```bash
+cargo run --example playground_pipeline --features opentelemetry
+```
+
+Then prints Grafana URL and service filter hints.
+It also asks for provider mode (`Anthropic`, `Ollama`, `Mock`) and prepares initramfs automatically:
+- `Mock` -> `scripts/build_test_image.sh` (`/tmp/void-box-test-rootfs.cpio.gz`, claudio mock)
+- `Anthropic` / `Ollama` -> `scripts/build_guest_image.sh` (`/tmp/void-box-rootfs.cpio.gz`)
+
 ## Examples
 
 - `boot_diag`: VM boot diagnostics
@@ -82,6 +101,7 @@ cargo test --test e2e_telemetry -- --ignored --test-threads=1
 - `remote_skills`: pulls skills from remote repositories
 - `claude_workflow`: workflow plan/apply pattern in sandbox
 - `claude_in_voidbox_example`: interactive Claude-style session
+- `playground_pipeline`: observability-first pipeline demo for Grafana
 
 See `examples/README.md` for per-example notes.
 
 
@@ -101,12 +101,7 @@ impl DiscoveredSkills {
             if let Ok(entries) = fs::read_dir(skills_dir) {
                 let mut files: Vec<_> = entries
                     .filter_map(|e| e.ok())
-                    .filter(|e| {
-                        e.path()
-                            .extension()
-                            .map(|ext| ext == "md")
-                            .unwrap_or(false)
-                    })
+                    .filter(|e| e.path().extension().map(|ext| ext == "md").unwrap_or(false))
                     .collect();
                 files.sort_by_key(|e| e.file_name());
 
@@ -121,7 +116,12 @@ impl DiscoveredSkills {
                     // Read first non-empty, non-frontmatter line as title
                     let title = read_skill_title(&path);
 
-                    eprintln!("claudio: discovered skill '{}' ({}) -> {}", name, title, path.display());
+                    eprintln!(
+                        "claudio: discovered skill '{}' ({}) -> {}",
+                        name,
+                        title,
+                        path.display()
+                    );
                     result.skill_files.push(name);
                     result.skill_titles.push(title);
                 }
@@ -162,7 +162,6 @@ impl DiscoveredSkills {
 
         result
     }
-
 }
 
 /// Read the first meaningful line from a SKILL.md (skip frontmatter).
@@ -249,9 +248,7 @@ impl Config {
                 .ok()
                 .filter(|s| !s.is_empty()),
             session_id: format!("mock_sess_{}", std::process::id()),
-            traceparent: env::var("TRACEPARENT")
-                .ok()
-                .filter(|s| !s.is_empty()),
+            traceparent: env::var("TRACEPARENT").ok().filter(|s| !s.is_empty()),
         }
     }
 
 
@@ -71,6 +71,20 @@ VOID_BOX_INITRAMFS=/tmp/void-box-test-rootfs.cpio.gz \
 cargo test --test e2e_telemetry -- --ignored --test-threads=1
 ```
 
+## Grafana Playground
+
+To bring up Grafana + traces + metrics in one command:
+
+```bash
+playground/up.sh
+```
+
+This uses the OTLP-enabled example:
+
+```bash
+cargo run --example playground_pipeline --features opentelemetry
+```
+
 ## Core Test Commands
 
 ```bash
 
@@ -1,28 +1,50 @@
-# Observability for Claude-in-void runs
+# Observability
 
-## User-facing summary
+`void-box` captures traces, metrics, and structured logs for workflow runs.
 
-Each workflow run returns an **`ObservedResult<WorkflowResult>`**:
+## What You Get Per Run
 
-- **`result`**: `WorkflowResult` with `output`, `exit_code`, `step_outputs` (per-step stdout/stderr/exit_code), and `duration_ms`.
-- **`traces()`**: Spans for the workflow and each step (name, status, duration, attributes such as `stdout_bytes` / `stderr_bytes`).
-- **`metrics()`**: `MetricsSnapshot` with step durations (e.g. for dashboards or alerting).
-- **`logs()`**: Structured log entries (workflow/step start and finish, errors).
+- `ObservedResult.result`: workflow output, step outputs, exit code, duration
+- `ObservedResult.traces()`: workflow + step spans
+- `ObservedResult.metrics()`: in-memory metrics snapshot
+- `ObservedResult.logs()`: structured run logs
 
-Use this to present a clear picture of each run: success/failure, which step failed, how long each step took, and optional export to OTLP for traces.
+## OTLP Export
 
-## What is captured
+When configured, traces and metrics are exported via OTLP.
 
-- **Per-step spans**: Created by the scheduler for each step. On success, the span records `stdout_bytes`; on failure, `stderr_bytes` and error status. Duration is always recorded and sent to the metrics collector.
-- **Workflow span**: Parent of all step spans; total duration.
-- **Logs**: Info at workflow start, debug at step start/finish, error when a step fails.
-- **Metrics**: Step duration (and any custom counters if added). Use `ObserveConfig::test()` for in-memory capture in tests; use `ObserveConfig::default()` and `.otlp_endpoint(...)` for production trace export.
+Required:
 
-## Recording the executed command
+- build/run with feature flag: `--features opentelemetry`
+- set endpoint env var:
+  - `VOIDBOX_OTLP_ENDPOINT=http://localhost:4317`
+- optional service name:
+  - `VOIDBOX_SERVICE_NAME=void-box-playground`
 
-`SpanGuard::record_exec(program, args)` exists to record the exact command (e.g. `claude-code plan /workspace`) on a step span. The scheduler does not call it because it does not see the program/args inside the step closure. To have the exec command on spans, either:
+Example:
 
-- Thread the observer into `StepContext` and have `ctx.exec` / `ctx.exec_piped` record the command on the current step span, or
-- Have step code set a custom attribute via a future API.
+```bash
+VOIDBOX_OTLP_ENDPOINT=http://localhost:4317 \
+VOIDBOX_SERVICE_NAME=void-box-playground \
+cargo run --example playground_pipeline --features opentelemetry
+```
 
-For now, step spans still give you step name, duration, and output sizes for debugging.
+## Fastest Grafana Path
+
+Use the one-command playground script:
+
+```bash
+playground/up.sh
+```
+
+This will:
+
+1. Start Grafana LGTM via Docker Compose
+2. Run `playground_pipeline` with OTLP enabled
+3. Print Grafana URL and service filter hints
+
+Stop stack:
+
+```bash
+playground/up.sh --down
+```
@@ -64,3 +64,11 @@ Interactive/demo style Claude-compatible session.
 ```bash
 cargo run --example claude_in_voidbox_example
 ```
+
+## playground_pipeline
+
+Observability-first pipeline for Grafana LGTM with OTLP export.
+
+```bash
+cargo run --example playground_pipeline --features opentelemetry
+```
@@ -109,7 +109,7 @@ fn try_kvm_sandbox() -> Result<Option<Arc<Sandbox>>, Box<dyn Error>> {
         .memory_mb(512)
         .vcpus(1)
         .kernel(&kernel)
-        .network(true);  // Enable SLIRP networking for API access
+        .network(true); // Enable SLIRP networking for API access
 
     if let Some(ref p) = initramfs {
         b = b.initramfs(p);
@@ -134,16 +134,18 @@ fn try_kvm_sandbox() -> Result<Option<Arc<Sandbox>>, Box<dyn Error>> {
 }
 
 /// Run claude-code with stream-json output and parse the result.
-async fn run_claude(
-    sandbox: &Sandbox,
-    prompt: &str,
-) -> Result<ClaudeExecResult, Box<dyn Error>> {
+async fn run_claude(sandbox: &Sandbox, prompt: &str) -> Result<ClaudeExecResult, Box<dyn Error>> {
     let out = sandbox
-        .exec("claude-code", &[
-            "-p", prompt,
-            "--output-format", "stream-json",
-            "--dangerously-skip-permissions",
-        ])
+        .exec(
+            "claude-code",
+            &[
+                "-p",
+                prompt,
+                "--output-format",
+                "stream-json",
+                "--dangerously-skip-permissions",
+            ],
+        )
         .await?;
 
     if !out.stderr.is_empty() {
@@ -160,10 +162,19 @@ fn print_telemetry(label: &str, result: &ClaudeExecResult) {
     println!("  Session:     {}", result.session_id);
     println!("  Model:       {}", result.model);
     println!("  Turns:       {}", result.num_turns);
-    println!("  Tokens:      {} in / {} out", result.input_tokens, result.output_tokens);
+    println!(
+        "  Tokens:      {} in / {} out",
+        result.input_tokens, result.output_tokens
+    );
     println!("  Cost:        ${:.6}", result.total_cost_usd);
-    println!("  Duration:    {}ms (API: {}ms)", result.duration_ms, result.duration_api_ms);
-    println!("  Error:       {}", if result.is_error { "YES" } else { "no" });
+    println!(
+        "  Duration:    {}ms (API: {}ms)",
+        result.duration_ms, result.duration_api_ms
+    );
+    println!(
+        "  Error:       {}",
+        if result.is_error { "YES" } else { "no" }
+    );
 
     if !result.tool_calls.is_empty() {
         println!("  Tool calls:  {}", result.tool_calls.len());
@@ -174,7 +185,13 @@ fn print_telemetry(label: &str, result: &ClaudeExecResult) {
             } else {
                 output_preview.to_string()
             };
-            println!("    [{}] {} (id={}) -> {}", i + 1, tc.tool_name, tc.tool_use_id, output_short);
+            println!(
+                "    [{}] {} (id={}) -> {}",
+                i + 1,
+                tc.tool_name,
+                tc.tool_use_id,
+                output_short
+            );
         }
     }
 
@@ -202,7 +219,7 @@ fn maybe_create_otel_spans(result: &ClaudeExecResult) {
 
     if otlp_configured {
         let tracer = void_box::observe::tracer::Tracer::new(
-            void_box::observe::tracer::TracerConfig::in_memory()
+            void_box::observe::tracer::TracerConfig::in_memory(),
         );
         void_box::observe::claude::create_otel_spans(result, None, &tracer);
         eprintln!(
@@ -247,8 +264,7 @@ async fn demo_multi_turn(sandbox: Arc<Sandbox>) -> Result<(), Box<dyn Error>> {
     // Turn 2: ask Claude to apply the plan
     let apply_prompt = format!(
         "Apply the following plan in {}. Execute each step.\n\n{}",
-        WORKSPACE,
-        plan_result.result_text,
+        WORKSPACE, plan_result.result_text,
     );
     println!("\nTurn 2: apply\n  prompt: {} bytes\n", apply_prompt.len());
 
@@ -264,7 +280,10 @@ async fn demo_multi_turn(sandbox: Arc<Sandbox>) -> Result<(), Box<dyn Error>> {
 
     println!("\n=== Session Summary ===");
     println!("  Total cost:   ${:.6}", total_cost);
-    println!("  Total tokens: {} in / {} out", total_tokens_in, total_tokens_out);
+    println!(
+        "  Total tokens: {} in / {} out",
+        total_tokens_in, total_tokens_out
+    );
     println!("  Total tools:  {}", total_tools);
 
     if !apply_result.is_error {
@@ -297,7 +316,8 @@ async fn interactive_session(sandbox: Arc<Sandbox>) -> Result<(), Box<dyn Error>
         if input.is_empty() {
             continue;
         }
-        if input.eq_ignore_ascii_case("quit") || input.eq_ignore_ascii_case("exit") || input == "q" {
+        if input.eq_ignore_ascii_case("quit") || input.eq_ignore_ascii_case("exit") || input == "q"
+        {
             break;
         }
 
@@ -324,7 +344,10 @@ async fn interactive_session(sandbox: Arc<Sandbox>) -> Result<(), Box<dyn Error>
     if turn_count > 0 {
         println!("\n=== Session Summary ({} turns) ===", turn_count);
         println!("  Total cost:   ${:.6}", total_cost);
-        println!("  Total tokens: {} in / {} out", total_tokens_in, total_tokens_out);
+        println!(
+            "  Total tokens: {} in / {} out",
+            total_tokens_in, total_tokens_out
+        );
     }
 
     Ok(())
 
@@ -18,7 +18,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
         })
         .step("apply", |ctx| async move {
             // Pipe plan output into claude-code apply
-            ctx.exec_piped("claude-code", &["apply", "/workspace"]).await
+            ctx.exec_piped("claude-code", &["apply", "/workspace"])
+                .await
         })
         .pipe("plan", "apply")
         .output("apply")
@@ -37,15 +38,22 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     println!("\n=== Step outputs ===");
     for (name, out) in &observed.result.step_outputs {
         let stdout = String::from_utf8_lossy(&out.stdout);
-        println!("  {}: exit={} stdout_len={}", name, out.exit_code, stdout.len());
+        println!(
+            "  {}: exit={} stdout_len={}",
+            name,
+            out.exit_code,
+            stdout.len()
+        );
         if !stdout.is_empty() && stdout.len() <= 200 {
             println!("    -> {}", stdout.trim());
         }
     }
 
-    println!("\n=== Observability ({} traces, {} logs) ===",
+    println!(
+        "\n=== Observability ({} traces, {} logs) ===",
         observed.traces().len(),
-        observed.logs().len());
+        observed.logs().len()
+    );
     for span in observed.traces() {
         println!("  span: {} status={:?}", span.name, span.status);
     }
 
@@ -90,15 +90,15 @@ async fn main() -> Result<(), Box<dyn Error>> {
     println!("--- Building AgentBox with remote skills ---");
     println!();
 
-    let reasoning = Skill::agent("claude-code")
-        .description("Autonomous reasoning and code execution");
+    let reasoning =
+        Skill::agent("claude-code").description("Autonomous reasoning and code execution");
 
     let mut builder = AgentBox::new("developer")
         .skill(reasoning)
         .prompt(
             "You are a senior developer. Use your brainstorming, debugging, and TDD skills \
              to plan a new CLI tool that converts Markdown to HTML. First brainstorm the design, \
-             then write tests, then implement."
+             then write tests, then implement.",
         )
         .mock();
 
@@ -139,11 +139,15 @@ async fn main() -> Result<(), Box<dyn Error>> {
     println!("--- Result ---");
     println!("  Box:     {}", result.box_name);
     println!("  Error:   {}", result.claude_result.is_error);
-    println!("  Tokens:  {} in / {} out",
-        result.claude_result.input_tokens,
-        result.claude_result.output_tokens);
+    println!(
+        "  Tokens:  {} in / {} out",
+        result.claude_result.input_tokens, result.claude_result.output_tokens
+    );
     println!();
-    println!("Done. All {} remote skills were fetched and provisioned.", fetched_count);
+    println!(
+        "Done. All {} remote skills were fetched and provisioned.",
+        fetched_count
+    );
 
     Ok(())
 }