Skip to content

Commit 45cda04

Browse files
committed
feat: Add Grafana Explore links, OTLP flushing, and structured logging
- Print direct trace and metrics Grafana Explore URLs on pipeline completion. - Implement `flush_global_otel` to ensure OTLP traces and metrics are exported before process exit. - Save structured logs locally (`/tmp/void-box-playground-last.log` by default). - Update relevant documentation and scripts to reflect new observability features.
1 parent ef18f26 commit 45cda04

File tree

7 files changed

+216
-13
lines changed

7 files changed

+216
-13
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ The script starts Docker Compose services, runs:
8787
cargo run --example playground_pipeline --features opentelemetry
8888
```
8989

90-
Then prints Grafana URL and service filter hints.
90+
Then prints direct Grafana Explore links for traces and metrics.
91+
Playground logs are also written locally to `/tmp/void-box-playground-last.log` by default.
9192
It also asks for provider mode (`Anthropic`, `Ollama`, `Mock`) and prepares initramfs automatically:
9293
- `Mock` -> `scripts/build_test_image.sh` (`/tmp/void-box-test-rootfs.cpio.gz`, claudio mock)
9394
- `Anthropic` / `Ollama` -> `scripts/build_guest_image.sh` (`/tmp/void-box-rootfs.cpio.gz`)

docs/GETTING_STARTED.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ This uses the OTLP-enabled example:
8585
cargo run --example playground_pipeline --features opentelemetry
8686
```
8787

88+
At the end of the run, the script prints:
89+
- direct Trace and Metrics Explore URLs
90+
- local log path (`/tmp/void-box-playground-last.log` by default)
91+
8892
## Core Test Commands
8993

9094
```bash

docs/observability.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
## OTLP Export
1313

1414
When configured, traces and metrics are exported via OTLP.
15+
Structured logs remain local in the current playground flow.
1516

1617
Required:
1718

@@ -41,7 +42,8 @@ This will:
4142

4243
1. Start Grafana LGTM via Docker Compose
4344
2. Run `playground_pipeline` with OTLP enabled
44-
3. Print Grafana URL and service filter hints
45+
3. Print direct Grafana Explore links for traces and metrics
46+
4. Write full run logs to `/tmp/void-box-playground-last.log` (default)
4547

4648
Stop stack:
4749

playground/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,9 @@ Services:
1919
- Grafana: `http://localhost:3000`
2020
- OTLP gRPC ingest: `localhost:4317`
2121
- OTLP HTTP ingest: `localhost:4318`
22+
23+
After a run, `playground/up.sh` prints direct Grafana links:
24+
- Traces (Tempo Explore)
25+
- Metrics (Prometheus Explore)
26+
27+
Logs are stored locally at `/tmp/void-box-playground-last.log` by default.

playground/playground_pipeline.rs

Lines changed: 105 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@
1111
use std::sync::Arc;
1212
use std::time::{SystemTime, UNIX_EPOCH};
1313

14-
use void_box::observe::ObserveConfig;
14+
use void_box::observe::{flush_global_otel, ObserveConfig};
1515
use void_box::sandbox::Sandbox;
1616
use void_box::workflow::{Workflow, WorkflowExt};
1717

1818
#[tokio::main]
1919
async fn main() -> Result<(), Box<dyn std::error::Error>> {
20+
let started_at_ms = now_ms();
2021
let run_id = SystemTime::now()
2122
.duration_since(UNIX_EPOCH)
2223
.unwrap_or_default()
@@ -60,6 +61,17 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
6061
observe.logs.in_memory = true;
6162

6263
let observed = workflow.observe(observe).run_in(sandbox).await?;
64+
let ended_at_ms = now_ms();
65+
66+
if let Err(e) = flush_global_otel() {
67+
eprintln!("[playground] WARN: failed to flush OTLP exporters: {e}");
68+
}
69+
70+
let grafana_base = std::env::var("PLAYGROUND_GRAFANA_URL")
71+
.unwrap_or_else(|_| "http://localhost:3000".to_string());
72+
let service_name =
73+
std::env::var("VOIDBOX_SERVICE_NAME").unwrap_or_else(|_| "void-box-playground".into());
74+
let workflow_span = format!("workflow:{workflow_name}");
6375

6476
println!("=== Playground Pipeline ===");
6577
println!("workflow: {}", workflow_name);
@@ -73,16 +85,104 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
7385

7486
println!();
7587
println!("=== Explore in Grafana ===");
76-
println!("Grafana: http://localhost:3000");
88+
println!("Grafana: {}", grafana_base);
89+
println!("Service: {}", service_name);
90+
println!("Workflow span: {}", workflow_span);
91+
println!(
92+
"Traces URL: {}",
93+
grafana_trace_url(
94+
&grafana_base,
95+
&service_name,
96+
&workflow_span,
97+
started_at_ms,
98+
ended_at_ms
99+
)
100+
);
77101
println!(
78-
"Service: {}",
79-
std::env::var("VOIDBOX_SERVICE_NAME").unwrap_or_else(|_| "void-box-playground".into())
102+
"Metrics URL: {}",
103+
grafana_metrics_url(&grafana_base, &service_name, started_at_ms, ended_at_ms)
80104
);
81-
println!("Workflow span: workflow:{}", workflow_name);
105+
if let Ok(log_path) = std::env::var("PLAYGROUND_LOG_PATH") {
106+
if !log_path.is_empty() {
107+
println!("Logs (local): {}", log_path);
108+
}
109+
}
82110

83111
Ok(())
84112
}
85113

114+
fn now_ms() -> u64 {
115+
SystemTime::now()
116+
.duration_since(UNIX_EPOCH)
117+
.unwrap_or_default()
118+
.as_millis() as u64
119+
}
120+
121+
fn grafana_trace_url(
122+
grafana_base: &str,
123+
service_name: &str,
124+
workflow_span: &str,
125+
from_ms: u64,
126+
to_ms: u64,
127+
) -> String {
128+
let query = format!(
129+
"{{ resource.service.name = \"{}\" && name = \"{}\" }}",
130+
service_name, workflow_span
131+
);
132+
let left = format!(
133+
"[{}, {}, \"tempo\", {{\"queryType\":\"traceql\",\"query\":\"{}\",\"refId\":\"A\"}}]",
134+
from_ms,
135+
to_ms.saturating_add(1000),
136+
escape_json_string(&query),
137+
);
138+
139+
format!(
140+
"{}/explore?orgId=1&left={}",
141+
grafana_base.trim_end_matches('/'),
142+
percent_encode(&left)
143+
)
144+
}
145+
146+
fn grafana_metrics_url(grafana_base: &str, service_name: &str, from_ms: u64, to_ms: u64) -> String {
147+
let _ = service_name;
148+
let expr = String::from(
149+
"sum by (__name__) ({__name__=~\"(ingest|normalize|score)_duration_ms(_bucket|_sum|_count)?\"})",
150+
);
151+
let left = format!(
152+
"[{}, {}, \"prometheus\", {{\"refId\":\"A\",\"expr\":\"{}\"}}]",
153+
from_ms,
154+
to_ms.saturating_add(1000),
155+
escape_json_string(&expr)
156+
);
157+
158+
format!(
159+
"{}/explore?orgId=1&left={}",
160+
grafana_base.trim_end_matches('/'),
161+
percent_encode(&left)
162+
)
163+
}
164+
165+
fn escape_json_string(input: &str) -> String {
166+
input.replace('\\', "\\\\").replace('"', "\\\"")
167+
}
168+
169+
fn percent_encode(input: &str) -> String {
170+
let mut encoded = String::with_capacity(input.len() * 3 / 2);
171+
for b in input.bytes() {
172+
match b {
173+
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
174+
encoded.push(char::from(b));
175+
}
176+
_ => {
177+
encoded.push('%');
178+
encoded.push(char::from(b"0123456789ABCDEF"[(b >> 4) as usize]));
179+
encoded.push(char::from(b"0123456789ABCDEF"[(b & 0x0F) as usize]));
180+
}
181+
}
182+
}
183+
encoded
184+
}
185+
86186
fn build_sandbox() -> Result<Arc<Sandbox>, Box<dyn std::error::Error>> {
87187
let has_kvm = std::path::Path::new("/dev/kvm").exists();
88188
let has_kernel = std::env::var("VOID_BOX_KERNEL")

playground/up.sh

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,19 +158,30 @@ configure_kvm_artifacts
158158

159159
export VOIDBOX_OTLP_ENDPOINT="${VOIDBOX_OTLP_ENDPOINT:-http://localhost:4317}"
160160
export VOIDBOX_SERVICE_NAME="${VOIDBOX_SERVICE_NAME:-void-box-playground}"
161+
export PLAYGROUND_GRAFANA_URL="${PLAYGROUND_GRAFANA_URL:-http://localhost:3000}"
162+
export PLAYGROUND_LOG_PATH="${PLAYGROUND_LOG_PATH:-/tmp/void-box-playground-last.log}"
161163
print_run_summary
162164

163165
echo "[playground] running pipeline example..."
164166
(
165167
cd "$ROOT_DIR"
166-
cargo run --example playground_pipeline --features opentelemetry
168+
cargo run --example playground_pipeline --features opentelemetry 2>&1 | tee "$PLAYGROUND_LOG_PATH"
167169
)
168170

171+
TRACES_URL="$(grep -E '^Traces URL:' "$PLAYGROUND_LOG_PATH" | tail -n1 | sed -E 's/^Traces URL:[[:space:]]*//')"
172+
METRICS_URL="$(grep -E '^Metrics URL:' "$PLAYGROUND_LOG_PATH" | tail -n1 | sed -E 's/^Metrics URL:[[:space:]]*//')"
173+
169174
echo
170175
echo "[playground] wow, it's live"
171-
echo " Grafana: http://localhost:3000"
176+
echo " Grafana: $PLAYGROUND_GRAFANA_URL"
172177
echo " Login: admin/admin"
173178
echo " Service filter: service.name=$VOIDBOX_SERVICE_NAME"
174179
echo " Provider: $PLAYGROUND_PROVIDER"
180+
echo " Logs (local): $PLAYGROUND_LOG_PATH"
181+
echo
182+
echo "[playground] direct links"
183+
echo " Traces: ${TRACES_URL:-<not emitted>}"
184+
echo " Metrics: ${METRICS_URL:-<not emitted>}"
185+
echo " Logs: $PLAYGROUND_LOG_PATH"
175186
echo
176187
echo "Tip: run '$0 --down' to stop the stack."

src/observe/mod.rs

Lines changed: 83 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ pub mod tracer;
3030

3131
use std::sync::Arc;
3232
#[cfg(feature = "opentelemetry")]
33+
use std::sync::Mutex;
34+
#[cfg(feature = "opentelemetry")]
3335
use std::sync::OnceLock;
3436
use std::time::Instant;
3537

@@ -137,9 +139,68 @@ pub struct Observer {
137139
logger: Arc<StructuredLogger>,
138140
}
139141

142+
#[cfg(feature = "opentelemetry")]
143+
#[derive(Default)]
144+
struct OtlpProviderState {
145+
tracer: Option<opentelemetry_sdk::trace::SdkTracerProvider>,
146+
meter: Option<opentelemetry_sdk::metrics::SdkMeterProvider>,
147+
}
148+
149+
/// Force-flush globally configured OTLP providers.
150+
///
151+
/// This is primarily useful in short-lived binaries (examples/CLI tools) so
152+
/// traces and metrics are exported before process exit.
153+
#[cfg(feature = "opentelemetry")]
154+
pub fn flush_global_otel() -> crate::Result<()> {
155+
let Some(state) = otlp_provider_state().get() else {
156+
return Ok(());
157+
};
158+
159+
let mut state = state
160+
.lock()
161+
.map_err(|e| crate::Error::Guest(format!("OTLP provider mutex poisoned: {e}")))?;
162+
let tracer = state.tracer.take();
163+
let meter = state.meter.take();
164+
drop(state);
165+
166+
let mut errors = Vec::new();
167+
168+
if let Some(tracer) = tracer {
169+
if let Err(e) = tracer.force_flush() {
170+
errors.push(format!("Failed to flush OTLP tracer: {e}"));
171+
}
172+
if let Err(e) = tracer.shutdown() {
173+
errors.push(format!("Failed to shutdown OTLP tracer: {e}"));
174+
}
175+
}
176+
177+
if let Some(meter) = meter {
178+
if let Err(e) = meter.force_flush() {
179+
errors.push(format!("Failed to flush OTLP metrics: {e}"));
180+
}
181+
if let Err(e) = meter.shutdown() {
182+
errors.push(format!("Failed to shutdown OTLP metrics: {e}"));
183+
}
184+
}
185+
186+
if !errors.is_empty() {
187+
return Err(crate::Error::Guest(errors.join("; ")));
188+
}
189+
190+
Ok(())
191+
}
192+
193+
/// No-op when OpenTelemetry feature is disabled.
194+
#[cfg(not(feature = "opentelemetry"))]
195+
pub fn flush_global_otel() -> crate::Result<()> {
196+
Ok(())
197+
}
198+
140199
impl Observer {
141200
/// Create a new observer with the given configuration
142201
pub fn new(config: ObserveConfig) -> Self {
202+
#[cfg(feature = "opentelemetry")]
203+
maybe_init_global_otel(&config);
143204
let tracer = Arc::new(Tracer::new(config.tracer.clone()));
144205
let metrics = Arc::new(build_metrics_collector(&config));
145206
let logger = Arc::new(StructuredLogger::new(config.logs.clone()));
@@ -255,15 +316,33 @@ fn maybe_init_global_otel(config: &ObserveConfig) {
255316
debug: false,
256317
};
257318

258-
if let Err(e) = crate::observe::otlp::init_otlp_tracer(&otlp_config) {
259-
eprintln!("[observe] WARN: failed to initialize OTLP tracer export: {e}");
319+
let mut state = OtlpProviderState::default();
320+
321+
match crate::observe::otlp::init_otlp_tracer(&otlp_config) {
322+
Ok(provider) => state.tracer = Some(provider),
323+
Err(e) => eprintln!("[observe] WARN: failed to initialize OTLP tracer export: {e}"),
260324
}
261-
if let Err(e) = crate::observe::otlp::init_otlp_metrics(&otlp_config) {
262-
eprintln!("[observe] WARN: failed to initialize OTLP metrics export: {e}");
325+
match crate::observe::otlp::init_otlp_metrics(&otlp_config) {
326+
Ok(provider) => state.meter = Some(provider),
327+
Err(e) => eprintln!("[observe] WARN: failed to initialize OTLP metrics export: {e}"),
328+
}
329+
330+
if state.tracer.is_some() || state.meter.is_some() {
331+
let lock =
332+
otlp_provider_state().get_or_init(|| Mutex::new(OtlpProviderState::default()));
333+
if let Ok(mut slot) = lock.lock() {
334+
*slot = state;
335+
}
263336
}
264337
});
265338
}
266339

340+
#[cfg(feature = "opentelemetry")]
341+
fn otlp_provider_state() -> &'static OnceLock<Mutex<OtlpProviderState>> {
342+
static OTLP_PROVIDER_STATE: OnceLock<Mutex<OtlpProviderState>> = OnceLock::new();
343+
&OTLP_PROVIDER_STATE
344+
}
345+
267346
/// RAII guard for spans that records metrics on drop
268347
pub struct SpanGuard {
269348
span: Span,

0 commit comments

Comments
 (0)