Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions crates/configuration/src/global_settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use multiaddr::Multiaddr;
use serde::{Deserialize, Serialize};

use crate::{
observability::{ObservabilityConfig, ObservabilityConfigBuilder},
shared::{
errors::{ConfigError, FieldError},
helpers::{merge_errors, merge_errors_vecs},
Expand All @@ -18,6 +19,10 @@ use crate::{
utils::{default_as_true, default_node_spawn_timeout, default_timeout},
};

fn is_default_observability(config: &ObservabilityConfig) -> bool {
*config == ObservabilityConfig::default()
}

/// Global settings applied to an entire network.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct GlobalSettings {
Expand All @@ -44,6 +49,9 @@ pub struct GlobalSettings {
/// If enabled, will launch a task to monitor nodes' liveness and tear down the network if there are any.
#[serde(default = "default_as_true")]
tear_down_on_failure: bool,
/// Observability stack configuration (Prometheus + Grafana)
#[serde(default, skip_serializing_if = "is_default_observability")]
observability: ObservabilityConfig,
}

impl GlobalSettings {
Expand Down Expand Up @@ -82,6 +90,11 @@ impl GlobalSettings {
pub fn tear_down_on_failure(&self) -> bool {
self.tear_down_on_failure
}

/// Observability stack configuration
pub fn observability(&self) -> &ObservabilityConfig {
&self.observability
}
}

impl Default for GlobalSettings {
Expand All @@ -94,6 +107,7 @@ impl Default for GlobalSettings {
base_dir: Default::default(),
spawn_concurrency: Default::default(),
tear_down_on_failure: true,
observability: ObservabilityConfig::default(),
}
}
}
Expand Down Expand Up @@ -217,6 +231,21 @@ impl GlobalSettingsBuilder {
)
}

/// Configure the observability stack (Prometheus + Grafana)
pub fn with_observability(
self,
f: impl FnOnce(ObservabilityConfigBuilder) -> ObservabilityConfigBuilder,
) -> Self {
let observability = f(ObservabilityConfigBuilder::new()).build();
Self::transition(
GlobalSettings {
observability,
..self.config
},
self.errors,
)
}

/// Seals the builder and returns a [`GlobalSettings`] if there are no validation errors, else returns errors.
pub fn build(self) -> Result<GlobalSettings, Vec<anyhow::Error>> {
if !self.errors.is_empty() {
Expand Down
2 changes: 2 additions & 0 deletions crates/configuration/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ mod custom_process;
mod global_settings;
mod hrmp_channel;
mod network;
mod observability;
mod parachain;
mod relaychain;
pub mod shared;
Expand All @@ -91,6 +92,7 @@ pub use custom_process::{CustomProcess, CustomProcessBuilder};
pub use global_settings::{GlobalSettings, GlobalSettingsBuilder};
pub use hrmp_channel::{HrmpChannelConfig, HrmpChannelConfigBuilder};
pub use network::{NetworkConfig, NetworkConfigBuilder, WithRelaychain};
pub use observability::{ObservabilityConfig, ObservabilityConfigBuilder};
pub use parachain::{
states as para_states, ParachainConfig, ParachainConfigBuilder, RegistrationStrategy,
};
Expand Down
193 changes: 193 additions & 0 deletions crates/configuration/src/observability.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
use serde::{Deserialize, Serialize};

use crate::shared::types::Port;

const DEFAULT_PROMETHEUS_IMAGE: &str = "prom/prometheus:latest";
const DEFAULT_GRAFANA_IMAGE: &str = "grafana/grafana:latest";

/// Configuration for the observability stack (Prometheus + Grafana)
///
/// When enabled, Docker/Podman containers are spawned after the network is up,
/// auto-configured to scrape all nodes' Prometheus metrics endpoints
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ObservabilityConfig {
/// Whether the observability stack is enabled
#[serde(default)]
enabled: bool,
/// Host port to expose Prometheus on. If `None`, a random available port is used
#[serde(default)]
prometheus_port: Option<Port>,
/// Host port to expose Grafana on. If `None`, a random available port is used
#[serde(default)]
grafana_port: Option<Port>,
/// Docker image for Prometheus
#[serde(default = "default_prometheus_image")]
prometheus_image: String,
/// Docker image for Grafana
#[serde(default = "default_grafana_image")]
grafana_image: String,
}

fn default_prometheus_image() -> String {
DEFAULT_PROMETHEUS_IMAGE.to_string()
}

fn default_grafana_image() -> String {
DEFAULT_GRAFANA_IMAGE.to_string()
}

impl Default for ObservabilityConfig {
fn default() -> Self {
Self {
enabled: false,
prometheus_port: None,
grafana_port: None,
prometheus_image: default_prometheus_image(),
grafana_image: default_grafana_image(),
}
}
}

impl ObservabilityConfig {
pub fn enabled(&self) -> bool {
self.enabled
}

pub fn prometheus_port(&self) -> Option<Port> {
self.prometheus_port
}

pub fn grafana_port(&self) -> Option<Port> {
self.grafana_port
}

pub fn prometheus_image(&self) -> &str {
&self.prometheus_image
}

pub fn grafana_image(&self) -> &str {
&self.grafana_image
}
}

/// Builder for [`ObservabilityConfig`]
#[derive(Default)]
pub struct ObservabilityConfigBuilder {
config: ObservabilityConfig,
}

impl ObservabilityConfigBuilder {
pub fn new() -> Self {
Self::default()
}

/// Enable or disable the observability stack
pub fn with_enabled(mut self, enabled: bool) -> Self {
self.config.enabled = enabled;
self
}

/// Set the host port for Prometheus
pub fn with_prometheus_port(mut self, port: Port) -> Self {
self.config.prometheus_port = Some(port);
self
}

/// Set the host port for Grafana
pub fn with_grafana_port(mut self, port: Port) -> Self {
self.config.grafana_port = Some(port);
self
}

/// Set a custom Prometheus Docker image
pub fn with_prometheus_image(mut self, image: impl Into<String>) -> Self {
self.config.prometheus_image = image.into();
self
}

/// Set a custom Grafana Docker image
pub fn with_grafana_image(mut self, image: impl Into<String>) -> Self {
self.config.grafana_image = image.into();
self
}

pub fn build(self) -> ObservabilityConfig {
self.config
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn default_config_is_disabled() {
let config = ObservabilityConfig::default();
assert!(!config.enabled());
assert_eq!(config.prometheus_port(), None);
assert_eq!(config.grafana_port(), None);
assert_eq!(config.prometheus_image(), "prom/prometheus:latest");
assert_eq!(config.grafana_image(), "grafana/grafana:latest");
}

#[test]
fn builder_defaults_are_disabled() {
let config = ObservabilityConfigBuilder::new().build();
assert!(!config.enabled());
assert_eq!(config.prometheus_port(), None);
assert_eq!(config.grafana_port(), None);
}

#[test]
fn builder_with_all_fields() {
let config = ObservabilityConfigBuilder::new()
.with_enabled(true)
.with_prometheus_port(9090)
.with_grafana_port(3000)
.with_prometheus_image("prom/prometheus:v2.50.0")
.with_grafana_image("grafana/grafana:10.0.0")
.build();

assert!(config.enabled());
assert_eq!(config.prometheus_port(), Some(9090));
assert_eq!(config.grafana_port(), Some(3000));
assert_eq!(config.prometheus_image(), "prom/prometheus:v2.50.0");
assert_eq!(config.grafana_image(), "grafana/grafana:10.0.0");
}

#[test]
fn toml_round_trip() {
let config = ObservabilityConfigBuilder::new()
.with_enabled(true)
.with_prometheus_port(9090)
.with_grafana_port(3000)
.build();

let toml_str = toml::to_string(&config).unwrap();
let deserialized: ObservabilityConfig = toml::from_str(&toml_str).unwrap();
assert_eq!(config, deserialized);
}

#[test]
fn deserialize_from_toml_string() {
let toml_str = r#"
enabled = true
prometheus_port = 9090
grafana_port = 3000
prometheus_image = "prom/prometheus:v2.50.0"
"#;

let config: ObservabilityConfig = toml::from_str(toml_str).unwrap();
assert!(config.enabled());
assert_eq!(config.prometheus_port(), Some(9090));
assert_eq!(config.grafana_port(), Some(3000));
assert_eq!(config.prometheus_image(), "prom/prometheus:v2.50.0");
assert_eq!(config.grafana_image(), "grafana/grafana:latest");
}

#[test]
fn deserialize_empty_toml_defaults_to_disabled() {
let config: ObservabilityConfig = toml::from_str("").unwrap();
assert!(!config.enabled());
}
}
48 changes: 48 additions & 0 deletions crates/examples/examples/observability.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
//! Example: Observability add-on (Prometheus + Grafana)
//!
//! This example demonstrates two ways to use the observability stack:
//!
//! 1. **From config**: Include `[settings.observability]` in TOML so the stack
//! starts automatically when the network spawns.
//!
//! 2. **As an add-on**: Call `network.start_observability()` on any running
//! network, including one re-attached via `attach_to_live`.
//!
//! Requirements: Docker or Podman must be available on the host.
use zombienet_sdk::{NetworkConfigBuilder, NetworkConfigExt};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
tracing_subscriber::fmt::init();

let config = NetworkConfigBuilder::new()
.with_relaychain(|r| {
r.with_chain("rococo-local")
.with_default_command("polkadot")
.with_validator(|n| n.with_name("alice"))
.with_validator(|n| n.with_name("bob"))
})
.with_global_settings(|s| {
s.with_observability(|o| {
o.with_enabled(true)
.with_prometheus_port(9090)
.with_grafana_port(3000)
})
})
.build()
.expect("Failed to build network config");

println!("🚀 Spawning network with observability...");

let network = config.spawn_native().await?;
if let Some(obs) = network.observability() {
println!("📊 Prometheus: {}", obs.prometheus_url);
println!("📊 Grafana: {}", obs.grafana_url);
}

tokio::signal::ctrl_c().await?;

let _ = network.destroy().await;

Ok(())
}
16 changes: 16 additions & 0 deletions crates/orchestrator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pub mod errors;
pub mod generators;
pub mod network;
pub mod network_helper;
pub mod observability;
pub mod tx_helper;

mod network_spec;
Expand Down Expand Up @@ -524,6 +525,21 @@ where
Parachain::register(register_para_options, &scoped_fs).await?;
}

if network_spec.global_settings.observability().enabled() {
match network
.start_observability(network_spec.global_settings.observability())
.await
{
Ok(obs) => {
info!("📊 Prometheus URL: {}", obs.prometheus_url);
info!("📊 Grafana URL: {}", obs.grafana_url);
},
Err(e) => {
warn!("⚠️ Failed to spawn observability stack: {e}");
},
}
}

// start custom processes if needed
for cp in &network_spec.custom_processes {
if let Err(e) = spawner::spawn_process(cp, ns.clone()).await {
Expand Down
Loading
Loading