diff --git a/Cargo.lock b/Cargo.lock index 813797a24..53b2d35e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2550,7 +2550,7 @@ dependencies = [ "aptos-metrics-core 0.1.0 (git+https://github.com/movementlabsxyz/aptos-core?rev=1d1cdbbd7fabb80dcb95ba5e23213faa072fab67)", "once_cell", "poem", - "prometheus", + "prometheus 0.13.4", "serde", ] @@ -3160,7 +3160,7 @@ dependencies = [ "async-trait", "backtrace", "clap 4.5.21", - "prometheus", + "prometheus 0.13.4", "serde", "serde_yaml 0.8.26", "tempfile", @@ -3219,7 +3219,7 @@ dependencies = [ "itertools 0.12.1", "lz4", "once_cell", - "prometheus", + "prometheus 0.13.4", "prost 0.12.6", "redis", "redis-test", @@ -3263,7 +3263,7 @@ dependencies = [ "futures", "hyper 0.14.31", "once_cell", - "prometheus", + "prometheus 0.13.4", "reqwest 0.11.27", "serde_json", "tokio", @@ -3542,7 +3542,7 @@ dependencies = [ "futures", "hostname", "once_cell", - "prometheus", + "prometheus 0.13.4", "serde", "serde_json", "strum 0.24.1", @@ -3566,7 +3566,7 @@ dependencies = [ "futures", "hostname", "once_cell", - "prometheus", + "prometheus 0.13.4", "serde", "serde_json", "strum 0.24.1", @@ -3736,7 +3736,7 @@ version = "0.1.0" source = "git+https://github.com/movementlabsxyz/aptos-core.git?branch=movement#31d9876e563698049a1ea0b7f1d88b48b921c3e9" dependencies = [ "anyhow", - "prometheus", + "prometheus 0.13.4", ] [[package]] @@ -3745,7 +3745,7 @@ version = "0.1.0" source = "git+https://github.com/movementlabsxyz/aptos-core?rev=1d1cdbbd7fabb80dcb95ba5e23213faa072fab67#1d1cdbbd7fabb80dcb95ba5e23213faa072fab67" dependencies = [ "anyhow", - "prometheus", + "prometheus 0.13.4", ] [[package]] @@ -4213,7 +4213,7 @@ dependencies = [ "cfg-if", "once_cell", "procfs", - "prometheus", + "prometheus 0.13.4", "sysinfo", ] @@ -4229,7 +4229,7 @@ dependencies = [ "cfg-if", "once_cell", "procfs", - "prometheus", + "prometheus 0.13.4", "sysinfo", ] @@ -5344,7 +5344,7 @@ dependencies = [ "flate2", "futures", "once_cell", - "prometheus", + "prometheus 0.13.4", "rand 0.7.3", "rand_core 0.5.1", "reqwest 0.11.27", @@ -5383,7 +5383,7 @@ dependencies = [ "flate2", "futures", "once_cell", - "prometheus", + "prometheus 0.13.4", "rand 0.7.3", "rand_core 0.5.1", "reqwest 0.11.27", @@ -5421,7 +5421,7 @@ dependencies = [ "gcp-bigquery-client", "jsonwebtoken 8.3.0", "once_cell", - "prometheus", + "prometheus 0.13.4", "rand 0.7.3", "rand_core 0.5.1", "reqwest 0.11.27", @@ -5461,7 +5461,7 @@ dependencies = [ "gcp-bigquery-client", "jsonwebtoken 8.3.0", "once_cell", - "prometheus", + "prometheus 0.13.4", "rand 0.7.3", "rand_core 0.5.1", "reqwest 0.11.27", @@ -12048,6 +12048,8 @@ dependencies = [ "movement-da-util", "movement-signer", "movement-signer-loader", + "poem", + "prometheus 0.14.0", "rand 0.7.3", "serde", "serde_derive", @@ -14658,6 +14660,7 @@ dependencies = [ name = "movement-tracing" version = "0.3.4" dependencies = [ + "anyhow", "aptos-config 0.1.0 (git+https://github.com/movementlabsxyz/aptos-core.git?branch=movement)", "aptos-crypto 0.0.3 (git+https://github.com/movementlabsxyz/aptos-core.git?branch=movement)", "aptos-logger 0.1.0 (git+https://github.com/movementlabsxyz/aptos-core.git?branch=movement)", @@ -14669,7 +14672,8 @@ dependencies = [ "lazy_static", "movement-config", "once_cell", - "prometheus", + "poem", + "prometheus 0.13.4", "rand 0.7.3", "ring 0.17.8", "tokio", @@ -16055,7 +16059,7 @@ dependencies = [ "nix 0.26.4", "once_cell", "parking_lot 0.12.3", - "protobuf", + "protobuf 2.28.0", "protobuf-codegen-pure", "smallvec", "symbolic-demangle", @@ -16256,7 +16260,7 @@ dependencies = [ "parquet", "parquet_derive", "postgres-native-tls", - "prometheus", + "prometheus 0.13.4", "prost 0.12.6", "regex", "serde", @@ -16315,7 +16319,7 @@ dependencies = [ "parquet", "parquet_derive", "postgres-native-tls", - "prometheus", + "prometheus 0.13.4", "prost 0.12.6", "regex", "serde", @@ -16359,10 +16363,25 @@ dependencies = [ "lazy_static", "memchr", "parking_lot 0.12.3", - "protobuf", + "protobuf 2.28.0", "thiserror 1.0.69", ] +[[package]] +name = "prometheus" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "memchr", + "parking_lot 0.12.3", + "protobuf 3.7.2", + "thiserror 2.0.7", +] + [[package]] name = "proptest" version = "1.5.0" @@ -16530,13 +16549,24 @@ version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" +[[package]] +name = "protobuf" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4" +dependencies = [ + "once_cell", + "protobuf-support", + "thiserror 1.0.69", +] + [[package]] name = "protobuf-codegen" version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "033460afb75cf755fcfc16dfaed20b86468082a2ea24e05ac35ab4a099a017d6" dependencies = [ - "protobuf", + "protobuf 2.28.0", ] [[package]] @@ -16545,10 +16575,19 @@ version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95a29399fc94bcd3eeaa951c715f7bea69409b2445356b00519740bcd6ddd865" dependencies = [ - "protobuf", + "protobuf 2.28.0", "protobuf-codegen", ] +[[package]] +name = "protobuf-support" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6" +dependencies = [ + "thiserror 1.0.69", +] + [[package]] name = "protox" version = "0.7.2" @@ -18138,7 +18177,7 @@ dependencies = [ "async-trait", "backtrace", "clap 4.5.21", - "prometheus", + "prometheus 0.13.4", "serde", "serde_yaml 0.8.26", "tempfile", @@ -18159,7 +18198,7 @@ dependencies = [ "async-trait", "backtrace", "clap 4.5.21", - "prometheus", + "prometheus 0.13.4", "serde", "serde_yaml 0.8.26", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index 71e82c390..786d11b60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -359,6 +359,7 @@ hyper-util = { version = "0.1.4" } tower = { version = "0.5" } http-body-util = "0.1" tap = "1.0.1" +prometheus = "0.14.0" # trying to pin diesel diesel = { version = "2.2.7", features = ["postgres", "numeric", "r2d2"] } diff --git a/protocol-units/execution/maptos/util/Cargo.toml b/protocol-units/execution/maptos/util/Cargo.toml index 3e273591a..91c393a1b 100644 --- a/protocol-units/execution/maptos/util/Cargo.toml +++ b/protocol-units/execution/maptos/util/Cargo.toml @@ -29,6 +29,9 @@ hex = { workspace = true } tokio = { workspace = true } url = { workspace = true } tracing = { workspace = true } +poem = { workspace = true } +prometheus = { workspace = true } + aptos-sdk = { workspace = true } movement-signer-loader = { workspace = true } diff --git a/protocol-units/execution/maptos/util/src/config/common.rs b/protocol-units/execution/maptos/util/src/config/common.rs index 1172ec0a2..ca2e8560c 100644 --- a/protocol-units/execution/maptos/util/src/config/common.rs +++ b/protocol-units/execution/maptos/util/src/config/common.rs @@ -246,6 +246,23 @@ env_default!( HashValue::sha3_256_of(b"maptos").to_hex() ); +env_default!( + default_health_server_hostname, + "HEALTH_SERVER_HOSTNAME", + String, + "0.0.0.0".to_string() +); + +env_default!(default_health_server_port, "HEALTH_SERVER_PORT", u16, 18085); + +env_default!( + default_metrics_server_hostname, + "METRICS_SERVER_HOSTNAME", + String, + "0.0.0.0".to_string() +); +env_default!(default_metrics_server_port, "METRICS_SERVER_PORT", u16, 18185); + env_default!(default_batch_production_time, "MAPTOS_BATCH_PRODUCTION_TIME_MS", u64, 2000); env_default!(default_max_transactions_in_flight, "MAPTOS_MAX_TRANSACTIONS_IN_FLIGHT", u64); diff --git a/protocol-units/execution/maptos/util/src/config/health_server.rs b/protocol-units/execution/maptos/util/src/config/health_server.rs new file mode 100644 index 000000000..0032773e3 --- /dev/null +++ b/protocol-units/execution/maptos/util/src/config/health_server.rs @@ -0,0 +1,39 @@ +use super::common::{default_health_server_hostname, default_health_server_port}; +use anyhow::Error; +use poem::listener::TcpListener; +use poem::{get, handler, IntoResponse, Response, Route, Server}; +use serde::{Deserialize, Serialize}; + +// An additional health server to be used by the indexer(or any other service). +// Do not use this with node since it exposes various endpoints to verify the health of the node. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct Config { + #[serde(default = "default_health_server_hostname")] + pub hostname: String, + #[serde(default = "default_health_server_port")] + pub port: u16, +} + +impl Default for Config { + fn default() -> Self { + Self { hostname: default_health_server_hostname(), port: default_health_server_port() } + } +} + +impl Config { + pub async fn run(self) -> Result<(), anyhow::Error> { + let url = format!("{}:{}", self.hostname, self.port); + run_service(url).await + } +} + +pub async fn run_service(url: String) -> Result<(), Error> { + let route = Route::new().at("/health", get(health)); + tracing::info!("Start health check access on :{url} ."); + Server::new(TcpListener::bind(url)).run(route).await.map_err(Into::into) +} + +#[handler] +async fn health() -> Response { + "{\"OK\": \"healthy\"}".into_response() +} diff --git a/protocol-units/execution/maptos/util/src/config/metrics_server.rs b/protocol-units/execution/maptos/util/src/config/metrics_server.rs new file mode 100644 index 000000000..ef01212f2 --- /dev/null +++ b/protocol-units/execution/maptos/util/src/config/metrics_server.rs @@ -0,0 +1,64 @@ +use super::common::{default_metrics_server_hostname, default_metrics_server_port}; +use poem::http::StatusCode; +use poem::{get, handler, listener::TcpListener, IntoResponse, Route, Server}; +use prometheus::{gather, Encoder, TextEncoder}; +use serde::{Deserialize, Serialize}; +use tokio::task::JoinHandle; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct MetricsConfig { + #[serde(default = "default_metrics_server_hostname")] + pub listen_hostname: String, + #[serde(default = "default_metrics_server_port")] + pub listen_port: u16, +} + +impl Default for MetricsConfig { + fn default() -> Self { + Self { + listen_hostname: default_metrics_server_hostname(), + listen_port: default_metrics_server_port(), + } + } +} + +impl MetricsConfig { + pub async fn start_metrics_server(&self) -> Result, anyhow::Error> { + let bind_address = format!("{}:{}", self.listen_hostname, self.listen_port); + + let metrics_route = Route::new().at("/metrics", get(metrics_handler)); + + let server_handle = tokio::spawn(async move { + let listener = TcpListener::bind(&bind_address); + tracing::info!("Starting Prometheus metrics server on http://{}/metrics", bind_address); + + if let Err(e) = Server::new(listener).run(metrics_route).await { + tracing::error!("Metrics server error: {}", e); + } + }); + + Ok(server_handle) + } +} + +#[handler] +async fn metrics_handler() -> impl IntoResponse { + let metrics = gather(); + let encoder = TextEncoder::new(); + let mut buffer = vec![]; + + match encoder.encode(&metrics, &mut buffer) { + Ok(_) => match String::from_utf8(buffer) { + Ok(metrics_text) => poem::Response::builder() + .status(StatusCode::OK) + .header("content-type", "text/plain") + .body(metrics_text), + Err(_) => poem::Response::builder() + .status(StatusCode::INTERNAL_SERVER_ERROR) + .body("Error encoding metrics"), + }, + Err(_) => poem::Response::builder() + .status(StatusCode::INTERNAL_SERVER_ERROR) + .body("Error gathering metrics"), + } +} diff --git a/protocol-units/execution/maptos/util/src/config/mod.rs b/protocol-units/execution/maptos/util/src/config/mod.rs index 89e238d02..5863fff5e 100644 --- a/protocol-units/execution/maptos/util/src/config/mod.rs +++ b/protocol-units/execution/maptos/util/src/config/mod.rs @@ -4,10 +4,12 @@ pub mod common; pub mod da_sequencer; pub mod faucet; pub mod fin; +pub mod health_server; pub mod indexer; pub mod indexer_processor; pub mod load_shedding; pub mod mempool; +pub mod metrics_server; use serde::{Deserialize, Serialize}; diff --git a/util/health/Cargo.toml b/util/health/Cargo.toml new file mode 100644 index 000000000..be86dbfcd --- /dev/null +++ b/util/health/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "movement-health" +description = "Health utilities for Movement services" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +homepage.workspace = true +publish.workspace = true +rust-version.workspace = true + +[dependencies] +poem = { workspace = true } +anyhow = { workspace = true } + +[lints] +workspace = true diff --git a/util/health/src/lib.rs b/util/health/src/lib.rs new file mode 100644 index 000000000..e98f38641 --- /dev/null +++ b/util/health/src/lib.rs @@ -0,0 +1,15 @@ +use anyhow::Error; +use poem::listener::TcpListener; +use poem::{get, handler, IntoResponse, Response, Route, Server}; + +pub async fn run_service(hostname: String, port: u16) -> Result<(), Error> { + let route = Route::new().at("/health", get(health)); + let url = format!("{}:{}", hostname, port); + tracing::info!("Start health check access on :{url} ."); + Server::new(TcpListener::bind(url)).run(route).await.map_err(Into::into) +} + +#[handler] +async fn health() -> Response { + "{\"OK\": \"healthy\"}".into_response() +} diff --git a/util/tracing/Cargo.toml b/util/tracing/Cargo.toml index e29428b61..84ddb55b8 100644 --- a/util/tracing/Cargo.toml +++ b/util/tracing/Cargo.toml @@ -25,8 +25,10 @@ hex = { workspace = true } rand = { workspace = true } ring = "0.17.7" warp = "0.3" +poem = { workspace = true } prometheus = "0.13" clap = { workspace = true } +anyhow = { workspace = true } lazy_static = "1.4.0" [lints] diff --git a/util/tracing/src/lib.rs b/util/tracing/src/lib.rs index f7ac37590..28f582edd 100644 --- a/util/tracing/src/lib.rs +++ b/util/tracing/src/lib.rs @@ -14,6 +14,8 @@ use tokio::runtime::Runtime; use tokio::time; use warp::Filter; +pub mod simple_metrics; + // Create a default NodeConfig for telemetry static DEFAULT_NODE_CONFIG: Lazy = Lazy::new(|| { let mut config = NodeConfig::default(); diff --git a/util/tracing/src/simple_metrics.rs b/util/tracing/src/simple_metrics.rs new file mode 100644 index 000000000..fe3ba8f2d --- /dev/null +++ b/util/tracing/src/simple_metrics.rs @@ -0,0 +1,44 @@ +use poem::http::StatusCode; +use poem::{get, handler, listener::TcpListener, IntoResponse, Route, Server}; +use prometheus::{gather, Encoder, TextEncoder}; +use tokio::task::JoinHandle; + +/// Start a simple metrics server on the given hostname and port. This is for the usage other than the node. +pub async fn start_metrics_server(listen_hostname: String, listen_port: u16) -> Result, anyhow::Error> { + let bind_address = format!("{}:{}", listen_hostname, listen_port); + + let metrics_route = Route::new().at("/metrics", get(metrics_handler)); + + let server_handle = tokio::spawn(async move { + let listener = TcpListener::bind(&bind_address); + aptos_logger::info!("Starting Prometheus metrics server on http://{}/metrics", bind_address); + + if let Err(e) = Server::new(listener).run(metrics_route).await { + aptos_logger::error!("Metrics server error: {}", e); + } + }); + + Ok(server_handle) +} + +#[handler] +async fn metrics_handler() -> impl IntoResponse { + let metrics = gather(); + let encoder = TextEncoder::new(); + let mut buffer = vec![]; + + match encoder.encode(&metrics, &mut buffer) { + Ok(_) => match String::from_utf8(buffer) { + Ok(metrics_text) => poem::Response::builder() + .status(StatusCode::OK) + .header("content-type", "text/plain") + .body(metrics_text), + Err(_) => poem::Response::builder() + .status(StatusCode::INTERNAL_SERVER_ERROR) + .body("Error encoding metrics"), + }, + Err(_) => poem::Response::builder() + .status(StatusCode::INTERNAL_SERVER_ERROR) + .body("Error gathering metrics"), + } +}