diff --git a/Cargo.lock b/Cargo.lock index 4dc6099..f3d4a85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2125,7 +2125,7 @@ dependencies = [ "bitflags", "libc", "plain", - "redox_syscall 0.7.3", + "redox_syscall", ] [[package]] @@ -2140,15 +2140,6 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" -[[package]] -name = "lock_api" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" -dependencies = [ - "scopeguard", -] - [[package]] name = "log" version = "0.4.29" @@ -2469,29 +2460,6 @@ dependencies = [ "sha2 0.10.9", ] -[[package]] -name = "parking_lot" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall 0.5.18", - "smallvec", - "windows-link", -] - [[package]] name = "parse-display" version = "0.9.1" @@ -2620,21 +2588,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "prometheus" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a" -dependencies = [ - "cfg-if", - "fnv", - "lazy_static", - "memchr", - "parking_lot", - "protobuf", - "thiserror 2.0.18", -] - [[package]] name = "prost" version = "0.14.3" @@ -2667,26 +2620,6 @@ dependencies = [ "prost", ] -[[package]] -name = "protobuf" -version = "3.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4" -dependencies = [ - "once_cell", - "protobuf-support", - "thiserror 1.0.69", -] - -[[package]] -name = "protobuf-support" -version = "3.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6" -dependencies = [ - "thiserror 1.0.69", -] - [[package]] name = "quick-xml" version = "0.37.5" @@ -2839,15 +2772,6 @@ dependencies = [ "rand 0.10.0", ] -[[package]] -name = "redox_syscall" -version = "0.5.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" -dependencies = [ - "bitflags", -] - [[package]] name = "redox_syscall" version = "0.7.3" @@ -3168,7 +3092,6 @@ dependencies = [ "opentelemetry-appender-tracing", "opentelemetry-otlp", "opentelemetry_sdk", - "prometheus", "rand 0.10.0", "rand_distr", "reqwest 0.13.2", @@ -3294,12 +3217,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - [[package]] name = "sct" version = "0.7.1" diff --git a/Cargo.toml b/Cargo.toml index 8bccc79..582fa03 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,7 +49,6 @@ opentelemetry_sdk = { version = "0.31", default-features = false, features = [ "logs", "metrics", ] } -prometheus = "0.14" s3s = "0.13.0-alpha.3" s3s-aws = "0.13.0-alpha.3" tokio = { version = "1", features = [ diff --git a/src/config.rs b/src/config.rs index 66498d0..90c49e8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -87,10 +87,6 @@ pub struct Config { /// Export logs via OTLP gRPC (requires otel_grpc_endpoint_url) #[arg(long, env = "OTEL_EXPORT_LOGS", default_value_t = false, action = clap::ArgAction::Set)] pub otel_export_logs: bool, - - /// Prometheus textfile collector directory - #[arg(long, env = "PROMETHEUS_TEXTFILE_DIR")] - pub prometheus_textfile_dir: Option, } impl Config { @@ -141,7 +137,7 @@ impl Display for Config { cache_max_entries: {}, cache_max_size_bytes: {}, cache_ttl_seconds: {}, \ max_cacheable_object_size: {}, otel_grpc_endpoint_url: {:?}, \ otel_export_metrics: {}, otel_export_logs: {}, cache_shards: {}, \ - cache_dry_run: {}, worker_threads: {}, prometheus_textfile_dir: {:?} }}", + cache_dry_run: {}, worker_threads: {} }}", self.listen_addr, self.upstream_endpoint, self.upstream_region, @@ -155,7 +151,6 @@ impl Display for Config { self.cache_shards, self.cache_dry_run, self.worker_threads, - self.prometheus_textfile_dir, ) } } @@ -186,7 +181,6 @@ mod tests { otel_grpc_endpoint_url: None, otel_export_metrics: false, otel_export_logs: false, - prometheus_textfile_dir: None, } } diff --git a/src/lib.rs b/src/lib.rs index 6f14e5b..27e1cfc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,7 @@ //! - **Range Request Support**: Caches partial object reads (byte ranges) //! - **Cache Invalidation**: Automatic invalidation on PUT/DELETE operations //! - **Dry-run Mode**: Validate cache correctness without serving cached data -//! - **Telemetry**: OpenTelemetry metrics and Prometheus support +//! - **Telemetry**: OpenTelemetry metrics support use std::sync::Arc; use std::time::Duration; @@ -41,7 +41,6 @@ mod auth; mod config; mod error; mod fifo_cache; -mod metrics_writer; mod proxy; mod s3_cache; mod s3_op; @@ -157,24 +156,6 @@ where S3CachingServiceProxy::new(b.build(), upstream_health_endpoint) }; - // Start Prometheus metrics writer if configured - let metrics_writer_handle = if let Some(textfile_dir) = config.prometheus_textfile_dir.clone() { - info!( - "Starting Prometheus textfile writer to {}/s3_cache.prom", - textfile_dir - ); - Some(tokio::spawn({ - async move { - if let Err(e) = metrics_writer::start_metrics_writer(textfile_dir).await { - error!("Metrics writer failed: {:?}", e); - } - } - })) - } else { - info!("Prometheus textfile writer disabled (PROMETHEUS_TEXTFILE_DIR not set)"); - None - }; - // Start hyper server let listener = TcpListener::bind(config.listen_addr).await?; // Report the bound address before entering the accept loop so callers using @@ -223,12 +204,6 @@ where } } - // Abort metrics writer background task - if let Some(handle) = metrics_writer_handle { - handle.abort(); - info!("Metrics writer task aborted"); - } - telemetry::shutdown_metrics(metrics_provider); telemetry::shutdown_logs(logs_provider); diff --git a/src/metrics_writer.rs b/src/metrics_writer.rs deleted file mode 100644 index 3b9b83e..0000000 --- a/src/metrics_writer.rs +++ /dev/null @@ -1,76 +0,0 @@ -use std::fs::File; -use std::io::Write; -use std::time::Duration; - -use prometheus::{Encoder, TextEncoder}; -use tracing::{debug, error, info}; - -use crate::telemetry; - -/// Start the Prometheus metrics writer background task. -/// -/// This task periodically writes metrics to a text file in Prometheus format, -/// which can be read by node_exporter's textfile collector. -/// -/// # Behavior -/// -/// - Writes metrics every 10 seconds -/// - Uses atomic file operations (write to .tmp, then rename) -/// - Continues running even if individual writes fail -/// - Graceful shutdown on Ctrl+C -pub async fn start_metrics_writer(textfile_dir: String) -> crate::Result<()> { - info!( - "Prometheus metrics writer started, writing to {}/s3_cache.prom", - textfile_dir - ); - - let tmp_path = format!("{}/s3_cache.prom.tmp", textfile_dir); - let final_path = format!("{}/s3_cache.prom", textfile_dir); - - let mut interval = tokio::time::interval(Duration::from_secs(10)); - - loop { - interval.tick().await; - - // Gather all metrics - let metric_families = telemetry::PROMETHEUS_REGISTRY.gather(); - - // Encode to Prometheus text format - let encoder = TextEncoder::new(); - let mut buffer = Vec::new(); - - if let Err(e) = encoder.encode(&metric_families, &mut buffer) { - error!("Failed to encode Prometheus metrics: {}", e); - continue; - } - - // Write atomically: write to temp file, then rename - match write_metrics_atomic(&tmp_path, &final_path, &buffer) { - Ok(_) => { - debug!( - "Successfully wrote {} bytes to {}", - buffer.len(), - final_path - ); - } - Err(e) => { - error!("Failed to write metrics file: {}", e); - } - } - } -} - -/// Write metrics to file atomically using write-to-temp + rename pattern. -/// -/// This ensures node_exporter never reads a partially written file. -fn write_metrics_atomic(tmp_path: &str, final_path: &str, data: &[u8]) -> std::io::Result<()> { - // Write to temporary file - let mut file = File::create(tmp_path)?; - file.write_all(data)?; - file.sync_all()?; // Ensure data is written to disk - - // Atomic rename - std::fs::rename(tmp_path, final_path)?; - - Ok(()) -} diff --git a/src/telemetry.rs b/src/telemetry.rs index 6c471a9..00eb949 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -1,11 +1,9 @@ -use std::collections::HashMap; use std::{sync::LazyLock, time::Duration}; use opentelemetry::KeyValue; use opentelemetry::metrics::{Counter, Gauge, Histogram}; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use opentelemetry_otlp::{Compression, WithExportConfig, WithTonicConfig}; -use prometheus::{HistogramOpts, IntCounter, IntGauge, Registry}; use tracing::{error, info}; use tracing_subscriber::{EnvFilter, layer::SubscriberExt, util::SubscriberInitExt}; @@ -25,15 +23,6 @@ static RESOURCE: LazyLock = LazyLock::new(|| { .build() }); -// Prometheus registry and metrics - -pub(crate) static PROMETHEUS_REGISTRY: LazyLock = LazyLock::new(|| { - let mut labels = HashMap::default(); - labels.insert("service_name".to_string(), "s3_cache".to_string()); - - Registry::new_custom(None, Some(labels)).expect("Failed to create Prometheus registry") -}); - const OBJECT_SIZE_BUCKETS: &[f64] = &[ 10_240.0, // 10 KiB 51_200.0, // 50 KiB @@ -202,21 +191,6 @@ pub(crate) fn record_cache_hit(bytes: u64) { .build() }); - static PROM_CACHE_HIT_BYTES_HISTOGRAM: LazyLock = LazyLock::new(|| { - let histogram = prometheus::Histogram::with_opts( - HistogramOpts::new( - "s3_cache_hit_bytes_histogram", - "Distribution of object sizes on cache hits", - ) - .buckets(OBJECT_SIZE_BUCKETS.to_vec()), - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(histogram.clone())) - .unwrap(); - histogram - }); - static CACHE_HIT_BYTES_TOTAL: LazyLock> = LazyLock::new(|| { opentelemetry::global::meter(CARGO_CRATE_NAME) .u64_counter("s3_cache.hit_bytes_total") @@ -224,20 +198,6 @@ pub(crate) fn record_cache_hit(bytes: u64) { .build() }); - static PROM_CACHE_HIT_BYTES_TOTAL: LazyLock = LazyLock::new(|| { - let counter = IntCounter::new( - "s3_cache_hit_bytes_total", - "Total bytes received from cache hits", - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(counter.clone())) - .unwrap(); - counter - }); - - PROM_CACHE_HIT_BYTES_HISTOGRAM.observe(bytes as f64); - PROM_CACHE_HIT_BYTES_TOTAL.inc_by(bytes); CACHE_HIT_BYTES_HISTOGRAM.record(bytes, &[]); CACHE_HIT_BYTES_TOTAL.add(bytes, &[]); } @@ -253,21 +213,6 @@ pub(crate) fn record_cache_miss(bytes: u64) { .build() }); - static PROM_CACHE_MISS_BYTES_HISTOGRAM: LazyLock = LazyLock::new(|| { - let histogram = prometheus::Histogram::with_opts( - HistogramOpts::new( - "s3_cache_miss_bytes_histogram", - "Distribution of object sizes on cache misses", - ) - .buckets(OBJECT_SIZE_BUCKETS.to_vec()), - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(histogram.clone())) - .unwrap(); - histogram - }); - static CACHE_MISS_BYTES_TOTAL: LazyLock> = LazyLock::new(|| { opentelemetry::global::meter(CARGO_CRATE_NAME) .u64_counter("s3_cache.miss_bytes_total") @@ -275,20 +220,6 @@ pub(crate) fn record_cache_miss(bytes: u64) { .build() }); - static PROM_CACHE_MISS_BYTES_TOTAL: LazyLock = LazyLock::new(|| { - let counter = IntCounter::new( - "s3_cache_miss_bytes_total", - "Total bytes received from cache misses", - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(counter.clone())) - .unwrap(); - counter - }); - - PROM_CACHE_MISS_BYTES_HISTOGRAM.observe(bytes as f64); - PROM_CACHE_MISS_BYTES_TOTAL.inc_by(bytes); CACHE_MISS_BYTES_HISTOGRAM.record(bytes, &[]); CACHE_MISS_BYTES_TOTAL.add(bytes, &[]); } @@ -304,22 +235,6 @@ pub(crate) fn record_cache_eviction(bytes: u64) { .build() }); - static PROM_CACHE_EVICTION_BYTES_HISTOGRAM: LazyLock = - LazyLock::new(|| { - let histogram = prometheus::Histogram::with_opts( - HistogramOpts::new( - "s3_cache_eviction_bytes_histogram", - "Distribution of object sizes on cache evictions", - ) - .buckets(OBJECT_SIZE_BUCKETS.to_vec()), - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(histogram.clone())) - .unwrap(); - histogram - }); - static CACHE_EVICTION_BYTES_TOTAL: LazyLock> = LazyLock::new(|| { opentelemetry::global::meter(CARGO_CRATE_NAME) .u64_counter("s3_cache.eviction_bytes_total") @@ -327,20 +242,6 @@ pub(crate) fn record_cache_eviction(bytes: u64) { .build() }); - static PROM_CACHE_EVICTION_BYTES_TOTAL: LazyLock = LazyLock::new(|| { - let counter = IntCounter::new( - "s3_cache_eviction_bytes_total", - "Total bytes evicted from cache", - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(counter.clone())) - .unwrap(); - counter - }); - - PROM_CACHE_EVICTION_BYTES_HISTOGRAM.observe(bytes as f64); - PROM_CACHE_EVICTION_BYTES_TOTAL.inc_by(bytes); CACHE_EVICTION_BYTES_HISTOGRAM.record(bytes, &[]); CACHE_EVICTION_BYTES_TOTAL.add(bytes, &[]); } @@ -357,23 +258,6 @@ pub(crate) fn record_cache_eviction_age(age_secs: f64) { .build() }); - static PROM_CACHE_EVICTION_AGE_HISTOGRAM: LazyLock = - LazyLock::new(|| { - let histogram = prometheus::Histogram::with_opts( - HistogramOpts::new( - "s3_cache_eviction_age_histogram_seconds", - "Age of objects (in seconds) at the time of eviction, capped at TTL", - ) - .buckets(EVICTION_AGE_BUCKETS.to_vec()), - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(histogram.clone())) - .unwrap(); - histogram - }); - - PROM_CACHE_EVICTION_AGE_HISTOGRAM.observe(age_secs); CACHE_EVICTION_AGE_HISTOGRAM.record(age_secs, &[]); } @@ -388,22 +272,6 @@ pub(crate) fn record_cache_oversized(bytes: u64) { .build() }); - static PROM_CACHE_OVERSIZED_BYTES_HISTOGRAM: LazyLock = - LazyLock::new(|| { - let histogram = prometheus::Histogram::with_opts( - HistogramOpts::new( - "s3_cache_oversized_bytes_histogram", - "Distribution of object sizes that exceeded the max cacheable size", - ) - .buckets(OVERSIZED_OBJECT_SIZE_BUCKETS.to_vec()), - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(histogram.clone())) - .unwrap(); - histogram - }); - static CACHE_OVERSIZED_BYTES_TOTAL: LazyLock> = LazyLock::new(|| { opentelemetry::global::meter(CARGO_CRATE_NAME) .u64_counter("s3_cache.oversized_bytes_total") @@ -413,20 +281,6 @@ pub(crate) fn record_cache_oversized(bytes: u64) { .build() }); - static PROM_CACHE_OVERSIZED_BYTES_TOTAL: LazyLock = LazyLock::new(|| { - let counter = IntCounter::new( - "s3_cache_oversized_bytes_total", - "Total number of objects encountered exceeding the max cacheable size", - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(counter.clone())) - .unwrap(); - counter - }); - - PROM_CACHE_OVERSIZED_BYTES_HISTOGRAM.observe(bytes as f64); - PROM_CACHE_OVERSIZED_BYTES_TOTAL.inc_by(bytes); CACHE_OVERSIZED_BYTES_HISTOGRAM.record(bytes, &[]); CACHE_OVERSIZED_BYTES_TOTAL.add(bytes, &[]); } @@ -442,22 +296,6 @@ pub(crate) fn record_unique_requested(bytes: u64) { .build() }); - static PROM_CACHE_UNIQUE_REQUESTED_BYTES_HISTOGRAM: LazyLock = - LazyLock::new(|| { - let histogram = prometheus::Histogram::with_opts( - HistogramOpts::new( - "s3_cache_estimated_unique_bytes_histogram", - "Distribution of estimated unique object sizes", - ) - .buckets(OBJECT_SIZE_BUCKETS.to_vec()), - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(histogram.clone())) - .unwrap(); - histogram - }); - static CACHE_UNIQUE_REQUESTED_BYTES_TOTAL: LazyLock> = LazyLock::new(|| { opentelemetry::global::meter(CARGO_CRATE_NAME) .u64_counter("s3_cache.estimated_unique_bytes_total") @@ -465,20 +303,6 @@ pub(crate) fn record_unique_requested(bytes: u64) { .build() }); - static PROM_CACHE_UNIQUE_REQUESTED_BYTES_TOTAL: LazyLock = LazyLock::new(|| { - let counter = IntCounter::new( - "s3_cache_estimated_unique_bytes_total", - "Estimated total bytes for unique keys accessed", - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(counter.clone())) - .unwrap(); - counter - }); - - PROM_CACHE_UNIQUE_REQUESTED_BYTES_HISTOGRAM.observe(bytes as f64); - PROM_CACHE_UNIQUE_REQUESTED_BYTES_TOTAL.inc_by(bytes); CACHE_UNIQUE_REQUESTED_BYTES_HISTOGRAM.record(bytes, &[]); CACHE_UNIQUE_REQUESTED_BYTES_TOTAL.add(bytes, &[]); } @@ -493,19 +317,6 @@ pub(crate) fn record_cache_invalidation() { .build() }); - static PROM_CACHE_INVALIDATION_TOTAL: LazyLock = LazyLock::new(|| { - let counter = IntCounter::new( - "s3_cache_invalidation_total", - "Number of cache invalidations", - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(counter.clone())) - .unwrap(); - counter - }); - - PROM_CACHE_INVALIDATION_TOTAL.inc(); CACHE_INVALIDATION_TOTAL.add(1, &[]); } @@ -518,21 +329,6 @@ static SERVICE_ERROR: LazyLock> = LazyLock::new(|| { .build() }); -static PROM_SERVICE_ERROR: LazyLock = LazyLock::new(|| { - let counter = prometheus::IntCounterVec::new( - prometheus::Opts::new( - "service_error_total", - "Internal errors the service can encounter", - ), - &["error_type", "service", "component", "action", "host_name"], - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(counter.clone())) - .unwrap(); - counter -}); - pub(crate) fn record_service_error( error_type: &'static str, component: &'static str, @@ -546,9 +342,6 @@ pub(crate) fn record_service_error( KeyValue::new("host.name", HOSTNAME.clone()), ]; SERVICE_ERROR.add(1, attributes); - PROM_SERVICE_ERROR - .with_label_values(&[error_type, CARGO_CRATE_NAME, component, action, &HOSTNAME]) - .inc(); } // MARK: Upstream Errors @@ -560,21 +353,6 @@ static UPSTREAM_ERROR: LazyLock> = LazyLock::new(|| { .build() }); -static PROM_UPSTREAM_ERROR: LazyLock = LazyLock::new(|| { - let counter = prometheus::IntCounterVec::new( - prometheus::Opts::new( - "upstream_error_total", - "Errors received from the upstream service", - ), - &["error_type", "service", "component", "action", "host_name"], - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(counter.clone())) - .unwrap(); - counter -}); - pub(crate) fn record_upstream_error( error_type: &'static str, component: &'static str, @@ -588,9 +366,6 @@ pub(crate) fn record_upstream_error( KeyValue::new("host.name", HOSTNAME.clone()), ]; UPSTREAM_ERROR.add(1, attributes); - PROM_UPSTREAM_ERROR - .with_label_values(&[error_type, CARGO_CRATE_NAME, component, action, &HOSTNAME]) - .inc(); } // MARK: Size Count @@ -603,16 +378,6 @@ pub(crate) fn record_cache_size_count(size_count: usize) { .build() }); - static PROM_CACHE_SIZE_COUNT: LazyLock = LazyLock::new(|| { - let gauge = - IntGauge::new("s3_cache_size_count", "Current number of objects in cache").unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(gauge.clone())) - .unwrap(); - gauge - }); - - PROM_CACHE_SIZE_COUNT.set(size_count as i64); CACHE_SIZE_COUNT.record(size_count as u64, &[]); } @@ -626,15 +391,6 @@ pub(crate) fn record_cache_size_bytes(size_bytes: usize) { .build() }); - static PROM_CACHE_SIZE_BYTES: LazyLock = LazyLock::new(|| { - let gauge = IntGauge::new("s3_cache_size_bytes", "Current cache size in bytes").unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(gauge.clone())) - .unwrap(); - gauge - }); - - PROM_CACHE_SIZE_BYTES.set(size_bytes as i64); CACHE_SIZE_BYTES.record(size_bytes as u64, &[]); } @@ -665,44 +421,17 @@ pub(crate) fn record_server_request_duration(data: RequestDuration, op_name: &st .build() }); - static PROM_REQUEST_DURATION_MS: LazyLock = LazyLock::new(|| { - let histogram = prometheus::HistogramVec::new( - HistogramOpts::new( - "http_server_request_duration_milliseconds", - "Duration of the request in milliseconds", - ) - .buckets(REQUEST_DURATION_BUCKETS.to_vec()), - &[ - "http_request_method", - "http_response_status_code", - "network_protocol_name", - "network_protocol_version", - "rpc_method", - "url_scheme", - ], - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(histogram.clone())) - .unwrap(); - histogram - }); - let http_request_method: String = data.method; - let http_response_status_code: String = data - .status_code - .map_or(String::default(), |status| status.to_string()); - let network_protocol_name: String = "http".to_owned(); let network_protocol_version: String = data.version.to_owned(); let rpc_method: String = op_name.to_owned(); let url_scheme: String = data.scheme.unwrap_or_else(|| "http".to_owned()); let mut attributes = vec![ - KeyValue::new("network.protocol.version", network_protocol_version.clone()), - KeyValue::new("http.request.method", http_request_method.clone()), - KeyValue::new("network.protocol.name", network_protocol_name.clone()), - KeyValue::new("rpc.method", rpc_method.clone()), - KeyValue::new("url.scheme", url_scheme.clone()), + KeyValue::new("network.protocol.version", network_protocol_version), + KeyValue::new("http.request.method", http_request_method), + KeyValue::new("network.protocol.name", "http"), + KeyValue::new("rpc.method", rpc_method), + KeyValue::new("url.scheme", url_scheme), ]; if let Some(status_code) = data.status_code { @@ -712,19 +441,7 @@ pub(crate) fn record_server_request_duration(data: RequestDuration, op_name: &st )); } - let milliseconds = 1000.0 * data.duration.as_secs_f64(); - - PROM_REQUEST_DURATION_MS - .with_label_values(&[ - http_request_method, - http_response_status_code, - network_protocol_name, - network_protocol_version, - rpc_method, - url_scheme, - ]) - .observe(milliseconds); - REQUEST_DURATION_MS.record(milliseconds, &attributes); + REQUEST_DURATION_MS.record(1000.0 * data.duration.as_secs_f64(), &attributes); } pub(crate) fn record_client_request_duration(data: RequestDuration, op_name: &str) { @@ -737,44 +454,17 @@ pub(crate) fn record_client_request_duration(data: RequestDuration, op_name: &st .build() }); - static PROM_REQUEST_DURATION_MS: LazyLock = LazyLock::new(|| { - let histogram = prometheus::HistogramVec::new( - HistogramOpts::new( - "http_client_request_duration_milliseconds", - "Duration of the request in milliseconds", - ) - .buckets(REQUEST_DURATION_BUCKETS.to_vec()), - &[ - "http_request_method", - "http_response_status_code", - "network_protocol_name", - "network_protocol_version", - "rpc_method", - "url_scheme", - ], - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(histogram.clone())) - .unwrap(); - histogram - }); - let http_request_method: String = data.method; - let http_response_status_code: String = data - .status_code - .map_or(String::default(), |status| status.to_string()); - let network_protocol_name: String = "http".to_owned(); let network_protocol_version: String = data.version.to_owned(); let rpc_method: String = op_name.to_owned(); let url_scheme: String = data.scheme.unwrap_or_else(|| "http".to_owned()); let mut attributes = vec![ - KeyValue::new("network.protocol.version", network_protocol_version.clone()), - KeyValue::new("http.request.method", http_request_method.clone()), - KeyValue::new("network.protocol.name", network_protocol_name.clone()), - KeyValue::new("rpc.method", rpc_method.clone()), - KeyValue::new("url.scheme", url_scheme.clone()), + KeyValue::new("network.protocol.version", network_protocol_version), + KeyValue::new("http.request.method", http_request_method), + KeyValue::new("network.protocol.name", "http"), + KeyValue::new("rpc.method", rpc_method), + KeyValue::new("url.scheme", url_scheme), ]; if let Some(status_code) = data.status_code { @@ -784,19 +474,7 @@ pub(crate) fn record_client_request_duration(data: RequestDuration, op_name: &st )); } - let milliseconds = 1000.0 * data.duration.as_secs_f64(); - - PROM_REQUEST_DURATION_MS - .with_label_values(&[ - http_request_method, - http_response_status_code, - network_protocol_name, - network_protocol_version, - rpc_method, - url_scheme, - ]) - .observe(milliseconds); - REQUEST_DURATION_MS.record(milliseconds, &attributes); + REQUEST_DURATION_MS.record(1000.0 * data.duration.as_secs_f64(), &attributes); } // MARK: Response Body Sizes @@ -810,8 +488,21 @@ pub(crate) struct ResponseBodySize { pub(crate) size: u64, } -static RESPONSE_BODY_SIZE_BUCKETS: LazyLock> = - LazyLock::new(|| prometheus::exponential_buckets(1024.0, 4.0, 10).unwrap()); +// Exponential buckets: start 1024, factor 4.0, count 10 (1 KiB to ~256 GiB) +static RESPONSE_BODY_SIZE_BUCKETS: LazyLock> = LazyLock::new(|| { + vec![ + 1_024.0, + 4_096.0, + 16_384.0, + 65_536.0, + 262_144.0, + 1_048_576.0, + 4_194_304.0, + 16_777_216.0, + 67_108_864.0, + 268_435_456.0, + ] +}); pub(crate) fn record_server_response_body_size(data: ResponseBodySize, op_name: &str) { static RESPONSE_BODY_SIZE_BYTES: LazyLock> = LazyLock::new(|| { @@ -823,45 +514,17 @@ pub(crate) fn record_server_response_body_size(data: ResponseBodySize, op_name: .build() }); - static PROM_RESPONSE_BODY_SIZE_BYTES: LazyLock = - LazyLock::new(|| { - let histogram = prometheus::HistogramVec::new( - HistogramOpts::new( - "http_server_response_body_size_bytes", - "Size of get_object response bodies in bytes", - ) - .buckets(RESPONSE_BODY_SIZE_BUCKETS.to_vec()), - &[ - "http_request_method", - "http_response_status_code", - "network_protocol_name", - "network_protocol_version", - "rpc_method", - "url_scheme", - ], - ) - .unwrap(); - PROMETHEUS_REGISTRY - .register(Box::new(histogram.clone())) - .unwrap(); - histogram - }); - let http_request_method: String = data.method; - let http_response_status_code: String = data - .status_code - .map_or(String::default(), |status| status.to_string()); - let network_protocol_name: String = "http".to_owned(); let network_protocol_version: String = data.version.to_owned(); let rpc_method: String = op_name.to_owned(); let url_scheme: String = data.scheme.unwrap_or_else(|| "http".to_owned()); let mut attributes = vec![ - KeyValue::new("network.protocol.version", network_protocol_version.clone()), - KeyValue::new("http.request.method", http_request_method.clone()), - KeyValue::new("network.protocol.name", network_protocol_name.clone()), - KeyValue::new("rpc.method", rpc_method.clone()), - KeyValue::new("url.scheme", url_scheme.clone()), + KeyValue::new("network.protocol.version", network_protocol_version), + KeyValue::new("http.request.method", http_request_method), + KeyValue::new("network.protocol.name", "http"), + KeyValue::new("rpc.method", rpc_method), + KeyValue::new("url.scheme", url_scheme), ]; if let Some(status_code) = data.status_code { @@ -871,17 +534,5 @@ pub(crate) fn record_server_response_body_size(data: ResponseBodySize, op_name: )); } - let bytes = data.size as f64; - - PROM_RESPONSE_BODY_SIZE_BYTES - .with_label_values(&[ - http_request_method, - http_response_status_code, - network_protocol_name, - network_protocol_version, - rpc_method, - url_scheme, - ]) - .observe(bytes); - RESPONSE_BODY_SIZE_BYTES.record(bytes, &attributes); + RESPONSE_BODY_SIZE_BYTES.record(data.size as f64, &attributes); } diff --git a/tests/common/docker.rs b/tests/common/docker.rs index 8fcfa62..be7966f 100644 --- a/tests/common/docker.rs +++ b/tests/common/docker.rs @@ -92,7 +92,6 @@ pub async fn start_proxy(minio_api_port: u16) -> TestProxy { otel_grpc_endpoint_url: None, // no network side effects in tests otel_export_logs: false, otel_export_metrics: false, - prometheus_textfile_dir: None, // no filesystem side effects in tests }; let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); diff --git a/tests/integration_health.rs b/tests/integration_health.rs index 5a6155d..23db976 100644 --- a/tests/integration_health.rs +++ b/tests/integration_health.rs @@ -30,7 +30,6 @@ async fn start_test_server_with_upstream( otel_grpc_endpoint_url: None, otel_export_metrics: false, otel_export_logs: false, - prometheus_textfile_dir: None, }; let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>();