|
| 1 | +//! Metrics for the sync pipeline. |
| 2 | +//! |
| 3 | +//! This module provides comprehensive metrics collection for the synchronization pipeline, |
| 4 | +//! enabling monitoring and visualization of both individual stages and overall pipeline progress. |
| 5 | +//! |
| 6 | +//! ## Pipeline Metrics |
| 7 | +//! |
| 8 | +//! Pipeline-level metrics track the overall synchronization process: |
| 9 | +//! |
| 10 | +//! - Total chunks processed across all stages |
| 11 | +//! - Total blocks processed across all pipeline runs |
| 12 | +//! - Total time spent syncing |
| 13 | +//! - Current tip block being synced to |
| 14 | +//! - Pipeline runs completed |
| 15 | +//! |
| 16 | +//! ## Stage Metrics |
| 17 | +//! |
| 18 | +//! Stage-level metrics are collected per stage and include: |
| 19 | +//! |
| 20 | +//! - Number of executions for each stage |
| 21 | +//! - Total blocks processed by each stage |
| 22 | +//! - Execution time for each stage execution |
| 23 | +//! - Checkpoint updates for each stage |
| 24 | +
|
| 25 | +use std::collections::HashMap; |
| 26 | +use std::sync::Arc; |
| 27 | +use std::time::Instant; |
| 28 | + |
| 29 | +use katana_metrics::metrics::{self, Counter, Gauge, Histogram}; |
| 30 | +use katana_metrics::Metrics; |
| 31 | + |
| 32 | +/// Metrics for the sync pipeline. |
| 33 | +#[derive(Clone)] |
| 34 | +pub struct PipelineMetrics { |
| 35 | + inner: Arc<PipelineMetricsInner>, |
| 36 | +} |
| 37 | + |
| 38 | +impl PipelineMetrics { |
| 39 | + /// Creates a new instance of `PipelineMetrics`. |
| 40 | + pub fn new() -> Self { |
| 41 | + Self { |
| 42 | + inner: Arc::new(PipelineMetricsInner { |
| 43 | + pipeline: PipelineOverallMetrics::default(), |
| 44 | + stages: Default::default(), |
| 45 | + }), |
| 46 | + } |
| 47 | + } |
| 48 | + |
| 49 | + /// Get or create metrics for a specific stage. |
| 50 | + pub fn stage(&self, stage_id: &'static str) -> StageMetrics { |
| 51 | + let mut stages = self.inner.stages.lock().unwrap(); |
| 52 | + stages |
| 53 | + .entry(stage_id) |
| 54 | + .or_insert_with(|| StageMetrics::new_with_labels(&[("stage", stage_id)])) |
| 55 | + .clone() |
| 56 | + } |
| 57 | + |
| 58 | + /// Record a chunk being processed by the pipeline. |
| 59 | + pub fn record_chunk(&self, blocks_in_chunk: u64) { |
| 60 | + self.inner.pipeline.chunks_processed_total.increment(1); |
| 61 | + self.inner.pipeline.blocks_processed_total.increment(blocks_in_chunk); |
| 62 | + } |
| 63 | + |
| 64 | + /// Update the current tip being synced to. |
| 65 | + pub fn set_tip(&self, tip: u64) { |
| 66 | + self.inner.pipeline.current_tip.set(tip as f64); |
| 67 | + } |
| 68 | + |
| 69 | + /// Record a pipeline run completing. |
| 70 | + pub fn record_run_complete(&self) { |
| 71 | + self.inner.pipeline.runs_completed_total.increment(1); |
| 72 | + } |
| 73 | + |
| 74 | + /// Record the time taken for a pipeline iteration. |
| 75 | + pub fn record_iteration_time(&self, duration_seconds: f64) { |
| 76 | + self.inner.pipeline.iteration_time_seconds.record(duration_seconds); |
| 77 | + } |
| 78 | + |
| 79 | + /// Update the lowest checkpoint across all stages. |
| 80 | + pub fn set_lowest_checkpoint(&self, checkpoint: u64) { |
| 81 | + self.inner.pipeline.lowest_checkpoint.set(checkpoint as f64); |
| 82 | + } |
| 83 | + |
| 84 | + /// Update the highest checkpoint across all stages. |
| 85 | + pub fn set_highest_checkpoint(&self, checkpoint: u64) { |
| 86 | + self.inner.pipeline.highest_checkpoint.set(checkpoint as f64); |
| 87 | + } |
| 88 | +} |
| 89 | + |
| 90 | +impl Default for PipelineMetrics { |
| 91 | + fn default() -> Self { |
| 92 | + Self::new() |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +struct PipelineMetricsInner { |
| 97 | + /// Overall pipeline metrics |
| 98 | + pipeline: PipelineOverallMetrics, |
| 99 | + /// Per-stage metrics |
| 100 | + stages: std::sync::Mutex<HashMap<&'static str, StageMetrics>>, |
| 101 | +} |
| 102 | + |
| 103 | +/// Metrics for the overall pipeline execution. |
| 104 | +#[derive(Metrics, Clone)] |
| 105 | +#[metrics(scope = "sync.pipeline")] |
| 106 | +struct PipelineOverallMetrics { |
| 107 | + /// Total number of chunks processed by the pipeline |
| 108 | + chunks_processed_total: Counter, |
| 109 | + /// Total number of blocks processed by the pipeline |
| 110 | + blocks_processed_total: Counter, |
| 111 | + /// Total number of pipeline runs completed |
| 112 | + runs_completed_total: Counter, |
| 113 | + /// Current tip block being synced to |
| 114 | + current_tip: Gauge, |
| 115 | + /// Lowest checkpoint across all stages |
| 116 | + lowest_checkpoint: Gauge, |
| 117 | + /// Highest checkpoint across all stages |
| 118 | + highest_checkpoint: Gauge, |
| 119 | + /// Time taken for each pipeline iteration |
| 120 | + iteration_time_seconds: Histogram, |
| 121 | +} |
| 122 | + |
| 123 | +/// Metrics for individual stage execution. |
| 124 | +#[derive(Metrics, Clone)] |
| 125 | +#[metrics(scope = "sync.stage")] |
| 126 | +pub struct StageMetrics { |
| 127 | + /// Number of times the stage has been executed |
| 128 | + executions_total: Counter, |
| 129 | + /// Total number of blocks processed by this stage |
| 130 | + blocks_processed_total: Counter, |
| 131 | + /// Number of times the stage was skipped (checkpoint >= target) |
| 132 | + skipped_total: Counter, |
| 133 | + /// Time taken for each stage execution |
| 134 | + execution_time_seconds: Histogram, |
| 135 | + /// Current checkpoint for this stage |
| 136 | + checkpoint: Gauge, |
| 137 | +} |
| 138 | + |
| 139 | +impl StageMetrics { |
| 140 | + /// Record a stage execution starting. |
| 141 | + pub fn execution_started(&self) -> StageExecutionGuard { |
| 142 | + self.executions_total.increment(1); |
| 143 | + StageExecutionGuard { metrics: self.clone(), started_at: Instant::now() } |
| 144 | + } |
| 145 | + |
| 146 | + /// Record blocks processed by this stage. |
| 147 | + pub fn record_blocks_processed(&self, count: u64) { |
| 148 | + self.blocks_processed_total.increment(count); |
| 149 | + } |
| 150 | + |
| 151 | + /// Record a stage being skipped. |
| 152 | + pub fn record_skipped(&self) { |
| 153 | + self.skipped_total.increment(1); |
| 154 | + } |
| 155 | + |
| 156 | + /// Update the checkpoint for this stage. |
| 157 | + pub fn set_checkpoint(&self, checkpoint: u64) { |
| 158 | + self.checkpoint.set(checkpoint as f64); |
| 159 | + } |
| 160 | +} |
| 161 | + |
| 162 | +/// Guard that records the execution time when dropped. |
| 163 | +pub struct StageExecutionGuard { |
| 164 | + metrics: StageMetrics, |
| 165 | + started_at: Instant, |
| 166 | +} |
| 167 | + |
| 168 | +impl Drop for StageExecutionGuard { |
| 169 | + fn drop(&mut self) { |
| 170 | + let duration = self.started_at.elapsed().as_secs_f64(); |
| 171 | + self.metrics.execution_time_seconds.record(duration); |
| 172 | + } |
| 173 | +} |
0 commit comments