enhancement(observability): Add transform latency metrics

bruceg · bruceg · commit f1af5e319e8f · 2026-02-09T19:41:34.000-06:00
This adds the `component_latency_seconds` histogram and
`component_latency_mean_seconds` gauge internal metrics, exposing the time an
event spends in a single transform including the transform buffer.
diff --git a/changelog.d/component_latency-metrics.enhancement.md b/changelog.d/component_latency-metrics.enhancement.md
@@ -0,0 +1,5 @@
+Added the `component_latency_seconds` histogram and
+`component_latency_mean_seconds` gauge internal metrics, exposing the time an
+event spends in a single transform including the transform buffer.
+
+authors: bruceg
diff --git a/lib/vector-core/src/latency.rs b/lib/vector-core/src/latency.rs
@@ -0,0 +1,60 @@
+use std::time::Instant;
+
+use metrics::{Histogram, gauge, histogram};
+use vector_common::stats::EwmaGauge;
+
+use crate::event::EventArray;
+
+const COMPONENT_LATENCY: &str = "component_latency_seconds";
+const COMPONENT_LATENCY_MEAN: &str = "component_latency_mean_seconds";
+const DEFAULT_LATENCY_EWMA_ALPHA: f64 = 0.9;
+
+#[derive(Debug)]
+pub struct LatencyRecorder {
+    histogram: Histogram,
+    gauge: EwmaGauge,
+}
+
+impl LatencyRecorder {
+    pub fn new(ewma_alpha: Option<f64>) -> Self {
+        Self {
+            histogram: histogram!(COMPONENT_LATENCY),
+            gauge: EwmaGauge::new(
+                gauge!(COMPONENT_LATENCY_MEAN),
+                ewma_alpha.or(Some(DEFAULT_LATENCY_EWMA_ALPHA)),
+            ),
+        }
+    }
+
+    pub fn on_send(&self, events: &mut EventArray) {
+        let now = Instant::now();
+        let mut sum = 0.0;
+        let mut count = 0usize;
+
+        // Since all of the events in the array will most likely have entered and exited the
+        // component at close to the same time, we average all the latencies over the entire array
+        // and record it just once in the EWMA-backed gauge. If we were to record each latency
+        // individually, the gauge would effectively just reflect the latest array's latency,
+        // eliminating the utility of the EWMA averaging. However, we record the individual
+        // latencies in the histogram to get a more granular view of the latency distribution.
+        for mut event in events.iter_events_mut() {
+            let metadata = event.metadata_mut();
+            if let Some(previous) = metadata.last_transform_timestamp() {
+                let latency = now.saturating_duration_since(previous).as_secs_f64();
+                sum += latency;
+                count += 1;
+                self.histogram.record(latency);
+            }
+
+            metadata.set_last_transform_timestamp(now);
+        }
+        if count > 0 {
+            #[expect(
+                clippy::cast_precision_loss,
+                reason = "losing precision is acceptable here"
+            )]
+            let mean = sum / count as f64;
+            self.gauge.record(mean);
+        }
+    }
+}
diff --git a/lib/vector-core/src/lib.rs b/lib/vector-core/src/lib.rs
@@ -31,6 +31,7 @@ pub mod config;
 pub mod event;
 pub mod fanout;
 pub mod ipallowlist;
+pub mod latency;
 pub mod metrics;
 pub mod partition;
 pub mod schema;
diff --git a/lib/vector-core/src/transform/outputs.rs b/lib/vector-core/src/transform/outputs.rs
@@ -220,6 +220,19 @@ impl TransformOutputsBuf {
     pub fn take_all_named(&mut self) -> HashMap<String, OutputBuffer> {
         std::mem::take(&mut self.named_buffers)
     }
+
+    /// Applies `f` to each [`EventArray`] currently buffered in this outputs buffer.
+    ///
+    /// This is useful for cross-cutting instrumentation (e.g. latency timestamp propagation)
+    /// that needs mutable access to the buffered arrays before they are sent.
+    pub fn for_each_array_mut(&mut self, mut f: impl FnMut(&mut EventArray)) {
+        if let Some(primary) = self.primary_buffer.as_mut() {
+            primary.for_each_array_mut(&mut f);
+        }
+        for buf in self.named_buffers.values_mut() {
+            buf.for_each_array_mut(&mut f);
+        }
+    }
 }
 
 impl ByteSizeOf for TransformOutputsBuf {
@@ -295,6 +308,13 @@ impl OutputBuffer {
         self.0.drain(..).flat_map(EventArray::into_events)
     }
 
+    /// Applies `f` to each [`EventArray`] currently held by this buffer.
+    pub fn for_each_array_mut(&mut self, mut f: impl FnMut(&mut EventArray)) {
+        for array in &mut self.0 {
+            f(array);
+        }
+    }
+
     async fn send(
         &mut self,
         output: &mut Fanout,
diff --git a/lib/vector-lib/src/lib.rs b/lib/vector-lib/src/lib.rs
@@ -21,8 +21,8 @@ pub use vector_config::impl_generate_config_from_default;
 pub use vector_core::compile_vrl;
 pub use vector_core::{
     EstimatedJsonEncodedSizeOf, buckets, default_data_dir, emit, event, fanout, ipallowlist,
-    metric_tags, metrics, partition, quantiles, register, samples, schema, serde, sink, source,
-    source_sender, tcp, tls, transform,
+    latency, metric_tags, metrics, partition, quantiles, register, samples, schema, serde, sink,
+    source, source_sender, tcp, tls, transform,
 };
 pub use vector_lookup as lookup;
 pub use vector_stream as stream;
diff --git a/src/topology/builder.rs b/src/topology/builder.rs
@@ -26,6 +26,7 @@ use vector_lib::{
         },
     },
     internal_event::{self, CountByteSize, EventsSent, InternalEventHandle as _, Registered},
+    latency::LatencyRecorder,
     schema::Definition,
     source_sender::{CHUNK_SIZE, SourceSenderItem},
     transform::update_runtime_schema_definition,
@@ -742,7 +743,14 @@ impl<'a> Builder<'a> {
         let sender = self
             .utilization_registry
             .add_component(node.key.clone(), gauge!("utilization"));
-        let runner = Runner::new(t, input_rx, sender, node.input_details.data_type(), outputs);
+        let runner = Runner::new(
+            t,
+            input_rx,
+            sender,
+            node.input_details.data_type(),
+            outputs,
+            LatencyRecorder::new(self.config.global.latency_ewma_alpha),
+        );
         let transform = if node.enable_concurrency {
             runner.run_concurrently().boxed()
         } else {
@@ -807,6 +815,7 @@ impl<'a> Builder<'a> {
             component: key.clone(),
             port: None,
         });
+        let latency_recorder = LatencyRecorder::new(self.config.global.latency_ewma_alpha);
 
         // Task transforms can only write to the default output, so only a single schema def map is needed
         let schema_definition_map = outputs
@@ -825,6 +834,7 @@ impl<'a> Builder<'a> {
                 for event in events.iter_events_mut() {
                     update_runtime_schema_definition(event, &output_id, &schema_definition_map);
                 }
+                latency_recorder.on_send(&mut events);
                 (events, Instant::now())
             })
             .inspect(move |(events, _): &(EventArray, Instant)| {
@@ -1110,6 +1120,7 @@ struct Runner {
     input_type: DataType,
     outputs: TransformOutputs,
     timer_tx: UtilizationComponentSender,
+    latency_recorder: LatencyRecorder,
     events_received: Registered<EventsReceived>,
 }
 
@@ -1120,13 +1131,15 @@ impl Runner {
         timer_tx: UtilizationComponentSender,
         input_type: DataType,
         outputs: TransformOutputs,
+        latency_recorder: LatencyRecorder,
     ) -> Self {
         Self {
             transform,
             input_rx: Some(input_rx),
             input_type,
             outputs,
             timer_tx,
+            latency_recorder,
             events_received: register!(EventsReceived),
         }
     }
@@ -1142,6 +1155,7 @@ impl Runner {
 
     async fn send_outputs(&mut self, outputs_buf: &mut TransformOutputsBuf) -> crate::Result<()> {
         self.timer_tx.try_send_start_wait();
+        outputs_buf.for_each_array_mut(|array| self.latency_recorder.on_send(array));
         self.outputs.send(outputs_buf).await
     }
 
diff --git a/src/topology/test/latency_metrics.rs b/src/topology/test/latency_metrics.rs
@@ -0,0 +1,147 @@
+use std::time::Instant;
+use tokio::{
+    sync::oneshot,
+    time::{Duration, timeout},
+};
+use vector_lib::metrics::Controller;
+
+use crate::{
+    config::Config,
+    event::{Event, LogEvent, Metric, MetricValue},
+    test_util::{
+        mock::{
+            basic_source,
+            sinks::CompletionSinkConfig,
+            transforms::{NoopTransformConfig, TransformType},
+        },
+        start_topology, trace_init,
+    },
+};
+
+const EVENT_COUNT: usize = 100;
+const TRANSFORM_DELAY_MS: u64 = 10;
+const SOURCE_ID: &str = "latency_source";
+const TRANSFORM_ID: &str = "latency_delay";
+const TRANSFORM_TYPE: &str = "test_noop";
+const TRANSFORM_KIND: &str = "transform";
+const SINK_ID: &str = "latency_sink";
+
+struct LatencyTestRun {
+    metrics: Vec<Metric>,
+    elapsed_time: f64,
+}
+
+#[tokio::test]
+async fn component_latency_metrics_emitted() {
+    let run = run_latency_topology().await;
+
+    assert_histogram_count(
+        &run.metrics,
+        "component_latency_seconds",
+        has_component_tags,
+    );
+    assert_gauge_range(
+        &run.metrics,
+        "component_latency_mean_seconds",
+        has_component_tags,
+        TRANSFORM_DELAY_MS as f64 / 1000.0,
+        run.elapsed_time,
+    );
+}
+
+async fn run_latency_topology() -> LatencyTestRun {
+    trace_init();
+
+    let controller = Controller::get().expect("metrics controller");
+    controller.reset();
+
+    let (mut source_tx, source_config) = basic_source();
+    let transform_config =
+        NoopTransformConfig::from(TransformType::Task).with_delay_ms(TRANSFORM_DELAY_MS);
+    let (sink_done_tx, sink_done_rx) = oneshot::channel();
+    let sink_config = CompletionSinkConfig::new(EVENT_COUNT, sink_done_tx);
+
+    let mut config = Config::builder();
+    config.add_source(SOURCE_ID, source_config);
+    config.add_transform(TRANSFORM_ID, &[SOURCE_ID], transform_config);
+    config.add_sink(SINK_ID, &[TRANSFORM_ID], sink_config);
+
+    let start_time = Instant::now();
+    let (topology, _) = start_topology(config.build().unwrap(), false).await;
+
+    for idx in 0..EVENT_COUNT {
+        let event = Event::Log(LogEvent::from(format!("payload-{idx}")));
+        source_tx.send_event(event).await.unwrap();
+    }
+
+    drop(source_tx);
+
+    let completed = timeout(Duration::from_secs(5), sink_done_rx)
+        .await
+        .expect("timed out waiting for completion sink to finish")
+        .expect("completion sink sender dropped");
+    assert!(
+        completed,
+        "completion sink finished before receiving all events"
+    );
+
+    topology.stop().await;
+    let elapsed_time = start_time.elapsed().as_secs_f64();
+
+    LatencyTestRun {
+        metrics: controller.capture_metrics(),
+        elapsed_time,
+    }
+}
+
+fn assert_histogram_count(metrics: &[Metric], metric_name: &str, tags_match: fn(&Metric) -> bool) {
+    let histogram = metrics
+        .iter()
+        .find(|metric| metric.name() == metric_name && tags_match(metric))
+        .unwrap_or_else(|| panic!("{metric_name} histogram missing"));
+
+    match histogram.value() {
+        MetricValue::AggregatedHistogram { count, .. } => {
+            assert_eq!(
+                *count, EVENT_COUNT as u64,
+                "histogram count should match number of events"
+            );
+        }
+        other => panic!("expected aggregated histogram, got {other:?}"),
+    }
+}
+
+fn assert_gauge_range(
+    metrics: &[Metric],
+    metric_name: &str,
+    tags_match: fn(&Metric) -> bool,
+    expected_min: f64,
+    elapsed_time: f64,
+) {
+    let gauge = metrics
+        .iter()
+        .find(|metric| metric.name() == metric_name && tags_match(metric))
+        .unwrap_or_else(|| panic!("{metric_name} gauge missing"));
+
+    match gauge.value() {
+        MetricValue::Gauge { value } => {
+            assert!(
+                *value >= expected_min,
+                "expected mean latency to be >= {expected_min}, got {value}"
+            );
+            assert!(
+                *value < elapsed_time,
+                "expected mean latency ({value}) to be less than elapsed time ({elapsed_time})"
+            );
+        }
+        other => panic!("expected gauge metric, got {other:?}"),
+    }
+}
+
+fn has_component_tags(metric: &Metric) -> bool {
+    metric.tags().is_some_and(|tags| {
+        tags.get("component_id") == Some(TRANSFORM_ID)
+            && tags.get("component_type") == Some(TRANSFORM_TYPE)
+            && tags.get("component_kind") == Some(TRANSFORM_KIND)
+    })
+}
diff --git a/src/topology/test/mod.rs b/src/topology/test/mod.rs
@@ -39,6 +39,7 @@ mod crash;
 mod doesnt_reload;
 #[cfg(all(feature = "sources-http_server", feature = "sinks-http"))]
 mod end_to_end;
+mod latency_metrics;
 #[cfg(all(
     feature = "sources-prometheus",
     feature = "sinks-prometheus",
diff --git a/website/cue/reference/components/sources/internal_metrics.cue b/website/cue/reference/components/sources/internal_metrics.cue
@@ -273,6 +273,28 @@ components: sources: internal_metrics: {
 				reason: _reason
 			}
 		}
+		component_latency_seconds: {
+			description: """
+				The elapsed time, in fractional seconds, that an event spends in a single transform.
+
+				This includes both the time spent queued in the transform’s input buffer and the time spent executing the transform itself.
+				"""
+			type:              "histogram"
+			default_namespace: "vector"
+			tags:              _internal_metrics_tags
+		}
+		component_latency_mean_seconds: {
+			description: """
+				The mean elapsed time, in fractional seconds, that an event spends in a single transform.
+
+				This includes both the time spent queued in the transform’s input buffer and the time spent executing the transform itself.
+
+				This value is smoothed over time using an exponentially weighted moving average (EWMA).
+				"""
+			type:              "gauge"
+			default_namespace: "vector"
+			tags:              _internal_metrics_tags
+		}
 		buffer_byte_size: {
 			description:        "The number of bytes currently in the buffer."
 			type:               "gauge"
diff --git a/website/cue/reference/components/transforms.cue b/website/cue/reference/components/transforms.cue
@@ -15,11 +15,13 @@ components: transforms: [Name=string]: {
 	telemetry: metrics: {
 		component_discarded_events_total:     components.sources.internal_metrics.output.metrics.component_discarded_events_total
 		component_errors_total:               components.sources.internal_metrics.output.metrics.component_errors_total
+		component_latency_mean_seconds:       components.sources.internal_metrics.output.metrics.component_latency_mean_seconds
+		component_latency_seconds:            components.sources.internal_metrics.output.metrics.component_latency_seconds
+		component_received_event_bytes_total: components.sources.internal_metrics.output.metrics.component_received_event_bytes_total
 		component_received_events_count:      components.sources.internal_metrics.output.metrics.component_received_events_count
 		component_received_events_total:      components.sources.internal_metrics.output.metrics.component_received_events_total
-		component_received_event_bytes_total: components.sources.internal_metrics.output.metrics.component_received_event_bytes_total
-		component_sent_events_total:          components.sources.internal_metrics.output.metrics.component_sent_events_total
 		component_sent_event_bytes_total:     components.sources.internal_metrics.output.metrics.component_sent_event_bytes_total
+		component_sent_events_total:          components.sources.internal_metrics.output.metrics.component_sent_events_total
 		transform_buffer_max_byte_size:       components.sources.internal_metrics.output.metrics.transform_buffer_max_byte_size
 		transform_buffer_max_event_size:      components.sources.internal_metrics.output.metrics.transform_buffer_max_event_size
 		transform_buffer_max_size_bytes:      components.sources.internal_metrics.output.metrics.transform_buffer_max_size_bytes