feat: [Trace Stats] Add skeleton of concentrator (#842)

lym953 · web-flow · commit 58110c0157c7 · 2025-09-19T14:05:44.000-04:00
## This PR - Add the skeleton of `StatsConcentrator`, with no implementation - Add `StatsConcentratorHandle` and `StatsConcentratorService`, which send and process stats requests (`add()` and `get_stats()`) to/from a queue, so mutex is not needed, and lock contention can be avoided. (Thanks @duncanista for the suggestion and @astuyve for the example code DataDog/serverless-components#32) ## Next steps - Implement `StatsConcentrator`, which aggregates stats data into buckets and returns it in batch - Add more fields to `AggregationKey` and `Stats` - Move the processing of stats after "obfuscation", as suggested by APM team. This will involve lots of code changes, so I'll make it a separate PR. I'll mainly move code from this draft PR: #827 ## Architecture <img width="1296" height="674" alt="image" src="https://github.com/user-attachments/assets/2d4cb925-6cfc-4581-8ed6-6bd87cf0d87a" /> Jira: https://datadoghq.atlassian.net/browse/SVLS-7593
diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs
@@ -57,6 +57,7 @@ use bottlecap::{
         proxy_aggregator,
         proxy_flusher::Flusher as ProxyFlusher,
         stats_aggregator::StatsAggregator,
+        stats_concentrator_service::StatsConcentratorService,
         stats_flusher::{self, StatsFlusher},
         stats_processor, trace_agent,
         trace_aggregator::{self, SendDataBuilderInfo},
@@ -522,6 +523,7 @@ async fn extension_loop_active(
                             &proxy_flusher,
                             &mut race_flush_interval,
                             &metrics_aggr_handle.clone(),
+                            false,
                         )
                         .await;
                     }
@@ -537,6 +539,7 @@ async fn extension_loop_active(
                 &proxy_flusher,
                 &mut race_flush_interval,
                 &metrics_aggr_handle.clone(),
+                false,
             )
             .await;
             let next_response =
@@ -606,6 +609,7 @@ async fn extension_loop_active(
                     &proxy_flusher,
                     &mut race_flush_interval,
                     &metrics_aggr_handle,
+                    false, // force_flush_trace_stats
                 )
                 .await;
             }
@@ -639,6 +643,7 @@ async fn extension_loop_active(
                                 &proxy_flusher,
                                 &mut race_flush_interval,
                                 &metrics_aggr_handle,
+                                false, // force_flush_trace_stats
                             )
                             .await;
                         }
@@ -697,13 +702,15 @@ async fn extension_loop_active(
                 &proxy_flusher,
                 &mut race_flush_interval,
                 &metrics_aggr_handle,
+                true, // force_flush_trace_stats
             )
             .await;
             return Ok(());
         }
     }
 }
 
+#[allow(clippy::too_many_arguments)]
 async fn blocking_flush_all(
     logs_flusher: &LogsFlusher,
     metrics_flushers: &mut [MetricsFlusher],
@@ -712,6 +719,7 @@ async fn blocking_flush_all(
     proxy_flusher: &ProxyFlusher,
     race_flush_interval: &mut tokio::time::Interval,
     metrics_aggr_handle: &MetricsAggregatorHandle,
+    force_flush_trace_stats: bool,
 ) {
     let flush_response = metrics_aggr_handle
         .flush()
@@ -731,7 +739,7 @@ async fn blocking_flush_all(
         logs_flusher.flush(None),
         futures::future::join_all(metrics_futures),
         trace_flusher.flush(None),
-        stats_flusher.flush(),
+        stats_flusher.flush(force_flush_trace_stats),
         proxy_flusher.flush(None),
     );
     race_flush_interval.reset();
@@ -981,7 +989,12 @@ fn start_trace_agent(
     tokio_util::sync::CancellationToken,
 ) {
     // Stats
-    let stats_aggregator = Arc::new(TokioMutex::new(StatsAggregator::default()));
+    let (stats_concentrator_service, stats_concentrator_handle) =
+        StatsConcentratorService::new(Arc::clone(config));
+    tokio::spawn(stats_concentrator_service.run());
+    let stats_aggregator: Arc<TokioMutex<StatsAggregator>> = Arc::new(TokioMutex::new(
+        StatsAggregator::new_with_concentrator(stats_concentrator_handle.clone()),
+    ));
     let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher::new(
         api_key_factory.clone(),
         stats_aggregator.clone(),
@@ -1029,6 +1042,7 @@ fn start_trace_agent(
         invocation_processor,
         appsec_processor,
         Arc::clone(tags_provider),
+        stats_concentrator_handle,
     );
     let trace_agent_channel = trace_agent.get_sender_copy();
     let shutdown_token = trace_agent.shutdown_token();
diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs
@@ -7,12 +7,15 @@ pub mod proxy_aggregator;
 pub mod proxy_flusher;
 pub mod span_pointers;
 pub mod stats_aggregator;
+pub mod stats_concentrator;
+pub mod stats_concentrator_service;
 pub mod stats_flusher;
 pub mod stats_processor;
 pub mod trace_agent;
 pub mod trace_aggregator;
 pub mod trace_flusher;
 pub mod trace_processor;
+pub mod trace_stats_processor;
 
 // URL for a call to the Lambda runtime API. The value may be replaced if `AWS_LAMBDA_RUNTIME_API` is set.
 const LAMBDA_RUNTIME_URL_PREFIX: &str = "http://127.0.0.1:9001";
diff --git a/bottlecap/src/traces/stats_aggregator.rs b/bottlecap/src/traces/stats_aggregator.rs
@@ -1,5 +1,7 @@
+use crate::traces::stats_concentrator_service::StatsConcentratorHandle;
 use datadog_trace_protobuf::pb::ClientStatsPayload;
 use std::collections::VecDeque;
+use tracing::error;
 
 #[allow(clippy::empty_line_after_doc_comments)]
 /// Maximum number of entries in a stat payload.
@@ -22,37 +24,44 @@ pub struct StatsAggregator {
     queue: VecDeque<ClientStatsPayload>,
     max_content_size_bytes: usize,
     buffer: Vec<ClientStatsPayload>,
-}
-
-impl Default for StatsAggregator {
-    fn default() -> Self {
-        StatsAggregator {
-            queue: VecDeque::new(),
-            max_content_size_bytes: MAX_CONTENT_SIZE_BYTES,
-            buffer: Vec::new(),
-        }
-    }
+    concentrator: StatsConcentratorHandle,
 }
 
 /// Takes in individual trace stats payloads and aggregates them into batches to be flushed to Datadog.
 impl StatsAggregator {
     #[allow(dead_code)]
     #[allow(clippy::must_use_candidate)]
-    pub fn new(max_content_size_bytes: usize) -> Self {
+    fn new(max_content_size_bytes: usize, concentrator: StatsConcentratorHandle) -> Self {
         StatsAggregator {
             queue: VecDeque::new(),
             max_content_size_bytes,
             buffer: Vec::new(),
+            concentrator,
         }
     }
 
+    #[must_use]
+    pub fn new_with_concentrator(concentrator: StatsConcentratorHandle) -> Self {
+        Self::new(MAX_CONTENT_SIZE_BYTES, concentrator)
+    }
+
     /// Takes in an individual trace stats payload.
     pub fn add(&mut self, payload: ClientStatsPayload) {
         self.queue.push_back(payload);
     }
 
     /// Returns a batch of trace stats payloads, subject to the max content size.
-    pub fn get_batch(&mut self) -> Vec<ClientStatsPayload> {
+    pub async fn get_batch(&mut self, force_flush: bool) -> Vec<ClientStatsPayload> {
+        // Pull stats data from concentrator
+        match self.concentrator.flush(force_flush).await {
+            Ok(stats) => {
+                self.queue.extend(stats);
+            }
+            Err(e) => {
+                error!("Error getting stats from the stats concentrator: {e:?}");
+            }
+        }
+
         let mut batch_size = 0;
 
         // Fill the batch
@@ -80,10 +89,15 @@ impl StatsAggregator {
 #[allow(clippy::unwrap_used)]
 mod tests {
     use super::*;
+    use crate::config::Config;
+    use crate::traces::stats_concentrator_service::StatsConcentratorService;
+    use std::sync::Arc;
 
     #[test]
     fn test_add() {
-        let mut aggregator = StatsAggregator::default();
+        let config = Arc::new(Config::default());
+        let (_, concentrator) = StatsConcentratorService::new(config);
+        let mut aggregator = StatsAggregator::new_with_concentrator(concentrator);
         let payload = ClientStatsPayload {
             hostname: "hostname".to_string(),
             env: "dev".to_string(),
@@ -106,9 +120,11 @@ mod tests {
         assert_eq!(aggregator.queue[0], payload);
     }
 
-    #[test]
-    fn test_get_batch() {
-        let mut aggregator = StatsAggregator::default();
+    #[tokio::test]
+    async fn test_get_batch() {
+        let config = Arc::new(Config::default());
+        let (_, concentrator) = StatsConcentratorService::new(config);
+        let mut aggregator = StatsAggregator::new_with_concentrator(concentrator);
         let payload = ClientStatsPayload {
             hostname: "hostname".to_string(),
             env: "dev".to_string(),
@@ -127,13 +143,15 @@ mod tests {
         };
         aggregator.add(payload.clone());
         assert_eq!(aggregator.queue.len(), 1);
-        let batch = aggregator.get_batch();
+        let batch = aggregator.get_batch(false).await;
         assert_eq!(batch, vec![payload]);
     }
 
-    #[test]
-    fn test_get_batch_full_entries() {
-        let mut aggregator = StatsAggregator::new(640);
+    #[tokio::test]
+    async fn test_get_batch_full_entries() {
+        let config = Arc::new(Config::default());
+        let (_, concentrator) = StatsConcentratorService::new(config);
+        let mut aggregator = StatsAggregator::new(640, concentrator);
         // Payload below is 115 bytes
         let payload = ClientStatsPayload {
             hostname: "hostname".to_string(),
@@ -158,12 +176,12 @@ mod tests {
         aggregator.add(payload.clone());
 
         // The batch should only contain the first 2 payloads
-        let first_batch = aggregator.get_batch();
+        let first_batch = aggregator.get_batch(false).await;
         assert_eq!(first_batch, vec![payload.clone(), payload.clone()]);
         assert_eq!(aggregator.queue.len(), 1);
 
         // The second batch should only contain the last log
-        let second_batch = aggregator.get_batch();
+        let second_batch = aggregator.get_batch(false).await;
         assert_eq!(second_batch, vec![payload]);
         assert_eq!(aggregator.queue.len(), 0);
     }
diff --git a/bottlecap/src/traces/stats_concentrator.rs b/bottlecap/src/traces/stats_concentrator.rs
@@ -0,0 +1,38 @@
+use crate::config::Config;
+use datadog_trace_protobuf::pb;
+use std::sync::Arc;
+
+// Event sent to the stats concentrator
+#[derive(Clone, Copy)]
+pub struct StatsEvent {
+    pub time: u64,
+    pub aggregation_key: AggregationKey,
+    pub stats: Stats,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash, Copy)]
+pub struct AggregationKey {}
+
+#[derive(Clone, Debug, Default, Copy)]
+pub struct Stats {}
+
+pub struct StatsConcentrator {
+    _config: Arc<Config>,
+}
+
+// Aggregates stats into buckets, which are then pulled by the stats aggregator.
+impl StatsConcentrator {
+    #[must_use]
+    pub fn new(config: Arc<Config>) -> Self {
+        Self { _config: config }
+    }
+
+    pub fn add(&mut self, _stats_event: StatsEvent) {}
+
+    // force_flush: If true, flush all stats. If false, flush stats except for the few latest
+    // buckets, which may still be getting data.
+    #[must_use]
+    pub fn flush(&mut self, _force_flush: bool) -> Vec<pb::ClientStatsPayload> {
+        vec![]
+    }
+}
diff --git a/bottlecap/src/traces/stats_concentrator_service.rs b/bottlecap/src/traces/stats_concentrator_service.rs
@@ -0,0 +1,79 @@
+use tokio::sync::{mpsc, oneshot};
+
+use crate::config::Config;
+use crate::traces::stats_concentrator::StatsConcentrator;
+use crate::traces::stats_concentrator::StatsEvent;
+use datadog_trace_protobuf::pb;
+use std::sync::Arc;
+use tracing::error;
+
+#[derive(Debug, thiserror::Error)]
+pub enum StatsError {
+    #[error("Failed to send command to concentrator: {0}")]
+    SendError(mpsc::error::SendError<ConcentratorCommand>),
+    #[error("Failed to receive response from concentrator: {0}")]
+    RecvError(oneshot::error::RecvError),
+}
+
+pub enum ConcentratorCommand {
+    Add(StatsEvent),
+    Flush(bool, oneshot::Sender<Vec<pb::ClientStatsPayload>>),
+}
+
+#[derive(Clone)]
+pub struct StatsConcentratorHandle {
+    tx: mpsc::UnboundedSender<ConcentratorCommand>,
+}
+
+impl StatsConcentratorHandle {
+    pub fn add(
+        &self,
+        stats_event: StatsEvent,
+    ) -> Result<(), mpsc::error::SendError<ConcentratorCommand>> {
+        self.tx.send(ConcentratorCommand::Add(stats_event))
+    }
+
+    pub async fn flush(
+        &self,
+        force_flush: bool,
+    ) -> Result<Vec<pb::ClientStatsPayload>, StatsError> {
+        let (response_tx, response_rx) = oneshot::channel();
+        self.tx
+            .send(ConcentratorCommand::Flush(force_flush, response_tx))
+            .map_err(StatsError::SendError)?;
+        let stats = response_rx.await.map_err(StatsError::RecvError)?;
+        Ok(stats)
+    }
+}
+
+pub struct StatsConcentratorService {
+    concentrator: StatsConcentrator,
+    rx: mpsc::UnboundedReceiver<ConcentratorCommand>,
+}
+
+// A service that handles add() and flush() requests in the same queue,
+// to avoid using mutex, which may cause lock contention.
+impl StatsConcentratorService {
+    #[must_use]
+    pub fn new(config: Arc<Config>) -> (Self, StatsConcentratorHandle) {
+        let (tx, rx) = mpsc::unbounded_channel();
+        let handle = StatsConcentratorHandle { tx };
+        let concentrator = StatsConcentrator::new(config);
+        let service: StatsConcentratorService = Self { concentrator, rx };
+        (service, handle)
+    }
+
+    pub async fn run(mut self) {
+        while let Some(command) = self.rx.recv().await {
+            match command {
+                ConcentratorCommand::Add(stats_event) => self.concentrator.add(stats_event),
+                ConcentratorCommand::Flush(force_flush, response_tx) => {
+                    let stats = self.concentrator.flush(force_flush);
+                    if let Err(e) = response_tx.send(stats) {
+                        error!("Failed to return trace stats: {e:?}");
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs
@@ -28,7 +28,7 @@ pub trait StatsFlusher {
     /// Flushes stats to the Datadog trace stats intake.
     async fn send(&self, traces: Vec<pb::ClientStatsPayload>);
 
-    async fn flush(&self);
+    async fn flush(&self, force_flush: bool);
 }
 
 #[allow(clippy::module_name_repetitions)]
@@ -116,14 +116,15 @@ impl StatsFlusher for ServerlessStatsFlusher {
             }
         };
     }
-    async fn flush(&self) {
+
+    async fn flush(&self, force_flush: bool) {
         let mut guard = self.aggregator.lock().await;
 
-        let mut stats = guard.get_batch();
+        let mut stats = guard.get_batch(force_flush).await;
         while !stats.is_empty() {
             self.send(stats).await;
 
-            stats = guard.get_batch();
+            stats = guard.get_batch(force_flush).await;
         }
     }
 }
diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs
diff --git a/bottlecap/src/traces/trace_stats_processor.rs b/bottlecap/src/traces/trace_stats_processor.rs

Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@ pub trait StatsFlusher {`
`28`	`28`	`/// Flushes stats to the Datadog trace stats intake.`
`29`	`29`	`async fn send(&self, traces: Vec<pb::ClientStatsPayload>);`
`30`	`30`
`31`		`- async fn flush(&self);`
	`31`	`+ async fn flush(&self, force_flush: bool);`
`32`	`32`	`}`
`33`	`33`
`34`	`34`	`#[allow(clippy::module_name_repetitions)]`
`@@ -116,14 +116,15 @@ impl StatsFlusher for ServerlessStatsFlusher {`
`116`	`116`	`}`
`117`	`117`	`};`
`118`	`118`	`}`
`119`		`- async fn flush(&self) {`
	`119`	`+`
	`120`	`+ async fn flush(&self, force_flush: bool) {`
`120`	`121`	`let mut guard = self.aggregator.lock().await;`
`121`	`122`
`122`		`- let mut stats = guard.get_batch();`
	`123`	`+ let mut stats = guard.get_batch(force_flush).await;`
`123`	`124`	`while !stats.is_empty() {`
`124`	`125`	`self.send(stats).await;`
`125`	`126`
`126`		`- stats = guard.get_batch();`
	`127`	`+ stats = guard.get_batch(force_flush).await;`
`127`	`128`	`}`
`128`	`129`	`}`
`129`	`130`	`}`