CodeBlanch
diff --git a/‎.github/workflows/pipeline-perf-test-nightly.yml‎
Lines changed: 47 additions & 0 deletions b/‎.github/workflows/pipeline-perf-test-nightly.yml‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎rust/otap-dataflow/configs/fake-filter-debug-noop.yaml‎
Lines changed: 6 additions & 3 deletions b/‎rust/otap-dataflow/configs/fake-filter-debug-noop.yaml‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎rust/otap-dataflow/crates/otap/src/filter_processor.rs‎
Lines changed: 63 additions & 35 deletions b/‎rust/otap-dataflow/crates/otap/src/filter_processor.rs‎
Lines changed: 63 additions & 35 deletions
diff --git a/‎rust/otap-dataflow/crates/otap/src/filter_processor/metrics.rs‎
Lines changed: 25 additions & 0 deletions b/‎rust/otap-dataflow/crates/otap/src/filter_processor/metrics.rs‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎rust/otap-dataflow/crates/pdata/src/otap/filter.rs‎
Lines changed: 4 additions & 2 deletions b/‎rust/otap-dataflow/crates/pdata/src/otap/filter.rs‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎rust/otap-dataflow/crates/pdata/src/otap/filter/logs.rs‎
Lines changed: 14 additions & 5 deletions b/‎rust/otap-dataflow/crates/pdata/src/otap/filter/logs.rs‎
Lines changed: 14 additions & 5 deletions
@@ -87,6 +87,11 @@ jobs:
             cd tools/pipeline_perf_test
             python orchestrator/run_orchestrator.py --debug --config test_suites/integration/nightly/backpressure-docker.yaml
 
+        - name: Run filter performance test log suite
+          run: |
+            cd tools/pipeline_perf_test
+            python orchestrator/run_orchestrator.py --debug --config test_suites/integration/nightly/filter-docker.yaml
+
         - name: Upload syslog results for processing
           uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
           with:
@@ -99,6 +104,12 @@ jobs:
             name: backpressure-nightly-results
             path: tools/pipeline_perf_test/results/nightly_backpressure/gh-actions-benchmark/*.json
 
+        - name: Upload filter results for processing
+          uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+          with:
+            name: filter-nightly-results
+            path: tools/pipeline_perf_test/results/nightly_filter/gh-actions-benchmark/*.json
+
         - name: Add benchmark link to job summary
           run: |
             echo "### Benchmark Results" >> $GITHUB_STEP_SUMMARY
@@ -129,6 +140,15 @@ jobs:
               merge-multiple: true
               path: backpressure_results
 
+
+          - name: Download filter artifacts
+            uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
+            with:
+              pattern: filter-nightly-results*
+              merge-multiple: true
+              path: filter_results
+
+
           - name: Consolidate syslog benchmark data
             run: |
               echo "Consolidating benchmark JSON files..."
@@ -159,6 +179,21 @@ jobs:
               echo "Consolidated benchmark data:"
               cat backpressure_output.json
 
+          - name: Consolidate filter benchmark data
+            run: |
+              echo "Consolidating benchmark JSON files..."
+              find filter_results -name "*.json" -type f | while read file; do
+                echo "Processing: $file"
+                cat "$file"
+                echo
+              done
+
+              # Combine all benchmark JSON files into a single output (find them recursively)
+              find filter_results -name "*.json" -type f -exec cat {} \; | jq -s 'map(.[])' > filter_output.json
+
+              echo "Consolidated benchmark data:"
+              cat filter_output.json
+
           - name: Update benchmark data
             uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
             with:
@@ -183,6 +218,18 @@ jobs:
               auto-push: true
               save-data-file: true
 
+          - name: Update filter benchmark data
+            uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
+            with:
+              tool: "customSmallerIsBetter"
+              output-file-path: filter_output.json
+              gh-pages-branch: benchmarks
+              max-items-in-chart: 100
+              github-token: ${{ secrets.GITHUB_TOKEN }}
+              benchmark-data-dir-path: "docs/benchmarks/nightly/filter"
+              auto-push: true
+              save-data-file: true
+
           - name: Add benchmark link to job summary
             run: |
               echo "### Benchmark Results" >> $GITHUB_STEP_SUMMARY
 
@@ -14,9 +14,10 @@ nodes:
         dispatch_strategy: round_robin
     config:
       traffic_config:
-        max_signal_count: 1000
-        max_batch_size: 100
-        signals_per_second: 100
+        max_batch_size: 1000
+        signals_per_second: 100000
+        metric_weight: 0
+        trace_weight: 0
         log_weight: 100
       registry_path: https://github.com/open-telemetry/semantic-conventions.git[model]
   filter:
@@ -50,6 +51,8 @@ nodes:
           record_attributes:
             - key: gen_ai.system
               value: openai
+            - key: ios.app.state
+              value: active
           severity_texts: []
           severity_number: null
           bodies: []
 
@@ -9,31 +9,35 @@
 //! ToDo: Collect telemetry like number of filtered data is removed datapoints
 
 use self::config::Config;
+use self::metrics::FilterPdataMetrics;
 use crate::{OTAP_PROCESSOR_FACTORIES, pdata::OtapPdata};
 use async_trait::async_trait;
 use linkme::distributed_slice;
-
 use otap_df_config::SignalType;
 use otap_df_config::error::Error as ConfigError;
 use otap_df_config::node::NodeUserConfig;
 use otap_df_engine::config::ProcessorConfig;
 use otap_df_engine::context::PipelineContext;
+use otap_df_engine::control::NodeControlMsg;
 use otap_df_engine::error::{Error, ProcessorErrorKind, format_error_sources};
 use otap_df_engine::local::processor as local;
 use otap_df_engine::message::Message;
 use otap_df_engine::node::NodeId;
 use otap_df_engine::processor::ProcessorWrapper;
 use otap_df_pdata::otap::OtapArrowRecords;
+use otap_df_telemetry::metrics::MetricSet;
 use serde_json::Value;
 use std::sync::Arc;
 
 mod config;
+mod metrics;
 /// The URN for the filter processor
 pub const FILTER_PROCESSOR_URN: &str = "urn:otel:filter:processor";
 
 /// processor that outputs all data received to stdout
 pub struct FilterProcessor {
     config: Config,
+    metrics: MetricSet<FilterPdataMetrics>,
 }
 
 /// Factory function to create a FilterProcessor.
@@ -71,20 +75,19 @@ impl FilterProcessor {
     /// Creates a new FilterProcessor
     #[must_use]
     #[allow(dead_code)]
-    pub fn new(config: Config, _pipeline_ctx: PipelineContext) -> Self {
-        FilterProcessor { config }
+    pub fn new(config: Config, pipeline_ctx: PipelineContext) -> Self {
+        let metrics = pipeline_ctx.register_metrics::<FilterPdataMetrics>();
+        FilterProcessor { config, metrics }
     }
 
     /// Creates a new FilterProcessor from a configuration object
-    pub fn from_config(
-        _pipeline_ctx: PipelineContext,
-        config: &Value,
-    ) -> Result<Self, ConfigError> {
+    pub fn from_config(pipeline_ctx: PipelineContext, config: &Value) -> Result<Self, ConfigError> {
+        let metrics = pipeline_ctx.register_metrics::<FilterPdataMetrics>();
         let config: Config =
             serde_json::from_value(config.clone()).map_err(|e| ConfigError::InvalidUserConfig {
                 error: e.to_string(),
             })?;
-        Ok(FilterProcessor { config })
+        Ok(FilterProcessor { config, metrics })
     }
 }
 
@@ -96,8 +99,13 @@ impl local::Processor<OtapPdata> for FilterProcessor {
         effect_handler: &mut local::EffectHandler<OtapPdata>,
     ) -> Result<(), Error> {
         match msg {
-            Message::Control(_control) => {
-                // ToDo: add internal telemetry that will be sent out here
+            Message::Control(control) => {
+                if let NodeControlMsg::CollectTelemetry {
+                    mut metrics_reporter,
+                } = control
+                {
+                    _ = metrics_reporter.report(&mut self.metrics);
+                }
                 Ok(())
             }
             Message::PData(pdata) => {
@@ -113,32 +121,52 @@ impl local::Processor<OtapPdata> for FilterProcessor {
                         arrow_records
                     }
                     SignalType::Logs => {
-                        self.config
-                            .log_filters()
-                            .filter(arrow_records)
-                            .map_err(|e| {
-                                let source_detail = format_error_sources(&e);
-                                Error::ProcessorError {
-                                    processor: effect_handler.processor_id(),
-                                    kind: ProcessorErrorKind::Other,
-                                    error: format!("Filter error: {e}"),
-                                    source_detail,
-                                }
-                            })?
+                        // get logs
+                        let (filtered_arrow_records, log_signals_consumed, log_signals_filtered) =
+                            self.config
+                                .log_filters()
+                                .filter(arrow_records)
+                                .map_err(|e| {
+                                    let source_detail = format_error_sources(&e);
+                                    Error::ProcessorError {
+                                        processor: effect_handler.processor_id(),
+                                        kind: ProcessorErrorKind::Other,
+                                        error: format!("Filter error: {e}"),
+                                        source_detail,
+                                    }
+                                })?;
+
+                        // get logs after
+                        self.metrics.log_signals_consumed.add(log_signals_consumed);
+                        self.metrics.log_signals_filtered.add(log_signals_filtered);
+
+                        filtered_arrow_records
+                    }
+                    SignalType::Traces => {
+                        // get spans
+                        let (filtered_arrow_records, span_signals_consumed, span_signals_filtered) =
+                            self.config
+                                .trace_filters()
+                                .filter(arrow_records)
+                                .map_err(|e| {
+                                    let source_detail = format_error_sources(&e);
+                                    Error::ProcessorError {
+                                        processor: effect_handler.processor_id(),
+                                        kind: ProcessorErrorKind::Other,
+                                        error: format!("Filter error: {e}"),
+                                        source_detail,
+                                    }
+                                })?;
+
+                        self.metrics
+                            .span_signals_consumed
+                            .add(span_signals_consumed);
+                        self.metrics
+                            .span_signals_filtered
+                            .add(span_signals_filtered);
+
+                        filtered_arrow_records
                     }
-                    SignalType::Traces => self
-                        .config
-                        .trace_filters()
-                        .filter(arrow_records)
-                        .map_err(|e| {
-                            let source_detail = format_error_sources(&e);
-                            Error::ProcessorError {
-                                processor: effect_handler.processor_id(),
-                                kind: ProcessorErrorKind::Other,
-                                error: format!("Filter error: {e}"),
-                                source_detail,
-                            }
-                        })?,
                 };
                 effect_handler
                     .send_message(OtapPdata::new(context, filtered_arrow_records.into()))
 
@@ -0,0 +1,25 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+//! Metrics for the OTAP FilterProcessor node.
+use otap_df_telemetry::instrument::Counter;
+use otap_df_telemetry_macros::metric_set;
+
+/// Pdata-oriented metrics for the OTAP FilterProcessor
+#[metric_set(name = "filter.processor.pdata.metrics")]
+#[derive(Debug, Default, Clone)]
+pub struct FilterPdataMetrics {
+    /// Number of log signals consumed
+    #[metric(unit = "{log}")]
+    pub log_signals_consumed: Counter<u64>,
+    /// Number of span signals consumed
+    #[metric(unit = "{span}")]
+    pub span_signals_consumed: Counter<u64>,
+
+    /// Number of log signals filtered
+    #[metric(unit = "{log}")]
+    pub log_signals_filtered: Counter<u64>,
+    /// Number of span signals filtered
+    #[metric(unit = "{span}")]
+    pub span_signals_filtered: Counter<u64>,
+}
@@ -511,14 +511,16 @@ fn apply_filter(
     payload: &mut OtapArrowRecords,
     payload_type: ArrowPayloadType,
     filter: &BooleanArray,
-) -> Result<()> {
+) -> Result<(u64, u64)> {
     let record_batch = payload
         .get(payload_type)
         .ok_or_else(|| Error::RecordBatchNotFound { payload_type })?;
+    let num_rows_before = record_batch.num_rows() as u64;
     let filtered_record_batch = arrow::compute::filter_record_batch(record_batch, filter)
         .map_err(|e| Error::ColumnLengthMismatch { source: e })?;
+    let num_rows_removed = num_rows_before - (filtered_record_batch.num_rows() as u64);
     payload.set(payload_type, filtered_record_batch);
-    Ok(())
+    Ok((num_rows_before, num_rows_removed))
 }
 
 /// update_child_record_batch_filter() takes an child record batch, with it's respective filter
 
@@ -103,7 +103,10 @@ impl LogFilter {
     }
 
     /// take a logs payload and return the filtered result
-    pub fn filter(&self, mut logs_payload: OtapArrowRecords) -> Result<OtapArrowRecords> {
+    pub fn filter(
+        &self,
+        mut logs_payload: OtapArrowRecords,
+    ) -> Result<(OtapArrowRecords, u64, u64)> {
         let (resource_attr_filter, log_record_filter, log_attr_filter) = if let Some(include_config) =
             &self.include
             && let Some(exclude_config) = &self.exclude
@@ -136,7 +139,13 @@ impl LogFilter {
             include_config.create_filters(&logs_payload, false)?
         } else {
             // both include and exclude is none
-            return Ok(logs_payload);
+            let num_rows = logs_payload
+                .get(ArrowPayloadType::Logs)
+                .ok_or_else(|| Error::RecordBatchNotFound {
+                    payload_type: ArrowPayloadType::Logs,
+                })?
+                .num_rows() as u64;
+            return Ok((logs_payload, num_rows, num_rows));
         };
 
         let (log_record_filter, child_record_batch_filters) = self.sync_up_filters(
@@ -146,17 +155,17 @@ impl LogFilter {
             log_attr_filter,
         )?;
 
-        apply_filter(
+        let (log_rows_before, log_rows_removed) = apply_filter(
             &mut logs_payload,
             ArrowPayloadType::Logs,
             &log_record_filter,
         )?;
 
         for (payload_type, filter) in child_record_batch_filters {
-            apply_filter(&mut logs_payload, payload_type, &filter)?;
+            let (_, _) = apply_filter(&mut logs_payload, payload_type, &filter)?;
         }
 
-        Ok(logs_payload)
+        Ok((logs_payload, log_rows_before, log_rows_removed))
     }
 
     /// this function takes the filters for each record batch and makes sure that incomplete