From b17f5c426e9d99b095c4072fdd3c815edb07ec1f Mon Sep 17 00:00:00 2001 From: Zhivko Kirishev Date: Wed, 10 Sep 2025 16:22:18 +0300 Subject: [PATCH 1/5] Update the RDI openapi and observability documentation by adding information for the newly exposed processor performance metrics --- .../redis-data-integration/observability.md | 4 ++++ .../reference/api-reference/openapi.json | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/content/integrate/redis-data-integration/observability.md b/content/integrate/redis-data-integration/observability.md index ebb8368bde..b062ddd6b9 100644 --- a/content/integrate/redis-data-integration/observability.md +++ b/content/integrate/redis-data-integration/observability.md @@ -113,6 +113,9 @@ RDI reports with their descriptions. | `monitor_time_elapsed_created` | Gauge | Timestamp when the monitor time elapsed counter was created | Informational - no alerting needed | | `rdi_incoming_entries` | Gauge | Count of incoming events by `data_source` and `operation` type (pending, inserted, updated, deleted, filtered, rejected) | Informational - monitor for trends, alert only on "rejected" > 0 | | `rdi_stream_event_latency_ms` | Gauge | Latency in milliseconds of the oldest event in each data stream, labeled by `data_source` | Informational - monitor based on business SLA requirements | +| `rdi_processed_batches_total` | Counter | Count of the total processed batches | Informational - use for data ingestion and load tracking | +| `rdi_processed_batches_created` | Gauge | Timestamp when the processed batches counter was created | Informational - no alerting needed | +| `rdi_processor_performance_batch_avg` | Gauge | Average processor performance for all batches | Informational - use for data ingestion tracking and debugging purposes | {{< note >}} **Additional information about stream processor metrics:** @@ -121,6 +124,7 @@ RDI reports with their descriptions. - Metrics with the `_created` suffix are automatically generated by Prometheus for counters and gauges to track when they were first created. - The `rdi_incoming_entries` metric provides a detailed breakdown for each data source by operation type. - The `rdi_stream_event_latency_ms` metric helps monitor data freshness and processing delays. +- The `rdi_processor_performance_batch_avg` metric provides detailed performance insights. {{< /note >}} ## Recommended alerting strategy diff --git a/content/integrate/redis-data-integration/reference/api-reference/openapi.json b/content/integrate/redis-data-integration/reference/api-reference/openapi.json index 6c4808d85e..4dc13a24dd 100644 --- a/content/integrate/redis-data-integration/reference/api-reference/openapi.json +++ b/content/integrate/redis-data-integration/reference/api-reference/openapi.json @@ -4627,6 +4627,22 @@ 10.5 ] }, + "transform_time_avg": { + "type": "number", + "minimum": 0.0, + "title": "Transform Time Avg", + "examples": [ + 2.3 + ] + }, + "write_time_avg": { + "type": "number", + "minimum": 0.0, + "title": "Write Time Avg", + "examples": [ + 4.4 + ] + }, "process_time_avg": { "type": "number", "minimum": 0.0, @@ -4665,6 +4681,8 @@ "total_batches", "batch_size_avg", "read_time_avg", + "transform_time_avg", + "write_time_avg", "process_time_avg", "ack_time_avg", "total_time_avg", From 37e0fdcf1691cdc97073f073bb7ec3a0ff791b56 Mon Sep 17 00:00:00 2001 From: Zhivko Kirishev Date: Tue, 16 Sep 2025 13:47:11 +0300 Subject: [PATCH 2/5] Replace rdi_processor_performance_batch_avg with multiple standalone metrics and write a description for them --- .../redis-data-integration/observability.md | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/content/integrate/redis-data-integration/observability.md b/content/integrate/redis-data-integration/observability.md index b062ddd6b9..789c2f7df3 100644 --- a/content/integrate/redis-data-integration/observability.md +++ b/content/integrate/redis-data-integration/observability.md @@ -115,7 +115,24 @@ RDI reports with their descriptions. | `rdi_stream_event_latency_ms` | Gauge | Latency in milliseconds of the oldest event in each data stream, labeled by `data_source` | Informational - monitor based on business SLA requirements | | `rdi_processed_batches_total` | Counter | Count of the total processed batches | Informational - use for data ingestion and load tracking | | `rdi_processed_batches_created` | Gauge | Timestamp when the processed batches counter was created | Informational - no alerting needed | -| `rdi_processor_performance_batch_avg` | Gauge | Average processor performance for all batches | Informational - use for data ingestion tracking and debugging purposes | +| **Processor Performance Total Metrics** | | | | +| `rdi_processor_batch_size_total` | Counter | Total batch size across all processed batches | Informational - use for throughput analysis | +| `rdi_processor_read_time_ms_total` | Counter | Total read time in milliseconds across all batches | Informational - use for performance analysis | +| `rdi_processor_transform_time_ms_total` | Counter | Total transform time in milliseconds across all batches | Informational - use for performance analysis | +| `rdi_processor_write_time_ms_total` | Counter | Total write time in milliseconds across all batches | Informational - use for performance analysis | +| `rdi_processor_process_time_ms_total` | Counter | Total process time in milliseconds across all batches | Informational - use for performance analysis | +| `rdi_processor_ack_time_ms_total` | Counter | Total acknowledgment time in milliseconds across all batches | Informational - use for performance analysis | +| `rdi_processor_total_time_ms_total` | Counter | Total total time in milliseconds across all batches | Informational - use for performance analysis | +| `rdi_processor_rec_per_sec_total` | Counter | Total records per second across all batches | Informational - use for throughput analysis | +| **Processor Performance Last Batch Metrics** | | | | +| `rdi_processor_batch_size_last` | Gauge | Last batch size processed | Informational - use for real-time monitoring | +| `rdi_processor_read_time_ms_last` | Gauge | Last batch read time in milliseconds | Informational - use for real-time performance monitoring | +| `rdi_processor_transform_time_ms_last` | Gauge | Last batch transform time in milliseconds | Informational - use for real-time performance monitoring | +| `rdi_processor_write_time_ms_last` | Gauge | Last batch write time in milliseconds | Informational - use for real-time performance monitoring | +| `rdi_processor_process_time_ms_last` | Gauge | Last batch process time in milliseconds | Informational - use for real-time performance monitoring | +| `rdi_processor_ack_time_ms_last` | Gauge | Last batch acknowledgment time in milliseconds | Informational - use for real-time performance monitoring | +| `rdi_processor_total_time_ms_last` | Gauge | Last batch total time in milliseconds | Informational - use for real-time performance monitoring | +| `rdi_processor_rec_per_sec_last` | Gauge | Last batch records per second | Informational - use for real-time throughput monitoring | {{< note >}} **Additional information about stream processor metrics:** @@ -124,12 +141,14 @@ RDI reports with their descriptions. - Metrics with the `_created` suffix are automatically generated by Prometheus for counters and gauges to track when they were first created. - The `rdi_incoming_entries` metric provides a detailed breakdown for each data source by operation type. - The `rdi_stream_event_latency_ms` metric helps monitor data freshness and processing delays. -- The `rdi_processor_performance_batch_avg` metric provides detailed performance insights. +- The processor performance metrics are divided into two categories: + - **Total metrics** (Counters): Accumulate values across all processed batches for historical analysis + - **Last batch metrics** (Gauges): Show real-time performance data for the most recently processed batch {{< /note >}} ## Recommended alerting strategy -The alerting strategy described in the sections below focuses on system failures and data integrity issues that require immediate attention. Most ther metrics are informational, so you should monitor them for trends rather than trigger alerts. +The alerting strategy described in the sections below focuses on system failures and data integrity issues that require immediate attention. Most other metrics are informational, so you should monitor them for trends rather than trigger alerts. ### Critical alerts (immediate response required) From 2e5fd47fa0f1ee22c2b29d6cfe5cee8c3299989f Mon Sep 17 00:00:00 2001 From: Zhivko Kirishev Date: Fri, 19 Sep 2025 15:32:27 +0300 Subject: [PATCH 3/5] Document `rdi_processor_rec_per_sec_total` as Gauge --- content/integrate/redis-data-integration/observability.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/integrate/redis-data-integration/observability.md b/content/integrate/redis-data-integration/observability.md index 789c2f7df3..270bfdd269 100644 --- a/content/integrate/redis-data-integration/observability.md +++ b/content/integrate/redis-data-integration/observability.md @@ -123,7 +123,7 @@ RDI reports with their descriptions. | `rdi_processor_process_time_ms_total` | Counter | Total process time in milliseconds across all batches | Informational - use for performance analysis | | `rdi_processor_ack_time_ms_total` | Counter | Total acknowledgment time in milliseconds across all batches | Informational - use for performance analysis | | `rdi_processor_total_time_ms_total` | Counter | Total total time in milliseconds across all batches | Informational - use for performance analysis | -| `rdi_processor_rec_per_sec_total` | Counter | Total records per second across all batches | Informational - use for throughput analysis | +| `rdi_processor_rec_per_sec_total` | Gauge | Total records per second across all batches | Informational - use for throughput analysis | | **Processor Performance Last Batch Metrics** | | | | | `rdi_processor_batch_size_last` | Gauge | Last batch size processed | Informational - use for real-time monitoring | | `rdi_processor_read_time_ms_last` | Gauge | Last batch read time in milliseconds | Informational - use for real-time performance monitoring | @@ -142,8 +142,8 @@ RDI reports with their descriptions. - The `rdi_incoming_entries` metric provides a detailed breakdown for each data source by operation type. - The `rdi_stream_event_latency_ms` metric helps monitor data freshness and processing delays. - The processor performance metrics are divided into two categories: - - **Total metrics** (Counters): Accumulate values across all processed batches for historical analysis - - **Last batch metrics** (Gauges): Show real-time performance data for the most recently processed batch + - **Total metrics**: Accumulate values across all processed batches for historical analysis + - **Last batch metrics**: Show real-time performance data for the most recently processed batch {{< /note >}} ## Recommended alerting strategy From 86270218412f5561b727e5a342f4733711169727 Mon Sep 17 00:00:00 2001 From: Zhivko Kirishev Date: Wed, 24 Sep 2025 07:42:38 +0300 Subject: [PATCH 4/5] Move `rdi_processed_batches_total` to the `Processor Performance Total Metrics` section --- content/integrate/redis-data-integration/observability.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/content/integrate/redis-data-integration/observability.md b/content/integrate/redis-data-integration/observability.md index 270bfdd269..1c704e429e 100644 --- a/content/integrate/redis-data-integration/observability.md +++ b/content/integrate/redis-data-integration/observability.md @@ -113,9 +113,8 @@ RDI reports with their descriptions. | `monitor_time_elapsed_created` | Gauge | Timestamp when the monitor time elapsed counter was created | Informational - no alerting needed | | `rdi_incoming_entries` | Gauge | Count of incoming events by `data_source` and `operation` type (pending, inserted, updated, deleted, filtered, rejected) | Informational - monitor for trends, alert only on "rejected" > 0 | | `rdi_stream_event_latency_ms` | Gauge | Latency in milliseconds of the oldest event in each data stream, labeled by `data_source` | Informational - monitor based on business SLA requirements | -| `rdi_processed_batches_total` | Counter | Count of the total processed batches | Informational - use for data ingestion and load tracking | -| `rdi_processed_batches_created` | Gauge | Timestamp when the processed batches counter was created | Informational - no alerting needed | | **Processor Performance Total Metrics** | | | | +| `rdi_processed_batches_total` | Counter | Count of the total processed batches | Informational - use for data ingestion and load tracking | | `rdi_processor_batch_size_total` | Counter | Total batch size across all processed batches | Informational - use for throughput analysis | | `rdi_processor_read_time_ms_total` | Counter | Total read time in milliseconds across all batches | Informational - use for performance analysis | | `rdi_processor_transform_time_ms_total` | Counter | Total transform time in milliseconds across all batches | Informational - use for performance analysis | From d92bf109432d6d01eeafa52657751b2227164bb7 Mon Sep 17 00:00:00 2001 From: Zhivko Kirishev Date: Thu, 25 Sep 2025 10:27:06 +0300 Subject: [PATCH 5/5] Address review comments about wording --- content/integrate/redis-data-integration/observability.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/integrate/redis-data-integration/observability.md b/content/integrate/redis-data-integration/observability.md index 1c704e429e..5235950c5f 100644 --- a/content/integrate/redis-data-integration/observability.md +++ b/content/integrate/redis-data-integration/observability.md @@ -114,14 +114,14 @@ RDI reports with their descriptions. | `rdi_incoming_entries` | Gauge | Count of incoming events by `data_source` and `operation` type (pending, inserted, updated, deleted, filtered, rejected) | Informational - monitor for trends, alert only on "rejected" > 0 | | `rdi_stream_event_latency_ms` | Gauge | Latency in milliseconds of the oldest event in each data stream, labeled by `data_source` | Informational - monitor based on business SLA requirements | | **Processor Performance Total Metrics** | | | | -| `rdi_processed_batches_total` | Counter | Count of the total processed batches | Informational - use for data ingestion and load tracking | +| `rdi_processed_batches_total` | Counter | Total number of processed batches | Informational - use for data ingestion and load tracking | | `rdi_processor_batch_size_total` | Counter | Total batch size across all processed batches | Informational - use for throughput analysis | | `rdi_processor_read_time_ms_total` | Counter | Total read time in milliseconds across all batches | Informational - use for performance analysis | | `rdi_processor_transform_time_ms_total` | Counter | Total transform time in milliseconds across all batches | Informational - use for performance analysis | | `rdi_processor_write_time_ms_total` | Counter | Total write time in milliseconds across all batches | Informational - use for performance analysis | | `rdi_processor_process_time_ms_total` | Counter | Total process time in milliseconds across all batches | Informational - use for performance analysis | | `rdi_processor_ack_time_ms_total` | Counter | Total acknowledgment time in milliseconds across all batches | Informational - use for performance analysis | -| `rdi_processor_total_time_ms_total` | Counter | Total total time in milliseconds across all batches | Informational - use for performance analysis | +| `rdi_processor_total_time_ms_total` | Counter | Sum of the total `read_time`, `process_time` and `ack_time` values in milliseconds across all batches | Informational - use for performance analysis | | `rdi_processor_rec_per_sec_total` | Gauge | Total records per second across all batches | Informational - use for throughput analysis | | **Processor Performance Last Batch Metrics** | | | | | `rdi_processor_batch_size_last` | Gauge | Last batch size processed | Informational - use for real-time monitoring |