From 617740400f5887a5fc0af9130236fc14bfd181f6 Mon Sep 17 00:00:00 2001 From: Alastair Lock Date: Mon, 10 Nov 2025 12:04:53 +0000 Subject: [PATCH 01/13] DTOSS-11379: Create a custom metric in the app This creates a custom Azure metric for the storage queues, enabling us to monitor and alert on the data through a gauge. The code is implemented as a service class and is designed to be flexible enough for use with other Azure infrastructure services in the future. --- .../commands/save_message_status.py | 9 ++++ .../notifications/services/metrics.py | 46 +++++++++++++++++++ .../notifications/services/queue.py | 10 ++++ 3 files changed, 65 insertions(+) create mode 100644 manage_breast_screening/notifications/services/metrics.py diff --git a/manage_breast_screening/notifications/management/commands/save_message_status.py b/manage_breast_screening/notifications/management/commands/save_message_status.py index 231d5b87d..700122b36 100644 --- a/manage_breast_screening/notifications/management/commands/save_message_status.py +++ b/manage_breast_screening/notifications/management/commands/save_message_status.py @@ -1,4 +1,5 @@ import json +import os from logging import getLogger from dateutil import parser @@ -13,6 +14,7 @@ Message, MessageStatus, ) +from manage_breast_screening.notifications.services.metrics import Metrics from manage_breast_screening.notifications.services.queue import Queue INSIGHTS_JOB_NAME = "SaveMessageStatus" @@ -29,6 +31,13 @@ def handle(self, *args, **options): with CommandHandler.handle(INSIGHTS_JOB_NAME): logger.info("Save Message Status Command started") queue = Queue.MessageStatusUpdates() + self.metrics = Metrics( + queue.queue_name, + "messages", + "Queue length", + os.getenv("ENVIRONMENT"), + ) + self.metrics.add(queue.get_message_count()) for item in queue.items(): logger.debug(f"Processing message status update {item}") payload = json.loads(item.content) diff --git a/manage_breast_screening/notifications/services/metrics.py b/manage_breast_screening/notifications/services/metrics.py new file mode 100644 index 000000000..ff08df5e6 --- /dev/null +++ b/manage_breast_screening/notifications/services/metrics.py @@ -0,0 +1,46 @@ +import logging +import os + +from azure.monitor.opentelemetry.exporter import AzureMonitorMetricExporter +from opentelemetry import metrics +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader + +logger = logging.getLogger(__name__) + + +class Metrics: + def __init__( + self, metric_name, metric_units, metric_description, metric_environment + ): + self.metric_name = metric_name + + try: + logger.info(f"metric_name: {metric_name}") + logger.info(f"metric_units: {metric_units}") + logger.info(f"metric_description: {metric_description}") + logger.info(f"metric_environment: {metric_environment}") + + exporter = AzureMonitorMetricExporter( + connection_string=os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING") + ) + reader = PeriodicExportingMetricReader(exporter) + metrics.set_meter_provider(MeterProvider(metric_readers=[reader])) + meter = metrics.get_meter(__name__) + self.environment = metric_environment + self.gauge = meter.create_gauge( + self.metric_name, unit=metric_units, description=metric_description + ) + except ValueError as e: + logger.warning(f"Skipping Azure Monitor setup: {e}") + self.gauge = None + + def add(self, value): + if self.gauge: + try: + self.gauge.set( + value, + {"queue_name": self.metric_name, "environment": self.environment}, + ) + except Exception: + logger.exception("Failed to update gauge") diff --git a/manage_breast_screening/notifications/services/queue.py b/manage_breast_screening/notifications/services/queue.py index edc294431..2ea861360 100644 --- a/manage_breast_screening/notifications/services/queue.py +++ b/manage_breast_screening/notifications/services/queue.py @@ -1,9 +1,12 @@ +import logging import os from azure.core.exceptions import ResourceExistsError from azure.identity import ManagedIdentityCredential from azure.storage.queue import QueueClient, QueueMessage +logger = logging.getLogger(__name__) + class QueueConfigurationError(Exception): """Raised when queue is not properly configured""" @@ -16,6 +19,7 @@ def __init__(self, queue_name): storage_account_name = os.getenv("STORAGE_ACCOUNT_NAME") queue_mi_client_id = os.getenv("QUEUE_MI_CLIENT_ID") connection_string = os.getenv("QUEUE_STORAGE_CONNECTION_STRING") + self.queue_name = queue_name if storage_account_name and queue_mi_client_id: self.client = QueueClient( @@ -55,6 +59,12 @@ def peek(self): def item(self): return self.client.receive_message() + def get_message_count(self): + message_count = self.client.get_queue_properties().approximate_message_count + logger.debug(f"get_message_count: {message_count}") + + return message_count + @classmethod def MessageStatusUpdates(cls): return cls( From 5aeb688e3ff1b965644c4a6a7ad83c1272efa120 Mon Sep 17 00:00:00 2001 From: Alastair Lock Date: Mon, 10 Nov 2025 12:10:52 +0000 Subject: [PATCH 02/13] DTOSS-11379: Unit test for metric service class --- .../tests/services/test_metrics.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 manage_breast_screening/notifications/tests/services/test_metrics.py diff --git a/manage_breast_screening/notifications/tests/services/test_metrics.py b/manage_breast_screening/notifications/tests/services/test_metrics.py new file mode 100644 index 000000000..4efde83a9 --- /dev/null +++ b/manage_breast_screening/notifications/tests/services/test_metrics.py @@ -0,0 +1,79 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from manage_breast_screening.notifications.services.metrics import Metrics + + +class TestMetrics: + @pytest.fixture + def conn_string(self): + return "InstrumentationKey=00000000-0000-0000-0000-000000000000" + + @pytest.fixture(autouse=True) + def setup(self, monkeypatch, conn_string): + monkeypatch.setenv("APPLICATIONINSIGHTS_CONNECTION_STRING", conn_string) + + @patch( + "manage_breast_screening.notifications.services.metrics.AzureMonitorMetricExporter" + ) + @patch( + "manage_breast_screening.notifications.services.metrics.PeriodicExportingMetricReader" + ) + @patch("manage_breast_screening.notifications.services.metrics.metrics") + @patch("manage_breast_screening.notifications.services.metrics.MeterProvider") + def test_metrics_initialisation( + self, + mock_meter_provider, + mock_metrics, + mock_metric_reader, + mock_metric_exporter, + conn_string, + ): + mock_meter = MagicMock() + mock_gauge = MagicMock() + mock_metrics.get_meter.return_value = mock_meter + mock_meter.create_gauge.return_value = mock_gauge + + subject = Metrics("metric-name", "metric-units", "metric-description") + + mock_metric_exporter.assert_called_once_with(connection_string=str(conn_string)) + mock_metric_reader.assert_called_once_with(mock_metric_exporter.return_value) + mock_meter_provider.assert_called_once_with( + metric_readers=[mock_metric_reader.return_value] + ) + mock_metrics.set_meter_provider.assert_called_once_with( + mock_meter_provider.return_value + ) + mock_metrics.get_meter.assert_called_once_with( + "manage_breast_screening.notifications.services.metrics" + ) + mock_meter.create_gauge.assert_called_once_with( + "metric-name", unit="metric-units", description="metric-description" + ) + assert subject.gauge == mock_gauge + + @patch( + "manage_breast_screening.notifications.services.metrics.AzureMonitorMetricExporter" + ) + @patch( + "manage_breast_screening.notifications.services.metrics.PeriodicExportingMetricReader" + ) + @patch("manage_breast_screening.notifications.services.metrics.metrics") + @patch("manage_breast_screening.notifications.services.metrics.MeterProvider") + def test_add( + self, + mock_meter_provider, + mock_metrics, + mock_metric_reader, + mock_metric_exporter, + ): + mock_meter = MagicMock() + mock_gauge = MagicMock() + mock_metrics.get_meter.return_value = mock_meter + mock_meter.create_gauge.return_value = mock_gauge + + subject = Metrics("TheQ", "yards", "desc") + subject.add("Yay!") + + subject.gauge.set.assert_called_once_with("Yay!", {"queue_name": "TheQ"}) From c9e9a59c11e8caaa7cdcef4ee85e09d4393f9ae6 Mon Sep 17 00:00:00 2001 From: Alastair Lock Date: Mon, 10 Nov 2025 12:12:26 +0000 Subject: [PATCH 03/13] DTOSS-11379: Azure monitor metrics on storage queues Azure monitor infrastructure metrics on storage queues, this is the actual alarm which queries the custom metric in azure. --- .../environments/review/variables.tfvars | 1 + .../environments/review/variables.yml | 2 +- .../modules/container-apps/alerts.tf | 53 +++++++++++++++++++ infrastructure/modules/container-apps/jobs.tf | 2 + .../modules/container-apps/variables.tf | 5 ++ infrastructure/terraform/main.tf | 1 + infrastructure/terraform/variables.tf | 6 +++ 7 files changed, 69 insertions(+), 1 deletion(-) diff --git a/infrastructure/environments/review/variables.tfvars b/infrastructure/environments/review/variables.tfvars index f135e972b..af173e980 100644 --- a/infrastructure/environments/review/variables.tfvars +++ b/infrastructure/environments/review/variables.tfvars @@ -10,3 +10,4 @@ vnet_address_space = "10.142.0.0/16" deploy_database_as_container = true seed_demo_data = true nhs_notify_api_message_batch_url = "https://int.api.service.nhs.uk/comms/v1/message-batches" +enable_alerting = true diff --git a/infrastructure/environments/review/variables.yml b/infrastructure/environments/review/variables.yml index 58c395d79..7f3c04cd4 100644 --- a/infrastructure/environments/review/variables.yml +++ b/infrastructure/environments/review/variables.yml @@ -1,5 +1,5 @@ -BASIC_AUTH_ENABLED: True PERSONAS_ENABLED: 1 CSRF_TRUSTED_ORIGINS: 'https://*.manage-breast-screening.non-live.screening.nhs.uk' NOTIFICATIONS_SMTP_IS_ENABLED: False APPLICATIONINSIGHTS_IS_ENABLED: True +BASIC_AUTH_ENABLED: True diff --git a/infrastructure/modules/container-apps/alerts.tf b/infrastructure/modules/container-apps/alerts.tf index a9ea412bd..111094fe5 100644 --- a/infrastructure/modules/container-apps/alerts.tf +++ b/infrastructure/modules/container-apps/alerts.tf @@ -36,3 +36,56 @@ resource "azurerm_monitor_scheduled_query_rules_alert_v2" "failure_event" { } } } + +# IMPORTANT: +# Enable metrics store with all dimensions: https://docs.azure.cn/en-us/azure-monitor/app/metrics-overview?tabs=standard#custom-metrics-dimensions-and-preaggregation +# currently this feature is in preview. +resource "azurerm_monitor_scheduled_query_rules_alert_v2" "queue_length_high" { + for_each = var.enable_alerting ? toset([ + "notifications-message-status-updates", + "notifications-message-batch-retries" + ]) : [] + + name = "${var.app_short_name}-${each.key}-${var.environment}-queue-length-high-alert" + location = var.region + resource_group_name = azurerm_resource_group.main.name + + auto_mitigation_enabled = true + description = "Alert when queue length exceeds ${var.queue_length_alert_threshold}" + display_name = "${var.app_short_name} Notifications Queue Length High Alert" + enabled = true + severity = 2 + evaluation_frequency = "PT10M" + window_duration = "PT10M" + scopes = [var.app_insights_id] + + criteria { + query = <<-KQL + customMetrics + | where name == "${each.key}" + | extend environment = tostring(customDimensions.environment) + | where environment == "${var.environment}" + | extend value = toreal(value) + | summarize avg_value = avg(value) by bin(timestamp, 5m) + | where avg_value > ${var.queue_length_alert_threshold} + KQL + + metric_measure_column = "avg_value" + time_aggregation_method = "Average" + operator = "GreaterThan" + threshold = 0 + + failing_periods { + minimum_failing_periods_to_trigger_alert = 1 + number_of_evaluation_periods = 1 + } + } + + action { + action_groups = [var.action_group_id] + } + + tags = { + environment = var.environment + } +} diff --git a/infrastructure/modules/container-apps/jobs.tf b/infrastructure/modules/container-apps/jobs.tf index 556a0ad7f..c8f91da1d 100644 --- a/infrastructure/modules/container-apps/jobs.tf +++ b/infrastructure/modules/container-apps/jobs.tf @@ -17,6 +17,7 @@ locals { job_container_args = "create_appointments" } send_message_batch = { + # cron_expression = "0,30 9 * * 1-5" cron_expression = null environment_variables = { @@ -43,6 +44,7 @@ locals { cron_expression = null environment_variables = { STATUS_UPDATES_QUEUE_NAME = "notifications-message-status-updates" + ENVIRONMENT = var.environment } job_short_name = "sms" job_container_args = "save_message_status" diff --git a/infrastructure/modules/container-apps/variables.tf b/infrastructure/modules/container-apps/variables.tf index e7f1f9120..c48166817 100644 --- a/infrastructure/modules/container-apps/variables.tf +++ b/infrastructure/modules/container-apps/variables.tf @@ -200,6 +200,11 @@ variable "app_insights_id" { type = string } +variable "queue_length_alert_threshold" { + description = "If alerting is enabled, alert if storage account queues are greater than this threshold." + type = number +} + variable "enable_notifications_jobs_schedule" { description = "Whether we apply the cron schedules for the notifications container app jobs" type = bool diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 22d1f1a2a..d909dd7fb 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -78,4 +78,5 @@ module "container-apps" { target_url = var.deploy_container_apps ? "${module.container-apps[0].external_url}healthcheck" : null resource_group_name_infra = local.resource_group_name enable_notifications_jobs_schedule = var.enable_notifications_jobs_schedule + queue_length_alert_threshold = var.queue_length_alert_threshold } diff --git a/infrastructure/terraform/variables.tf b/infrastructure/terraform/variables.tf index cd6e78767..6ac36a1f8 100644 --- a/infrastructure/terraform/variables.tf +++ b/infrastructure/terraform/variables.tf @@ -184,6 +184,12 @@ variable "run_notifications_smoke_test" { type = bool } +variable "queue_length_alert_threshold" { + description = "If alerting is enabled, alert if storage account queues are greater than this threshold." + type = number + default = 5 +} + locals { region = "uksouth" From 779d76be2711a0060c2266f4f61902c70d08149f Mon Sep 17 00:00:00 2001 From: Alastair Lock Date: Mon, 10 Nov 2025 12:18:49 +0000 Subject: [PATCH 04/13] DTOSS-11379: Update unit test for queue service --- .../environments/review/variables.tfvars | 1 - infrastructure/environments/review/variables.yml | 2 +- .../notifications/services/metrics.py | 8 ++++---- .../notifications/tests/services/test_metrics.py | 14 ++++++++++---- .../notifications/tests/services/test_queue.py | 13 +++++++++++++ 5 files changed, 28 insertions(+), 10 deletions(-) diff --git a/infrastructure/environments/review/variables.tfvars b/infrastructure/environments/review/variables.tfvars index af173e980..f135e972b 100644 --- a/infrastructure/environments/review/variables.tfvars +++ b/infrastructure/environments/review/variables.tfvars @@ -10,4 +10,3 @@ vnet_address_space = "10.142.0.0/16" deploy_database_as_container = true seed_demo_data = true nhs_notify_api_message_batch_url = "https://int.api.service.nhs.uk/comms/v1/message-batches" -enable_alerting = true diff --git a/infrastructure/environments/review/variables.yml b/infrastructure/environments/review/variables.yml index 7f3c04cd4..58c395d79 100644 --- a/infrastructure/environments/review/variables.yml +++ b/infrastructure/environments/review/variables.yml @@ -1,5 +1,5 @@ +BASIC_AUTH_ENABLED: True PERSONAS_ENABLED: 1 CSRF_TRUSTED_ORIGINS: 'https://*.manage-breast-screening.non-live.screening.nhs.uk' NOTIFICATIONS_SMTP_IS_ENABLED: False APPLICATIONINSIGHTS_IS_ENABLED: True -BASIC_AUTH_ENABLED: True diff --git a/manage_breast_screening/notifications/services/metrics.py b/manage_breast_screening/notifications/services/metrics.py index ff08df5e6..633191321 100644 --- a/manage_breast_screening/notifications/services/metrics.py +++ b/manage_breast_screening/notifications/services/metrics.py @@ -16,10 +16,10 @@ def __init__( self.metric_name = metric_name try: - logger.info(f"metric_name: {metric_name}") - logger.info(f"metric_units: {metric_units}") - logger.info(f"metric_description: {metric_description}") - logger.info(f"metric_environment: {metric_environment}") + logger.debug(f"metric_name: {metric_name}") + logger.debug(f"metric_units: {metric_units}") + logger.debug(f"metric_description: {metric_description}") + logger.debug(f"metric_environment: {metric_environment}") exporter = AzureMonitorMetricExporter( connection_string=os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING") diff --git a/manage_breast_screening/notifications/tests/services/test_metrics.py b/manage_breast_screening/notifications/tests/services/test_metrics.py index 4efde83a9..274b6621f 100644 --- a/manage_breast_screening/notifications/tests/services/test_metrics.py +++ b/manage_breast_screening/notifications/tests/services/test_metrics.py @@ -35,7 +35,9 @@ def test_metrics_initialisation( mock_metrics.get_meter.return_value = mock_meter mock_meter.create_gauge.return_value = mock_gauge - subject = Metrics("metric-name", "metric-units", "metric-description") + subject = Metrics( + "metric-name", "metric-units", "metric-description", "metric-environment" + ) mock_metric_exporter.assert_called_once_with(connection_string=str(conn_string)) mock_metric_reader.assert_called_once_with(mock_metric_exporter.return_value) @@ -49,7 +51,9 @@ def test_metrics_initialisation( "manage_breast_screening.notifications.services.metrics" ) mock_meter.create_gauge.assert_called_once_with( - "metric-name", unit="metric-units", description="metric-description" + "metric-name", + unit="metric-units", + description="metric-description", ) assert subject.gauge == mock_gauge @@ -73,7 +77,9 @@ def test_add( mock_metrics.get_meter.return_value = mock_meter mock_meter.create_gauge.return_value = mock_gauge - subject = Metrics("TheQ", "yards", "desc") + subject = Metrics("TheQ", "yards", "desc", "env") subject.add("Yay!") - subject.gauge.set.assert_called_once_with("Yay!", {"queue_name": "TheQ"}) + subject.gauge.set.assert_called_once_with( + "Yay!", {"queue_name": "TheQ", "environment": "env"} + ) diff --git a/manage_breast_screening/notifications/tests/services/test_queue.py b/manage_breast_screening/notifications/tests/services/test_queue.py index 67ef4445f..43413a9e2 100644 --- a/manage_breast_screening/notifications/tests/services/test_queue.py +++ b/manage_breast_screening/notifications/tests/services/test_queue.py @@ -150,3 +150,16 @@ def test_queue_raises_configuration_error_when_no_config_provided( Queue("test-queue") assert "Queue not configured" in str(exc_info.value) + + def test_get_message_count_returns_correct_value(self, mock_queue_client, caplog): + mock_props = MagicMock() + mock_props.approximate_message_count = 8 + mock_queue_client.get_queue_properties.return_value = mock_props + + with caplog.at_level("DEBUG"): + queue = Queue("new-queue") + count = queue.get_message_count() + + assert count == 8 + mock_queue_client.get_queue_properties.assert_called_once() + assert "get_message_count: 8" in caplog.text From ed935ca18a08b029ecb6959de8c62cf918cc8536 Mon Sep 17 00:00:00 2001 From: Steve Laing Date: Tue, 11 Nov 2025 15:42:05 +0000 Subject: [PATCH 05/13] Pass a key to add method Metrics won't necessarily be tied to measuring queues so make the key an argument we pass to add. --- .../notifications/services/metrics.py | 30 +++++++++---------- .../tests/services/test_metrics.py | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/manage_breast_screening/notifications/services/metrics.py b/manage_breast_screening/notifications/services/metrics.py index 633191321..2e059c25c 100644 --- a/manage_breast_screening/notifications/services/metrics.py +++ b/manage_breast_screening/notifications/services/metrics.py @@ -10,37 +10,37 @@ class Metrics: - def __init__( - self, metric_name, metric_units, metric_description, metric_environment - ): - self.metric_name = metric_name - + def __init__(self, name, units, description, environment): try: - logger.debug(f"metric_name: {metric_name}") - logger.debug(f"metric_units: {metric_units}") - logger.debug(f"metric_description: {metric_description}") - logger.debug(f"metric_environment: {metric_environment}") + logger.debug( + ( + f"Initialising Metrics(name: {name}, units: {units}, " + f"description: {description}, environment: {environment})" + ) + ) exporter = AzureMonitorMetricExporter( connection_string=os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING") ) - reader = PeriodicExportingMetricReader(exporter) - metrics.set_meter_provider(MeterProvider(metric_readers=[reader])) + metrics.set_meter_provider( + MeterProvider(metric_readers=[PeriodicExportingMetricReader(exporter)]) + ) meter = metrics.get_meter(__name__) - self.environment = metric_environment + self.name = name + self.environment = environment self.gauge = meter.create_gauge( - self.metric_name, unit=metric_units, description=metric_description + self.name, unit=units, description=description ) except ValueError as e: logger.warning(f"Skipping Azure Monitor setup: {e}") self.gauge = None - def add(self, value): + def add(self, key, value): if self.gauge: try: self.gauge.set( value, - {"queue_name": self.metric_name, "environment": self.environment}, + {key: self.name, "environment": self.environment}, ) except Exception: logger.exception("Failed to update gauge") diff --git a/manage_breast_screening/notifications/tests/services/test_metrics.py b/manage_breast_screening/notifications/tests/services/test_metrics.py index 274b6621f..c717a7d94 100644 --- a/manage_breast_screening/notifications/tests/services/test_metrics.py +++ b/manage_breast_screening/notifications/tests/services/test_metrics.py @@ -78,7 +78,7 @@ def test_add( mock_meter.create_gauge.return_value = mock_gauge subject = Metrics("TheQ", "yards", "desc", "env") - subject.add("Yay!") + subject.add("queue_name", "Yay!") subject.gauge.set.assert_called_once_with( "Yay!", {"queue_name": "TheQ", "environment": "env"} From a0a7d748eba35c5972584a110e8a6363da3bd72d Mon Sep 17 00:00:00 2001 From: Steve Laing Date: Tue, 11 Nov 2025 15:43:54 +0000 Subject: [PATCH 06/13] Simplify message_count method Amend unit test so that it doesn't depend on logging. --- .../notifications/services/queue.py | 5 +---- .../tests/services/test_queue.py | 21 +++++++++---------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/manage_breast_screening/notifications/services/queue.py b/manage_breast_screening/notifications/services/queue.py index 2ea861360..b31002590 100644 --- a/manage_breast_screening/notifications/services/queue.py +++ b/manage_breast_screening/notifications/services/queue.py @@ -60,10 +60,7 @@ def item(self): return self.client.receive_message() def get_message_count(self): - message_count = self.client.get_queue_properties().approximate_message_count - logger.debug(f"get_message_count: {message_count}") - - return message_count + return self.client.get_queue_properties().approximate_message_count @classmethod def MessageStatusUpdates(cls): diff --git a/manage_breast_screening/notifications/tests/services/test_queue.py b/manage_breast_screening/notifications/tests/services/test_queue.py index 43413a9e2..d8c4e939e 100644 --- a/manage_breast_screening/notifications/tests/services/test_queue.py +++ b/manage_breast_screening/notifications/tests/services/test_queue.py @@ -1,4 +1,4 @@ -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, PropertyMock, patch import pytest @@ -152,14 +152,13 @@ def test_queue_raises_configuration_error_when_no_config_provided( assert "Queue not configured" in str(exc_info.value) def test_get_message_count_returns_correct_value(self, mock_queue_client, caplog): - mock_props = MagicMock() - mock_props.approximate_message_count = 8 - mock_queue_client.get_queue_properties.return_value = mock_props - - with caplog.at_level("DEBUG"): - queue = Queue("new-queue") - count = queue.get_message_count() + with patch( + "manage_breast_screening.notifications.services.queue.QueueClient" + ) as queue_client: + queue_client.get_queue_properties.return_value = PropertyMock( + approximate_message_count=666 + ) + subject = Queue("A-Q") + subject.client = queue_client - assert count == 8 - mock_queue_client.get_queue_properties.assert_called_once() - assert "get_message_count: 8" in caplog.text + assert subject.get_message_count() == 666 From dc97c575ec63b4e5c4342b1a1d5850f44ae3a7ab Mon Sep 17 00:00:00 2001 From: Steve Laing Date: Tue, 11 Nov 2025 16:53:59 +0000 Subject: [PATCH 07/13] Add Django admin command to collect metrics Initially this gets queue lengths for both message status updates and message batch retries --- .../management/commands/collect_metrics.py | 25 ++++++++++++ .../commands/test_collect_metrics.py | 38 +++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 manage_breast_screening/notifications/management/commands/collect_metrics.py create mode 100644 manage_breast_screening/notifications/tests/management/commands/test_collect_metrics.py diff --git a/manage_breast_screening/notifications/management/commands/collect_metrics.py b/manage_breast_screening/notifications/management/commands/collect_metrics.py new file mode 100644 index 000000000..cb914b4da --- /dev/null +++ b/manage_breast_screening/notifications/management/commands/collect_metrics.py @@ -0,0 +1,25 @@ +import logging +import os + +from django.core.management.base import BaseCommand, CommandError + +from manage_breast_screening.notifications.services.metrics import Metrics +from manage_breast_screening.notifications.services.queue import Queue + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + def handle(self, *args, **options): + try: + for queue in [Queue.RetryMessageBatches(), Queue.MessageStatusUpdates()]: + metrics = Metrics( + queue.queue_name, + "messages", + "Queue length", + os.getenv("ENVIRONMENT"), + ) + metrics.add("queue_name", queue.get_message_count()) + except Exception as e: + logger.error(e, exc_info=True) + raise CommandError(e) diff --git a/manage_breast_screening/notifications/tests/management/commands/test_collect_metrics.py b/manage_breast_screening/notifications/tests/management/commands/test_collect_metrics.py new file mode 100644 index 000000000..d3bc1241f --- /dev/null +++ b/manage_breast_screening/notifications/tests/management/commands/test_collect_metrics.py @@ -0,0 +1,38 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from manage_breast_screening.notifications.management.commands.collect_metrics import ( + Command, +) +from manage_breast_screening.notifications.services.metrics import Metrics + + +class TestCollectMetrics: + @pytest.fixture(autouse=True) + def setup(self, monkeypatch): + monkeypatch.setenv("ENVIRONMENT", "test") + + @patch(f"{Command.__module__}.Queue") + def test_handle_sends_queue_lengths(self, mock_queue): + mock_metrics_1 = MagicMock(spec=Metrics) + mock_metrics_2 = MagicMock(spec=Metrics) + mock_queue.RetryMessageBatches.return_value.queue_name = "queue 1" + mock_queue.RetryMessageBatches.return_value.get_message_count.return_value = 8 + mock_queue.MessageStatusUpdates.return_value.queue_name = "queue 2" + mock_queue.MessageStatusUpdates.return_value.get_message_count.return_value = 2 + + with patch( + f"{Command.__module__}.Metrics", + side_effect=[mock_metrics_1, mock_metrics_2], + ) as mock_metrics_class: + Command().handle() + + mock_metrics_class.assert_any_call( + "queue 1", "messages", "Queue length", "test" + ) + mock_metrics_class.assert_any_call( + "queue 2", "messages", "Queue length", "test" + ) + mock_metrics_1.add.assert_called_once_with("queue_name", 8) + mock_metrics_2.add.assert_called_once_with("queue_name", 2) From 9c602517d18d115dc94c5c19953fe03e9d87b38a Mon Sep 17 00:00:00 2001 From: Steve Laing Date: Tue, 11 Nov 2025 16:56:43 +0000 Subject: [PATCH 08/13] Remove metrics calculation from save message status job --- .../management/commands/save_message_status.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/manage_breast_screening/notifications/management/commands/save_message_status.py b/manage_breast_screening/notifications/management/commands/save_message_status.py index 700122b36..231d5b87d 100644 --- a/manage_breast_screening/notifications/management/commands/save_message_status.py +++ b/manage_breast_screening/notifications/management/commands/save_message_status.py @@ -1,5 +1,4 @@ import json -import os from logging import getLogger from dateutil import parser @@ -14,7 +13,6 @@ Message, MessageStatus, ) -from manage_breast_screening.notifications.services.metrics import Metrics from manage_breast_screening.notifications.services.queue import Queue INSIGHTS_JOB_NAME = "SaveMessageStatus" @@ -31,13 +29,6 @@ def handle(self, *args, **options): with CommandHandler.handle(INSIGHTS_JOB_NAME): logger.info("Save Message Status Command started") queue = Queue.MessageStatusUpdates() - self.metrics = Metrics( - queue.queue_name, - "messages", - "Queue length", - os.getenv("ENVIRONMENT"), - ) - self.metrics.add(queue.get_message_count()) for item in queue.items(): logger.debug(f"Processing message status update {item}") payload = json.loads(item.content) From 2be9706c0ff79a808e0cd04ac1830857d509fa6a Mon Sep 17 00:00:00 2001 From: Steve Laing Date: Wed, 12 Nov 2025 09:12:43 +0000 Subject: [PATCH 09/13] Add terraform config for collect_metrics container app job --- infrastructure/modules/container-apps/jobs.tf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/infrastructure/modules/container-apps/jobs.tf b/infrastructure/modules/container-apps/jobs.tf index c8f91da1d..681d972ab 100644 --- a/infrastructure/modules/container-apps/jobs.tf +++ b/infrastructure/modules/container-apps/jobs.tf @@ -66,6 +66,16 @@ locals { job_short_name = "smk" job_container_args = "create_reports --smoke-test" } + collect_metrics = { + cron_expression = "*/5 * * * *" + environment_variables = { + RETRY_QUEUE_NAME = "notifications-message-batch-retries" + STATUS_UPDATES_QUEUE_NAME = "notifications-message-status-updates" + ENVIRONMENT = var.environment + } + job_short_name = "clm" + job_container_args = "collect_metrics" + } } } From c39745236f282bb741ffa90bd3e244efd7f0bbcd Mon Sep 17 00:00:00 2001 From: Alastair Lock Date: Mon, 24 Nov 2025 17:27:07 +0000 Subject: [PATCH 10/13] Metric service class now a singleton with an array of gauges per instance To avoid the complexity of running multiple instances of the Metric Service, which would introduce unnecessary computational overhead, we decided it would be better to use a single instance of the Metric Service class and allow it to manage an array of gauges instead. Co-authored-by: Alastair Lock Co-authored-by: Colin Saliceti --- .github/workflows/cicd-1-pull-request.yaml | 26 ++--- core | 0 .../management/commands/collect_metrics.py | 14 ++- .../notifications/services/metrics.py | 44 ++++---- testing/new-code-testing2/delme.py | 106 ++++++++++++++++++ 5 files changed, 153 insertions(+), 37 deletions(-) create mode 100644 core create mode 100644 testing/new-code-testing2/delme.py diff --git a/.github/workflows/cicd-1-pull-request.yaml b/.github/workflows/cicd-1-pull-request.yaml index 4583a5d41..690b8b272 100644 --- a/.github/workflows/cicd-1-pull-request.yaml +++ b/.github/workflows/cicd-1-pull-request.yaml @@ -46,19 +46,19 @@ jobs: terraform_version: '${{ needs.metadata.outputs.terraform_version }}' version: '${{ needs.metadata.outputs.version }}' secrets: inherit - test-stage: - name: 'Test stage' - needs: [metadata] - uses: ./.github/workflows/stage-2-test.yaml - with: - build_datetime: '${{ needs.metadata.outputs.build_datetime }}' - build_timestamp: '${{ needs.metadata.outputs.build_timestamp }}' - build_epoch: '${{ needs.metadata.outputs.build_epoch }}' - nodejs_version: '${{ needs.metadata.outputs.nodejs_version }}' - python_version: '${{ needs.metadata.outputs.python_version }}' - terraform_version: '${{ needs.metadata.outputs.terraform_version }}' - version: '${{ needs.metadata.outputs.version }}' - secrets: inherit + # test-stage: + # name: 'Test stage' + # needs: [metadata] + # uses: ./.github/workflows/stage-2-test.yaml + # with: + # build_datetime: '${{ needs.metadata.outputs.build_datetime }}' + # build_timestamp: '${{ needs.metadata.outputs.build_timestamp }}' + # build_epoch: '${{ needs.metadata.outputs.build_epoch }}' + # nodejs_version: '${{ needs.metadata.outputs.nodejs_version }}' + # python_version: '${{ needs.metadata.outputs.python_version }}' + # terraform_version: '${{ needs.metadata.outputs.terraform_version }}' + # version: '${{ needs.metadata.outputs.version }}' + # secrets: inherit build-stage: name: 'Build stage' needs: [metadata] diff --git a/core b/core new file mode 100644 index 000000000..e69de29bb diff --git a/manage_breast_screening/notifications/management/commands/collect_metrics.py b/manage_breast_screening/notifications/management/commands/collect_metrics.py index cb914b4da..8b27e4ca8 100644 --- a/manage_breast_screening/notifications/management/commands/collect_metrics.py +++ b/manage_breast_screening/notifications/management/commands/collect_metrics.py @@ -12,14 +12,20 @@ class Command(BaseCommand): def handle(self, *args, **options): try: + # Create metrics collection + metrics = Metrics( + os.getenv("ENVIRONMENT"), + ) + + # Set queue_size metrics for queue in [Queue.RetryMessageBatches(), Queue.MessageStatusUpdates()]: - metrics = Metrics( - queue.queue_name, + metrics.set_gauge_value( + f"queue_size_{queue.queue_name}", "messages", "Queue length", - os.getenv("ENVIRONMENT"), + queue.get_message_count(), ) - metrics.add("queue_name", queue.get_message_count()) + except Exception as e: logger.error(e, exc_info=True) raise CommandError(e) diff --git a/manage_breast_screening/notifications/services/metrics.py b/manage_breast_screening/notifications/services/metrics.py index 2e059c25c..762224e36 100644 --- a/manage_breast_screening/notifications/services/metrics.py +++ b/manage_breast_screening/notifications/services/metrics.py @@ -10,14 +10,9 @@ class Metrics: - def __init__(self, name, units, description, environment): + def __init__(self, environment): try: - logger.debug( - ( - f"Initialising Metrics(name: {name}, units: {units}, " - f"description: {description}, environment: {environment})" - ) - ) + logger.debug((f"Initialising Metrics(environment: {environment})")) exporter = AzureMonitorMetricExporter( connection_string=os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING") @@ -25,22 +20,31 @@ def __init__(self, name, units, description, environment): metrics.set_meter_provider( MeterProvider(metric_readers=[PeriodicExportingMetricReader(exporter)]) ) - meter = metrics.get_meter(__name__) - self.name = name + self.meter = metrics.get_meter(__name__) self.environment = environment - self.gauge = meter.create_gauge( - self.name, unit=units, description=description - ) + except ValueError as e: logger.warning(f"Skipping Azure Monitor setup: {e}") self.gauge = None - def add(self, key, value): - if self.gauge: - try: - self.gauge.set( - value, - {key: self.name, "environment": self.environment}, + def set_gauge_value(self, metric_name, units, description, value): + try: + logger.debug( + ( + f"Metrics: set_gauge_value(metric_name: {metric_name} " + f"units: {units}, description: {description}, value: {value})" ) - except Exception: - logger.exception("Failed to update gauge") + ) + + # Create gauge metric + gauge = self.meter.create_gauge( + metric_name, unit=units, description=description + ) + + # Set metric value + gauge.set( + value, + {"environment": self.environment}, + ) + except Exception: + logger.exception("Failed to update gauge") diff --git a/testing/new-code-testing2/delme.py b/testing/new-code-testing2/delme.py new file mode 100644 index 000000000..0eef93583 --- /dev/null +++ b/testing/new-code-testing2/delme.py @@ -0,0 +1,106 @@ +# delme.py +import logging +import os +import time + +from azure.monitor.opentelemetry.exporter import AzureMonitorMetricExporter +from opentelemetry import metrics +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import ( + MetricReader, + PeriodicExportingMetricReader, +) + +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + + +class MyPushReader(MetricReader): + """ + Concrete MetricReader that exports when we call provider.force_flush(). + NOTE: _receive_metrics accepts timeout_millis and **kwargs to match MeterProvider.force_flush(). + """ + + def __init__(self, exporter): + super().__init__() + self._exporter = exporter + + # Accept timeout_millis and any other kwargs to avoid TypeError + def _receive_metrics(self, metrics_data, timeout_millis: int = None, **kwargs): + logger.debug( + "MyPushReader._receive_metrics called (timeout_millis=%s, other=%s)", + timeout_millis, + kwargs, + ) + try: + # exporter.export expects the metrics_data object produced by the SDK + self._exporter.export(metrics_data) + except Exception: + logger.exception("Failed to export metrics") + + def flush(self): + """Manual flush method.""" + logger.debug("MyPushReader.flush called") + try: + self._exporter.force_flush() + except Exception: + logger.exception("Failed to flush exporter") + + def shutdown(self, timeout_millis: int = None): + logger.debug("MyPushReader.shutdown called (timeout_millis=%s)", timeout_millis) + try: + if hasattr(self._exporter, "shutdown"): + # some exporters provide shutdown/force_flush style methods + self._exporter.shutdown() + except Exception: + logger.exception("Failed to shutdown exporter") + + +def main(): + print(os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")) + + # exporter = ConsoleMetricExporter() + exporter = AzureMonitorMetricExporter( + connection_string=os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING") + ) + # reader = MyPushReader(exporter) + # reader = MetricReader(exporter) + reader = PeriodicExportingMetricReader(exporter) + provider = MeterProvider(metric_readers=[reader]) + metrics.set_meter_provider(provider) + meter = metrics.get_meter(__name__) + + # Use a counter for manual updates (ObservableGauge caused compat issues) + # counter = meter.create_counter( + # "queue_depth", + # unit="messages", + # description="Queue depth (manual)" + # ) + gauge = meter.create_gauge( + "queue_gauge_depth", unit="count", description="Count using gauge" + ) + + # reader.flush() + + # Add a sample value with attributes + # counter.add(10, {"environment": "dev", "queue": "todo"}) + gauge.set(120, {"environment": "dev", "queue": "todo"}) + + # Sleep a fraction to allow any internal scheduling (not required but nicer) + time.sleep(0.1) + + # Force a flush (this will call reader._receive_metrics(timeout_millis=...)) + # try: + # provider.force_flush() + # logger.info("force_flush() completed without exception") + # except Exception as exc: + # logger.exception("provider.force_flush() failed: %s", exc) + + # try: + # provider.shutdown() + # except Exception: + # logger.exception("provider.shutdown() failed") + + +if __name__ == "__main__": + main() From f23728f700d2da49df027c571fe3eacb168c1245 Mon Sep 17 00:00:00 2001 From: Alastair Lock Date: Mon, 24 Nov 2025 20:45:05 +0000 Subject: [PATCH 11/13] update the unit tests --- .github/workflows/cicd-1-pull-request.yaml | 26 ++--- .../commands/test_collect_metrics.py | 53 +++++---- .../tests/services/test_metrics.py | 43 ++++--- testing/new-code-testing2/delme.py | 106 ------------------ 4 files changed, 70 insertions(+), 158 deletions(-) delete mode 100644 testing/new-code-testing2/delme.py diff --git a/.github/workflows/cicd-1-pull-request.yaml b/.github/workflows/cicd-1-pull-request.yaml index 690b8b272..4583a5d41 100644 --- a/.github/workflows/cicd-1-pull-request.yaml +++ b/.github/workflows/cicd-1-pull-request.yaml @@ -46,19 +46,19 @@ jobs: terraform_version: '${{ needs.metadata.outputs.terraform_version }}' version: '${{ needs.metadata.outputs.version }}' secrets: inherit - # test-stage: - # name: 'Test stage' - # needs: [metadata] - # uses: ./.github/workflows/stage-2-test.yaml - # with: - # build_datetime: '${{ needs.metadata.outputs.build_datetime }}' - # build_timestamp: '${{ needs.metadata.outputs.build_timestamp }}' - # build_epoch: '${{ needs.metadata.outputs.build_epoch }}' - # nodejs_version: '${{ needs.metadata.outputs.nodejs_version }}' - # python_version: '${{ needs.metadata.outputs.python_version }}' - # terraform_version: '${{ needs.metadata.outputs.terraform_version }}' - # version: '${{ needs.metadata.outputs.version }}' - # secrets: inherit + test-stage: + name: 'Test stage' + needs: [metadata] + uses: ./.github/workflows/stage-2-test.yaml + with: + build_datetime: '${{ needs.metadata.outputs.build_datetime }}' + build_timestamp: '${{ needs.metadata.outputs.build_timestamp }}' + build_epoch: '${{ needs.metadata.outputs.build_epoch }}' + nodejs_version: '${{ needs.metadata.outputs.nodejs_version }}' + python_version: '${{ needs.metadata.outputs.python_version }}' + terraform_version: '${{ needs.metadata.outputs.terraform_version }}' + version: '${{ needs.metadata.outputs.version }}' + secrets: inherit build-stage: name: 'Build stage' needs: [metadata] diff --git a/manage_breast_screening/notifications/tests/management/commands/test_collect_metrics.py b/manage_breast_screening/notifications/tests/management/commands/test_collect_metrics.py index d3bc1241f..6df2ff862 100644 --- a/manage_breast_screening/notifications/tests/management/commands/test_collect_metrics.py +++ b/manage_breast_screening/notifications/tests/management/commands/test_collect_metrics.py @@ -5,7 +5,6 @@ from manage_breast_screening.notifications.management.commands.collect_metrics import ( Command, ) -from manage_breast_screening.notifications.services.metrics import Metrics class TestCollectMetrics: @@ -14,25 +13,37 @@ def setup(self, monkeypatch): monkeypatch.setenv("ENVIRONMENT", "test") @patch(f"{Command.__module__}.Queue") - def test_handle_sends_queue_lengths(self, mock_queue): - mock_metrics_1 = MagicMock(spec=Metrics) - mock_metrics_2 = MagicMock(spec=Metrics) - mock_queue.RetryMessageBatches.return_value.queue_name = "queue 1" - mock_queue.RetryMessageBatches.return_value.get_message_count.return_value = 8 - mock_queue.MessageStatusUpdates.return_value.queue_name = "queue 2" - mock_queue.MessageStatusUpdates.return_value.get_message_count.return_value = 2 - - with patch( - f"{Command.__module__}.Metrics", - side_effect=[mock_metrics_1, mock_metrics_2], - ) as mock_metrics_class: - Command().handle() - - mock_metrics_class.assert_any_call( - "queue 1", "messages", "Queue length", "test" + @patch(f"{Command.__module__}.Metrics") + def test_handle_sends_queue_lengths(self, mock_metrics_class, mock_queue): + mock_retry = MagicMock() + mock_retry.queue_name = "retry_queue" + mock_retry.get_message_count.return_value = 8 + + mock_status = MagicMock() + mock_status.queue_name = "status_queue" + mock_status.get_message_count.return_value = 2 + + mock_queue.RetryMessageBatches.return_value = mock_retry + mock_queue.MessageStatusUpdates.return_value = mock_status + + mock_metrics_instance = MagicMock() + mock_metrics_class.return_value = mock_metrics_instance + + Command().handle() + + mock_metrics_class.assert_called_once_with("test") + + mock_metrics_instance.set_gauge_value.assert_any_call( + "queue_size_retry_queue", + "messages", + "Queue length", + 8, ) - mock_metrics_class.assert_any_call( - "queue 2", "messages", "Queue length", "test" + mock_metrics_instance.set_gauge_value.assert_any_call( + "queue_size_status_queue", + "messages", + "Queue length", + 2, ) - mock_metrics_1.add.assert_called_once_with("queue_name", 8) - mock_metrics_2.add.assert_called_once_with("queue_name", 2) + + assert mock_metrics_instance.set_gauge_value.call_count == 2 diff --git a/manage_breast_screening/notifications/tests/services/test_metrics.py b/manage_breast_screening/notifications/tests/services/test_metrics.py index c717a7d94..67d8ca043 100644 --- a/manage_breast_screening/notifications/tests/services/test_metrics.py +++ b/manage_breast_screening/notifications/tests/services/test_metrics.py @@ -31,13 +31,9 @@ def test_metrics_initialisation( conn_string, ): mock_meter = MagicMock() - mock_gauge = MagicMock() mock_metrics.get_meter.return_value = mock_meter - mock_meter.create_gauge.return_value = mock_gauge - subject = Metrics( - "metric-name", "metric-units", "metric-description", "metric-environment" - ) + subject = Metrics(environment="dev") mock_metric_exporter.assert_called_once_with(connection_string=str(conn_string)) mock_metric_reader.assert_called_once_with(mock_metric_exporter.return_value) @@ -50,12 +46,9 @@ def test_metrics_initialisation( mock_metrics.get_meter.assert_called_once_with( "manage_breast_screening.notifications.services.metrics" ) - mock_meter.create_gauge.assert_called_once_with( - "metric-name", - unit="metric-units", - description="metric-description", - ) - assert subject.gauge == mock_gauge + + assert subject.meter == mock_meter + assert subject.environment == "dev" @patch( "manage_breast_screening.notifications.services.metrics.AzureMonitorMetricExporter" @@ -65,21 +58,35 @@ def test_metrics_initialisation( ) @patch("manage_breast_screening.notifications.services.metrics.metrics") @patch("manage_breast_screening.notifications.services.metrics.MeterProvider") - def test_add( + def test_set_gauge_value( self, mock_meter_provider, mock_metrics, - mock_metric_reader, - mock_metric_exporter, + mock_reader, + mock_exporter, ): mock_meter = MagicMock() mock_gauge = MagicMock() + mock_metrics.get_meter.return_value = mock_meter mock_meter.create_gauge.return_value = mock_gauge - subject = Metrics("TheQ", "yards", "desc", "env") - subject.add("queue_name", "Yay!") + subject = Metrics(environment="prod") + + subject.set_gauge_value( + metric_name="queue_depth", + units="messages", + description="Number of messages", + value=999, + ) + + mock_meter.create_gauge.assert_called_once_with( + "queue_depth", + unit="messages", + description="Number of messages", + ) - subject.gauge.set.assert_called_once_with( - "Yay!", {"queue_name": "TheQ", "environment": "env"} + mock_gauge.set.assert_called_once_with( + 999, + {"environment": "prod"}, ) diff --git a/testing/new-code-testing2/delme.py b/testing/new-code-testing2/delme.py deleted file mode 100644 index 0eef93583..000000000 --- a/testing/new-code-testing2/delme.py +++ /dev/null @@ -1,106 +0,0 @@ -# delme.py -import logging -import os -import time - -from azure.monitor.opentelemetry.exporter import AzureMonitorMetricExporter -from opentelemetry import metrics -from opentelemetry.sdk.metrics import MeterProvider -from opentelemetry.sdk.metrics.export import ( - MetricReader, - PeriodicExportingMetricReader, -) - -logging.basicConfig(level=logging.DEBUG) -logger = logging.getLogger(__name__) - - -class MyPushReader(MetricReader): - """ - Concrete MetricReader that exports when we call provider.force_flush(). - NOTE: _receive_metrics accepts timeout_millis and **kwargs to match MeterProvider.force_flush(). - """ - - def __init__(self, exporter): - super().__init__() - self._exporter = exporter - - # Accept timeout_millis and any other kwargs to avoid TypeError - def _receive_metrics(self, metrics_data, timeout_millis: int = None, **kwargs): - logger.debug( - "MyPushReader._receive_metrics called (timeout_millis=%s, other=%s)", - timeout_millis, - kwargs, - ) - try: - # exporter.export expects the metrics_data object produced by the SDK - self._exporter.export(metrics_data) - except Exception: - logger.exception("Failed to export metrics") - - def flush(self): - """Manual flush method.""" - logger.debug("MyPushReader.flush called") - try: - self._exporter.force_flush() - except Exception: - logger.exception("Failed to flush exporter") - - def shutdown(self, timeout_millis: int = None): - logger.debug("MyPushReader.shutdown called (timeout_millis=%s)", timeout_millis) - try: - if hasattr(self._exporter, "shutdown"): - # some exporters provide shutdown/force_flush style methods - self._exporter.shutdown() - except Exception: - logger.exception("Failed to shutdown exporter") - - -def main(): - print(os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")) - - # exporter = ConsoleMetricExporter() - exporter = AzureMonitorMetricExporter( - connection_string=os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING") - ) - # reader = MyPushReader(exporter) - # reader = MetricReader(exporter) - reader = PeriodicExportingMetricReader(exporter) - provider = MeterProvider(metric_readers=[reader]) - metrics.set_meter_provider(provider) - meter = metrics.get_meter(__name__) - - # Use a counter for manual updates (ObservableGauge caused compat issues) - # counter = meter.create_counter( - # "queue_depth", - # unit="messages", - # description="Queue depth (manual)" - # ) - gauge = meter.create_gauge( - "queue_gauge_depth", unit="count", description="Count using gauge" - ) - - # reader.flush() - - # Add a sample value with attributes - # counter.add(10, {"environment": "dev", "queue": "todo"}) - gauge.set(120, {"environment": "dev", "queue": "todo"}) - - # Sleep a fraction to allow any internal scheduling (not required but nicer) - time.sleep(0.1) - - # Force a flush (this will call reader._receive_metrics(timeout_millis=...)) - # try: - # provider.force_flush() - # logger.info("force_flush() completed without exception") - # except Exception as exc: - # logger.exception("provider.force_flush() failed: %s", exc) - - # try: - # provider.shutdown() - # except Exception: - # logger.exception("provider.shutdown() failed") - - -if __name__ == "__main__": - main() From 69ff47df804fe0de84ab50ff25e63cbfca434ff1 Mon Sep 17 00:00:00 2001 From: Alastair Lock Date: Tue, 25 Nov 2025 15:15:52 +0000 Subject: [PATCH 12/13] Remove try/catch on service class so we can see the exception type --- core | 0 .../notifications/services/metrics.py | 58 ++++++++----------- 2 files changed, 25 insertions(+), 33 deletions(-) delete mode 100644 core diff --git a/core b/core deleted file mode 100644 index e69de29bb..000000000 diff --git a/manage_breast_screening/notifications/services/metrics.py b/manage_breast_screening/notifications/services/metrics.py index 762224e36..b3ba7b5c2 100644 --- a/manage_breast_screening/notifications/services/metrics.py +++ b/manage_breast_screening/notifications/services/metrics.py @@ -11,40 +11,32 @@ class Metrics: def __init__(self, environment): - try: - logger.debug((f"Initialising Metrics(environment: {environment})")) + logger.debug((f"Initialising Metrics(environment: {environment})")) - exporter = AzureMonitorMetricExporter( - connection_string=os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING") - ) - metrics.set_meter_provider( - MeterProvider(metric_readers=[PeriodicExportingMetricReader(exporter)]) - ) - self.meter = metrics.get_meter(__name__) - self.environment = environment - - except ValueError as e: - logger.warning(f"Skipping Azure Monitor setup: {e}") - self.gauge = None + exporter = AzureMonitorMetricExporter( + connection_string=os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING") + ) + metrics.set_meter_provider( + MeterProvider(metric_readers=[PeriodicExportingMetricReader(exporter)]) + ) + self.meter = metrics.get_meter(__name__) + self.environment = environment def set_gauge_value(self, metric_name, units, description, value): - try: - logger.debug( - ( - f"Metrics: set_gauge_value(metric_name: {metric_name} " - f"units: {units}, description: {description}, value: {value})" - ) - ) - - # Create gauge metric - gauge = self.meter.create_gauge( - metric_name, unit=units, description=description - ) - - # Set metric value - gauge.set( - value, - {"environment": self.environment}, + logger.debug( + ( + f"Metrics: set_gauge_value(metric_name: {metric_name} " + f"units: {units}, description: {description}, value: {value})" ) - except Exception: - logger.exception("Failed to update gauge") + ) + + # Create gauge metric + gauge = self.meter.create_gauge( + metric_name, unit=units, description=description + ) + + # Set metric value + gauge.set( + value, + {"environment": self.environment}, + ) From 3abef938d2eef02d9e6a8829a22dad33247510e2 Mon Sep 17 00:00:00 2001 From: Steve Laing Date: Wed, 26 Nov 2025 11:20:44 +0000 Subject: [PATCH 13/13] Make Metrics a singleton class --- .../management/commands/collect_metrics.py | 8 +--- .../notifications/services/metrics.py | 10 ++++- .../commands/test_collect_metrics.py | 11 ++---- .../tests/services/test_metrics.py | 38 +++++++++---------- 4 files changed, 33 insertions(+), 34 deletions(-) diff --git a/manage_breast_screening/notifications/management/commands/collect_metrics.py b/manage_breast_screening/notifications/management/commands/collect_metrics.py index 8b27e4ca8..b6f1ac90b 100644 --- a/manage_breast_screening/notifications/management/commands/collect_metrics.py +++ b/manage_breast_screening/notifications/management/commands/collect_metrics.py @@ -1,5 +1,4 @@ import logging -import os from django.core.management.base import BaseCommand, CommandError @@ -12,14 +11,9 @@ class Command(BaseCommand): def handle(self, *args, **options): try: - # Create metrics collection - metrics = Metrics( - os.getenv("ENVIRONMENT"), - ) - # Set queue_size metrics for queue in [Queue.RetryMessageBatches(), Queue.MessageStatusUpdates()]: - metrics.set_gauge_value( + Metrics().set_gauge_value( f"queue_size_{queue.queue_name}", "messages", "Queue length", diff --git a/manage_breast_screening/notifications/services/metrics.py b/manage_breast_screening/notifications/services/metrics.py index b3ba7b5c2..1f075abe0 100644 --- a/manage_breast_screening/notifications/services/metrics.py +++ b/manage_breast_screening/notifications/services/metrics.py @@ -10,7 +10,15 @@ class Metrics: - def __init__(self, environment): + _instance = None + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self): + environment = os.getenv("ENVIRONMENT") logger.debug((f"Initialising Metrics(environment: {environment})")) exporter = AzureMonitorMetricExporter( diff --git a/manage_breast_screening/notifications/tests/management/commands/test_collect_metrics.py b/manage_breast_screening/notifications/tests/management/commands/test_collect_metrics.py index 6df2ff862..511874678 100644 --- a/manage_breast_screening/notifications/tests/management/commands/test_collect_metrics.py +++ b/manage_breast_screening/notifications/tests/management/commands/test_collect_metrics.py @@ -26,24 +26,21 @@ def test_handle_sends_queue_lengths(self, mock_metrics_class, mock_queue): mock_queue.RetryMessageBatches.return_value = mock_retry mock_queue.MessageStatusUpdates.return_value = mock_status - mock_metrics_instance = MagicMock() - mock_metrics_class.return_value = mock_metrics_instance - Command().handle() - mock_metrics_class.assert_called_once_with("test") + metrics_instance = mock_metrics_class.return_value - mock_metrics_instance.set_gauge_value.assert_any_call( + metrics_instance.set_gauge_value.assert_any_call( "queue_size_retry_queue", "messages", "Queue length", 8, ) - mock_metrics_instance.set_gauge_value.assert_any_call( + metrics_instance.set_gauge_value.assert_any_call( "queue_size_status_queue", "messages", "Queue length", 2, ) - assert mock_metrics_instance.set_gauge_value.call_count == 2 + assert metrics_instance.set_gauge_value.call_count == 2 diff --git a/manage_breast_screening/notifications/tests/services/test_metrics.py b/manage_breast_screening/notifications/tests/services/test_metrics.py index 67d8ca043..b6d033163 100644 --- a/manage_breast_screening/notifications/tests/services/test_metrics.py +++ b/manage_breast_screening/notifications/tests/services/test_metrics.py @@ -5,6 +5,10 @@ from manage_breast_screening.notifications.services.metrics import Metrics +@patch(f"{Metrics.__module__}.AzureMonitorMetricExporter") +@patch(f"{Metrics.__module__}.PeriodicExportingMetricReader") +@patch(f"{Metrics.__module__}.metrics") +@patch(f"{Metrics.__module__}.MeterProvider") class TestMetrics: @pytest.fixture def conn_string(self): @@ -13,15 +17,8 @@ def conn_string(self): @pytest.fixture(autouse=True) def setup(self, monkeypatch, conn_string): monkeypatch.setenv("APPLICATIONINSIGHTS_CONNECTION_STRING", conn_string) + monkeypatch.setenv("ENVIRONMENT", "dev") - @patch( - "manage_breast_screening.notifications.services.metrics.AzureMonitorMetricExporter" - ) - @patch( - "manage_breast_screening.notifications.services.metrics.PeriodicExportingMetricReader" - ) - @patch("manage_breast_screening.notifications.services.metrics.metrics") - @patch("manage_breast_screening.notifications.services.metrics.MeterProvider") def test_metrics_initialisation( self, mock_meter_provider, @@ -33,7 +30,7 @@ def test_metrics_initialisation( mock_meter = MagicMock() mock_metrics.get_meter.return_value = mock_meter - subject = Metrics(environment="dev") + subject = Metrics() mock_metric_exporter.assert_called_once_with(connection_string=str(conn_string)) mock_metric_reader.assert_called_once_with(mock_metric_exporter.return_value) @@ -50,14 +47,17 @@ def test_metrics_initialisation( assert subject.meter == mock_meter assert subject.environment == "dev" - @patch( - "manage_breast_screening.notifications.services.metrics.AzureMonitorMetricExporter" - ) - @patch( - "manage_breast_screening.notifications.services.metrics.PeriodicExportingMetricReader" - ) - @patch("manage_breast_screening.notifications.services.metrics.metrics") - @patch("manage_breast_screening.notifications.services.metrics.MeterProvider") + def test_metrics_is_a_singleton( + self, + mock_meter_provider, + mock_metrics, + mock_reader, + mock_exporter, + ): + subject = Metrics() + the_same_instance = Metrics() + assert subject == the_same_instance + def test_set_gauge_value( self, mock_meter_provider, @@ -71,7 +71,7 @@ def test_set_gauge_value( mock_metrics.get_meter.return_value = mock_meter mock_meter.create_gauge.return_value = mock_gauge - subject = Metrics(environment="prod") + subject = Metrics() subject.set_gauge_value( metric_name="queue_depth", @@ -88,5 +88,5 @@ def test_set_gauge_value( mock_gauge.set.assert_called_once_with( 999, - {"environment": "prod"}, + {"environment": "dev"}, )