From b43fdc6d9b741a6d133ff61e72a8d6ee7c2b7038 Mon Sep 17 00:00:00 2001 From: James Robinson Date: Mon, 16 Mar 2026 14:29:46 +0000 Subject: [PATCH 1/4] WIP Otel --- entrypoint.sh | 2 +- requirements.in | 2 ++ requirements.txt | 56 ++++++++++++++++++++++++++++++- requirements_for_test.txt | 69 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+), 2 deletions(-) diff --git a/entrypoint.sh b/entrypoint.sh index b371815eab..0996d8446c 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -5,7 +5,7 @@ export PROMETHEUS_MULTIPROC_DIR="/tmp" CONCURRENCY=${CONCURRENCY:-4} # Define a common command prefix -WORKER_CMD="celery --quiet -A run_celery.notify_celery worker --logfile=/dev/null --concurrency=$CONCURRENCY" +WORKER_CMD="opentelemetry-instrument celery --quiet -A run_celery.notify_celery worker --logfile=/dev/null --concurrency=$CONCURRENCY" COMMON_CMD="$WORKER_CMD -Q" case "$1" in diff --git a/requirements.in b/requirements.in index df9f45e3d7..b0f05cf002 100644 --- a/requirements.in +++ b/requirements.in @@ -30,3 +30,5 @@ notifications-utils @ git+https://github.com/alphagov/notifications-utils.git@11 git+https://github.com/alphagov/gds_metrics_python.git@6f1840a57b6fb1ee40b7e84f2f18ec229de8aa72 sentry-sdk[flask,celery,sqlalchemy]~=1.45 + +opentelemetry-distro[otlp] diff --git a/requirements.txt b/requirements.txt index 3c6eaaea46..f1680ac6bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -94,10 +94,16 @@ fqdn==1.5.1 # via jsonschema gds-metrics @ git+https://github.com/alphagov/gds_metrics_python.git@6f1840a57b6fb1ee40b7e84f2f18ec229de8aa72 # via -r requirements.in +googleapis-common-protos==1.73.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http govuk-bank-holidays==0.19 # via notifications-utils greenlet==3.3.2 # via eventlet +grpcio==1.78.0 + # via opentelemetry-exporter-otlp-proto-grpc gunicorn==25.1.0 # via # -r requirements.in @@ -155,7 +161,42 @@ notifications-python-client==10.0.1 notifications-utils @ git+https://github.com/alphagov/notifications-utils.git@900cb380b9f92b23886586207f44c99893d2b56b # via -r requirements.in opentelemetry-api==1.40.0 - # via notifications-utils + # via + # notifications-utils + # opentelemetry-distro + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-distro==0.61b0 + # via -r requirements.in +opentelemetry-exporter-otlp==1.40.0 + # via opentelemetry-distro +opentelemetry-exporter-otlp-proto-common==1.40.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.40.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.40.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.61b0 + # via opentelemetry-distro +opentelemetry-proto==1.40.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.40.0 + # via + # opentelemetry-distro + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.61b0 + # via + # opentelemetry-instrumentation + # opentelemetry-sdk ordered-set==4.1.0 # via notifications-utils packaging==26.0 @@ -163,12 +204,17 @@ packaging==26.0 # gunicorn # kombu # marshmallow + # opentelemetry-instrumentation phonenumbers==9.0.25 # via notifications-utils prometheus-client==0.24.1 # via gds-metrics prompt-toolkit==3.0.52 # via click-repl +protobuf==6.33.5 + # via + # googleapis-common-protos + # opentelemetry-proto psutil==6.1.1 # via -r requirements.in psycopg2-binary==2.9.10 @@ -206,6 +252,7 @@ requests==2.32.5 # govuk-bank-holidays # notifications-python-client # notifications-utils + # opentelemetry-exporter-otlp-proto-http rfc3339-validator==0.1.4 # via jsonschema rfc3987==1.3.8 @@ -238,7 +285,12 @@ statsd==4.0.1 typing-extensions==4.15.0 # via # alembic + # grpcio # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-sdk + # opentelemetry-semantic-conventions # sqlalchemy tzdata==2025.3 # via @@ -268,5 +320,7 @@ werkzeug==3.1.6 # via flask wheel==0.44.0 # via click-datetime +wrapt==1.17.3 + # via opentelemetry-instrumentation zipp==3.23.0 # via importlib-metadata diff --git a/requirements_for_test.txt b/requirements_for_test.txt index d2c4602207..b0a1e1512e 100644 --- a/requirements_for_test.txt +++ b/requirements_for_test.txt @@ -143,6 +143,11 @@ freezegun==1.5.5 # via -r requirements_for_test_common.in gds-metrics @ git+https://github.com/alphagov/gds_metrics_python.git@6f1840a57b6fb1ee40b7e84f2f18ec229de8aa72 # via -r requirements.txt +googleapis-common-protos==1.73.0 + # via + # -r requirements.txt + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http govuk-bank-holidays==0.19 # via # -r requirements.txt @@ -151,6 +156,10 @@ greenlet==3.3.2 # via # -r requirements.txt # eventlet +grpcio==1.78.0 + # via + # -r requirements.txt + # opentelemetry-exporter-otlp-proto-grpc gunicorn==25.1.0 # via # -r requirements.txt @@ -234,6 +243,50 @@ opentelemetry-api==1.40.0 # via # -r requirements.txt # notifications-utils + # opentelemetry-distro + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-distro==0.61b0 + # via -r requirements.txt +opentelemetry-exporter-otlp==1.40.0 + # via -r requirements.txt +opentelemetry-exporter-otlp-proto-common==1.40.0 + # via + # -r requirements.txt + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.40.0 + # via + # -r requirements.txt + # opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.40.0 + # via + # -r requirements.txt + # opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.61b0 + # via + # -r requirements.txt + # opentelemetry-distro +opentelemetry-proto==1.40.0 + # via + # -r requirements.txt + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.40.0 + # via + # -r requirements.txt + # opentelemetry-distro + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.61b0 + # via + # -r requirements.txt + # opentelemetry-instrumentation + # opentelemetry-sdk ordered-set==4.1.0 # via # -r requirements.txt @@ -244,6 +297,7 @@ packaging==26.0 # gunicorn # kombu # marshmallow + # opentelemetry-instrumentation # pytest pathspec==1.0.4 # via mypy @@ -261,6 +315,11 @@ prompt-toolkit==3.0.52 # via # -r requirements.txt # click-repl +protobuf==6.33.5 + # via + # -r requirements.txt + # googleapis-common-protos + # opentelemetry-proto psutil==6.1.1 # via -r requirements.txt psycopg2-binary==2.9.10 @@ -334,6 +393,7 @@ requests==2.32.5 # moto # notifications-python-client # notifications-utils + # opentelemetry-exporter-otlp-proto-http # requests-mock # responses requests-mock==1.12.1 @@ -399,8 +459,13 @@ typing-extensions==4.15.0 # -r requirements.txt # alembic # beautifulsoup4 + # grpcio # mypy # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-sdk + # opentelemetry-semantic-conventions # sqlalchemy tzdata==2025.3 # via @@ -443,6 +508,10 @@ wheel==0.44.0 # via # -r requirements.txt # click-datetime +wrapt==1.17.3 + # via + # -r requirements.txt + # opentelemetry-instrumentation xmltodict==1.0.4 # via moto zipp==3.23.0 From 9b8581db2f755d8f412ee97f76d453c10fc178db Mon Sep 17 00:00:00 2001 From: James Robinson Date: Wed, 18 Mar 2026 10:08:54 +0000 Subject: [PATCH 2/4] SmsClient: Inline function record_outcome --- app/clients/sms/__init__.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/app/clients/sms/__init__.py b/app/clients/sms/__init__.py index c4df4a0f99..c684249c4d 100644 --- a/app/clients/sms/__init__.py +++ b/app/clients/sms/__init__.py @@ -45,8 +45,11 @@ def __init__(self, current_app, statsd_client): **adapter.poolmanager.connection_pool_kw, } - def record_outcome(self, success): - if success: + def send_sms(self, to, content, reference, international, sender): + start_time = monotonic() + + try: + response = self.try_send_sms(to, content, reference, international, sender) self.current_app.logger.info( "Provider request for %s succeeded", self.name, @@ -55,7 +58,7 @@ def record_outcome(self, success): }, ) self.statsd_client.incr(f"clients.{self.name}.success") - else: + except SmsClientResponseException as e: self.statsd_client.incr(f"clients.{self.name}.error") self.current_app.logger.warning( "Provider request for %s failed", @@ -64,15 +67,6 @@ def record_outcome(self, success): "provider_name": self.name, }, ) - - def send_sms(self, to, content, reference, international, sender): - start_time = monotonic() - - try: - response = self.try_send_sms(to, content, reference, international, sender) - self.record_outcome(True) - except SmsClientResponseException as e: - self.record_outcome(False) raise e finally: elapsed_time = monotonic() - start_time From 6f953bb4c70c0b6d018d95386cce477e23ce56fe Mon Sep 17 00:00:00 2001 From: James Robinson Date: Wed, 18 Mar 2026 10:11:03 +0000 Subject: [PATCH 3/4] SmsClient: Merge succeeded/failed log messages into finished message We capture these messages at exactly the same time, down to the millisecond, but only annotate the "finished" message with notification_id and duration. Let's just merge them all together into a single message capturing everything. --- app/clients/sms/__init__.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/app/clients/sms/__init__.py b/app/clients/sms/__init__.py index c684249c4d..cbfe185f49 100644 --- a/app/clients/sms/__init__.py +++ b/app/clients/sms/__init__.py @@ -47,40 +47,30 @@ def __init__(self, current_app, statsd_client): def send_sms(self, to, content, reference, international, sender): start_time = monotonic() + status = "succeeded" try: response = self.try_send_sms(to, content, reference, international, sender) - self.current_app.logger.info( - "Provider request for %s succeeded", - self.name, - extra={ - "provider_name": self.name, - }, - ) self.statsd_client.incr(f"clients.{self.name}.success") except SmsClientResponseException as e: + status = "failed" self.statsd_client.incr(f"clients.{self.name}.error") - self.current_app.logger.warning( - "Provider request for %s failed", - self.name, - extra={ - "provider_name": self.name, - }, - ) raise e finally: elapsed_time = monotonic() - start_time self.statsd_client.timing(f"clients.{self.name}.request-time", elapsed_time) self.current_app.logger.info( - "%s request for %s finished in %.4g", + "Provider %s request for %s %s in %.4g", self.name, reference, + status, elapsed_time, extra={ "provider_name": self.name, # for SMS, we happen to use the notification id as the "reference" for the provider "notification_id": reference, "duration": elapsed_time, + "status": status, }, ) From 501086835d13074ad9825336306f526d1f9a220c Mon Sep 17 00:00:00 2001 From: James Robinson Date: Thu, 19 Mar 2026 16:14:20 +0000 Subject: [PATCH 4/4] utils --- .pre-commit-config.yaml | 2 +- requirements.in | 2 +- requirements.txt | 2 +- requirements_for_test.txt | 2 +- requirements_for_test_common.in | 2 +- ruff.toml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5c312deb36..9ef1f0e0c2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -# This file was automatically copied from notifications-utils@111.0.0 +# This file was automatically copied from notifications-utils@112.0.0 repos: - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/requirements.in b/requirements.in index b0f05cf002..01e3ed2263 100644 --- a/requirements.in +++ b/requirements.in @@ -25,7 +25,7 @@ psutil>=6.0.0,<7.0.0 notifications-python-client==10.0.1 # Run `make bump-utils` to update to the latest version -notifications-utils @ git+https://github.com/alphagov/notifications-utils.git@111.0.0 +notifications-utils @ git+https://github.com/alphagov/notifications-utils.git@jar-otel-semconv git+https://github.com/alphagov/gds_metrics_python.git@6f1840a57b6fb1ee40b7e84f2f18ec229de8aa72 diff --git a/requirements.txt b/requirements.txt index f1680ac6bb..648b5b122b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -158,7 +158,7 @@ mistune==0.8.4 # via notifications-utils notifications-python-client==10.0.1 # via -r requirements.in -notifications-utils @ git+https://github.com/alphagov/notifications-utils.git@900cb380b9f92b23886586207f44c99893d2b56b +notifications-utils @ git+https://github.com/alphagov/notifications-utils.git@70e245b5bdd9234ddd8f1f1adbee3401ed1909cf # via -r requirements.in opentelemetry-api==1.40.0 # via diff --git a/requirements_for_test.txt b/requirements_for_test.txt index b0a1e1512e..1ad9d963be 100644 --- a/requirements_for_test.txt +++ b/requirements_for_test.txt @@ -237,7 +237,7 @@ mypy-extensions==1.1.0 # via mypy notifications-python-client==10.0.1 # via -r requirements.txt -notifications-utils @ git+https://github.com/alphagov/notifications-utils.git@900cb380b9f92b23886586207f44c99893d2b56b +notifications-utils @ git+https://github.com/alphagov/notifications-utils.git@70e245b5bdd9234ddd8f1f1adbee3401ed1909cf # via -r requirements.txt opentelemetry-api==1.40.0 # via diff --git a/requirements_for_test_common.in b/requirements_for_test_common.in index 6e80cf349f..60a2f39559 100644 --- a/requirements_for_test_common.in +++ b/requirements_for_test_common.in @@ -1,4 +1,4 @@ -# This file was automatically copied from notifications-utils@111.0.0 +# This file was automatically copied from notifications-utils@112.0.0 beautifulsoup4 pytest diff --git a/ruff.toml b/ruff.toml index c4c9a25c33..204c904a59 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,4 +1,4 @@ -# This file was automatically copied from notifications-utils@111.0.0 +# This file was automatically copied from notifications-utils@112.0.0 extend-exclude = [ "migrations/versions/",