Skip to content

Commit b68dc0a

Browse files
author
Ben Corlett
committed
Otel spike
1 parent 745fccf commit b68dc0a

File tree

9 files changed

+552
-58
lines changed

9 files changed

+552
-58
lines changed

app/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@
135135
from app.notify_client.upload_api_client import upload_api_client # noqa
136136
from app.notify_client.user_api_client import user_api_client # noqa
137137
from app.notify_session import NotifyAdminSessionInterface
138+
from app.otel.metrics import otel_metrics
139+
from app.otel.traces import otel_traces
138140
from app.s3_client.logo_client import logo_client
139141
from app.template_previews import template_preview_client # noqa
140142
from app.url_converters import (
@@ -179,6 +181,9 @@ def create_app(application):
179181

180182
init_app(application)
181183

184+
otel_metrics.init_app(application)
185+
otel_traces.init_app(application)
186+
182187
if "extensions" not in application.jinja_options:
183188
application.jinja_options["extensions"] = []
184189

app/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ class Config:
99
DANGEROUS_SALT = os.environ.get("DANGEROUS_SALT")
1010
ZENDESK_API_KEY = os.environ.get("ZENDESK_API_KEY")
1111

12+
OTEL_EXPORT_TYPE = os.getenv("OTEL_EXPORT_TYPE", "otlp")
13+
OTEL_COLLECTOR_ENDPOINT = os.getenv("OTEL_COLLECTOR_ENDPOINT", "localhost:4317")
14+
OTEL_INSTRUMENTATIONS = os.getenv("OTEL_INSTRUMENTATIONS", "wsgi,celery,flask,redis,sqlalchemy,requests")
15+
1216
# if we're not on cloudfoundry, we can get to this app from localhost. but on cloudfoundry its different
1317
ADMIN_BASE_URL = os.environ.get("ADMIN_BASE_URL", "http://localhost:6012")
1418

@@ -118,6 +122,7 @@ class Development(Config):
118122
S3_BUCKET_REPORT_REQUESTS_DOWNLOAD = "development-report-requests-download"
119123

120124
LOGO_CDN_DOMAIN = "static-logos.notify.tools"
125+
OTEL_EXPORT_TYPE = os.getenv("OTEL_EXPORT_TYPE", "none")
121126

122127
ADMIN_CLIENT_SECRET = "dev-notify-secret-key"
123128
DANGEROUS_SALT = "dev-notify-salt"
@@ -155,6 +160,7 @@ class Test(Development):
155160

156161
ASSET_DOMAIN = "static.example.com"
157162
ASSET_PATH = "https://static.example.com/"
163+
OTEL_EXPORT_TYPE = os.getenv("OTEL_EXPORT_TYPE", "none")
158164

159165

160166
class CloudFoundryConfig(Config):

app/main/views/templates.py

Lines changed: 71 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
from notifications_utils.pdf import pdf_page_count
2323
from notifications_utils.s3 import s3download
2424
from notifications_utils.template import Template
25+
from opentelemetry import trace
26+
from opentelemetry.baggage import set_baggage
27+
from opentelemetry.context import attach, detach
2528
from pypdf.errors import PdfReadError
2629
from requests import RequestException
2730

@@ -719,69 +722,80 @@ def abort_for_unauthorised_bilingual_letters_or_invalid_options(language: str |
719722
def edit_service_template(service_id, template_id, language=None):
720723
template = current_service.get_template_with_user_permission_or_403(template_id, current_user)
721724

722-
if template.template_type not in current_service.available_template_types:
723-
return redirect(
724-
url_for(
725-
".action_blocked",
726-
service_id=service_id,
727-
notification_type=template.template_type,
728-
return_to="view_template",
729-
template_id=template.id,
730-
)
731-
)
725+
ctx = set_baggage("template_id", str(template_id))
726+
token = attach(ctx)
732727

733-
abort_for_unauthorised_bilingual_letters_or_invalid_options(language, template)
728+
with trace.get_tracer(__name__).start_as_current_span("edit_service_template") as span:
729+
try:
730+
if template.template_type not in current_service.available_template_types:
731+
return redirect(
732+
url_for(
733+
".action_blocked",
734+
service_id=service_id,
735+
notification_type=template.template_type,
736+
return_to="view_template",
737+
template_id=template.id,
738+
)
739+
)
734740

735-
form = get_template_form(template.template_type, language=language)(**template._template)
741+
abort_for_unauthorised_bilingual_letters_or_invalid_options(language, template)
736742

737-
if form.validate_on_submit():
738-
new_template = get_template(
739-
template._template | form.new_template_data,
740-
current_service,
741-
)
742-
template_change = template.compare_to(new_template)
743+
form = get_template_form(template.template_type, language=language)(**template._template)
743744

744-
if template_change.placeholders_added and not request.form.get("confirm") and current_service.api_keys:
745-
return render_template(
746-
"views/templates/breaking-change.html",
747-
template_change=template_change,
748-
new_template=new_template,
749-
form=form,
750-
)
751-
try:
752-
service_api_client.update_service_template(
753-
service_id=service_id,
754-
template_id=template_id,
755-
**form.new_template_data,
756-
)
757-
except HTTPError as e:
758-
if e.status_code == 400:
759-
if "content" in e.message and any("character count greater than" in x for x in e.message["content"]):
760-
form.template_content.errors.extend(e.message["content"])
761-
elif "content" in e.message and any(x == QR_CODE_TOO_LONG for x in e.message["content"]):
762-
form.template_content.errors.append(
763-
"Cannot create a usable QR code - the link you entered is too long"
745+
if form.validate_on_submit():
746+
new_template = get_template(
747+
template._template | form.new_template_data,
748+
current_service,
749+
)
750+
template_change = template.compare_to(new_template)
751+
752+
span.add_event("This is an example span event")
753+
754+
if template_change.placeholders_added and not request.form.get("confirm") and current_service.api_keys:
755+
return render_template(
756+
"views/templates/breaking-change.html",
757+
template_change=template_change,
758+
new_template=new_template,
759+
form=form,
764760
)
761+
try:
762+
service_api_client.update_service_template(
763+
service_id=service_id,
764+
template_id=template_id,
765+
**form.new_template_data,
766+
)
767+
except HTTPError as e:
768+
if e.status_code == 400:
769+
if "content" in e.message and any(
770+
"character count greater than" in x for x in e.message["content"]
771+
):
772+
form.template_content.errors.extend(e.message["content"])
773+
elif "content" in e.message and any(x == QR_CODE_TOO_LONG for x in e.message["content"]):
774+
form.template_content.errors.append(
775+
"Cannot create a usable QR code - the link you entered is too long"
776+
)
777+
else:
778+
raise e
779+
else:
780+
raise e
765781
else:
766-
raise e
767-
else:
768-
raise e
769-
else:
770-
editing_english_content_in_bilingual_letter = (
771-
template.template_type == "letter" and template.welsh_page_count and language != "welsh"
772-
)
773-
return redirect(
774-
url_for(
775-
"main.view_template",
776-
service_id=service_id,
777-
template_id=template_id,
778-
**(
779-
{"_anchor": "first-page-of-english-in-bilingual-letter"}
780-
if editing_english_content_in_bilingual_letter
781-
else {}
782-
),
783-
)
784-
)
782+
editing_english_content_in_bilingual_letter = (
783+
template.template_type == "letter" and template.welsh_page_count and language != "welsh"
784+
)
785+
return redirect(
786+
url_for(
787+
"main.view_template",
788+
service_id=service_id,
789+
template_id=template_id,
790+
**(
791+
{"_anchor": "first-page-of-english-in-bilingual-letter"}
792+
if editing_english_content_in_bilingual_letter
793+
else {}
794+
),
795+
)
796+
)
797+
finally:
798+
detach(token)
785799

786800
return render_template(
787801
f"views/edit-{template.template_type}-template.html",

app/otel/decorators.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import functools
2+
import time
3+
4+
from app.otel.metrics import otel_metrics
5+
6+
7+
def otel(counter_name=None, histogram_name=None, attributes=None):
8+
if attributes is None:
9+
attributes = {}
10+
11+
def time_function(func):
12+
@functools.wraps(func)
13+
def wrapper(*args, **kwargs):
14+
start_time = time.monotonic()
15+
c_name = counter_name or func.__name__
16+
h_name = histogram_name or f"{func.__name__}_time"
17+
18+
# Create counter if it doesn't exist
19+
if not hasattr(otel_metrics, c_name):
20+
setattr(
21+
otel_metrics,
22+
c_name,
23+
otel_metrics.meter.create_counter(c_name, description=f"Calls to the {func.__name__} task"),
24+
)
25+
counter = getattr(otel_metrics, c_name)
26+
27+
# Create histogram if it doesn't exist
28+
if not hasattr(otel_metrics, h_name):
29+
setattr(
30+
otel_metrics,
31+
h_name,
32+
otel_metrics.meter.create_histogram(
33+
h_name,
34+
description=f"time taken to execute {func.__name__} function",
35+
explicit_bucket_boundaries_advisory=getattr(otel_metrics, "default_histogram_bucket", None),
36+
),
37+
)
38+
histogram = getattr(otel_metrics, h_name)
39+
40+
try:
41+
result = func(*args, **kwargs)
42+
elapsed_time = time.monotonic() - start_time
43+
44+
counter.add(
45+
amount=1,
46+
attributes={**attributes, "function_name": func.__name__, "status": "success"},
47+
)
48+
49+
histogram.record(
50+
amount=elapsed_time,
51+
attributes={**attributes, "function_name": func.__name__, "status": "success"},
52+
)
53+
54+
except Exception as e:
55+
elapsed_time = time.monotonic() - start_time
56+
counter.add(
57+
amount=1,
58+
attributes={**attributes, "function_name": func.__name__, "status": "error"},
59+
)
60+
histogram.record(
61+
amount=elapsed_time,
62+
attributes={**attributes, "function_name": func.__name__, "status": "error"},
63+
)
64+
raise e
65+
else:
66+
return result
67+
68+
return wrapper
69+
70+
return time_function

app/otel/metrics.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
from opentelemetry import metrics
2+
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
3+
from opentelemetry.sdk.metrics import MeterProvider
4+
from opentelemetry.sdk.metrics.export import (
5+
ConsoleMetricExporter,
6+
PeriodicExportingMetricReader,
7+
)
8+
from opentelemetry.sdk.resources import Resource
9+
10+
11+
class Metrics:
12+
def __init__(self):
13+
self.meter = None
14+
self.default_histogram_bucket = [
15+
0.005,
16+
0.01,
17+
0.025,
18+
0.05,
19+
0.075,
20+
0.1,
21+
0.25,
22+
0.5,
23+
0.75,
24+
1.0,
25+
2.5,
26+
5.0,
27+
7.5,
28+
10.0,
29+
float("inf"),
30+
]
31+
32+
def init_app(self, app):
33+
export_mode = app.config.get("OTEL_EXPORT_TYPE", "none").lower()
34+
metric_readers = []
35+
36+
if export_mode == "console":
37+
app.logger.info("OpenTelemetry metrics will be exported to console")
38+
metric_readers.append(PeriodicExportingMetricReader(ConsoleMetricExporter()))
39+
elif export_mode == "otlp":
40+
endpoint = app.config.get("OTEL_COLLECTOR_ENDPOINT", "localhost:4317")
41+
app.logger.info("OpenTelemetry metrics will be exported to OTLP collector at %s", endpoint)
42+
otlp_exporter = OTLPMetricExporter(endpoint=endpoint, insecure=True)
43+
# Metrics will be exported every 60 seconds with a 30 seconds timeout by default.
44+
# The following environments variables can be used to change this:
45+
# OTEL_METRIC_EXPORT_INTERVAL
46+
# OTEL_METRIC_EXPORT_TIMEOUT
47+
metric_readers.append(PeriodicExportingMetricReader(otlp_exporter))
48+
49+
resource = Resource.create({"service.name": "notifications-api"})
50+
provider = MeterProvider(metric_readers=metric_readers, resource=resource)
51+
metrics.set_meter_provider(provider)
52+
self.meter = metrics.get_meter(__name__)
53+
54+
self.create_counters()
55+
self.create_histograms()
56+
self.create_gauges()
57+
58+
def create_counters(self):
59+
pass
60+
61+
def create_histograms(self):
62+
pass
63+
64+
def create_gauges(self):
65+
pass
66+
67+
68+
# Initialize the metrics instance singleton
69+
otel_metrics = Metrics()

0 commit comments

Comments
 (0)