Skip to content

Commit 678b488

Browse files
committed
Merge branch 'feat/otel-test' into dev
2 parents 67d563b + 128612b commit 678b488

File tree

18 files changed

+12711
-16423
lines changed

18 files changed

+12711
-16423
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
{
2+
"uid": "monitor-dashboard",
3+
"title": "Monitor",
4+
"timezone": "browser",
5+
"schemaVersion": 39,
6+
"version": 1,
7+
"refresh": "10s",
8+
"panels": [
9+
{
10+
"id": 1,
11+
"type": "timeseries",
12+
"title": "HIBP notify total requests (rate)",
13+
"gridPos": { "x": 0, "y": 0, "w": 24, "h": 8 },
14+
"targets": [
15+
{
16+
"refId": "A",
17+
"expr": "sum(rate(hibp_notify_requests_total[1m]))",
18+
"legendFormat": "requests/s"
19+
}
20+
]
21+
},
22+
{
23+
"id": 2,
24+
"type": "timeseries",
25+
"title": "HIBP notify failures (rate)",
26+
"gridPos": { "x": 0, "y": 8, "w": 24, "h": 8 },
27+
"targets": [
28+
{
29+
"refId": "B",
30+
"expr": "sum(rate(hibp_notify_request_failures_total[1m]))",
31+
"legendFormat": "failures/s"
32+
}
33+
]
34+
},
35+
{
36+
"id": 3,
37+
"type": "timeseries",
38+
"title": "HIBP notify failures by error (rate)",
39+
"gridPos": { "x": 0, "y": 16, "w": 24, "h": 10 },
40+
"targets": [
41+
{
42+
"refId": "C",
43+
"expr": "sum by (error) (rate(hibp_notify_request_failures_total[1m]))",
44+
"legendFormat": "{{error}}"
45+
}
46+
]
47+
},
48+
{
49+
"id": 4,
50+
"type": "barchart",
51+
"title": "Failures by error (increase, last 15m)",
52+
"gridPos": { "x": 0, "y": 26, "w": 12, "h": 8 },
53+
"targets": [
54+
{
55+
"refId": "D",
56+
"expr": "sum by (error) (increase(hibp_notify_request_failures_total[15m]))",
57+
"legendFormat": "{{error}}"
58+
}
59+
]
60+
},
61+
{
62+
"id": 5,
63+
"type": "stat",
64+
"title": "Failure rate %",
65+
"gridPos": { "x": 12, "y": 26, "w": 12, "h": 8 },
66+
"targets": [
67+
{
68+
"refId": "E",
69+
"expr": "100 * (sum(rate(hibp_notify_request_failures_total[5m])) / clamp_min(sum(rate(hibp_notify_requests_total[5m])), 1e-9))"
70+
}
71+
],
72+
"options": {
73+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
74+
"orientation": "horizontal",
75+
"textMode": "auto"
76+
}
77+
}
78+
]
79+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
apiVersion: 1
2+
3+
providers:
4+
- name: Local Dashboards
5+
type: file
6+
disableDeletion: false
7+
editable: true
8+
options:
9+
path: /etc/grafana/provisioning/dashboards-json
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
apiVersion: 1
2+
3+
datasources:
4+
- name: Prometheus
5+
type: prometheus
6+
access: proxy
7+
url: http://prometheus:9090
8+
isDefault: true
9+
editable: true
10+
11+
- name: Tempo
12+
type: tempo
13+
access: proxy
14+
url: http://tempo:3200
15+
editable: true
16+
jsonData:
17+
tracesToLogsV2:
18+
datasourceUid: prometheus
19+
nodeGraph:
20+
enabled: true
21+
search:
22+
hide: false
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
receivers:
2+
otlp:
3+
protocols:
4+
http:
5+
endpoint: '0.0.0.0:4318' # http port for receiving traces, metrics, and logs over http
6+
7+
exporters:
8+
debug:
9+
10+
otlp/tempo:
11+
endpoint: tempo:4317
12+
tls:
13+
insecure: true
14+
15+
prometheus:
16+
endpoint: 0.0.0.0:9464
17+
18+
service:
19+
pipelines:
20+
traces:
21+
receivers: [otlp]
22+
exporters: [debug, otlp/tempo]
23+
metrics:
24+
receivers: [otlp]
25+
exporters: [debug, prometheus]

.docker/otel/prometheus.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
global:
2+
scrape_interval: 5s
3+
4+
scrape_configs:
5+
- job_name: otelcol
6+
static_configs:
7+
- targets: ["otel-collector:9464"]

.docker/otel/tempo.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
server:
2+
http_listen_port: 3200
3+
4+
distributor:
5+
receivers:
6+
otlp:
7+
protocols:
8+
grpc:
9+
endpoint: 0.0.0.0:4317
10+
11+
storage:
12+
trace:
13+
backend: local
14+
local:
15+
path: /tmp/tempo/traces

.env.local.example

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,8 @@ GCP_PUBSUB_SUBSCRIPTION_NAME=hibp-cron
6464
PUBSUB_HOST=localhost
6565
PUBSUB_PORT=8085
6666
PUBSUB_EMULATOR_HOST=localhost:8085
67+
68+
# OpenTelemetry configuration
69+
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
70+
OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
71+
OTEL_SERVICE_NAME=monitor

.github/dependabot.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ updates:
7070
- "stylelint"
7171
- "stylelint-scss"
7272
- "stylelint-config-recommended-scss"
73+
otel:
74+
patterns:
75+
- "@opentelemetry/*"
7376
- package-ecosystem: "docker"
7477
directory: "/"
7578
schedule:

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,19 @@ This will automatically provision a pubsub topic named 'hibp-breaches' with a su
194194
docker compose --env-file .env.local up -d
195195
```
196196

197+
### Observability
198+
199+
We use opentelemetry for manual and auto-instrumentation of app code. We use Sentry for error tracking and some alerting; other alerts are configured on metrics through Grafana ([Yardstick](https://yardstick.mozilla.org/)). Error-level logs are automatically captured and sent to Sentry. Trace IDs are forwarded to Sentry. They can be searched in [Yardstick](https://yardstick.mozilla.org/) for more detailed trace data.
200+
201+
The infrastructure for viewing traces and metrics locally is automatically set up when you follow #docker-compose-setup instructions. It starts 4 services:
202+
203+
- Otel collector (collects metrics, traces, and logs using OTLP)
204+
- Tempo (scrapes traces for grafana; in GCP environment we use Cloud Trace)
205+
- Prometheus (scrapes metrics for grafana; in GCP environment we use Google-Managed Prometheus)
206+
- Grafana (visualization)
207+
208+
To view metrics locally, visit [Grafana](http://localhost:3000/d/monitor-dashboard/monitor?orgId=1). Some default dashboard panels are seeded. To see traces, navigate to the [Explore] pane in Grafana and select the Tempo datasource. Note that the data won't propagate immediately, so wait a minute if you're not seeing expected activity show up.
209+
197210
### In a different shell, set the environment to point at the emulator and run Monitor in dev mode
198211

199212
```sh

docker-compose.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,60 @@ services:
4040
start_interval: 10s
4141
networks:
4242
- shared
43+
44+
otel-collector:
45+
image: otel/opentelemetry-collector:0.142.0
46+
restart: always
47+
command: ["--config=/etc/otel-collector-config.yaml"]
48+
volumes:
49+
- ./.docker/otel/otel-collector-config.yaml:/etc/otel-collector-config.yaml
50+
networks:
51+
- shared
52+
ports:
53+
- "4318:4318" # OTLP HTTP receiver
54+
- "9464:9464" # Prometheus scrape endpoint
55+
56+
tempo:
57+
image: grafana/tempo:2.4.1
58+
command: ["-config.file=/etc/tempo.yaml"]
59+
volumes:
60+
- ./.docker/otel/tempo.yaml:/etc/tempo.yaml:ro
61+
ports:
62+
- "3200:3200" # Tempo query endpoint
63+
networks:
64+
- shared
65+
66+
67+
prometheus:
68+
image: prom/prometheus:v2.49.1
69+
command: ["--config.file=/etc/prometheus/prometheus.yaml"]
70+
volumes:
71+
- ./.docker/otel/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
72+
ports:
73+
- "9090:9090"
74+
networks:
75+
- shared
76+
depends_on:
77+
- otel-collector
78+
79+
grafana:
80+
image: grafana/grafana:10.3.3
81+
environment:
82+
- GF_AUTH_ANONYMOUS_ENABLED=true
83+
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
84+
# Skip the initial admin password change UI
85+
- GF_SECURITY_ADMIN_USER=admin
86+
- GF_SECURITY_ADMIN_PASSWORD=admin
87+
volumes:
88+
- ./.docker/otel/grafana/provisioning:/etc/grafana/provisioning:ro
89+
- ./.docker/otel/grafana/provisioning/dashboards-json:/etc/grafana/provisioning/dashboards-json:ro
90+
ports:
91+
- "3000:3000"
92+
networks:
93+
- shared
94+
depends_on:
95+
- tempo
96+
- prometheus
4397

4498
networks:
4599
shared:

0 commit comments

Comments
 (0)