Skip to content

Commit ce66ab8

Browse files
authored
feat(localenv): add trace collection (with Tempo) in local playground (#2816)
* feat: add telemetry stack to localenv under command * chore: remove tempo * chore(localenv): update prometheus scrape interval * chore: explicitly set otel collector endpoint * chore: change prometheus scrape interval to 15s * chore: update dashboard queries * chore: add readme * chore: set auto-refresh within grafana dashboard * chore: add psql command * feat(backend): add trace auto-instrumentation * feat(localenv): add tempo to telemetry stack * feat(localenv): add example panel of traces in the grafana dashboard * chore: format for example dashboard * chore: rearrange dashboard * chore(backend): add instrumentation only if enabled
1 parent e65e032 commit ce66ab8

File tree

12 files changed

+683
-68
lines changed

12 files changed

+683
-68
lines changed

localenv/telemetry/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ The telemetry components include:
88

99
- **OpenTelemetry Collector**: Collects and processes telemetry data from `cloud-nine-backend` and `happy-life-backend` services.
1010
- **Prometheus**: Scrapes metrics from the OpenTelemetry collector, and stores them.
11-
- **Grafana**: Visualizes metrics from Prometheus.
11+
- **Tempo**: Ingests traces from the OpenTelemetry collector, and stores them.
12+
- **Grafana**: Visualizes metrics from Prometheus and traces from Tempo.
1213

1314
## Usage
1415

localenv/telemetry/docker-compose.yml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,20 @@ services:
22
cloud-nine-backend:
33
environment:
44
ENABLE_TELEMETRY: true
5+
ENABLE_TELEMETRY_TRACES: true
56
LIVENET: false
67
OPEN_TELEMETRY_COLLECTOR_URLS: http://otel-collector:4317
8+
OPEN_TELEMETRY_TRACE_COLLECTOR_URLS: http://otel-collector:4317
79

810
happy-life-backend:
911
environment:
1012
ENABLE_TELEMETRY: true
13+
ENABLE_TELEMETRY_TRACES: true
1114
LIVENET: false
1215
OPEN_TELEMETRY_COLLECTOR_URLS: http://otel-collector:4317
16+
OPEN_TELEMETRY_TRACE_COLLECTOR_URLS: http://otel-collector:4317
1317

1418
otel-collector:
15-
hostname: otel-collector
1619
image: otel/opentelemetry-collector:latest
1720
command: "--config=/etc/otel-collector-config.yaml"
1821
networks:
@@ -30,6 +33,17 @@ services:
3033
ports:
3134
- "9090:9090"
3235

36+
tempo:
37+
image: grafana/tempo:latest
38+
command: "-config.file=/etc/tempo.yaml"
39+
networks:
40+
- rafiki
41+
ports:
42+
- "3200:3200"
43+
volumes:
44+
- ../telemetry/tempo.yaml:/etc/tempo.yaml
45+
- tempo-data:/var/tempo
46+
3347
grafana:
3448
image: grafana/grafana:latest
3549
networks:
@@ -45,3 +59,4 @@ services:
4559

4660
volumes:
4761
grafana_storage:
62+
tempo-data:

localenv/telemetry/grafana/provisioning/dashboards/example.json

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
"editable": true,
1919
"fiscalYearStartMonth": 0,
2020
"graphTooltip": 0,
21-
"id": 1,
2221
"links": [],
2322
"panels": [
2423
{
@@ -119,6 +118,109 @@
119118
"title": "Transaction Amount",
120119
"type": "timeseries"
121120
},
121+
{
122+
"datasource": {
123+
"type": "tempo",
124+
"uid": "P214B5B846CF3925F"
125+
},
126+
"fieldConfig": {
127+
"defaults": {
128+
"color": {
129+
"mode": "thresholds"
130+
},
131+
"custom": {
132+
"align": "auto",
133+
"cellOptions": {
134+
"type": "auto"
135+
},
136+
"inspect": false
137+
},
138+
"mappings": [],
139+
"thresholds": {
140+
"mode": "absolute",
141+
"steps": [
142+
{
143+
"color": "green",
144+
"value": null
145+
},
146+
{
147+
"color": "red",
148+
"value": 80
149+
}
150+
]
151+
}
152+
},
153+
"overrides": []
154+
},
155+
"gridPos": {
156+
"h": 8,
157+
"w": 12,
158+
"x": 12,
159+
"y": 0
160+
},
161+
"id": 4,
162+
"options": {
163+
"cellHeight": "sm",
164+
"footer": {
165+
"countRows": false,
166+
"fields": "",
167+
"reducer": ["sum"],
168+
"show": false
169+
},
170+
"showHeader": true,
171+
"sortBy": [
172+
{
173+
"desc": true,
174+
"displayName": "Duration"
175+
}
176+
]
177+
},
178+
"pluginVersion": "11.1.0",
179+
"targets": [
180+
{
181+
"datasource": {
182+
"type": "tempo",
183+
"uid": "P214B5B846CF3925F"
184+
},
185+
"filters": [
186+
{
187+
"id": "9bab4a0a",
188+
"operator": "=",
189+
"scope": "span"
190+
},
191+
{
192+
"id": "service-name",
193+
"operator": "=",
194+
"scope": "resource",
195+
"tag": "service.name",
196+
"value": ["RAFIKI_NETWORK"],
197+
"valueType": "string"
198+
},
199+
{
200+
"id": "span-name",
201+
"operator": "=",
202+
"scope": "span",
203+
"tag": "name",
204+
"value": [],
205+
"valueType": "string"
206+
},
207+
{
208+
"id": "min-duration",
209+
"operator": ">",
210+
"tag": "duration",
211+
"value": "100ms",
212+
"valueType": "duration"
213+
}
214+
],
215+
"limit": 20,
216+
"queryType": "traceqlSearch",
217+
"refId": "A",
218+
"tableType": "traces"
219+
}
220+
],
221+
"title": "Traces > 100ms",
222+
"type": "table"
223+
},
122224
{
123225
"datasource": {
124226
"type": "prometheus",
@@ -339,6 +441,6 @@
339441
"timezone": "browser",
340442
"title": "Example Dashboard",
341443
"uid": "fdr58stwkr6yof",
342-
"version": 1,
444+
"version": 2,
343445
"weekStart": ""
344446
}

localenv/telemetry/grafana/provisioning/datasources/datasources.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,8 @@ datasources:
99
isDefault: true
1010
version: 1
1111
editable: true
12+
- name: Tempo
13+
type: tempo
14+
access: proxy
15+
url: http://tempo:3200
16+
editable: true

localenv/telemetry/otel-collector-config.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,18 @@ exporters:
1212
verbosity: detailed
1313
prometheus:
1414
endpoint: 0.0.0.0:8491
15+
otlp:
16+
endpoint: http://tempo:8492
17+
tls:
18+
insecure: true
1519

1620
service:
1721
pipelines:
1822
metrics:
1923
receivers: [otlp]
2024
processors: [batch]
2125
exporters: [prometheus, debug]
26+
traces:
27+
receivers: [otlp]
28+
processors: [batch]
29+
exporters: [otlp, debug]

localenv/telemetry/tempo.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
server:
2+
http_listen_port: 3200
3+
4+
distributor:
5+
receivers:
6+
otlp:
7+
protocols:
8+
grpc:
9+
endpoint: 0.0.0.0:8492
10+
11+
storage:
12+
trace:
13+
backend: local
14+
local:
15+
path: /var/tempo/blocks
16+
wal:
17+
path: /var/tempo/wal

packages/backend/Dockerfile.prod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,4 @@ COPY --from=builder /home/rafiki/packages/backend/dist ./packages/backend/dist
5959
COPY --from=builder /home/rafiki/packages/token-introspection/dist ./packages/token-introspection/dist
6060
COPY --from=builder /home/rafiki/packages/backend/knexfile.js ./packages/backend/knexfile.js
6161

62-
CMD ["node", "/home/rafiki/packages/backend/dist/index.js"]
62+
CMD ["node", "-r", "/home/rafiki/packages/backend/dist/telemetry/index.js", "/home/rafiki/packages/backend/dist/index.js"]

packages/backend/package.json

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"copy-op-schemas": "cp ./node_modules/@interledger/open-payments/dist/openapi/specs/schemas.yaml ./src/openapi/specs/",
1616
"prepack": "pnpm build",
1717
"postinstall": "pnpm copy-op-schemas",
18-
"dev": "ts-node-dev --inspect=0.0.0.0:9229 --respawn --transpile-only src/index.ts"
18+
"dev": "ts-node-dev --inspect=0.0.0.0:9229 --respawn --transpile-only --require ./src/telemetry/index.ts src/index.ts"
1919
},
2020
"devDependencies": {
2121
"@apollo/client": "^3.9.9",
@@ -64,8 +64,16 @@
6464
"@koa/router": "^12.0.0",
6565
"@opentelemetry/api": "^1.8.0",
6666
"@opentelemetry/exporter-metrics-otlp-grpc": "^0.49.1",
67+
"@opentelemetry/exporter-trace-otlp-grpc": "^0.52.1",
68+
"@opentelemetry/instrumentation": "^0.52.1",
69+
"@opentelemetry/instrumentation-graphql": "^0.42.0",
70+
"@opentelemetry/instrumentation-http": "^0.52.1",
71+
"@opentelemetry/instrumentation-pg": "^0.43.0",
72+
"@opentelemetry/instrumentation-undici": "^0.4.0",
6773
"@opentelemetry/resources": "^1.22.0",
68-
"@opentelemetry/sdk-metrics": "^1.22.0",
74+
"@opentelemetry/sdk-metrics": "^1.25.1",
75+
"@opentelemetry/sdk-node": "^0.52.1",
76+
"@opentelemetry/sdk-trace-node": "^1.25.1",
6977
"ajv": "^8.12.0",
7078
"axios": "1.6.8",
7179
"base64url": "^3.0.1",

packages/backend/src/config/app.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ const privateKeyFileValue = loadOrGenerateKey(privateKeyFileEnv)
5050
export const Config = {
5151
logLevel: envString('LOG_LEVEL', 'info'),
5252
enableTelemetry: envBool('ENABLE_TELEMETRY', false),
53+
enableTelemetryTraces: envBool('ENABLE_TELEMETRY_TRACES', false),
54+
openTelemetryTraceCollectorUrls: envStringArray(
55+
'OPEN_TELEMETRY_TRACE_COLLECTOR_URLS',
56+
[]
57+
),
5358
livenet: envBool('LIVENET', false),
5459
openTelemetryCollectors: envStringArray(
5560
'OPEN_TELEMETRY_COLLECTOR_URLS',
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import { Config } from '../config/app'
2+
import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-grpc'
3+
import { Resource } from '@opentelemetry/resources'
4+
import {
5+
MeterProvider,
6+
PeriodicExportingMetricReader
7+
} from '@opentelemetry/sdk-metrics'
8+
9+
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc'
10+
import { api } from '@opentelemetry/sdk-node'
11+
import { PgInstrumentation } from '@opentelemetry/instrumentation-pg'
12+
import { GraphQLInstrumentation } from '@opentelemetry/instrumentation-graphql'
13+
14+
import { HttpInstrumentation } from '@opentelemetry/instrumentation-http'
15+
import {
16+
BatchSpanProcessor,
17+
NodeTracerProvider
18+
} from '@opentelemetry/sdk-trace-node'
19+
import { registerInstrumentations } from '@opentelemetry/instrumentation'
20+
import { UndiciInstrumentation } from '@opentelemetry/instrumentation-undici'
21+
22+
// debug logger:
23+
// diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.DEBUG)
24+
25+
const SERVICE_NAME = 'RAFIKI_NETWORK'
26+
const rafikiResource = new Resource({
27+
'service.name': SERVICE_NAME,
28+
instance: Config.instanceName
29+
})
30+
31+
if (Config.enableTelemetry) {
32+
const meterReaders = []
33+
34+
for (const url of Config.openTelemetryCollectors) {
35+
const metricExporter = new PeriodicExportingMetricReader({
36+
exporter: new OTLPMetricExporter({
37+
url
38+
}),
39+
exportIntervalMillis: Config.openTelemetryExportInterval ?? 15000
40+
})
41+
42+
meterReaders.push(metricExporter)
43+
}
44+
45+
const meterProvider = new MeterProvider({
46+
resource: rafikiResource,
47+
readers: meterReaders
48+
})
49+
50+
api.metrics.setGlobalMeterProvider(meterProvider)
51+
}
52+
53+
if (Config.enableTelemetryTraces) {
54+
const tracerProvider = new NodeTracerProvider({
55+
resource: rafikiResource
56+
})
57+
58+
for (const url of Config.openTelemetryTraceCollectorUrls) {
59+
const traceExporter = new OTLPTraceExporter({
60+
url
61+
})
62+
63+
tracerProvider.addSpanProcessor(new BatchSpanProcessor(traceExporter))
64+
}
65+
66+
tracerProvider.register()
67+
68+
registerInstrumentations({
69+
instrumentations: [
70+
new UndiciInstrumentation(),
71+
new HttpInstrumentation(),
72+
new PgInstrumentation(),
73+
new GraphQLInstrumentation({
74+
mergeItems: true,
75+
ignoreTrivialResolveSpans: true,
76+
ignoreResolveSpans: true
77+
})
78+
]
79+
})
80+
}

0 commit comments

Comments
 (0)