Skip to content

Commit 6b47159

Browse files
authored
Add support for OpenTelemetry tracing (#1612)
* Add support for OpenTelemetry tracing * Use `open_telemetry_decorator` * Disable tracing exporter during testing * Add a custom OTel filtered sampler
1 parent 7ca6076 commit 6b47159

File tree

10 files changed

+224
-5
lines changed

10 files changed

+224
-5
lines changed

config/runtime.exs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,27 @@ config :sentry,
387387
]
388388
]
389389

390+
config :opentelemetry, :resource, service: %{name: nerves_hub_app}
391+
392+
if otlp_endpoint = System.get_env("OTLP_ENDPOINT") do
393+
config :opentelemetry_exporter,
394+
otlp_protocol: :http_protobuf,
395+
otlp_endpoint: otlp_endpoint,
396+
otlp_headers: [{System.get_env("OTLP_AUTH_HEADER"), System.get_env("OTLP_AUTH_HEADER_VALUE")}]
397+
398+
otlp_sampler_ratio =
399+
if ratio = System.get_env("OTLP_SAMPLER_RATIO") do
400+
String.to_float(ratio)
401+
else
402+
nil
403+
end
404+
405+
config :opentelemetry,
406+
sampler: {:parent_based, %{root: {NervesHub.Telemetry.FilteredSampler, otlp_sampler_ratio}}}
407+
else
408+
config :opentelemetry, traces_exporter: :none
409+
end
410+
390411
if host = System.get_env("STATSD_HOST") do
391412
config :nerves_hub, :statsd,
392413
host: System.get_env("STATSD_HOST"),

lib/nerves_hub/application.ex

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ defmodule NervesHub.Application do
1212
raise "fwup could not be found in the $PATH. This is a requirement of NervesHubWeb and cannot start otherwise"
1313
end
1414

15+
setup_open_telemetry()
16+
1517
_ =
1618
:logger.add_handler(:my_sentry_handler, Sentry.LoggerHandler, %{
1719
config: %{metadata: [:file, :line]}
@@ -47,6 +49,25 @@ defmodule NervesHub.Application do
4749
Supervisor.start_link(children, opts)
4850
end
4951

52+
defp setup_open_telemetry() do
53+
if System.get_env("ECTO_IPV6") do
54+
:httpc.set_option(:ipfamily, :inet6fb4)
55+
end
56+
57+
:ok = NervesHub.Telemetry.Customizations.setup()
58+
59+
:ok = OpentelemetryBandit.setup()
60+
:ok = OpentelemetryPhoenix.setup(adapter: :bandit)
61+
:ok = OpentelemetryOban.setup(trace: [:jobs])
62+
63+
:ok =
64+
NervesHub.Repo.config()
65+
|> Keyword.fetch!(:telemetry_prefix)
66+
|> OpentelemetryEcto.setup(db_statement: :enabled)
67+
68+
:ok
69+
end
70+
5071
def config_change(changed, _new, removed) do
5172
NervesHubWeb.Endpoint.config_change(changed, removed)
5273
:ok

lib/nerves_hub/deployments/orchestrator.ex

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ defmodule NervesHub.Deployments.Orchestrator do
99
"""
1010

1111
use GenServer
12+
use OpenTelemetryDecorator
1213

1314
require Logger
1415

@@ -48,6 +49,7 @@ defmodule NervesHub.Deployments.Orchestrator do
4849
As devices update and reconnect, the new orchestrator is told that the update
4950
was successful, and the process is repeated.
5051
"""
52+
@decorate with_span("Deployments.Orchestrator.trigger_update")
5153
def trigger_update(deployment) do
5254
:telemetry.execute([:nerves_hub, :deployment, :trigger_update], %{count: 1})
5355

@@ -106,6 +108,7 @@ defmodule NervesHub.Deployments.Orchestrator do
106108
{:ok, deployment, {:continue, :boot}}
107109
end
108110

111+
@decorate with_span("Deployments.Orchestrator.boot")
109112
def handle_continue(:boot, deployment) do
110113
_ = PubSub.subscribe(NervesHub.PubSub, "deployment:#{deployment.id}")
111114

@@ -126,6 +129,7 @@ defmodule NervesHub.Deployments.Orchestrator do
126129
{:noreply, deployment}
127130
end
128131

132+
@decorate with_span("Deployments.Orchestrator.handle_info:deployments/update")
129133
def handle_info(%Broadcast{event: "deployments/update"}, deployment) do
130134
deployment =
131135
deployment
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
defmodule NervesHub.Telemetry.Customizations do
2+
alias OpenTelemetry.Tracer
3+
require OpenTelemetry.Tracer
4+
5+
def setup() do
6+
:telemetry.attach_many(
7+
{__MODULE__, :bandit_customizations},
8+
[
9+
[:bandit, :request, :stop]
10+
],
11+
&__MODULE__.handle_request/4,
12+
nil
13+
)
14+
end
15+
16+
def handle_request([:bandit, :request, :stop], _measurements, %{conn: conn}, _config) do
17+
if conn.request_path =~ ~r/\/websocket$/ do
18+
Tracer.update_name("WEBSOCKET #{conn.request_path}")
19+
end
20+
21+
:ok
22+
end
23+
end
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
defmodule NervesHub.Telemetry.FilteredSampler do
2+
# Inspired by https://arathunku.com/b/2024/notes-on-adding-opentelemetry-to-an-elixir-app/
3+
4+
# TODO: Add ratio sampling support
5+
6+
require OpenTelemetry.Tracer, as: Tracer
7+
require Logger
8+
9+
@behaviour :otel_sampler
10+
11+
@ignored_static_paths ~r/^\/(assets|fonts|images|css)\/.*/
12+
13+
@ignored_url_paths [
14+
"/status/alive",
15+
"/phoenix/live_reload/socket/websocket",
16+
"/live/websocket",
17+
"/favicon.ico",
18+
"/"
19+
]
20+
21+
@ignored_span_names [
22+
"Channels.DeviceSocket.heartbeat",
23+
"nerves_hub.repo.query:schema_migrations"
24+
]
25+
26+
@impl :otel_sampler
27+
def setup(probability \\ nil) do
28+
if probability do
29+
[ratio_sampler_config: :otel_sampler_trace_id_ratio_based.setup(probability)]
30+
else
31+
[]
32+
end
33+
end
34+
35+
@impl :otel_sampler
36+
def description(_sampler_config), do: "NervesHub.Sampler"
37+
38+
@impl :otel_sampler
39+
def should_sample(
40+
ctx,
41+
trace_id,
42+
links,
43+
span_name,
44+
span_kind,
45+
attributes,
46+
sampler_config
47+
) do
48+
result = drop_trace?(span_name, attributes)
49+
50+
tracestate = Tracer.current_span_ctx(ctx) |> OpenTelemetry.Span.tracestate()
51+
52+
case result do
53+
true ->
54+
{:drop, [], tracestate}
55+
56+
false ->
57+
if config = sampler_config[:ratio_sampler_config] do
58+
:otel_sampler_trace_id_ratio_based.should_sample(
59+
ctx,
60+
trace_id,
61+
links,
62+
span_name,
63+
span_kind,
64+
attributes,
65+
config
66+
)
67+
else
68+
{:record_and_sample, [], tracestate}
69+
end
70+
end
71+
end
72+
73+
def drop_trace?(span_name, attributes) do
74+
cond do
75+
Enum.member?(@ignored_span_names, span_name) ->
76+
true
77+
78+
span_name == :GET && Enum.member?(@ignored_url_paths, attributes[:"url.path"]) ->
79+
true
80+
81+
span_name == :GET && (attributes[:"url.path"] || "") =~ @ignored_static_paths ->
82+
true
83+
84+
true ->
85+
false
86+
end
87+
end
88+
end

lib/nerves_hub_web/channels/device_channel.ex

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ defmodule NervesHubWeb.DeviceChannel do
66
"""
77

88
use Phoenix.Channel
9+
use OpenTelemetryDecorator
910

1011
require Logger
1112

@@ -19,6 +20,7 @@ defmodule NervesHubWeb.DeviceChannel do
1920
alias NervesHub.Repo
2021
alias Phoenix.Socket.Broadcast
2122

23+
@decorate with_span("Channels.DeviceChannel.join")
2224
def join("device", params, %{assigns: %{device: device}} = socket) do
2325
with {:ok, device} <- update_metadata(device, params) do
2426
send(self(), {:after_join, params})
@@ -31,6 +33,7 @@ defmodule NervesHubWeb.DeviceChannel do
3133
end
3234
end
3335

36+
@decorate with_span("Channels.DeviceChannel.handle_info:after_join")
3437
def handle_info({:after_join, params}, %{assigns: %{device: device}} = socket) do
3538
device = maybe_update_deployment(device)
3639

@@ -77,6 +80,7 @@ defmodule NervesHubWeb.DeviceChannel do
7780
{:stop, :shutdown, socket}
7881
end
7982

83+
@decorate with_span("Channels.DeviceChannel.handle_info:device_registration")
8084
def handle_info({:device_registation, attempt}, socket) do
8185
%{assigns: %{device: device}} = socket
8286

@@ -98,6 +102,7 @@ defmodule NervesHubWeb.DeviceChannel do
98102

99103
# We can save a fairly expensive query by checking the incoming deployment's payload
100104
# If it matches, we can set the deployment directly and only do 3 queries (update, two preloads)
105+
@decorate with_span("Channels.DeviceChannel.handle_info:deployments/changed,deployment:none")
101106
def handle_info(
102107
%Broadcast{event: "deployments/changed", topic: "deployment:none", payload: payload},
103108
%{assigns: %{device: device}} = socket
@@ -121,6 +126,7 @@ defmodule NervesHubWeb.DeviceChannel do
121126
{:noreply, assign_deployment(socket, payload)}
122127
end
123128

129+
@decorate with_span("Channels.DeviceChannel.handle_info:deployments/changed")
124130
def handle_info(
125131
%Broadcast{event: "deployments/changed", payload: payload},
126132
%{assigns: %{device: device}} = socket
@@ -138,6 +144,7 @@ defmodule NervesHubWeb.DeviceChannel do
138144
end
139145
end
140146

147+
@decorate with_span("Channels.DeviceChannel.handle_info:resolve_changed_deployment")
141148
def handle_info(:resolve_changed_deployment, %{assigns: %{device: device}} = socket) do
142149
:telemetry.execute([:nerves_hub, :devices, :deployment, :changed], %{count: 1})
143150

@@ -171,6 +178,7 @@ defmodule NervesHubWeb.DeviceChannel do
171178
{:noreply, socket}
172179
end
173180

181+
@decorate with_span("Channels.DeviceChannel.handle_info:deployments/update")
174182
def handle_info({"deployments/update", inflight_update}, %{assigns: %{device: device}} = socket) do
175183
device = deployment_preload(device)
176184

@@ -215,6 +223,7 @@ defmodule NervesHubWeb.DeviceChannel do
215223
end
216224

217225
# Update local state and tell the various servers of the new information
226+
@decorate with_span("Channels.DeviceChannel.handle_info:devices-updated")
218227
def handle_info(%Broadcast{event: "devices/updated"}, %{assigns: %{device: device}} = socket) do
219228
device = Repo.reload(device)
220229

@@ -355,6 +364,7 @@ defmodule NervesHubWeb.DeviceChannel do
355364
end
356365
end
357366

367+
@decorate with_span("Channels.DeviceChannel.handle_in:location:update")
358368
def handle_in("location:update", location, %{assigns: %{device: device}} = socket) do
359369
metadata = Map.put(device.connection_metadata, "location", location)
360370

@@ -405,6 +415,7 @@ defmodule NervesHubWeb.DeviceChannel do
405415
{:noreply, socket}
406416
end
407417

418+
@decorate with_span("Channels.DeviceChannel.handle_in:health_check_report")
408419
def handle_in("health_check_report", %{"value" => device_status}, socket) do
409420
device_meta =
410421
for {key, val} <- Map.from_struct(socket.assigns.device.firmware_metadata),
@@ -474,6 +485,7 @@ defmodule NervesHubWeb.DeviceChannel do
474485
:ok
475486
end
476487

488+
@decorate with_span("Channels.DeviceChannel.maybe_update_deployment")
477489
defp maybe_update_deployment(device) do
478490
device
479491
|> Deployments.preload_with_firmware_and_archive()

lib/nerves_hub_web/channels/device_socket.ex

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
defmodule NervesHubWeb.DeviceSocket do
22
use Phoenix.Socket
3+
use OpenTelemetryDecorator
34

45
require Logger
56

@@ -28,6 +29,7 @@ defmodule NervesHubWeb.DeviceSocket do
2829
end
2930

3031
@impl Phoenix.Socket.Transport
32+
@decorate with_span("Channels.DeviceSocket.terminate")
3133
def terminate(reason, {_channels_info, socket} = state) do
3234
on_disconnect(reason, socket)
3335
super(reason, state)
@@ -42,6 +44,7 @@ defmodule NervesHubWeb.DeviceSocket do
4244
super(msg, {state, socket})
4345
end
4446

47+
@decorate with_span("Channels.DeviceSocket.heartbeat")
4548
defp heartbeat(
4649
%Phoenix.Socket.Message{topic: "phoenix", event: "heartbeat"},
4750
%{
@@ -84,6 +87,7 @@ defmodule NervesHubWeb.DeviceSocket do
8487

8588
# Used by Devices connecting with SSL certificates
8689
@impl Phoenix.Socket
90+
@decorate with_span("Channels.DeviceSocket.connect")
8791
def connect(_params, socket, %{peer_data: %{ssl_cert: ssl_cert}})
8892
when not is_nil(ssl_cert) do
8993
X509.Certificate.from_der!(ssl_cert)
@@ -103,6 +107,7 @@ defmodule NervesHubWeb.DeviceSocket do
103107
end
104108

105109
# Used by Devices connecting with HMAC Shared Secrets
110+
@decorate with_span("Channels.DeviceSocket.connect")
106111
def connect(_params, socket, %{x_headers: x_headers})
107112
when is_list(x_headers) and length(x_headers) > 0 do
108113
headers = Map.new(x_headers)
@@ -210,12 +215,14 @@ defmodule NervesHubWeb.DeviceSocket do
210215
{:ok, socket}
211216
end
212217

218+
@decorate with_span("Channels.DeviceSocket.on_connect#registered")
213219
defp on_connect(%{assigns: %{device: %{status: :registered} = device}} = socket) do
214220
socket
215221
|> assign(device: Devices.set_as_provisioned!(device))
216222
|> on_connect()
217223
end
218224

225+
@decorate with_span("Channels.DeviceSocket.on_connect#provisioned")
219226
defp on_connect(%{assigns: %{device: device}} = socket) do
220227
# Report connection and use connection id as reference
221228
{:ok, %DeviceConnection{id: connection_id}} =
@@ -235,6 +242,9 @@ defmodule NervesHubWeb.DeviceSocket do
235242
|> assign(:reference_id, connection_id)
236243
end
237244

245+
@decorate with_span("Channels.DeviceSocket.on_disconnect")
246+
defp on_disconnect(exit_reason, socket)
247+
238248
defp on_disconnect({:error, reason}, %{
239249
assigns: %{
240250
device: device,
@@ -262,6 +272,7 @@ defmodule NervesHubWeb.DeviceSocket do
262272
shutdown(device, reference_id)
263273
end
264274

275+
@decorate with_span("Channels.DeviceSocket.shutdown")
265276
defp shutdown(device, reference_id) do
266277
:telemetry.execute([:nerves_hub, :devices, :disconnect], %{count: 1}, %{
267278
ref_id: reference_id,

0 commit comments

Comments
 (0)