Skip to content

Commit 9f826a7

Browse files
committed
Add sweeping of expired spans
1 parent 0359cb4 commit 9f826a7

File tree

2 files changed

+210
-16
lines changed

2 files changed

+210
-16
lines changed

lib/sentry/opentelemetry/span_storage.ex

Lines changed: 67 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,64 +2,76 @@ defmodule Sentry.OpenTelemetry.SpanStorage do
22
@moduledoc false
33
use GenServer
44

5+
require Logger
6+
57
@table :span_storage
8+
@cleanup_interval :timer.minutes(5)
9+
@span_ttl :timer.minutes(30)
610

711
@spec start_link(keyword()) :: GenServer.on_start()
812
def start_link(opts \\ []) do
913
name = Keyword.get(opts, :name, __MODULE__)
10-
GenServer.start_link(__MODULE__, nil, name: name)
14+
GenServer.start_link(__MODULE__, opts, name: name)
1115
end
1216

1317
@impl true
14-
def init(nil) do
18+
def init(opts) do
1519
_table =
1620
if :ets.whereis(@table) == :undefined do
1721
:ets.new(@table, [:named_table, :public, :bag])
1822
end
1923

20-
{:ok, :no_state}
24+
cleanup_interval = Keyword.get(opts, :cleanup_interval, @cleanup_interval)
25+
schedule_cleanup(cleanup_interval)
26+
27+
{:ok, %{cleanup_interval: cleanup_interval}}
2128
end
2229

2330
def store_span(span_data) when span_data.parent_span_id == nil do
31+
stored_at = System.system_time(:second)
32+
2433
case :ets.lookup(@table, {:root_span, span_data.span_id}) do
25-
[] -> :ets.insert(@table, {{:root_span, span_data.span_id}, span_data})
34+
[] -> :ets.insert(@table, {{:root_span, span_data.span_id}, {span_data, stored_at}})
2635
_ -> :ok
2736
end
2837
end
2938

3039
def store_span(span_data) do
31-
_ = :ets.insert(@table, {span_data.parent_span_id, span_data})
40+
stored_at = System.system_time(:second)
41+
_ = :ets.insert(@table, {span_data.parent_span_id, {span_data, stored_at}})
3242
end
3343

3444
def get_root_span(span_id) do
3545
case :ets.lookup(@table, {:root_span, span_id}) do
36-
[{{:root_span, ^span_id}, span}] -> span
46+
[{{:root_span, ^span_id}, {span, _stored_at}}] -> span
3747
[] -> nil
3848
end
3949
end
4050

4151
def get_child_spans(parent_span_id) do
4252
:ets.lookup(@table, parent_span_id)
43-
|> Enum.map(fn {_parent_id, span} -> span end)
53+
|> Enum.map(fn {_parent_id, {span, _stored_at}} -> span end)
4454
end
4555

4656
def update_span(span_data) do
57+
stored_at = System.system_time(:second)
58+
4759
if span_data.parent_span_id == nil do
4860
case :ets.lookup(@table, {:root_span, span_data.span_id}) do
4961
[] ->
50-
:ets.insert(@table, {{:root_span, span_data.span_id}, span_data})
62+
:ets.insert(@table, {{:root_span, span_data.span_id}, {span_data, stored_at}})
5163

5264
_ ->
5365
:ets.delete(@table, {:root_span, span_data.span_id})
54-
:ets.insert(@table, {{:root_span, span_data.span_id}, span_data})
66+
:ets.insert(@table, {{:root_span, span_data.span_id}, {span_data, stored_at}})
5567
end
5668
else
5769
existing_spans = :ets.lookup(@table, span_data.parent_span_id)
5870

59-
Enum.each(existing_spans, fn {parent_id, span} ->
71+
Enum.each(existing_spans, fn {parent_id, {span, stored_at}} ->
6072
if span.span_id == span_data.span_id do
61-
:ets.delete_object(@table, {parent_id, span})
62-
:ets.insert(@table, {span_data.parent_span_id, span_data})
73+
:ets.delete_object(@table, {parent_id, {span, stored_at}})
74+
:ets.insert(@table, {span_data.parent_span_id, {span_data, stored_at}})
6375
end
6476
end)
6577
end
@@ -69,7 +81,9 @@ defmodule Sentry.OpenTelemetry.SpanStorage do
6981

7082
def remove_span(span_id) do
7183
case get_root_span(span_id) do
72-
nil -> :ok
84+
nil ->
85+
:ok
86+
7387
_root_span ->
7488
:ets.delete(@table, {:root_span, span_id})
7589
remove_child_spans(span_id)
@@ -80,4 +94,44 @@ defmodule Sentry.OpenTelemetry.SpanStorage do
8094
:ets.delete(@table, parent_span_id)
8195
:ok
8296
end
97+
98+
@impl true
99+
def handle_info(:cleanup_stale_spans, state) do
100+
cleanup_stale_spans()
101+
schedule_cleanup(state.cleanup_interval)
102+
{:noreply, state}
103+
end
104+
105+
defp schedule_cleanup(interval) do
106+
Process.send_after(self(), :cleanup_stale_spans, interval)
107+
end
108+
109+
defp cleanup_stale_spans do
110+
now = System.system_time(:second)
111+
cutoff_time = now - @span_ttl
112+
113+
:ets.match_object(@table, {{:root_span, :_}, {:_, :_}})
114+
|> Enum.each(fn {{:root_span, span_id}, {_span, stored_at}} ->
115+
if stored_at < cutoff_time do
116+
Logger.debug("Cleaning up stale root span: #{span_id}")
117+
remove_span(span_id)
118+
end
119+
end)
120+
121+
:ets.match_object(@table, {:_, {:_, :_}})
122+
|> Enum.each(fn {parent_id, {span, stored_at}} = object ->
123+
cond do
124+
get_root_span(parent_id) != nil and stored_at < cutoff_time ->
125+
Logger.debug("Cleaning up stale child span: #{span.span_id}")
126+
:ets.delete_object(@table, object)
127+
128+
get_root_span(parent_id) == nil and stored_at < cutoff_time ->
129+
Logger.debug("Cleaning up stale orphaned child span: #{span.span_id}")
130+
:ets.delete_object(@table, object)
131+
132+
true ->
133+
:ok
134+
end
135+
end)
136+
end
83137
end

test/sentry/opentelemetry/span_storage_test.exs

Lines changed: 143 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,11 @@ defmodule Sentry.OpenTelemetry.SpanStorageTest do
9595
SpanStorage.store_span(child_span1)
9696
SpanStorage.store_span(child_span2)
9797

98-
# Verify initial state
9998
assert root_span == SpanStorage.get_root_span("root123")
10099
assert length(SpanStorage.get_child_spans("root123")) == 2
101100

102-
# Remove root span should remove everything
103101
SpanStorage.remove_span("root123")
104102

105-
# Verify everything is removed
106103
assert nil == SpanStorage.get_root_span("root123")
107104
assert [] == SpanStorage.get_child_spans("root123")
108105
end
@@ -218,4 +215,147 @@ defmodule Sentry.OpenTelemetry.SpanStorageTest do
218215
assert nil == SpanStorage.get_root_span("root123")
219216
assert [] == SpanStorage.get_child_spans("root123")
220217
end
218+
219+
describe "stale span cleanup" do
220+
test "cleans up stale spans" do
221+
start_supervised!({SpanStorage, cleanup_interval: 100, name: :cleanup_test})
222+
223+
root_span = %SpanRecord{
224+
span_id: "stale_root",
225+
parent_span_id: nil,
226+
trace_id: "trace123",
227+
name: "stale_root_span"
228+
}
229+
230+
child_span = %SpanRecord{
231+
span_id: "stale_child",
232+
parent_span_id: "stale_root",
233+
trace_id: "trace123",
234+
name: "stale_child_span"
235+
}
236+
237+
old_time = System.system_time(:second) - :timer.minutes(31)
238+
:ets.insert(:span_storage, {{:root_span, "stale_root"}, {root_span, old_time}})
239+
:ets.insert(:span_storage, {"stale_root", {child_span, old_time}})
240+
241+
fresh_root_span = %SpanRecord{
242+
span_id: "fresh_root",
243+
parent_span_id: nil,
244+
trace_id: "trace123",
245+
name: "fresh_root_span"
246+
}
247+
248+
SpanStorage.store_span(fresh_root_span)
249+
250+
Process.sleep(200)
251+
252+
assert nil == SpanStorage.get_root_span("stale_root")
253+
assert [] == SpanStorage.get_child_spans("stale_root")
254+
255+
assert SpanStorage.get_root_span("fresh_root")
256+
end
257+
258+
test "cleans up orphaned child spans" do
259+
start_supervised!({SpanStorage, cleanup_interval: 100, name: :cleanup_test})
260+
261+
child_span = %SpanRecord{
262+
span_id: "stale_child",
263+
parent_span_id: "non_existent_parent",
264+
trace_id: "trace123",
265+
name: "stale_child_span"
266+
}
267+
268+
old_time = System.system_time(:second) - :timer.minutes(31)
269+
:ets.insert(:span_storage, {"non_existent_parent", {child_span, old_time}})
270+
271+
Process.sleep(200)
272+
273+
assert [] == SpanStorage.get_child_spans("non_existent_parent")
274+
end
275+
276+
test "cleans up expired root spans with all their children regardless of child timestamps" do
277+
start_supervised!({SpanStorage, cleanup_interval: 100, name: :cleanup_test})
278+
279+
root_span = %SpanRecord{
280+
span_id: "root123",
281+
parent_span_id: nil,
282+
trace_id: "trace123",
283+
name: "root_span"
284+
}
285+
286+
old_child = %SpanRecord{
287+
span_id: "old_child",
288+
parent_span_id: "root123",
289+
trace_id: "trace123",
290+
name: "old_child_span"
291+
}
292+
293+
fresh_child = %SpanRecord{
294+
span_id: "fresh_child",
295+
parent_span_id: "root123",
296+
trace_id: "trace123",
297+
name: "fresh_child_span"
298+
}
299+
300+
old_time = System.system_time(:second) - :timer.minutes(31)
301+
:ets.insert(:span_storage, {{:root_span, "root123"}, {root_span, old_time}})
302+
303+
:ets.insert(:span_storage, {"root123", {old_child, old_time}})
304+
SpanStorage.store_span(fresh_child)
305+
306+
Process.sleep(200)
307+
308+
assert nil == SpanStorage.get_root_span("root123")
309+
assert [] == SpanStorage.get_child_spans("root123")
310+
end
311+
312+
test "handles mixed expiration times in child spans" do
313+
start_supervised!({SpanStorage, cleanup_interval: 100, name: :cleanup_test})
314+
315+
root_span = %SpanRecord{
316+
span_id: "root123",
317+
parent_span_id: nil,
318+
trace_id: "trace123",
319+
name: "root_span"
320+
}
321+
322+
old_child1 = %SpanRecord{
323+
span_id: "old_child1",
324+
parent_span_id: "root123",
325+
trace_id: "trace123",
326+
name: "old_child_span_1"
327+
}
328+
329+
old_child2 = %SpanRecord{
330+
span_id: "old_child2",
331+
parent_span_id: "root123",
332+
trace_id: "trace123",
333+
name: "old_child_span_2"
334+
}
335+
336+
fresh_child = %SpanRecord{
337+
span_id: "fresh_child",
338+
parent_span_id: "root123",
339+
trace_id: "trace123",
340+
name: "fresh_child_span"
341+
}
342+
343+
SpanStorage.store_span(root_span)
344+
345+
old_time = System.system_time(:second) - :timer.minutes(31)
346+
:ets.insert(:span_storage, {"root123", {old_child1, old_time}})
347+
:ets.insert(:span_storage, {"root123", {old_child2, old_time}})
348+
349+
SpanStorage.store_span(fresh_child)
350+
351+
Process.sleep(200)
352+
353+
assert root_span == SpanStorage.get_root_span("root123")
354+
children = SpanStorage.get_child_spans("root123")
355+
assert length(children) == 1
356+
assert fresh_child in children
357+
refute old_child1 in children
358+
refute old_child2 in children
359+
end
360+
end
221361
end

0 commit comments

Comments
 (0)