Skip to content

Commit e09d281

Browse files
committed
wip - add pending children tracking to SpanStorage
1 parent 1b6248b commit e09d281

File tree

2 files changed

+202
-0
lines changed

2 files changed

+202
-0
lines changed

lib/sentry/opentelemetry/span_storage.ex

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,25 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do
6161
end
6262
end
6363

64+
@spec span_exists?(String.t(), keyword()) :: boolean()
65+
def span_exists?(span_id, opts \\ []) do
66+
table_name = Keyword.get(opts, :table_name, default_table_name())
67+
68+
# Check root spans first
69+
case :ets.lookup(table_name, {:root_span, span_id}) do
70+
[_] ->
71+
true
72+
73+
[] ->
74+
# Check child spans - scan for any child span with this span_id
75+
# This is O(n) but necessary when the span has a remote parent
76+
case :ets.match_object(table_name, {{:child_span, :_, span_id}, :_, :_}) do
77+
[_ | _] -> true
78+
[] -> false
79+
end
80+
end
81+
end
82+
6483
@spec get_child_spans(String.t(), keyword()) :: [SpanRecord.t()]
6584
def get_child_spans(parent_span_id, opts \\ []) do
6685
table_name = Keyword.get(opts, :table_name, default_table_name())
@@ -121,6 +140,119 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do
121140
:ok
122141
end
123142

143+
@spec remove_child_span(String.t(), String.t(), keyword()) :: :ok
144+
def remove_child_span(parent_span_id, span_id, opts \\ []) do
145+
table_name = Keyword.get(opts, :table_name, default_table_name())
146+
key = {:child_span, parent_span_id, span_id}
147+
148+
:ets.delete(table_name, key)
149+
150+
:ok
151+
end
152+
153+
# Pending children tracking functions
154+
#
155+
# These functions track spans that have started (on_start) but not yet ended (on_end).
156+
# This is crucial for handling the race condition where a parent span's on_end is called
157+
# before its child spans' on_end callbacks.
158+
#
159+
# The key insight is that when a child span starts, we record its existence in ETS.
160+
# When it ends, we remove the pending record. This allows us to:
161+
# 1. Know how many children are still in-flight when a parent ends
162+
# 2. Defer transaction building until all pending children have ended
163+
# 3. Have the last child trigger the transaction build
164+
165+
@doc """
166+
Register a pending child span when it starts (via on_start callback).
167+
This creates a lightweight record tracking that a child span is in-flight.
168+
"""
169+
@spec store_pending_child(String.t(), String.t(), keyword()) :: true
170+
def store_pending_child(parent_span_id, child_span_id, opts \\ []) do
171+
table_name = Keyword.get(opts, :table_name, default_table_name())
172+
stored_at = System.system_time(:second)
173+
174+
key = {:pending_child, parent_span_id, child_span_id}
175+
:ets.insert(table_name, {key, stored_at})
176+
end
177+
178+
@doc """
179+
Remove a pending child span when it ends (via on_end callback).
180+
Returns true if the child was found and removed, false otherwise.
181+
"""
182+
@spec remove_pending_child(String.t(), String.t(), keyword()) :: boolean()
183+
def remove_pending_child(parent_span_id, child_span_id, opts \\ []) do
184+
table_name = Keyword.get(opts, :table_name, default_table_name())
185+
key = {:pending_child, parent_span_id, child_span_id}
186+
187+
# Returns true if an object was deleted
188+
:ets.delete(table_name, key)
189+
true
190+
end
191+
192+
@doc """
193+
Check if a span has any pending (in-flight) children.
194+
"""
195+
@spec has_pending_children?(String.t(), keyword()) :: boolean()
196+
def has_pending_children?(parent_span_id, opts \\ []) do
197+
table_name = Keyword.get(opts, :table_name, default_table_name())
198+
199+
case :ets.match_object(table_name, {{:pending_child, parent_span_id, :_}, :_}, 1) do
200+
{[_ | _], _} -> true
201+
:"$end_of_table" -> false
202+
end
203+
end
204+
205+
@doc """
206+
Store a completed parent span that's waiting for its children to finish.
207+
This is used when a parent span ends but has pending children.
208+
"""
209+
@spec store_waiting_parent(SpanRecord.t(), keyword()) :: true
210+
def store_waiting_parent(span_record, opts \\ []) do
211+
table_name = Keyword.get(opts, :table_name, default_table_name())
212+
stored_at = System.system_time(:second)
213+
214+
key = {:waiting_parent, span_record.span_id}
215+
:ets.insert(table_name, {key, span_record, stored_at})
216+
end
217+
218+
@doc """
219+
Get a waiting parent span by its span_id.
220+
"""
221+
@spec get_waiting_parent(String.t(), keyword()) :: SpanRecord.t() | nil
222+
def get_waiting_parent(span_id, opts \\ []) do
223+
table_name = Keyword.get(opts, :table_name, default_table_name())
224+
225+
case :ets.lookup(table_name, {:waiting_parent, span_id}) do
226+
[{{:waiting_parent, ^span_id}, span_record, _stored_at}] -> span_record
227+
[] -> nil
228+
end
229+
end
230+
231+
@doc """
232+
Remove a waiting parent span after transaction has been built and sent.
233+
"""
234+
@spec remove_waiting_parent(String.t(), keyword()) :: :ok
235+
def remove_waiting_parent(span_id, opts \\ []) do
236+
table_name = Keyword.get(opts, :table_name, default_table_name())
237+
:ets.delete(table_name, {:waiting_parent, span_id})
238+
:ok
239+
end
240+
241+
@doc """
242+
Remove all pending children for a given parent span.
243+
Used during cleanup when a transaction is sent.
244+
"""
245+
@spec remove_pending_children(String.t(), keyword()) :: :ok
246+
def remove_pending_children(parent_span_id, opts \\ []) do
247+
table_name = Keyword.get(opts, :table_name, default_table_name())
248+
249+
:ets.select_delete(table_name, [
250+
{{{:pending_child, parent_span_id, :_}, :_}, [], [true]}
251+
])
252+
253+
:ok
254+
end
255+
124256
defp schedule_cleanup(interval) do
125257
Process.send_after(self(), :cleanup_stale_spans, interval)
126258
end
@@ -144,6 +276,20 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do
144276
]
145277

146278
:ets.select_delete(table_name, child_match_spec)
279+
280+
# Cleanup stale pending children
281+
pending_child_match_spec = [
282+
{{{:pending_child, :_, :_}, :"$1"}, [{:<, :"$1", cutoff_time}], [true]}
283+
]
284+
285+
:ets.select_delete(table_name, pending_child_match_spec)
286+
287+
# Cleanup stale waiting parents
288+
waiting_parent_match_spec = [
289+
{{{:waiting_parent, :_}, :_, :"$1"}, [{:<, :"$1", cutoff_time}], [true]}
290+
]
291+
292+
:ets.select_delete(table_name, waiting_parent_match_spec)
147293
end
148294

149295
defp default_table_name do

test/sentry/opentelemetry/span_storage_test.exs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,62 @@ defmodule Sentry.OpenTelemetry.SpanStorageTest do
9393
end
9494
end
9595

96+
describe "span_exists?" do
97+
@tag span_storage: true
98+
test "returns true for existing root span", %{table_name: table_name} do
99+
root_span = %SpanRecord{
100+
span_id: "root123",
101+
parent_span_id: nil,
102+
trace_id: "trace123",
103+
name: "root_span"
104+
}
105+
106+
SpanStorage.store_span(root_span, table_name: table_name)
107+
108+
assert SpanStorage.span_exists?("root123", table_name: table_name) == true
109+
end
110+
111+
@tag span_storage: true
112+
test "returns true for existing child span", %{table_name: table_name} do
113+
child_span = %SpanRecord{
114+
span_id: "child123",
115+
parent_span_id: "parent123",
116+
trace_id: "trace123",
117+
name: "child_span"
118+
}
119+
120+
SpanStorage.store_span(child_span, table_name: table_name)
121+
122+
assert SpanStorage.span_exists?("child123", table_name: table_name) == true
123+
end
124+
125+
@tag span_storage: true
126+
test "returns false for non-existent span", %{table_name: table_name} do
127+
assert SpanStorage.span_exists?("nonexistent", table_name: table_name) == false
128+
end
129+
130+
@tag span_storage: true
131+
test "returns true for HTTP server span with remote parent (distributed tracing)", %{
132+
table_name: table_name
133+
} do
134+
# HTTP server span with a remote parent (from distributed tracing)
135+
# is stored as a child span, not a root span
136+
http_server_span = %SpanRecord{
137+
span_id: "http_span_123",
138+
parent_span_id: "remote_parent_456",
139+
trace_id: "trace123",
140+
name: "GET /users"
141+
}
142+
143+
SpanStorage.store_span(http_server_span, table_name: table_name)
144+
145+
# Should find the span even though it has a remote parent
146+
assert SpanStorage.span_exists?("http_span_123", table_name: table_name) == true
147+
# Should NOT find the remote parent
148+
assert SpanStorage.span_exists?("remote_parent_456", table_name: table_name) == false
149+
end
150+
end
151+
96152
describe "child spans" do
97153
@tag span_storage: true
98154
test "stores and retrieves child spans", %{table_name: table_name} do

0 commit comments

Comments
 (0)