Skip to content

Commit 9bb4116

Browse files
committed
wip - add pending children tracking to SpanStorage
1 parent 1b6248b commit 9bb4116

File tree

2 files changed

+200
-0
lines changed

2 files changed

+200
-0
lines changed

lib/sentry/opentelemetry/span_storage.ex

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,25 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do
6161
end
6262
end
6363

64+
@spec span_exists?(String.t(), keyword()) :: boolean()
65+
def span_exists?(span_id, opts \\ []) do
66+
table_name = Keyword.get(opts, :table_name, default_table_name())
67+
68+
# Check root spans first
69+
case :ets.lookup(table_name, {:root_span, span_id}) do
70+
[_] ->
71+
true
72+
73+
[] ->
74+
# Check child spans - scan for any child span with this span_id
75+
# This is O(n) but necessary when the span has a remote parent
76+
case :ets.match_object(table_name, {{:child_span, :_, span_id}, :_, :_}) do
77+
[_ | _] -> true
78+
[] -> false
79+
end
80+
end
81+
end
82+
6483
@spec get_child_spans(String.t(), keyword()) :: [SpanRecord.t()]
6584
def get_child_spans(parent_span_id, opts \\ []) do
6685
table_name = Keyword.get(opts, :table_name, default_table_name())
@@ -121,6 +140,117 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do
121140
:ok
122141
end
123142

143+
@spec remove_child_span(String.t(), String.t(), keyword()) :: :ok
144+
def remove_child_span(parent_span_id, span_id, opts \\ []) do
145+
table_name = Keyword.get(opts, :table_name, default_table_name())
146+
key = {:child_span, parent_span_id, span_id}
147+
148+
:ets.delete(table_name, key)
149+
150+
:ok
151+
end
152+
153+
# Pending children tracking functions
154+
#
155+
# These functions track spans that have started (on_start) but not yet ended (on_end).
156+
# This is crucial for handling the race condition where a parent span's on_end is called
157+
# before its child spans' on_end callbacks.
158+
#
159+
# The key insight is that when a child span starts, we record its existence in ETS.
160+
# When it ends, we remove the pending record. This allows us to:
161+
# 1. Know how many children are still in-flight when a parent ends
162+
# 2. Defer transaction building until all pending children have ended
163+
# 3. Have the last child trigger the transaction build
164+
165+
@doc """
166+
Register a pending child span when it starts (via on_start callback).
167+
This creates a lightweight record tracking that a child span is in-flight.
168+
"""
169+
@spec store_pending_child(String.t(), String.t(), keyword()) :: true
170+
def store_pending_child(parent_span_id, child_span_id, opts \\ []) do
171+
table_name = Keyword.get(opts, :table_name, default_table_name())
172+
stored_at = System.system_time(:second)
173+
174+
key = {:pending_child, parent_span_id, child_span_id}
175+
:ets.insert(table_name, {key, stored_at})
176+
end
177+
178+
@doc """
179+
Remove a pending child span when it ends (via on_end callback).
180+
"""
181+
@spec remove_pending_child(String.t(), String.t(), keyword()) :: :ok
182+
def remove_pending_child(parent_span_id, child_span_id, opts \\ []) do
183+
table_name = Keyword.get(opts, :table_name, default_table_name())
184+
key = {:pending_child, parent_span_id, child_span_id}
185+
186+
:ets.delete(table_name, key)
187+
:ok
188+
end
189+
190+
@doc """
191+
Check if a span has any pending (in-flight) children.
192+
"""
193+
@spec has_pending_children?(String.t(), keyword()) :: boolean()
194+
def has_pending_children?(parent_span_id, opts \\ []) do
195+
table_name = Keyword.get(opts, :table_name, default_table_name())
196+
197+
case :ets.match_object(table_name, {{:pending_child, parent_span_id, :_}, :_}, 1) do
198+
{[_ | _], _} -> true
199+
:"$end_of_table" -> false
200+
end
201+
end
202+
203+
@doc """
204+
Store a completed parent span that's waiting for its children to finish.
205+
This is used when a parent span ends but has pending children.
206+
"""
207+
@spec store_waiting_parent(SpanRecord.t(), keyword()) :: true
208+
def store_waiting_parent(span_record, opts \\ []) do
209+
table_name = Keyword.get(opts, :table_name, default_table_name())
210+
stored_at = System.system_time(:second)
211+
212+
key = {:waiting_parent, span_record.span_id}
213+
:ets.insert(table_name, {key, span_record, stored_at})
214+
end
215+
216+
@doc """
217+
Get a waiting parent span by its span_id.
218+
"""
219+
@spec get_waiting_parent(String.t(), keyword()) :: SpanRecord.t() | nil
220+
def get_waiting_parent(span_id, opts \\ []) do
221+
table_name = Keyword.get(opts, :table_name, default_table_name())
222+
223+
case :ets.lookup(table_name, {:waiting_parent, span_id}) do
224+
[{{:waiting_parent, ^span_id}, span_record, _stored_at}] -> span_record
225+
[] -> nil
226+
end
227+
end
228+
229+
@doc """
230+
Remove a waiting parent span after transaction has been built and sent.
231+
"""
232+
@spec remove_waiting_parent(String.t(), keyword()) :: :ok
233+
def remove_waiting_parent(span_id, opts \\ []) do
234+
table_name = Keyword.get(opts, :table_name, default_table_name())
235+
:ets.delete(table_name, {:waiting_parent, span_id})
236+
:ok
237+
end
238+
239+
@doc """
240+
Remove all pending children for a given parent span.
241+
Used during cleanup when a transaction is sent.
242+
"""
243+
@spec remove_pending_children(String.t(), keyword()) :: :ok
244+
def remove_pending_children(parent_span_id, opts \\ []) do
245+
table_name = Keyword.get(opts, :table_name, default_table_name())
246+
247+
:ets.select_delete(table_name, [
248+
{{{:pending_child, parent_span_id, :_}, :_}, [], [true]}
249+
])
250+
251+
:ok
252+
end
253+
124254
defp schedule_cleanup(interval) do
125255
Process.send_after(self(), :cleanup_stale_spans, interval)
126256
end
@@ -144,6 +274,20 @@ if Sentry.OpenTelemetry.VersionChecker.tracing_compatible?() do
144274
]
145275

146276
:ets.select_delete(table_name, child_match_spec)
277+
278+
# Cleanup stale pending children
279+
pending_child_match_spec = [
280+
{{{:pending_child, :_, :_}, :"$1"}, [{:<, :"$1", cutoff_time}], [true]}
281+
]
282+
283+
:ets.select_delete(table_name, pending_child_match_spec)
284+
285+
# Cleanup stale waiting parents
286+
waiting_parent_match_spec = [
287+
{{{:waiting_parent, :_}, :_, :"$1"}, [{:<, :"$1", cutoff_time}], [true]}
288+
]
289+
290+
:ets.select_delete(table_name, waiting_parent_match_spec)
147291
end
148292

149293
defp default_table_name do

test/sentry/opentelemetry/span_storage_test.exs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,62 @@ defmodule Sentry.OpenTelemetry.SpanStorageTest do
9393
end
9494
end
9595

96+
describe "span_exists?" do
97+
@tag span_storage: true
98+
test "returns true for existing root span", %{table_name: table_name} do
99+
root_span = %SpanRecord{
100+
span_id: "root123",
101+
parent_span_id: nil,
102+
trace_id: "trace123",
103+
name: "root_span"
104+
}
105+
106+
SpanStorage.store_span(root_span, table_name: table_name)
107+
108+
assert SpanStorage.span_exists?("root123", table_name: table_name) == true
109+
end
110+
111+
@tag span_storage: true
112+
test "returns true for existing child span", %{table_name: table_name} do
113+
child_span = %SpanRecord{
114+
span_id: "child123",
115+
parent_span_id: "parent123",
116+
trace_id: "trace123",
117+
name: "child_span"
118+
}
119+
120+
SpanStorage.store_span(child_span, table_name: table_name)
121+
122+
assert SpanStorage.span_exists?("child123", table_name: table_name) == true
123+
end
124+
125+
@tag span_storage: true
126+
test "returns false for non-existent span", %{table_name: table_name} do
127+
assert SpanStorage.span_exists?("nonexistent", table_name: table_name) == false
128+
end
129+
130+
@tag span_storage: true
131+
test "returns true for HTTP server span with remote parent (distributed tracing)", %{
132+
table_name: table_name
133+
} do
134+
# HTTP server span with a remote parent (from distributed tracing)
135+
# is stored as a child span, not a root span
136+
http_server_span = %SpanRecord{
137+
span_id: "http_span_123",
138+
parent_span_id: "remote_parent_456",
139+
trace_id: "trace123",
140+
name: "GET /users"
141+
}
142+
143+
SpanStorage.store_span(http_server_span, table_name: table_name)
144+
145+
# Should find the span even though it has a remote parent
146+
assert SpanStorage.span_exists?("http_span_123", table_name: table_name) == true
147+
# Should NOT find the remote parent
148+
assert SpanStorage.span_exists?("remote_parent_456", table_name: table_name) == false
149+
end
150+
end
151+
96152
describe "child spans" do
97153
@tag span_storage: true
98154
test "stores and retrieves child spans", %{table_name: table_name} do

0 commit comments

Comments
 (0)