Skip to content

Commit b964a45

Browse files
authored
ref(spans): Use provided segment_id for buffering (#97832)
Now that the span buffer will be used in the ingestion pipeline, we can use the `segment_id` provided by Relay on most spans. This will guarantee correct output, since Relay takes the `segment_id` from the transaction event. For otel, the buffer still constructs span trees recursively via the `parent_id`. This particularly makes a difference for transactions/segments where some spans are disconnected from the tree. They will now be correctly assigned to the segment. Metrics on mismatches and the "original_segment_id" attribute are removed, since they will now always match if they are available. By definition, the match rate will be 100%.
1 parent 00cbd95 commit b964a45

File tree

5 files changed

+101
-30
lines changed

5 files changed

+101
-30
lines changed

src/sentry/spans/buffer.py

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ class Span(NamedTuple):
126126
trace_id: str
127127
span_id: str
128128
parent_span_id: str | None
129+
segment_id: str | None
129130
project_id: int
130131
payload: bytes
131132
end_timestamp_precise: float
@@ -138,7 +139,7 @@ def effective_parent_id(self):
138139
if self.is_segment_span:
139140
return self.span_id
140141
else:
141-
return self.parent_span_id or self.span_id
142+
return self.segment_id or self.parent_span_id or self.span_id
142143

143144

144145
class OutputSpan(NamedTuple):
@@ -308,9 +309,14 @@ def _group_by_parent(self, spans: Sequence[Span]) -> dict[tuple[str, str], list[
308309
top-most known parent, and the value is a flat list of all its
309310
transitive children.
310311
312+
For spans with a known segment_id, the grouping is done by the
313+
segment_id instead of the parent_span_id. This is the case for spans
314+
extracted from transaction events, or if in the future SDKs provide
315+
segment IDs.
316+
311317
:param spans: List of spans to be grouped.
312-
:return: Dictionary of grouped spans. The key is a tuple of
313-
the `project_and_trace`, and the `parent_span_id`.
318+
:return: Dictionary of grouped spans. The key is a tuple of the
319+
`project_and_trace`, and the `parent_span_id`.
314320
"""
315321
trees: dict[tuple[str, str], list[Span]] = {}
316322
redirects: dict[str, dict[str, str]] = {}
@@ -423,31 +429,15 @@ def flush_segments(self, now: int) -> dict[SegmentKey, FlushedSegment]:
423429
metrics.timing("spans.buffer.flush_segments.num_spans_per_segment", len(segment))
424430
for payload in segment:
425431
val = orjson.loads(payload)
426-
old_segment_id = val.get("segment_id")
427-
outcome = "same" if old_segment_id == segment_span_id else "different"
428432

429-
is_segment = val["is_segment"] = segment_span_id == val["span_id"]
433+
if not val.get("segment_id"):
434+
val["segment_id"] = segment_span_id
435+
436+
is_segment = segment_span_id == val["span_id"]
437+
val["is_segment"] = is_segment
430438
if is_segment:
431439
has_root_span = True
432440

433-
val_data = val.setdefault("data", {})
434-
if isinstance(val_data, dict):
435-
val_data["sentry._internal.span_buffer_segment_id_outcome"] = outcome
436-
437-
if old_segment_id:
438-
val_data["sentry._internal.span_buffer_old_segment_id"] = old_segment_id
439-
440-
val["segment_id"] = segment_span_id
441-
442-
metrics.incr(
443-
"spans.buffer.flush_segments.is_same_segment",
444-
tags={
445-
"outcome": outcome,
446-
"is_segment_span": is_segment,
447-
"old_segment_is_null": "true" if old_segment_id is None else "false",
448-
},
449-
)
450-
451441
output_spans.append(OutputSpan(payload=val))
452442

453443
metrics.incr(

src/sentry/spans/consumers/process/factory.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ def process_batch(
182182
trace_id=val["trace_id"],
183183
span_id=val["span_id"],
184184
parent_span_id=val.get("parent_span_id"),
185+
segment_id=cast(str | None, val.get("segment_id")),
185186
project_id=val["project_id"],
186187
payload=payload.value,
187188
end_timestamp_precise=val["end_timestamp_precise"],

tests/sentry/spans/consumers/process/test_consumer.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,6 @@ def add_commit(offsets, force=False):
7676
assert orjson.loads(msg.value) == {
7777
"spans": [
7878
{
79-
"data": {
80-
"sentry._internal.span_buffer_segment_id_outcome": "different",
81-
},
8279
"is_segment": True,
8380
"project_id": 12,
8481
"segment_id": "aaaaaaaaaaaaaaaa",

tests/sentry/spans/consumers/process/test_flusher.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def append(msg):
4848
trace_id=trace_id,
4949
span_id="a" * 16,
5050
parent_span_id="b" * 16,
51+
segment_id=None,
5152
project_id=1,
5253
end_timestamp_precise=now,
5354
),
@@ -56,6 +57,7 @@ def append(msg):
5657
trace_id=trace_id,
5758
span_id="d" * 16,
5859
parent_span_id="b" * 16,
60+
segment_id=None,
5961
project_id=1,
6062
end_timestamp_precise=now,
6163
),
@@ -64,6 +66,7 @@ def append(msg):
6466
trace_id=trace_id,
6567
span_id="c" * 16,
6668
parent_span_id="b" * 16,
69+
segment_id=None,
6770
project_id=1,
6871
end_timestamp_precise=now,
6972
),
@@ -73,6 +76,7 @@ def append(msg):
7376
span_id="b" * 16,
7477
parent_span_id=None,
7578
is_segment_span=True,
79+
segment_id=None,
7680
project_id=1,
7781
end_timestamp_precise=now,
7882
),

0 commit comments

Comments
 (0)