Skip to content

Commit b957929

Browse files
ref(llm-detection): Send enhanced span data to support improved Seer analysis (#103871)
Extends span data sent to Seer with additional fields needed for more accurate LLM issue detection. This provides the telemetry improvements requested through `missing_telemetry` feedback and supports stricter detection controls in getsentry/seer#4096. Changes: - Add `EvidenceSpan` model (matches the model defined in Seer) - create new method for getting the trace for a transaction - Adds additional columns to `selected_columns` when searching for traces --------- Co-authored-by: Richard Roggenkemper <[email protected]>
1 parent 5183c5d commit b957929

File tree

6 files changed

+293
-30
lines changed

6 files changed

+293
-30
lines changed

src/sentry/seer/explorer/index_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def get_trace_for_transaction(transaction_name: str, project_id: int) -> TraceDa
195195
sampling_mode="NORMAL",
196196
)
197197

198-
# Step 4: Build span objects
198+
# Step 3: Build span objects
199199
spans = []
200200
for row in spans_result.get("data", []):
201201
span_id = row.get("span_id")

src/sentry/seer/sentry_data_models.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,16 @@ class Span(BaseModel):
2222
span_description: str | None
2323

2424

25+
class EvidenceSpan(BaseModel):
26+
span_id: str | None = None
27+
parent_span_id: str | None = None
28+
timestamp: float | None = None
29+
op: str | None = None
30+
description: str | None = None
31+
exclusive_time: float | None = None # duration in milliseconds
32+
data: dict[str, Any] | None = None
33+
34+
2535
class TraceData(BaseModel):
2636
trace_id: str
2737
project_id: int
@@ -30,6 +40,14 @@ class TraceData(BaseModel):
3040
spans: list[Span]
3141

3242

43+
class EvidenceTraceData(BaseModel):
44+
trace_id: str
45+
project_id: int
46+
transaction_name: str
47+
total_spans: int
48+
spans: list[EvidenceSpan]
49+
50+
3351
class EAPTrace(BaseModel):
3452
"""
3553
Based on the Seer model. `trace` can contain both span and error events (see `SerializedEvent`).
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from sentry.tasks.llm_issue_detection.detection import (
2+
DetectedIssue,
3+
create_issue_occurrence_from_detection,
4+
detect_llm_issues_for_project,
5+
run_llm_issue_detection,
6+
)
7+
8+
__all__ = [
9+
"DetectedIssue",
10+
"create_issue_occurrence_from_detection",
11+
"detect_llm_issues_for_project",
12+
"run_llm_issue_detection",
13+
]

src/sentry/tasks/llm_issue_detection.py renamed to src/sentry/tasks/llm_issue_detection/detection.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@
1515
from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka
1616
from sentry.models.project import Project
1717
from sentry.net.http import connection_from_url
18-
from sentry.seer.explorer.index_data import get_trace_for_transaction, get_transactions_for_project
18+
from sentry.seer.explorer.index_data import get_transactions_for_project
1919
from sentry.seer.models import SeerApiError
20-
from sentry.seer.sentry_data_models import TraceData
20+
from sentry.seer.sentry_data_models import EvidenceTraceData
2121
from sentry.seer.signed_seer_api import make_signed_seer_api_request
2222
from sentry.tasks.base import instrumented_task
23+
from sentry.tasks.llm_issue_detection.trace_data import get_evidence_trace_for_llm_detection
2324
from sentry.taskworker.namespaces import issues_tasks
2425
from sentry.utils import json
2526

@@ -73,7 +74,7 @@ def __init__(
7374

7475
def create_issue_occurrence_from_detection(
7576
detected_issue: DetectedIssue,
76-
trace: TraceData,
77+
trace: EvidenceTraceData,
7778
project_id: int,
7879
transaction_name: str,
7980
) -> None:
@@ -181,6 +182,7 @@ def detect_llm_issues_for_project(project_id: int) -> None:
181182
"""
182183
project = Project.objects.get_from_cache(id=project_id)
183184
organization = project.organization
185+
organization_id = organization.id
184186

185187
has_access = features.has("organizations:gen-ai-features", organization) and not bool(
186188
organization.get_option("sentry:hide_ai_features")
@@ -203,9 +205,8 @@ def detect_llm_issues_for_project(project_id: int) -> None:
203205
break
204206

205207
try:
206-
trace: TraceData | None = get_trace_for_transaction(
207-
transaction.name, transaction.project_id
208-
)
208+
trace = get_evidence_trace_for_llm_detection(transaction.name, transaction.project_id)
209+
209210
if (
210211
not trace
211212
or trace.total_spans < LOWER_SPAN_LIMIT
@@ -226,7 +227,7 @@ def detect_llm_issues_for_project(project_id: int) -> None:
226227

227228
seer_request = {
228229
"telemetry": [{**trace.dict(), "kind": "trace"}],
229-
"organization_id": organization.id,
230+
"organization_id": organization_id,
230231
"project_id": project_id,
231232
}
232233
response = make_signed_seer_api_request(
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
"""
2+
Functions for fetching trace data optimized for LLM issue detection.
3+
"""
4+
5+
from __future__ import annotations
6+
7+
import logging
8+
import re
9+
from datetime import UTC, datetime, timedelta
10+
11+
from sentry.models.project import Project
12+
from sentry.search.eap.types import SearchResolverConfig
13+
from sentry.search.events.types import SnubaParams
14+
from sentry.seer.sentry_data_models import EvidenceSpan, EvidenceTraceData
15+
from sentry.snuba.referrer import Referrer
16+
from sentry.snuba.spans_rpc import Spans
17+
18+
logger = logging.getLogger(__name__)
19+
20+
# Regex to match unescaped quotes (not preceded by backslash)
21+
UNESCAPED_QUOTE_RE = re.compile('(?<!\\\\)"')
22+
23+
24+
def get_evidence_trace_for_llm_detection(
25+
transaction_name: str, project_id: int
26+
) -> EvidenceTraceData | None:
27+
"""
28+
Get trace data with performance metrics for LLM issue detection.
29+
30+
Args:
31+
transaction_name: The name of the transaction to find traces for
32+
project_id: The ID of the project
33+
34+
Returns:
35+
EvidenceTraceData with spans including performance metrics, or None if no traces found
36+
"""
37+
try:
38+
project = Project.objects.get(id=project_id)
39+
except Project.DoesNotExist:
40+
logger.exception(
41+
"Project does not exist; cannot fetch traces for LLM detection",
42+
extra={"project_id": project_id, "transaction_name": transaction_name},
43+
)
44+
return None
45+
46+
end_time = datetime.now(UTC)
47+
start_time = end_time - timedelta(hours=24)
48+
49+
snuba_params = SnubaParams(
50+
start=start_time,
51+
end=end_time,
52+
projects=[project],
53+
organization=project.organization,
54+
)
55+
config = SearchResolverConfig(
56+
auto_fields=True,
57+
)
58+
59+
escaped_transaction_name = UNESCAPED_QUOTE_RE.sub('\\"', transaction_name)
60+
traces_result = Spans.run_table_query(
61+
params=snuba_params,
62+
query_string=f'transaction:"{escaped_transaction_name}" project.id:{project_id}',
63+
selected_columns=[
64+
"trace",
65+
"precise.start_ts",
66+
],
67+
orderby=["precise.start_ts"],
68+
offset=0,
69+
limit=1,
70+
referrer=Referrer.SEER_RPC,
71+
config=config,
72+
sampling_mode="NORMAL",
73+
)
74+
75+
trace_id = None
76+
for row in traces_result.get("data", []):
77+
trace_id = row.get("trace")
78+
if trace_id:
79+
break
80+
81+
if not trace_id:
82+
logger.info(
83+
"No traces found for transaction (LLM detection)",
84+
extra={"transaction_name": transaction_name, "project_id": project_id},
85+
)
86+
return None
87+
88+
spans_result = Spans.run_table_query(
89+
params=snuba_params,
90+
query_string=f"trace:{trace_id}",
91+
selected_columns=[
92+
"span_id",
93+
"parent_span",
94+
"span.op",
95+
"span.description",
96+
"precise.start_ts",
97+
"span.self_time",
98+
"span.duration",
99+
"span.status",
100+
],
101+
orderby=["precise.start_ts"],
102+
offset=0,
103+
limit=1000,
104+
referrer=Referrer.SEER_RPC,
105+
config=config,
106+
sampling_mode="NORMAL",
107+
)
108+
109+
evidence_spans: list[EvidenceSpan] = []
110+
for row in spans_result.get("data", []):
111+
span_id = row.get("span_id")
112+
parent_span_id = row.get("parent_span")
113+
span_op = row.get("span.op")
114+
span_description = row.get("span.description")
115+
span_exclusive_time = row.get("span.self_time")
116+
span_duration = row.get("span.duration")
117+
span_status = row.get("span.status")
118+
span_timestamp = row.get("precise.start_ts")
119+
120+
if span_id:
121+
evidence_spans.append(
122+
EvidenceSpan(
123+
span_id=span_id,
124+
parent_span_id=parent_span_id,
125+
op=span_op,
126+
description=span_description or "",
127+
exclusive_time=span_exclusive_time,
128+
timestamp=span_timestamp,
129+
data={
130+
"duration": span_duration,
131+
"status": span_status,
132+
},
133+
)
134+
)
135+
136+
return EvidenceTraceData(
137+
trace_id=trace_id,
138+
project_id=project_id,
139+
transaction_name=transaction_name,
140+
total_spans=len(evidence_spans),
141+
spans=evidence_spans,
142+
)

0 commit comments

Comments
 (0)