Skip to content

Commit f336a30

Browse files
authored
fix(replay): use unit-aware timestamps for summary log sorting (#97647)
Closes [REPLAY-601: Replay logs are not always sorted](https://linear.app/getsentry/issue/REPLAY-601/replay-logs-are-not-always-sorted) `get_timestamp_unit` was validated using real replay data. Also includes a rewrite of the parse_timestamp convenience fx
1 parent 9431805 commit f336a30

File tree

3 files changed

+93
-54
lines changed

3 files changed

+93
-54
lines changed

src/sentry/replays/lib/summarize.py

Lines changed: 31 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
import logging
22
from collections.abc import Generator, Iterator
33
from datetime import datetime
4-
from typing import Any, TypedDict
4+
from typing import Any, Literal, TypedDict
55
from urllib.parse import urlparse
66

77
import sentry_sdk
88

99
from sentry import nodestore
1010
from sentry.constants import ObjectStatus
1111
from sentry.models.project import Project
12+
from sentry.replays.usecases.ingest.event_parser import EventType
1213
from sentry.replays.usecases.ingest.event_parser import (
13-
EventType,
14-
parse_network_content_lengths,
15-
which,
14+
get_timestamp_ms as get_replay_event_timestamp_ms,
1615
)
16+
from sentry.replays.usecases.ingest.event_parser import parse_network_content_lengths, which
1717
from sentry.search.events.builder.discover import DiscoverQueryBuilder
1818
from sentry.search.events.types import QueryBuilderConfig, SnubaParams
1919
from sentry.services.eventstore.models import Event
@@ -59,21 +59,18 @@ def fetch_error_details(project_id: int, error_ids: list[str]) -> list[EventDict
5959
return []
6060

6161

62-
def parse_timestamp(timestamp_value: Any, unit: str) -> float:
63-
"""Parse a timestamp input to a float value.
64-
The argument timestamp value can be string, float, or None.
65-
The returned unit will be the same as the input unit.
62+
def _parse_snuba_timestamp_to_ms(timestamp: str | float, input_unit: Literal["s", "ms"]) -> float:
6663
"""
67-
if timestamp_value is not None:
68-
if isinstance(timestamp_value, str):
69-
try:
70-
dt = datetime.fromisoformat(timestamp_value.replace("Z", "+00:00"))
71-
return dt.timestamp() * 1000 if unit == "ms" else dt.timestamp()
72-
except (ValueError, AttributeError):
73-
return 0.0
74-
else:
75-
return float(timestamp_value)
76-
return 0.0
64+
Parse a numeric or ISO timestamp to float milliseconds. `input_unit` is only used for numeric inputs.
65+
"""
66+
if isinstance(timestamp, str):
67+
try:
68+
dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
69+
return dt.timestamp() * 1000
70+
except (ValueError, AttributeError):
71+
return 0.0
72+
73+
return float(timestamp) * 1000 if input_unit == "s" else float(timestamp)
7774

7875

7976
@sentry_sdk.trace
@@ -138,9 +135,11 @@ def fetch_trace_connected_errors(
138135
error_data = query.process_results(result)["data"]
139136

140137
for event in error_data:
141-
timestamp_ms = parse_timestamp(event.get("timestamp_ms"), "ms")
142-
timestamp_s = parse_timestamp(event.get("timestamp"), "s")
143-
timestamp = timestamp_ms or timestamp_s * 1000
138+
snuba_ts_ms = event.get("timestamp_ms", 0.0)
139+
snuba_ts_s = event.get("timestamp", 0.0)
140+
timestamp = _parse_snuba_timestamp_to_ms(
141+
snuba_ts_ms, "ms"
142+
) or _parse_snuba_timestamp_to_ms(snuba_ts_s, "s")
144143

145144
if timestamp:
146145
error_events.append(
@@ -227,16 +226,18 @@ def generate_summary_logs(
227226
for _, segment in segment_data:
228227
events = json.loads(segment.tobytes().decode("utf-8"))
229228
for event in events:
229+
event_type = which(event)
230+
timestamp = get_replay_event_timestamp_ms(event, event_type)
231+
230232
# Check if we need to yield any error messages that occurred before this event
231-
while error_idx < len(error_events) and error_events[error_idx][
232-
"timestamp"
233-
] < event.get("timestamp", 0):
233+
while (
234+
error_idx < len(error_events) and error_events[error_idx]["timestamp"] < timestamp
235+
):
234236
error = error_events[error_idx]
235237
yield generate_error_log_message(error)
236238
error_idx += 1
237239

238240
# Yield the current event's log message
239-
event_type = which(event)
240241
if event_type == EventType.FEEDBACK:
241242
feedback_id = event["data"]["payload"].get("data", {}).get("feedbackId")
242243
feedback = fetch_feedback_details(feedback_id, project_id)
@@ -262,7 +263,7 @@ def as_log_message(event: dict[str, Any]) -> str | None:
262263
should be forked.
263264
"""
264265
event_type = which(event)
265-
timestamp = event.get("timestamp", 0.0)
266+
timestamp = get_replay_event_timestamp_ms(event, event_type)
266267

267268
try:
268269
match event_type:
@@ -276,20 +277,16 @@ def as_log_message(event: dict[str, Any]) -> str | None:
276277
message = event["data"]["payload"]["message"]
277278
return f"User rage clicked on {message} but the triggered action was slow to complete at {timestamp}"
278279
case EventType.NAVIGATION_SPAN:
279-
timestamp_ms = timestamp * 1000
280280
to = event["data"]["payload"]["description"]
281-
return f"User navigated to: {to} at {timestamp_ms}"
281+
return f"User navigated to: {to} at {timestamp}"
282282
case EventType.CONSOLE:
283283
message = event["data"]["payload"]["message"]
284284
return f"Logged: {message} at {timestamp}"
285285
case EventType.UI_BLUR:
286-
# timestamp_ms = timestamp * 1000
287286
return None
288287
case EventType.UI_FOCUS:
289-
# timestamp_ms = timestamp * 1000
290288
return None
291289
case EventType.RESOURCE_FETCH:
292-
timestamp_ms = timestamp * 1000
293290
payload = event["data"]["payload"]
294291
method = payload["data"]["method"]
295292
status_code = payload["data"]["statusCode"]
@@ -311,14 +308,13 @@ def as_log_message(event: dict[str, Any]) -> str | None:
311308
return None
312309

313310
if response_size is None:
314-
return f'Application initiated request: "{method} {path} HTTP/2.0" with status code {status_code}; took {duration} milliseconds at {timestamp_ms}'
311+
return f'Application initiated request: "{method} {path} HTTP/2.0" with status code {status_code}; took {duration} milliseconds at {timestamp}'
315312
else:
316-
return f'Application initiated request: "{method} {path} HTTP/2.0" with status code {status_code} and response size {response_size}; took {duration} milliseconds at {timestamp_ms}'
313+
return f'Application initiated request: "{method} {path} HTTP/2.0" with status code {status_code} and response size {response_size}; took {duration} milliseconds at {timestamp}'
317314
case EventType.LCP:
318-
timestamp_ms = timestamp * 1000
319315
duration = event["data"]["payload"]["data"]["size"]
320316
rating = event["data"]["payload"]["data"]["rating"]
321-
return f"Application largest contentful paint: {duration} ms and has a {rating} rating at {timestamp_ms}"
317+
return f"Application largest contentful paint: {duration} ms and has a {rating} rating at {timestamp}"
322318
case EventType.HYDRATION_ERROR:
323319
return f"There was a hydration error on the page at {timestamp}"
324320
case EventType.RESOURCE_XHR:

src/sentry/replays/usecases/ingest/event_parser.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from dataclasses import dataclass
88
from datetime import datetime
99
from enum import Enum
10-
from typing import Any, TypedDict, TypeVar
10+
from typing import Any, Literal, TypedDict, TypeVar
1111

1212
import sentry_sdk
1313
from sentry_protos.snuba.v1.trace_item_pb2 import TraceItem
@@ -217,6 +217,51 @@ def which_iter(events: list[dict[str, Any]]) -> Iterator[tuple[EventType, dict[s
217217
yield (which(event), event)
218218

219219

220+
def get_timestamp_unit(event_type: EventType) -> Literal["s", "ms"]:
221+
"""
222+
Returns the time unit of event["timestamp"] for a replay event.
223+
This is not guaranteed to match event.data.payload.timestamp.
224+
225+
We do not allow wildcard or default cases. Please be explicit when adding new types.
226+
Beware that EventType.UNKNOWN returns "ms" but there's no way to know the actual unit.
227+
"""
228+
match event_type:
229+
case (
230+
EventType.CLS
231+
| EventType.LCP
232+
| EventType.FEEDBACK
233+
| EventType.MEMORY
234+
| EventType.MUTATIONS
235+
| EventType.NAVIGATION_SPAN
236+
| EventType.RESOURCE_FETCH
237+
| EventType.RESOURCE_IMAGE
238+
| EventType.RESOURCE_SCRIPT
239+
| EventType.RESOURCE_XHR
240+
| EventType.UI_BLUR
241+
| EventType.UI_FOCUS
242+
):
243+
return "s"
244+
case (
245+
EventType.CANVAS
246+
| EventType.CONSOLE
247+
| EventType.CLICK
248+
| EventType.DEAD_CLICK
249+
| EventType.RAGE_CLICK
250+
| EventType.SLOW_CLICK
251+
| EventType.HYDRATION_ERROR
252+
| EventType.NAVIGATION
253+
| EventType.OPTIONS
254+
| EventType.UNKNOWN
255+
):
256+
return "ms"
257+
258+
259+
def get_timestamp_ms(event: dict[str, Any], event_type: EventType) -> float:
260+
if get_timestamp_unit(event_type) == "s":
261+
return event.get("timestamp", 0) * 1000
262+
return event.get("timestamp", 0)
263+
264+
220265
#
221266
# EAP Trace Item Processor
222267
#

tests/sentry/replays/unit/lib/test_summarize.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
from sentry.models.project import Project
44
from sentry.replays.lib.summarize import (
55
EventDict,
6+
_parse_snuba_timestamp_to_ms,
67
as_log_message,
78
get_summary_logs,
8-
parse_timestamp,
99
)
1010
from sentry.testutils.pytest.fixtures import django_db_all
1111
from sentry.utils import json
@@ -255,29 +255,27 @@ def test_as_log_message() -> None:
255255
assert as_log_message({}) is None
256256

257257

258-
def test_parse_timestamp() -> None:
259-
# Test None input
260-
assert parse_timestamp(None, "ms") == 0.0
261-
assert parse_timestamp(None, "s") == 0.0
262-
258+
def test_parse_snuba_timestamp() -> None:
263259
# Test numeric input
264-
assert parse_timestamp(123.456, "ms") == 123.456
265-
assert parse_timestamp(123, "s") == 123.0
260+
assert _parse_snuba_timestamp_to_ms(123.456, "ms") == 123.456
261+
assert _parse_snuba_timestamp_to_ms(123, "s") == 123000
262+
263+
# Note input unit is ignored for string inputs.
266264

267265
# Test string input with ISO format without timezone
268-
assert parse_timestamp("2023-01-01T12:00:00", "ms") == 1672574400.0 * 1000
269-
assert parse_timestamp("2023-01-01T12:00:00", "s") == 1672574400.0
266+
assert _parse_snuba_timestamp_to_ms("2023-01-01T12:00:00", "ms") == 1672574400000
267+
assert _parse_snuba_timestamp_to_ms("2023-01-01T12:00:00", "s") == 1672574400000
270268

271269
# Test string input with ISO format with timezone offset
272-
assert parse_timestamp("2023-01-01T12:00:00+00:00", "ms") == 1672574400.0 * 1000
273-
assert parse_timestamp("2023-01-01T12:00:00.123+00:00", "ms") == 1672574400.123 * 1000
274-
assert parse_timestamp("2023-01-01T12:00:00+00:00", "s") == 1672574400.0
270+
assert _parse_snuba_timestamp_to_ms("2023-01-01T12:00:00+00:00", "ms") == 1672574400000
271+
assert _parse_snuba_timestamp_to_ms("2023-01-01T12:00:00.123+00:00", "ms") == 1672574400123
272+
assert _parse_snuba_timestamp_to_ms("2023-01-01T12:00:00+00:00", "s") == 1672574400000
275273

276274
# Test string input with ISO format with 'Z' timezone suffix
277-
assert parse_timestamp("2023-01-01T12:00:00Z", "s") == 1672574400.0
278-
assert parse_timestamp("2023-01-01T12:00:00.123Z", "ms") == 1672574400.123 * 1000
275+
assert _parse_snuba_timestamp_to_ms("2023-01-01T12:00:00Z", "s") == 1672574400000
276+
assert _parse_snuba_timestamp_to_ms("2023-01-01T12:00:00.123Z", "ms") == 1672574400123
279277

280278
# Test invalid input
281-
assert parse_timestamp("invalid timestamp", "ms") == 0.0
282-
assert parse_timestamp("", "ms") == 0.0
283-
assert parse_timestamp("2023-13-01T12:00:00Z", "ms") == 0.0
279+
assert _parse_snuba_timestamp_to_ms("invalid timestamp", "ms") == 0.0
280+
assert _parse_snuba_timestamp_to_ms("", "ms") == 0.0
281+
assert _parse_snuba_timestamp_to_ms("2023-13-01T12:00:00Z", "ms") == 0.0

0 commit comments

Comments
 (0)