-
-
Notifications
You must be signed in to change notification settings - Fork 4.5k
perf(replays): Optimize memory usage and deserialization performance #101195
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
7f07fa4
86a999f
d3be983
5a9293c
57b253f
dc96213
529589f
ad56a84
072448c
6ba0ac3
a053f42
11e155b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ | |
| from datetime import datetime, timezone | ||
| from typing import Any, TypedDict | ||
|
|
||
| import msgspec | ||
| import sentry_sdk | ||
| from sentry_protos.snuba.v1.trace_item_pb2 import TraceItem | ||
|
|
||
|
|
@@ -38,6 +39,58 @@ | |
| logger.addFilter(SamplingFilter(LOG_SAMPLE_RATE)) | ||
|
|
||
|
|
||
| # Msgspec allows us to define a schema which we can deserialize the typed JSON into. We can also | ||
| # leverage this fact to opportunistically avoid deserialization. This is especially important | ||
| # because we have huge JSON types that we don't really care about. | ||
|
|
||
|
|
||
| class DomContentLoadedEvent(msgspec.Struct, gc=False, tag_field="type", tag=0): | ||
| pass | ||
|
|
||
|
|
||
| class LoadedEvent(msgspec.Struct, gc=False, tag_field="type", tag=1): | ||
| pass | ||
|
|
||
|
|
||
| class FullSnapshotEvent(msgspec.Struct, gc=False, tag_field="type", tag=2): | ||
| pass | ||
|
|
||
|
|
||
| class IncrementalSnapshotEvent(msgspec.Struct, gc=False, tag_field="type", tag=3): | ||
| pass | ||
|
|
||
|
|
||
| class MetaEvent(msgspec.Struct, gc=False, tag_field="type", tag=4): | ||
| pass | ||
|
|
||
|
|
||
| class PluginEvent(msgspec.Struct, gc=False, tag_field="type", tag=6): | ||
| pass | ||
|
|
||
|
|
||
| # These are the schema definitions we care about. | ||
|
|
||
|
|
||
| class CustomEventData(msgspec.Struct, gc=False): | ||
| tag: str | ||
| payload: Any | ||
|
|
||
|
|
||
| class CustomEvent(msgspec.Struct, gc=False, tag_field="type", tag=5): | ||
| data: CustomEventData | ||
|
|
||
|
|
||
| RRWebEvent = ( | ||
| DomContentLoadedEvent | ||
| | LoadedEvent | ||
| | FullSnapshotEvent | ||
| | IncrementalSnapshotEvent | ||
| | MetaEvent | ||
| | CustomEvent | ||
| | PluginEvent | ||
| ) | ||
|
|
||
|
|
||
| class DropEvent(Exception): | ||
| pass | ||
|
|
||
|
|
@@ -112,6 +165,21 @@ def process_recording_event(message: Event) -> ProcessedEvent: | |
|
|
||
| def parse_replay_events(message: Event): | ||
| try: | ||
| try: | ||
| # We're parsing with msgspec (if we can) and then transforming to the type that | ||
| # JSON.loads returns. | ||
| events = [ | ||
| {"data": {"tag": e.data.tag, "payload": e.data.payload}} | ||
| for e in msgspec.json.decode(message["payload"], type=list[RRWebEvent]) | ||
| if isinstance(e, CustomEvent) | ||
| ] | ||
| except Exception: | ||
| # We're emitting a metric instead of logging in case this thing really fails hard in | ||
| # prod. We don't want a huge volume of logs slowing throughput. If there's a | ||
| # significant volume of this metric we'll test against a broader cohort of data. | ||
| metrics.incr("replays.recording_consumer.msgspec_decode_error") | ||
| events = json.loads(message["payload"]) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Replay Parsing InconsistencyThe replay event parsing has inconsistent behavior between the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is true. It is unpredictable (more specifically it is deterministic but dependent on user input which we may consider to be random) whether certain paths will be taken. However the affected paths are minor logging statements. |
||
|
|
||
| return parse_events( | ||
| { | ||
| "organization_id": message["context"]["org_id"], | ||
|
|
@@ -122,7 +190,7 @@ def parse_replay_events(message: Event): | |
| "segment_id": message["context"]["segment_id"], | ||
| "trace_id": extract_trace_id(message["replay_event"]), | ||
| }, | ||
| json.loads(message["payload"]), | ||
| events, | ||
cursor[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ) | ||
| except Exception: | ||
| logger.exception( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.