Skip to content

Commit 1d5f016

Browse files
Swatinemandrewshie-sentry
authored andcommitted
Load view hierarchies only once during processing (#98512)
This moves view hierarchy handling into its own file, and changes the java language processor in such a way that it only loads the view hierarchy once. Previously, it would load them twice, once to collect all the window class names, and then a second time to actually deobfuscate it.
1 parent 0452cb7 commit 1d5f016

File tree

5 files changed

+86
-210
lines changed

5 files changed

+86
-210
lines changed

src/sentry/lang/dart/utils.py

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -109,46 +109,3 @@ def replace_symbol(match: re.Match[str]) -> str:
109109
new_value = re.sub(INSTANCE_OF_VALUE_RE, replace_symbol, exception_value)
110110
if new_value != exception_value:
111111
exception["value"] = new_value
112-
113-
114-
# TODO(buenaflor): Add this back in when we decide to deobfuscate view hierarchies
115-
#
116-
# def _deobfuscate_view_hierarchy(event_data: dict[str, Any], project: Project, view_hierarchy):
117-
# """
118-
# Deobfuscates a view hierarchy in-place.
119-
120-
# If we're unable to fetch a dart symbols uuid, then the view hierarchy remains unmodified.
121-
# """
122-
# dart_symbols_uuids = get_debug_meta_image_ids(event_data)
123-
# if len(dart_symbols_uuids) == 0:
124-
# return
125-
126-
# with sentry_sdk.start_span(op="dartsymbolmap.deobfuscate_view_hierarchy_data"):
127-
# for dart_symbols_uuid in dart_symbols_uuids:
128-
# map = generate_dart_symbols_map(dart_symbols_uuid, project)
129-
# if map is None:
130-
# return
131-
132-
# windows_to_deobfuscate = [*view_hierarchy.get("windows")]
133-
# while windows_to_deobfuscate:
134-
# window = windows_to_deobfuscate.pop()
135-
136-
# if window.get("type") is None:
137-
# # If there is no type, then skip this window
138-
# continue
139-
140-
# matcher = re.match(VIEW_HIERARCHY_TYPE_REGEX, window.get("type"))
141-
# if not matcher:
142-
# continue
143-
# obfuscated_values = matcher.groups()
144-
# for obfuscated_value in obfuscated_values:
145-
# if obfuscated_value is not None and obfuscated_value in map:
146-
# window["type"] = window["type"].replace(
147-
# obfuscated_value, map[obfuscated_value]
148-
# )
149-
150-
# if children := window.get("children"):
151-
# windows_to_deobfuscate.extend(children)
152-
153-
# def deobfuscate_view_hierarchy(data):
154-
# return deobfuscation_template(data, "dartsymbolmap", _deobfuscate_view_hierarchy)

src/sentry/lang/java/processing.py

Lines changed: 4 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,15 @@
33
from collections.abc import Mapping
44
from typing import Any
55

6-
import orjson
7-
8-
from sentry.attachments import CachedAttachment, attachment_cache
9-
from sentry.ingest.consumer.processors import CACHE_TIMEOUT
106
from sentry.lang.java.utils import JAVA_PLATFORMS, get_jvm_images, get_proguard_images
7+
from sentry.lang.java.view_hierarchies import ViewHierarchies
118
from sentry.lang.native.error import SymbolicationFailed, write_error
129
from sentry.lang.native.symbolicator import Symbolicator
1310
from sentry.models.eventerror import EventError
1411
from sentry.models.project import Project
1512
from sentry.models.release import Release
1613
from sentry.stacktraces.processing import find_stacktraces_in_data
1714
from sentry.utils import metrics
18-
from sentry.utils.cache import cache_key_for_event
1915
from sentry.utils.safe import get_path
2016

2117
logger = logging.getLogger(__name__)
@@ -150,76 +146,6 @@ def _get_release_package(project: Project, release_name: str | None) -> str | No
150146
return release.package if release else None
151147

152148

153-
def _get_window_class_names(attachments: list[CachedAttachment]) -> list[str]:
154-
"""Returns the class names of all windows in all view hierarchies
155-
contained in `attachments`."""
156-
157-
class_names = []
158-
windows_to_deobfuscate = []
159-
160-
for attachment in attachments:
161-
if attachment.type == "event.view_hierarchy":
162-
view_hierarchy = orjson.loads(attachment_cache.get_data(attachment))
163-
windows_to_deobfuscate.extend(view_hierarchy.get("windows"))
164-
165-
while windows_to_deobfuscate:
166-
window = windows_to_deobfuscate.pop()
167-
if window.get("type") is not None:
168-
class_names.append(window["type"])
169-
if children := window.get("children"):
170-
windows_to_deobfuscate.extend(children)
171-
172-
return class_names
173-
174-
175-
def _deobfuscate_view_hierarchy(view_hierarchy: Any, class_names: dict[str, str]) -> None:
176-
"""Deobfuscates a view hierarchy in-place.
177-
178-
The `class_names` dict is used to resolve obfuscated to deobfuscated names. If
179-
an obfuscated class name isn't present in `class_names`, it is left unchanged."""
180-
181-
windows_to_deobfuscate = [*view_hierarchy.get("windows")]
182-
183-
while windows_to_deobfuscate:
184-
window = windows_to_deobfuscate.pop()
185-
if (
186-
window.get("type") is not None
187-
and (mapped_type := class_names.get(window["type"])) is not None
188-
):
189-
window["type"] = mapped_type
190-
if children := window.get("children"):
191-
windows_to_deobfuscate.extend(children)
192-
193-
194-
def _deobfuscate_view_hierarchies(
195-
attachments: list[CachedAttachment], class_names: dict[str, str]
196-
) -> list[CachedAttachment]:
197-
"""Deobfuscates all view hierarchies contained in `attachments`, returning a new list of attachments.
198-
199-
Non-view-hierarchy attachments are unchanged.
200-
"""
201-
new_attachments = []
202-
for attachment in attachments:
203-
if attachment.type == "event.view_hierarchy":
204-
view_hierarchy = orjson.loads(attachment_cache.get_data(attachment))
205-
_deobfuscate_view_hierarchy(view_hierarchy, class_names)
206-
# Reupload to cache as a unchunked data
207-
new_attachments.append(
208-
CachedAttachment(
209-
type=attachment.type,
210-
id=attachment.id,
211-
name=attachment.name,
212-
content_type=attachment.content_type,
213-
data=orjson.dumps(view_hierarchy),
214-
chunks=None,
215-
)
216-
)
217-
else:
218-
new_attachments.append(attachment)
219-
220-
return new_attachments
221-
222-
223149
def map_symbolicator_process_jvm_errors(
224150
errors: list[dict[str, Any]] | None,
225151
) -> list[dict[str, Any]]:
@@ -276,9 +202,8 @@ def process_jvm_stacktraces(symbolicator: Symbolicator, data: Any) -> Any:
276202
]
277203

278204
processable_exceptions = _get_exceptions_for_symbolication(data)
279-
cache_key = cache_key_for_event(data)
280-
attachments = [*attachment_cache.get(cache_key)]
281-
window_class_names = _get_window_class_names(attachments)
205+
view_hierarchies = ViewHierarchies(data)
206+
window_class_names = view_hierarchies.get_window_class_names()
282207

283208
metrics.incr("proguard.symbolicator.events")
284209

@@ -341,7 +266,6 @@ def process_jvm_stacktraces(symbolicator: Symbolicator, data: Any) -> Any:
341266
raw_exc["type"] = exc["type"]
342267

343268
classes = response.get("classes")
344-
new_attachments = _deobfuscate_view_hierarchies(attachments, classes)
345-
attachment_cache.set(cache_key, attachments=new_attachments, timeout=CACHE_TIMEOUT)
269+
view_hierarchies.deobfuscate_and_save(classes)
346270

347271
return data

src/sentry/lang/java/utils.py

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,7 @@
22

33
from typing import Any
44

5-
import orjson
6-
import sentry_sdk
7-
8-
from sentry.attachments import CachedAttachment, attachment_cache
9-
from sentry.ingest.consumer.processors import CACHE_TIMEOUT
10-
from sentry.models.project import Project
115
from sentry.stacktraces.processing import StacktraceInfo
12-
from sentry.utils.cache import cache_key_for_event
136
from sentry.utils.safe import get_path
147

158
# Platform values that should mark an event
@@ -53,44 +46,6 @@ def get_jvm_images(event: dict[str, Any]) -> set[str]:
5346
return images
5447

5548

56-
@sentry_sdk.trace
57-
def deobfuscation_template(data, map_type, deobfuscation_fn):
58-
"""
59-
Template for operations involved in deobfuscating view hierarchies.
60-
61-
The provided deobfuscation function is expected to modify the view hierarchy dict in-place.
62-
"""
63-
project = Project.objects.get_from_cache(id=data["project"])
64-
65-
cache_key = cache_key_for_event(data)
66-
attachments = [*attachment_cache.get(cache_key)]
67-
68-
if not any(attachment.type == "event.view_hierarchy" for attachment in attachments):
69-
return
70-
71-
new_attachments = []
72-
for attachment in attachments:
73-
if attachment.type == "event.view_hierarchy":
74-
view_hierarchy = orjson.loads(attachment_cache.get_data(attachment))
75-
deobfuscation_fn(data, project, view_hierarchy)
76-
77-
# Reupload to cache as a unchunked data
78-
new_attachments.append(
79-
CachedAttachment(
80-
type=attachment.type,
81-
id=attachment.id,
82-
name=attachment.name,
83-
content_type=attachment.content_type,
84-
data=orjson.dumps(view_hierarchy),
85-
chunks=None,
86-
)
87-
)
88-
else:
89-
new_attachments.append(attachment)
90-
91-
attachment_cache.set(cache_key, attachments=new_attachments, timeout=CACHE_TIMEOUT)
92-
93-
9449
def is_jvm_event(data: Any, stacktraces: list[StacktraceInfo]) -> bool:
9550
"""Returns whether `data` is a JVM event, based on its platform,
9651
the supplied stacktraces, and its images."""
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
from typing import Any
2+
3+
import orjson
4+
5+
from sentry.attachments import CachedAttachment, attachment_cache
6+
from sentry.ingest.consumer.processors import CACHE_TIMEOUT
7+
from sentry.utils.cache import cache_key_for_event
8+
9+
10+
class ViewHierarchies:
11+
def __init__(self, data: Any):
12+
self._cache_key = cache_key_for_event(data)
13+
self._view_hierarchies: list[tuple[CachedAttachment, Any]] = []
14+
self._other_attachments: list[CachedAttachment] = []
15+
16+
for attachment in attachment_cache.get(self._cache_key):
17+
if attachment.type == "event.view_hierarchy":
18+
view_hierarchy = orjson.loads(attachment.data)
19+
self._view_hierarchies.append((attachment, view_hierarchy))
20+
else:
21+
self._other_attachments.append(attachment)
22+
23+
def get_window_class_names(self) -> list[str]:
24+
"""
25+
Returns the class names of all windows in all view hierarchies.
26+
"""
27+
windows_to_deobfuscate = []
28+
for _, view_hierarchy in self._view_hierarchies:
29+
windows_to_deobfuscate.extend(view_hierarchy.get("windows"))
30+
31+
class_names = []
32+
while windows_to_deobfuscate:
33+
window = windows_to_deobfuscate.pop()
34+
if window.get("type") is not None:
35+
class_names.append(window["type"])
36+
if children := window.get("children"):
37+
windows_to_deobfuscate.extend(children)
38+
39+
return class_names
40+
41+
def deobfuscate_and_save(self, class_names: dict[str, str]):
42+
"""
43+
Deobfuscates all view hierarchies in-place and persists any changes made.
44+
"""
45+
if not self._view_hierarchies:
46+
return
47+
48+
new_attachments: list[CachedAttachment] = []
49+
for attachment, view_hierarchy in self._view_hierarchies:
50+
_deobfuscate_view_hierarchy(view_hierarchy, class_names)
51+
new_attachments.append(
52+
CachedAttachment(
53+
type=attachment.type,
54+
id=attachment.id,
55+
name=attachment.name,
56+
content_type=attachment.content_type,
57+
data=orjson.dumps(view_hierarchy),
58+
chunks=None,
59+
)
60+
)
61+
62+
attachments = self._other_attachments + new_attachments
63+
attachment_cache.set(self._cache_key, attachments, timeout=CACHE_TIMEOUT)
64+
65+
66+
def _deobfuscate_view_hierarchy(view_hierarchy: Any, class_names: dict[str, str]):
67+
"""
68+
Deobfuscates a view hierarchy in-place.
69+
70+
The `class_names` dict is used to resolve obfuscated to deobfuscated names. If
71+
an obfuscated class name isn't present in `class_names`, it is left unchanged.
72+
"""
73+
windows_to_deobfuscate = list(view_hierarchy.get("windows"))
74+
while windows_to_deobfuscate:
75+
window = windows_to_deobfuscate.pop()
76+
if (
77+
window.get("type") is not None
78+
and (mapped_type := class_names.get(window["type"])) is not None
79+
):
80+
window["type"] = mapped_type
81+
if children := window.get("children"):
82+
windows_to_deobfuscate.extend(children)

tests/sentry/lang/dart/test_utils.py

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -539,45 +539,3 @@ def test_deobfuscate_exception_value_without_type() -> None:
539539
# Third: no pattern; unchanged
540540
assert data["exception"]["values"][2]["type"] is None
541541
assert data["exception"]["values"][2]["value"] == "No pattern here"
542-
543-
544-
# @mock.patch("sentry.lang.dart.utils.generate_dart_symbols_map", return_value=MOCK_DEBUG_MAP)
545-
# @mock.patch("sentry.lang.dart.utils.get_debug_meta_image_ids", return_value=["test-uuid"])
546-
# def test_view_hierarchy_deobfuscation(
547-
# mock_images: mock.MagicMock, mock_map: mock.MagicMock
548-
# ) -> None:
549-
# test_view_hierarchy = {
550-
# "windows": [
551-
# {
552-
# "type": "mD",
553-
# "children": [
554-
# {
555-
# "type": "er",
556-
# "children": [
557-
# {"type": "_YMa<er>", "children": [{"type": "_NativeInteger"}]}
558-
# ],
559-
# },
560-
# ],
561-
# }
562-
# ]
563-
# }
564-
# _deobfuscate_view_hierarchy(mock.Mock(), mock.Mock(), test_view_hierarchy)
565-
566-
# assert test_view_hierarchy == {
567-
# "windows": [
568-
# {
569-
# "type": "ButtonTheme",
570-
# "children": [
571-
# {
572-
# "type": "SemanticsAction",
573-
# "children": [
574-
# {
575-
# "type": "_entry<SemanticsAction>",
576-
# "children": [{"type": "_NativeInteger"}],
577-
# }
578-
# ],
579-
# }
580-
# ],
581-
# }
582-
# ]
583-
# }

0 commit comments

Comments
 (0)