Skip to content

Commit e29c09d

Browse files
committed
dedupe with a fingerprint instead of exception object
1 parent 4329383 commit e29c09d

File tree

1 file changed

+89
-8
lines changed

1 file changed

+89
-8
lines changed

sentry_sdk/integrations/dedupe.py

Lines changed: 89 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,87 @@
1+
import hashlib
12
import sentry_sdk
2-
from sentry_sdk.utils import ContextVar, logger
3+
from sentry_sdk.utils import (
4+
ContextVar,
5+
logger,
6+
get_type_name,
7+
get_type_module,
8+
get_error_message,
9+
iter_stacks,
10+
)
311
from sentry_sdk.integrations import Integration
412
from sentry_sdk.scope import add_global_event_processor
513

614
from typing import TYPE_CHECKING
715

816
if TYPE_CHECKING:
9-
from typing import Optional
17+
from typing import Any, Optional
1018

1119
from sentry_sdk._types import Event, Hint
1220

1321

22+
def _create_exception_fingerprint(exc_info):
23+
# type: (Any) -> str
24+
"""
25+
Creates a unique fingerprint for an exception based on type, message, and traceback.
26+
27+
This replaces object identity comparison to prevent memory leaks while maintaining
28+
accurate deduplication for the same exception (same type+message+traceback).
29+
30+
Memory usage: 64 bytes (SHA256 hex string) for the last seen exception fingerprint.
31+
"""
32+
exc_type, exc_value, tb = exc_info
33+
34+
if exc_type is None or exc_value is None:
35+
return ""
36+
37+
# Get exception type information
38+
type_module = get_type_module(exc_type) or ""
39+
type_name = get_type_name(exc_type) or ""
40+
41+
# Get exception message
42+
message = get_error_message(exc_value)
43+
44+
# Create traceback fingerprint from top frames (limit to avoid excessive memory usage)
45+
tb_parts = []
46+
frame_count = 0
47+
max_frames = 10 # Limit frames to keep memory usage low
48+
49+
for tb_frame in iter_stacks(tb):
50+
if frame_count >= max_frames:
51+
break
52+
53+
# Extract key frame information for fingerprint
54+
filename = tb_frame.tb_frame.f_code.co_filename or ""
55+
function_name = tb_frame.tb_frame.f_code.co_name or ""
56+
line_number = str(tb_frame.tb_lineno)
57+
58+
# Create a compact frame fingerprint
59+
frame_fingerprint = "{}:{}:{}".format(
60+
(
61+
filename.split("/")[-1] if "/" in filename else filename
62+
), # Just filename, not full path
63+
function_name,
64+
line_number,
65+
)
66+
tb_parts.append(frame_fingerprint)
67+
frame_count += 1
68+
69+
# Combine all parts for the complete fingerprint
70+
fingerprint_parts = [type_module, type_name, message, "|".join(tb_parts)]
71+
72+
# Create SHA256 hash of the combined fingerprint
73+
fingerprint_data = "||".join(fingerprint_parts).encode("utf-8", errors="replace")
74+
return hashlib.sha256(fingerprint_data).hexdigest()
75+
76+
1477
class DedupeIntegration(Integration):
1578
identifier = "dedupe"
1679

1780
def __init__(self):
1881
# type: () -> None
19-
self._last_seen = ContextVar("last-seen")
82+
# Store fingerprint of the last seen exception instead of the exception object
83+
# This prevents memory leaks by not holding references to exception objects
84+
self._last_fingerprint = ContextVar("last-fingerprint", default=None)
2085

2186
@staticmethod
2287
def setup_once():
@@ -35,19 +100,35 @@ def processor(event, hint):
35100
if exc_info is None:
36101
return event
37102

38-
exc = exc_info[1]
39-
if integration._last_seen.get(None) is exc:
40-
logger.info("DedupeIntegration dropped duplicated error event %s", exc)
103+
# Create fingerprint from exception instead of storing the object
104+
fingerprint = _create_exception_fingerprint(exc_info)
105+
if not fingerprint:
106+
return event
107+
108+
# Check if this fingerprint matches the last seen one
109+
last_fingerprint = integration._last_fingerprint.get()
110+
if last_fingerprint == fingerprint:
111+
logger.info(
112+
"DedupeIntegration dropped duplicated error event with fingerprint %s",
113+
fingerprint[:16],
114+
)
41115
return None
42116

43-
integration._last_seen.set(exc)
117+
# Store this fingerprint as the last seen one
118+
integration._last_fingerprint.set(fingerprint)
44119
return event
45120

46121
@staticmethod
47122
def reset_last_seen():
48123
# type: () -> None
124+
"""
125+
Resets the deduplication state, clearing the last seen exception fingerprint.
126+
127+
This maintains the existing public API while working with the new
128+
fingerprint-based implementation.
129+
"""
49130
integration = sentry_sdk.get_client().get_integration(DedupeIntegration)
50131
if integration is None:
51132
return
52133

53-
integration._last_seen.set(None)
134+
integration._last_fingerprint.set(None)

0 commit comments

Comments
 (0)