Skip to content

Commit 373fa6c

Browse files
committed
Clean up after full attachments rollout
This cleans up the codebase after attachmens on objectstore has been fully rolled out. It assumes that attachments are now fully stored from within relay, and assumes that no attachment chunks will ever be emitted. Thus, there is no more need for the attachments cache, or the chunk message handlers. All feature flags related to objectstore rollout are removed and hardcoded.
1 parent 3d977e9 commit 373fa6c

File tree

22 files changed

+246
-1976
lines changed

22 files changed

+246
-1976
lines changed

src/sentry/api/endpoints/event_attachment_details.py

Lines changed: 4 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
1-
import contextlib
21
import posixpath
3-
from typing import IO, ContextManager
42

5-
import sentry_sdk
63
from django.http import StreamingHttpResponse
74
from rest_framework.request import Request
85
from rest_framework.response import Response
@@ -17,11 +14,10 @@
1714
from sentry.auth.system import is_system_auth
1815
from sentry.constants import ATTACHMENTS_ROLE_DEFAULT
1916
from sentry.models.activity import Activity
20-
from sentry.models.eventattachment import V1_PREFIX, V2_PREFIX, EventAttachment
17+
from sentry.models.eventattachment import EventAttachment
2118
from sentry.models.organizationmember import OrganizationMember
2219
from sentry.services import eventstore
2320
from sentry.types.activity import ActivityType
24-
from sentry.utils import metrics
2521

2622

2723
class EventAttachmentDetailsPermission(ProjectPermission):
@@ -65,35 +61,12 @@ def download(self, attachment: EventAttachment):
6561
name = posixpath.basename(" ".join(attachment.name.split()))
6662

6763
def stream_attachment():
68-
attachment_file = attachment.getfile()
69-
doublewrite_file: IO[bytes] | ContextManager[None] = contextlib.nullcontext()
70-
blob_path = attachment.blob_path or ""
71-
blob_path = blob_path.startswith(V1_PREFIX) and blob_path.removeprefix(V1_PREFIX) or ""
72-
if blob_path.startswith(V2_PREFIX):
73-
try:
74-
# We force the attachment model to use the objectstore backend
75-
# by changing its prefix. Its a big hack, but hey why not.
76-
attachment.blob_path = blob_path
77-
doublewrite_file = attachment.getfile()
78-
metrics.incr("storage.attachments.double_write.read")
79-
except Exception:
80-
sentry_sdk.capture_exception()
81-
8264
# TODO: We should pass along the `Accept-Encoding`, so we can avoid
8365
# decompressing on the API side, and just transfer the already
8466
# compressed bytes to the client as it indicated it can handle it.
85-
with attachment_file as af, doublewrite_file as df:
86-
while filestore_chunk := af.read(4096):
87-
if df:
88-
try:
89-
objectstore_chunk = df.read(4096)
90-
assert filestore_chunk == objectstore_chunk
91-
except Exception:
92-
# If we have encountered one error, clear the reference
93-
# to avoid spamming more errors for all the remaining chunks.
94-
df = None
95-
sentry_sdk.capture_exception()
96-
yield filestore_chunk
67+
with attachment.getfile() as attachment_file:
68+
while chunk := attachment_file.read(4096):
69+
yield chunk
9770

9871
response = StreamingHttpResponse(
9972
stream_attachment(),

src/sentry/attachments.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
from __future__ import annotations
2+
3+
__all__ = [
4+
"store_attachments_for_event",
5+
"get_attachments_for_event",
6+
"delete_cached_and_ratelimited_attachments",
7+
"CachedAttachment",
8+
]
9+
10+
from typing import TYPE_CHECKING, Any
11+
12+
import sentry_sdk
13+
14+
from sentry.objectstore import get_attachments_session
15+
from sentry.utils.json import prune_empty_keys
16+
17+
if TYPE_CHECKING:
18+
from sentry.models.project import Project
19+
20+
21+
UNINITIALIZED_DATA = object()
22+
23+
24+
class CachedAttachment:
25+
def __init__(
26+
self,
27+
name=None,
28+
content_type=None,
29+
type=None,
30+
data=UNINITIALIZED_DATA,
31+
stored_id: str | None = None,
32+
rate_limited=None,
33+
size=None,
34+
**kwargs,
35+
):
36+
self.name = name
37+
self.content_type = content_type
38+
self.type = type or "event.attachment"
39+
assert isinstance(self.type, str), self.type
40+
self.rate_limited = rate_limited
41+
42+
if size is not None:
43+
self.size = size
44+
elif data not in (None, UNINITIALIZED_DATA):
45+
self.size = len(data)
46+
else:
47+
self.size = 0
48+
49+
self.stored_id = stored_id
50+
self._data = data
51+
52+
def load_data(self, project: Project | None = None) -> bytes:
53+
if self.stored_id:
54+
assert project
55+
session = get_attachments_session(project.organization_id, project.id)
56+
return session.get(self.stored_id).payload.read()
57+
58+
assert self._data is not UNINITIALIZED_DATA
59+
return self._data
60+
61+
def meta(self) -> dict:
62+
return prune_empty_keys(
63+
{
64+
"name": self.name,
65+
"rate_limited": self.rate_limited,
66+
"content_type": self.content_type,
67+
"type": self.type,
68+
"size": self.size,
69+
"stored_id": self.stored_id,
70+
}
71+
)
72+
73+
74+
@sentry_sdk.trace
75+
def store_attachments_for_event(project: Project, event: Any, attachments: list[CachedAttachment]):
76+
"""
77+
Stores the given list of `attachments` belonging to `event` for processing.
78+
79+
The attachment metadata is stored within the `event`, and attachment payloads
80+
are stored in `objectstore`.
81+
"""
82+
83+
attachments_metadata: list[dict] = []
84+
for attachment in attachments:
85+
# if the attachment has non-empty data set, we want to store it, overwriting any existing data in case a `stored_id` is set.
86+
if attachment._data and attachment._data is not UNINITIALIZED_DATA:
87+
session = get_attachments_session(project.organization_id, project.id)
88+
attachment.stored_id = session.put(attachment._data, key=attachment.stored_id)
89+
90+
attachments_metadata.append(attachment.meta())
91+
92+
event["_attachments"] = attachments_metadata
93+
94+
95+
def get_attachments_for_event(event: Any) -> list[CachedAttachment]:
96+
"""
97+
Retrieves the attachments belonging to the given `event`.
98+
"""
99+
100+
return [CachedAttachment(**attachment) for attachment in event.get("_attachments", [])]
101+
102+
103+
@sentry_sdk.trace
104+
def delete_cached_and_ratelimited_attachments(
105+
project: Project, attachments: list[CachedAttachment]
106+
):
107+
"""
108+
This deletes all the `rate_limited` attachments from the `objectstore`.
109+
Non-ratelimited attachments which are already stored in `objectstore` will
110+
be retained there for long-term storage.
111+
"""
112+
for attachment in attachments:
113+
# deletes from objectstore if no long-term storage is desired
114+
if attachment.rate_limited and attachment.stored_id:
115+
get_attachments_session(project.organization_id, project.id).delete(
116+
attachment.stored_id
117+
)

src/sentry/attachments/__init__.py

Lines changed: 0 additions & 83 deletions
This file was deleted.

0 commit comments

Comments
 (0)