Skip to content

Commit 995ecac

Browse files
authored
feat(release-files): Store multiple release archives (#26529)
Instead of merging multiple archive uploads for one release, store all uploaded archives as-is and create an artifact index to keep track of the location of each individual file. In order to hide the archives and index file from the user, use the new `public_objects` manager when querying user-facing release files.
1 parent 0d9e60c commit 995ecac

File tree

14 files changed

+540
-296
lines changed

14 files changed

+540
-296
lines changed

src/sentry/api/endpoints/debug_files.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -429,13 +429,13 @@ def expose_release(release, count):
429429

430430
def serialize_results(results):
431431
file_counts = (
432-
Release.objects.filter(id__in=[r["id"] for r in results])
433-
.annotate(count=Count("releasefile"))
434-
.values("count", "id")
432+
ReleaseFile.public_objects.filter(release_id__in=[r["id"] for r in results])
433+
.values("release_id")
434+
.annotate(count=Count("id"))
435435
)
436-
file_count_map = {r["id"]: r["count"] for r in file_counts}
436+
file_count_map = {r["release_id"]: r["count"] for r in file_counts}
437437
return serialize(
438-
[expose_release(r, file_count_map[r["id"]]) for r in results], request.user
438+
[expose_release(r, file_count_map.get(r["id"], 0)) for r in results], request.user
439439
)
440440

441441
return self.paginate(

src/sentry/api/endpoints/organization_release_file_details.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def get(self, request, organization, version, file_id):
5252
raise ResourceDoesNotExist
5353

5454
try:
55-
releasefile = ReleaseFile.objects.get(release=release, id=file_id)
55+
releasefile = ReleaseFile.public_objects.get(release=release, id=file_id)
5656
except ReleaseFile.DoesNotExist:
5757
raise ResourceDoesNotExist
5858

@@ -88,7 +88,7 @@ def put(self, request, organization, version, file_id):
8888
raise ResourceDoesNotExist
8989

9090
try:
91-
releasefile = ReleaseFile.objects.get(release=release, id=file_id)
91+
releasefile = ReleaseFile.public_objects.get(release=release, id=file_id)
9292
except ReleaseFile.DoesNotExist:
9393
raise ResourceDoesNotExist
9494

@@ -127,7 +127,7 @@ def delete(self, request, organization, version, file_id):
127127
raise ResourceDoesNotExist
128128

129129
try:
130-
releasefile = ReleaseFile.objects.get(release=release, id=file_id)
130+
releasefile = ReleaseFile.public_objects.get(release=release, id=file_id)
131131
except ReleaseFile.DoesNotExist:
132132
raise ResourceDoesNotExist
133133

src/sentry/api/endpoints/organization_release_files.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ def get(self, request, organization, version):
5757
raise ResourceDoesNotExist
5858

5959
file_list = (
60-
ReleaseFile.objects.filter(release=release).select_related("file").order_by("name")
60+
ReleaseFile.public_objects.filter(release=release)
61+
.select_related("file")
62+
.order_by("name")
6163
)
6264

6365
return self.paginate(

src/sentry/api/endpoints/organization_release_meta.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def get(self, request, organization, version):
7777
for pr in project_releases
7878
]
7979

80-
release_file_count = ReleaseFile.objects.filter(release=release).count()
80+
release_file_count = ReleaseFile.public_objects.filter(release=release).count()
8181

8282
return Response(
8383
{

src/sentry/api/endpoints/project_release_file_details.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def get(self, request, project, version, file_id):
5656
raise ResourceDoesNotExist
5757

5858
try:
59-
releasefile = ReleaseFile.objects.get(release=release, id=file_id)
59+
releasefile = ReleaseFile.public_objects.get(release=release, id=file_id)
6060
except ReleaseFile.DoesNotExist:
6161
raise ResourceDoesNotExist
6262

@@ -92,7 +92,7 @@ def put(self, request, project, version, file_id):
9292
raise ResourceDoesNotExist
9393

9494
try:
95-
releasefile = ReleaseFile.objects.get(release=release, id=file_id)
95+
releasefile = ReleaseFile.public_objects.get(release=release, id=file_id)
9696
except ReleaseFile.DoesNotExist:
9797
raise ResourceDoesNotExist
9898

@@ -132,7 +132,7 @@ def delete(self, request, project, version, file_id):
132132
raise ResourceDoesNotExist
133133

134134
try:
135-
releasefile = ReleaseFile.objects.get(release=release, id=file_id)
135+
releasefile = ReleaseFile.public_objects.get(release=release, id=file_id)
136136
except ReleaseFile.DoesNotExist:
137137
raise ResourceDoesNotExist
138138

src/sentry/api/endpoints/project_release_files.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ def get(self, request, project, version):
4545
raise ResourceDoesNotExist
4646

4747
file_list = (
48-
ReleaseFile.objects.filter(release=release).select_related("file").order_by("name")
48+
ReleaseFile.public_objects.filter(release=release)
49+
.select_related("file")
50+
.order_by("name")
4951
)
5052

5153
if query:

src/sentry/lang/javascript/processor.py

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55

66
from django.utils.encoding import force_bytes, force_text
77

8-
from sentry.models.releasefile import ReleaseArchive
9-
from sentry.tasks.assemble import RELEASE_ARCHIVE_FILENAME
8+
from sentry.models.releasefile import ARTIFACT_INDEX_FILENAME, ReleaseArchive, read_artifact_index
9+
from sentry.utils import json
1010

1111
__all__ = ["JavaScriptStacktraceProcessor"]
1212

@@ -392,17 +392,58 @@ def get_from_archive(url: str, archive: ReleaseArchive) -> Tuple[bytes, dict]:
392392
raise KeyError(f"Not found in archive: '{url}'")
393393

394394

395+
@metrics.wraps("sourcemaps.load_artifact_index")
396+
def get_artifact_index(release, dist):
397+
dist_name = dist and dist.name or None
398+
399+
ident = ReleaseFile.get_ident(ARTIFACT_INDEX_FILENAME, dist_name)
400+
cache_key = f"artifact-index:v1:{release.id}:{ident}"
401+
result = cache.get(cache_key)
402+
if result == -1:
403+
index = None
404+
elif result:
405+
index = json.loads(result)
406+
else:
407+
index = read_artifact_index(release, dist)
408+
cache_value = -1 if index is None else json.dumps(index)
409+
# Only cache for a short time to keep the manifest up-to-date
410+
cache.set(cache_key, cache_value, timeout=60)
411+
412+
return index
413+
414+
415+
def get_index_entry(release, dist, url) -> Optional[dict]:
416+
index = get_artifact_index(release, dist)
417+
if index:
418+
for candidate in ReleaseFile.normalize(url):
419+
entry = index.get("files", {}).get(candidate)
420+
if entry:
421+
return entry
422+
423+
return None
424+
425+
395426
@metrics.wraps("sourcemaps.fetch_release_archive")
396-
def fetch_release_archive(release, dist) -> Optional[IO]:
427+
def fetch_release_archive_for_url(release, dist, url) -> Optional[IO]:
397428
"""Fetch release archive and cache if possible.
398429
430+
Multiple archives might have been uploaded, so we need the URL
431+
to get the correct archive from the artifact index.
432+
399433
If return value is not empty, the caller is responsible for closing the stream.
400434
"""
401-
dist_name = dist and dist.name or None
402-
releasefile_ident = ReleaseFile.get_ident(RELEASE_ARCHIVE_FILENAME, dist_name)
403-
cache_key = get_release_file_cache_key(
404-
release_id=release.id, releasefile_ident=releasefile_ident
405-
)
435+
info = get_index_entry(release, dist, url)
436+
if info is None:
437+
# Cannot write negative cache entry here because ID of release archive
438+
# is not yet known
439+
return None
440+
441+
archive_ident = info["archive_ident"]
442+
443+
# TODO(jjbayer): Could already extract filename from info and return
444+
# it later
445+
446+
cache_key = get_release_file_cache_key(release_id=release.id, releasefile_ident=archive_ident)
406447

407448
result = cache.get(cache_key)
408449

@@ -412,11 +453,13 @@ def fetch_release_archive(release, dist) -> Optional[IO]:
412453
return BytesIO(result)
413454
else:
414455
qs = ReleaseFile.objects.filter(
415-
release=release, dist=dist, ident=releasefile_ident
456+
release=release, dist=dist, ident=archive_ident
416457
).select_related("file")
417458
try:
418459
releasefile = qs[0]
419460
except IndexError:
461+
# This should not happen when there is an archive_ident in the manifest
462+
logger.error("sourcemaps.missing_archive", exc_info=sys.exc_info())
420463
# Cache as nonexistent:
421464
cache.set(cache_key, -1, 60)
422465
return None
@@ -460,11 +503,10 @@ def fetch_release_artifact(url, release, dist):
460503
return result_from_cache(url, result)
461504

462505
start = time.monotonic()
463-
464-
release_file = fetch_release_archive(release, dist)
465-
if release_file is not None:
506+
archive_file = fetch_release_archive_for_url(release, dist, url)
507+
if archive_file is not None:
466508
try:
467-
archive = ReleaseArchive(release_file)
509+
archive = ReleaseArchive(archive_file)
468510
except BaseException as exc:
469511
logger.error("Failed to initialize archive for release %s", release.id, exc_info=exc)
470512
# TODO(jjbayer): cache error and return here
@@ -473,8 +515,10 @@ def fetch_release_artifact(url, release, dist):
473515
try:
474516
fp, headers = get_from_archive(url, archive)
475517
except KeyError:
476-
logger.debug(
477-
"Release artifact %r not found in archive (release_id=%s)", url, release.id
518+
# The manifest mapped the url to an archive, but the file
519+
# is not there.
520+
logger.error(
521+
"Release artifact %r not found in archive %s", url, archive_file.id
478522
)
479523
cache.set(cache_key, -1, 60)
480524
metrics.timing(

src/sentry/models/file.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,6 @@ def putfile(self, fileobj, blob_size=DEFAULT_BLOB_SIZE, commit=True, logger=noop
405405

406406
blob_fileobj = ContentFile(contents)
407407
blob = FileBlob.from_file(blob_fileobj, logger=logger)
408-
409408
results.append(FileBlobIndex.objects.create(file=self, blob=blob, offset=offset))
410409
offset += blob.size
411410
self.size = offset

0 commit comments

Comments
 (0)