Skip to content

Commit 48a66f6

Browse files
committed
🚚(backend) store classroom documents in S3 using filenames as keys
Refactored S3 key generation for classroom documents: instead of using timestamp-based keys, files are now stored with their original filenames to support proper download behavior via Scaleway Edge Service URLs (since `Response-Content-Disposition` headers cannot be set).
1 parent 4dc5430 commit 48a66f6

File tree

13 files changed

+245
-269
lines changed

13 files changed

+245
-269
lines changed

env.d/development.dist

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ DJANGO_AWS_ACCESS_KEY_ID=yourAwsAccesKey
1515
DJANGO_AWS_SECRET_ACCESS_KEY=YourAwsSecretAccessKeyId
1616
DJANGO_AWS_S3_REGION_NAME=eu-west-1
1717

18+
# Scaleway
19+
DJANGO_SCW_EDGE_SERVICE_DOMAIN=yourEdgeServiceDomain
20+
1821
# STORAGE_S3
1922
DJANGO_STORAGE_S3_ACCESS_KEY=yourS3AccesKey
2023
DJANGO_STORAGE_S3_SECRET_KEY=yourS3AccesKey

src/backend/marsha/bbb/api.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -622,10 +622,17 @@ def initiate_upload(self, request, pk=None, classroom_id=None):
622622
if serializer.is_valid() is not True:
623623
return Response(serializer.errors, status=400)
624624

625+
filename = serializer.validated_data["filename"]
626+
extension = serializer.validated_data["extension"]
627+
628+
if not filename.endswith(extension):
629+
filename = f"{filename}{extension}"
630+
625631
presigned_post = (
626632
storage.get_initiate_backend().initiate_classroom_document_storage_upload(
627633
request,
628634
classroom_document,
635+
filename,
629636
[
630637
["eq", "$Content-Type", serializer.validated_data["mimetype"]],
631638
[
@@ -674,10 +681,8 @@ def upload_ended(self, request, pk=None, classroom_id=None):
674681
)
675682
serializer.is_valid(raise_exception=True)
676683

677-
file_key = serializer.validated_data["file_key"]
678-
# The file_key have the "classroom/{classroom_pk}/classroomdocument/{stamp}"
679-
# format
680-
stamp = file_key.split("/")[-1]
684+
now = timezone.now()
685+
stamp = to_timestamp(now)
681686

682687
classroom_document.update_upload_state(READY, to_datetime(stamp))
683688

src/backend/marsha/bbb/models.py

Lines changed: 10 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -352,68 +352,33 @@ class Meta:
352352
verbose_name = _("Classroom document")
353353
verbose_name_plural = _("Classroom documents")
354354

355-
def get_source_s3_key(self, stamp=None, extension=None):
356-
"""Compute the S3 key in the source bucket.
357-
It is built from the classroom ID + ID of the classroom document + version stamp.
358-
Parameters
359-
----------
360-
stamp: Type[string]
361-
Passing a value for this argument will return the source S3 key for the classroom
362-
document assuming its active stamp is set to this value. This is useful to create an
363-
upload policy for this prospective version of the track, so that the client can
364-
upload the file to S3 and the confirmation lambda can set the `uploaded_on` field
365-
to this value only after the file upload and processing is successful.
366-
extension: Type[string]
367-
The extension used by the uploaded media. This extension is added at the end of the key
368-
to keep a record of the extension. We will use it in the update-state endpoint to
369-
record it in the database.
370-
Returns
371-
-------
372-
string
373-
The S3 key for the classroom document in the source bucket, where uploaded files are
374-
stored before they are converted and copied to the destination bucket.
375-
"""
376-
# We don't want to deal with None value, so we set it with an empty string
377-
extension = extension or ""
378-
379-
# We check if the extension starts with a leading dot or not. If it's not the case we add
380-
# it at the beginning of the string
381-
if extension and not extension.startswith("."):
382-
extension = "." + extension
383-
384-
stamp = stamp or to_timestamp(self.uploaded_on)
385-
return f"{self.classroom.pk}/classroomdocument/{self.pk}/{stamp}{extension}"
386-
387-
def get_storage_prefix(
355+
def get_storage_key(
388356
self,
389-
stamp=None,
357+
filename,
390358
base_dir: STORAGE_BASE_DIRECTORY = CLASSROOM_STORAGE_BASE_DIRECTORY,
391359
):
392-
"""Compute the storage prefix for the classroom document.
360+
"""Compute the storage key for the classroom document.
393361
394362
Parameters
395363
----------
396-
stamp: Type[string]
397-
Passing a value for this argument will return the storage prefix for the
398-
classroom document assuming its active stamp is set to this value. This is
399-
useful to create an upload policy for this prospective version of the
400-
classroom, so that the client can upload the file to S3.
401-
364+
filename: Type[string]
365+
The filename of the uploaded media. For classroom documents, the filename is
366+
directly set into the key.
402367
base: Type[STORAGE_BASE_DIRECTORY]
403368
The storage base directory. Defaults to Classroom. It will be used to
404-
compute the storage prefix.
369+
compute the storage key.
405370
406371
Returns
407372
-------
408373
string
409-
The storage prefix for the classroom document, depending on the base directory passed.
374+
The storage key for the classroom document, depending on the base directory
375+
passed.
410376
"""
411-
stamp = stamp or self.uploaded_on_stamp()
412377
base = base_dir
413378
if base == DELETED_STORAGE_BASE_DIRECTORY:
414379
base = f"{base}/{CLASSROOM_STORAGE_BASE_DIRECTORY}"
415380

416-
return f"{base}/{self.classroom.pk}/classroomdocument/{self.pk}/{stamp}"
381+
return f"{base}/{self.classroom.pk}/classroomdocument/{self.pk}/{filename}"
417382

418383

419384
class ClassroomRecording(BaseModel):

src/backend/marsha/bbb/serializers.py

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,15 @@
2222
ClassroomSession,
2323
)
2424
from marsha.bbb.utils.bbb_utils import get_recording_url, get_url as get_document_url
25-
from marsha.core.defaults import (
26-
CLASSROOM_RECORDINGS_KEY_CACHE,
27-
CLASSROOM_STORAGE_BASE_DIRECTORY,
28-
VOD_CONVERT,
29-
)
25+
from marsha.core.defaults import CLASSROOM_RECORDINGS_KEY_CACHE, SCW_S3, VOD_CONVERT
3026
from marsha.core.serializers import (
3127
BaseInitiateUploadSerializer,
3228
PlaylistLiteSerializer,
3329
ReadOnlyModelSerializer,
3430
UploadableFileWithExtensionSerializerMixin,
3531
VideoFromRecordingSerializer,
3632
)
37-
from marsha.core.utils import time_utils
33+
from marsha.core.storage.storage_class import file_storage
3834

3935

4036
class ClassroomRecordingSerializer(ReadOnlyModelSerializer):
@@ -409,16 +405,25 @@ def get_url(self, obj):
409405
Returns
410406
-------
411407
String or None
412-
the url to fetch the classroom document on CloudFront
408+
the url to fetch the classroom document on CloudFront/Storage
413409
None if the classroom document is still not uploaded to S3 with success
414410
415411
"""
416-
if url := get_document_url(obj):
417-
return (
418-
f"{url}?response-content-disposition="
419-
f"{quote_plus('attachment; filename=' + obj.filename)}"
420-
)
421-
return None
412+
if not obj.uploaded_on:
413+
return None
414+
415+
if obj.storage_location == SCW_S3:
416+
file_key = obj.get_storage_key(obj.filename)
417+
418+
return file_storage.url(file_key)
419+
420+
# Default AWS fallback
421+
url = get_document_url(obj)
422+
423+
return (
424+
f"{url}?response-content-disposition="
425+
f"{quote_plus('attachment; filename=' + obj.filename)}"
426+
)
422427

423428

424429
class ClassroomDocumentInitiateUploadSerializer(BaseInitiateUploadSerializer):
@@ -434,6 +439,19 @@ def max_upload_file_size(self):
434439
"""
435440
return settings.CLASSROOM_DOCUMENT_SOURCE_MAX_SIZE
436441

442+
def validate_filename(self, value):
443+
"""Check if the filename is valid."""
444+
if "/" in value or "\\" in value:
445+
raise serializers.ValidationError("Filename must not contain slashes.")
446+
447+
if value.startswith("."):
448+
raise serializers.ValidationError("Filename must not start with a dot.")
449+
450+
if len(value) > 255:
451+
raise serializers.ValidationError("Filename is too long.")
452+
453+
return value
454+
437455
def validate(self, attrs):
438456
"""Validate if the mimetype is allowed or not."""
439457
# mimetype is provided, we directly check it
@@ -467,19 +485,13 @@ class ClassroomDocumentUploadEndedSerializer(serializers.Serializer):
467485

468486
def validate_file_key(self, value):
469487
"""Check if the file_key is valid."""
488+
filename = value.split("/")[-1]
489+
_, extension = splitext(filename)
470490

471-
stamp = value.split("/")[-1]
491+
if not extension:
492+
raise serializers.ValidationError("Filename must include an extension.")
472493

473-
try:
474-
time_utils.to_datetime(stamp)
475-
except serializers.ValidationError as error:
476-
raise serializers.ValidationError("file_key is not valid") from error
477-
478-
if (
479-
self.context["obj"].get_storage_prefix(
480-
stamp, CLASSROOM_STORAGE_BASE_DIRECTORY
481-
)
482-
!= value
483-
):
494+
if self.context["obj"].get_storage_key(filename=filename) != value:
484495
raise serializers.ValidationError("file_key is not valid")
496+
485497
return value

0 commit comments

Comments
 (0)