Skip to content

Commit 6615500

Browse files
committed
🚚(back) serve legacy classroom documents from Scaleway S3
Classroom documents are now served from the `aws/` directory in Scaleway S3. They are served using the django-storage already in place. As content headers cannot be set to Scaleway Edge services URLs, legacy classroom documents need to be renamed to their filenames. Adding a management command to do that.
1 parent b12bd55 commit 6615500

File tree

7 files changed

+220
-57
lines changed

7 files changed

+220
-57
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ Versioning](https://semver.org/spec/v2.0.0.html).
88

99
## [Unreleased]
1010

11+
### Changed
12+
13+
- Serve legacy classroom documents from Scaleway S3 after AWS migration
14+
1115
## [5.9.1] - 2025-06-25
1216

1317
### Fixed

src/backend/marsha/bbb/api.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -599,19 +599,19 @@ def initiate_upload(self, request, pk=None, classroom_id=None):
599599
"""Get an upload policy for a classroom document.
600600
601601
Calling the endpoint resets the upload state to `pending` and returns an upload policy to
602-
our AWS S3 source bucket.
602+
our S3 bucket.
603603
604604
Parameters
605605
----------
606606
request : Type[django.http.request.HttpRequest]
607607
The request on the API endpoint
608608
pk: string
609-
The primary key of the shared live media
609+
The primary key of the classroom document
610610
611611
Returns
612612
-------
613613
Type[rest_framework.response.Response]
614-
HttpResponse carrying the AWS S3 upload policy as a JSON object.
614+
HttpResponse carrying the S3 upload policy as a JSON object.
615615
616616
"""
617617
classroom_document = self.get_object() # check permissions first
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"""Rename classroom documents in Scaleway S3 for serving them with the right name."""
2+
3+
import logging
4+
from os.path import splitext
5+
6+
from django.conf import settings
7+
from django.core.management.base import BaseCommand
8+
9+
import boto3
10+
11+
from marsha.bbb.models import ClassroomDocument
12+
from marsha.core.defaults import AWS_STORAGE_BASE_DIRECTORY
13+
from marsha.core.storage.storage_class import file_storage
14+
from marsha.core.utils import time_utils
15+
16+
17+
logger = logging.getLogger(__name__)
18+
19+
scw_credentials = {
20+
"aws_access_key_id": settings.STORAGE_S3_ACCESS_KEY,
21+
"aws_secret_access_key": settings.STORAGE_S3_SECRET_KEY,
22+
"region_name": settings.STORAGE_S3_REGION_NAME,
23+
"endpoint_url": settings.STORAGE_S3_ENDPOINT_URL,
24+
}
25+
26+
# Configure medialive client
27+
s3_client = boto3.client("s3", **scw_credentials)
28+
29+
30+
class Command(BaseCommand):
31+
"""Rename classroom documents in Scaleway S3 to their filename."""
32+
33+
help = "Rename classroom documents in Scaleway S3 to their filename."
34+
35+
def validate_filename(self, value):
36+
"""Transform filename to make it valid."""
37+
38+
value = value.replace("/", "_")
39+
value = value.replace("\\", "_")
40+
value = value.lstrip(".")
41+
42+
return value
43+
44+
def handle(self, *args, **options):
45+
"""Execute management command."""
46+
47+
for document in ClassroomDocument.objects.all():
48+
# Get the file stored on Scaleway S3 under `aws/`
49+
stamp = time_utils.to_timestamp(document.uploaded_on)
50+
extension = ""
51+
if "." in document.filename:
52+
extension = splitext(document.filename)[1]
53+
54+
file_key_src = document.get_storage_key(
55+
filename=f"{stamp}{extension}", base_dir=AWS_STORAGE_BASE_DIRECTORY
56+
)
57+
copy_source = {
58+
"Bucket": settings.STORAGE_S3_BUCKET_NAME,
59+
"Key": file_key_src,
60+
}
61+
62+
filename = self.validate_filename(document.filename)
63+
64+
# Override document filename with the validated S3-compatible filename
65+
if filename != document.filename:
66+
document.filename = filename
67+
document.save()
68+
69+
# Compute file key destination which should be the document filename
70+
file_key_dest = document.get_storage_key(
71+
filename, base_dir=AWS_STORAGE_BASE_DIRECTORY
72+
)
73+
if file_storage.exists(file_key_dest):
74+
logger.info("Object %s already exists", file_key_dest)
75+
continue
76+
77+
logger.info("Copying %s to %s", file_key_src, file_key_dest)
78+
s3_client.copy_object(
79+
Bucket=settings.STORAGE_S3_BUCKET_NAME,
80+
CopySource=copy_source,
81+
Key=file_key_dest,
82+
)
83+
84+
logger.info("Finished copying!")

src/backend/marsha/bbb/serializers.py

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from datetime import datetime
44
import mimetypes
55
from os.path import splitext
6-
from urllib.parse import quote_plus
76
from uuid import uuid4
87

98
from django.conf import settings
@@ -22,15 +21,14 @@
2221
ClassroomSession,
2322
)
2423
from marsha.bbb.utils.bbb_utils import get_recording_url, get_url as get_document_url
25-
from marsha.core.defaults import CLASSROOM_RECORDINGS_KEY_CACHE, SCW_S3, VOD_CONVERT
24+
from marsha.core.defaults import CLASSROOM_RECORDINGS_KEY_CACHE, VOD_CONVERT
2625
from marsha.core.serializers import (
2726
BaseInitiateUploadSerializer,
2827
PlaylistLiteSerializer,
2928
ReadOnlyModelSerializer,
3029
UploadableFileWithExtensionSerializerMixin,
3130
VideoFromRecordingSerializer,
3231
)
33-
from marsha.core.storage.storage_class import file_storage
3432

3533

3634
class ClassroomRecordingSerializer(ReadOnlyModelSerializer):
@@ -405,25 +403,11 @@ def get_url(self, obj):
405403
Returns
406404
-------
407405
String or None
408-
the url to fetch the classroom document on CloudFront/Storage
406+
the url to fetch the classroom document on Storage
409407
None if the classroom document is still not uploaded to S3 with success
410408
411409
"""
412-
if not obj.uploaded_on:
413-
return None
414-
415-
if obj.storage_location == SCW_S3:
416-
file_key = obj.get_storage_key(obj.filename)
417-
418-
return file_storage.url(file_key)
419-
420-
# Default AWS fallback
421-
url = get_document_url(obj)
422-
423-
return (
424-
f"{url}?response-content-disposition="
425-
f"{quote_plus('attachment; filename=' + obj.filename)}"
426-
)
410+
return get_document_url(obj)
427411

428412

429413
class ClassroomDocumentInitiateUploadSerializer(BaseInitiateUploadSerializer):

src/backend/marsha/bbb/tests/api/classroomdocument/test_list.py

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,7 @@ def test_api_list_classroom_documents_instructor(self):
112112
)
113113

114114
@override_settings(
115-
CLOUDFRONT_SIGNED_URLS_ACTIVE=True,
116-
CLOUDFRONT_SIGNED_PUBLIC_KEY_ID="cloudfront-access-key-id",
115+
MEDIA_URL="https://abc.svc.edge.scw.cloud/",
117116
)
118117
def test_api_list_classroom_documents_on_aws(self):
119118
"""Classroom documents should not been signed."""
@@ -156,10 +155,8 @@ def test_api_list_classroom_documents_on_aws(self):
156155
"upload_state": "pending",
157156
"uploaded_on": "2018-08-08T00:00:00Z",
158157
"url": (
159-
f"https://abc.cloudfront.net/{classroom.id}/classroomdocument/"
160-
f"{classroom_documents[3].id}/1533686400"
161-
f"?response-content-disposition"
162-
f"=attachment%3B+filename%3D{classroom_documents[3].filename}"
158+
f"https://abc.svc.edge.scw.cloud/aws/{classroom.id}/classroomdocument/"
159+
f"{classroom_documents[3].id}/{classroom_documents[3].filename}"
163160
),
164161
},
165162
{
@@ -170,11 +167,8 @@ def test_api_list_classroom_documents_on_aws(self):
170167
"upload_state": "pending",
171168
"uploaded_on": "2018-08-08T00:00:00Z",
172169
"url": (
173-
f"https://abc.cloudfront.net/{classroom.id}/classroomdocument/"
174-
f"{classroom_documents[2].id}/1533686400"
175-
f".{classroom_documents[2].filename.split('.')[-1]}"
176-
f"?response-content-disposition"
177-
f"=attachment%3B+filename%3D{classroom_documents[2].filename}"
170+
f"https://abc.svc.edge.scw.cloud/aws/{classroom.id}/classroomdocument/"
171+
f"{classroom_documents[2].id}/{classroom_documents[2].filename}"
178172
),
179173
},
180174
{
@@ -185,11 +179,8 @@ def test_api_list_classroom_documents_on_aws(self):
185179
"upload_state": "pending",
186180
"uploaded_on": "2018-08-08T00:00:00Z",
187181
"url": (
188-
f"https://abc.cloudfront.net/{classroom.id}/classroomdocument/"
189-
f"{classroom_documents[1].id}/1533686400"
190-
f".{classroom_documents[1].filename.split('.')[-1]}"
191-
f"?response-content-disposition"
192-
f"=attachment%3B+filename%3D{classroom_documents[1].filename}"
182+
f"https://abc.svc.edge.scw.cloud/aws/{classroom.id}/classroomdocument/"
183+
f"{classroom_documents[1].id}/{classroom_documents[1].filename}"
193184
),
194185
},
195186
{
@@ -200,11 +191,8 @@ def test_api_list_classroom_documents_on_aws(self):
200191
"upload_state": "pending",
201192
"uploaded_on": "2018-08-08T00:00:00Z",
202193
"url": (
203-
f"https://abc.cloudfront.net/{classroom.id}/classroomdocument/"
204-
f"{classroom_documents[0].id}/1533686400"
205-
f".{classroom_documents[0].filename.split('.')[-1]}"
206-
f"?response-content-disposition"
207-
f"=attachment%3B+filename%3D{classroom_documents[0].filename}"
194+
f"https://abc.svc.edge.scw.cloud/aws/{classroom.id}/classroomdocument/"
195+
f"{classroom_documents[0].id}/{classroom_documents[0].filename}"
208196
),
209197
},
210198
],
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""Test the ``rename_classroom_documents`` management command."""
2+
3+
from datetime import datetime, timezone
4+
from os.path import splitext
5+
from unittest import mock
6+
7+
from django.core.management import call_command
8+
from django.test import TestCase
9+
10+
from botocore.stub import Stubber
11+
12+
from marsha import settings
13+
from marsha.bbb.factories import ClassroomDocumentFactory, ClassroomFactory
14+
from marsha.bbb.management.commands import rename_classroom_documents
15+
from marsha.bbb.models import ClassroomDocument
16+
from marsha.core.utils import time_utils
17+
18+
19+
class RenameClassroomDocumentsTestCase(TestCase):
20+
"""
21+
Test the ``rename_classroom_documents`` command.
22+
"""
23+
24+
@mock.patch("marsha.core.storage.storage_class.file_storage.exists")
25+
def test_rename_classroom_documents(self, mock_exists):
26+
"""Command should rename document S3 objects to their filename."""
27+
28+
mock_exists.return_value = False
29+
30+
now = datetime(2018, 8, 8, tzinfo=timezone.utc)
31+
32+
with Stubber(rename_classroom_documents.s3_client) as s3_client_stubber:
33+
# Generate some classroom documents
34+
# (<original filename>, <expected and cleaned>)
35+
filenames = [
36+
("normal_filename.pdf", "normal_filename.pdf"),
37+
("weird\\file/name.pdf", "weird_file_name.pdf"),
38+
(".hidden_file", "hidden_file"),
39+
]
40+
41+
documents = []
42+
for filename_src, _ in filenames:
43+
document = ClassroomDocumentFactory(
44+
classroom=ClassroomFactory(),
45+
filename=filename_src,
46+
uploaded_on=now,
47+
)
48+
49+
documents.append(document)
50+
51+
# Create mocks for copy_objects with Stubber
52+
# Note: Stubber requires that its mocks are called in the exact order they
53+
# were created, so we must iterate over objects.all() in the same sequence
54+
for document in ClassroomDocument.objects.all():
55+
stamp = time_utils.to_timestamp(document.uploaded_on)
56+
extension = ""
57+
if "." in document.filename:
58+
extension = splitext(document.filename)[1]
59+
60+
file_key_src = (
61+
f"aws/{document.classroom.id}/classroomdocument/"
62+
f"{document.id}/{stamp}{extension}"
63+
)
64+
65+
sanitized_filename = (
66+
rename_classroom_documents.Command().validate_filename(
67+
document.filename
68+
)
69+
)
70+
file_key_dest = (
71+
f"aws/{document.classroom.id}/classroomdocument/"
72+
f"{document.id}/{sanitized_filename}"
73+
)
74+
75+
expected_params = {
76+
"Bucket": settings.STORAGE_S3_BUCKET_NAME,
77+
"CopySource": {
78+
"Bucket": settings.STORAGE_S3_BUCKET_NAME,
79+
"Key": file_key_src,
80+
},
81+
"Key": file_key_dest,
82+
}
83+
s3_client_stubber.add_response("copy_object", {}, expected_params)
84+
85+
call_command("rename_classroom_documents")
86+
87+
s3_client_stubber.assert_no_pending_responses()
88+
89+
# Check that each document.filename has been updated with the clean
90+
# S3-compatible filename
91+
for document, (_, expected_filename) in zip(documents, filenames):
92+
document.refresh_from_db()
93+
assert document.filename == expected_filename
94+
95+
@mock.patch("marsha.core.storage.storage_class.file_storage.exists")
96+
def test_rename_classroom_documents_file_exists(self, mock_exists):
97+
"""Command should not copy document if file already exists."""
98+
99+
mock_exists.return_value = True
100+
101+
now = datetime(2018, 8, 8, tzinfo=timezone.utc)
102+
103+
ClassroomDocumentFactory(
104+
classroom=ClassroomFactory(),
105+
filename="filename.pdf",
106+
uploaded_on=now,
107+
)
108+
109+
with Stubber(rename_classroom_documents.s3_client) as s3_client_stubber:
110+
call_command("rename_classroom_documents")
111+
s3_client_stubber.assert_no_pending_responses()

src/backend/marsha/bbb/utils/bbb_utils.py

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import json
66
from json import JSONDecodeError
77
import logging
8-
from os.path import splitext
98

109
from django.conf import settings
1110
from django.utils.timezone import now
@@ -16,7 +15,7 @@
1615
import xmltodict
1716

1817
from marsha.bbb.models import Classroom, ClassroomRecording, ClassroomSession
19-
from marsha.core.defaults import SCW_S3
18+
from marsha.core.defaults import AWS_STORAGE_BASE_DIRECTORY, SCW_S3
2019
from marsha.core.storage.storage_class import file_storage
2120
from marsha.core.utils import time_utils
2221

@@ -106,7 +105,7 @@ def get_url(obj):
106105
Returns
107106
-------
108107
String or None
109-
the url to fetch the classroom document on CloudFront or Edge Service
108+
the url to fetch the classroom document on Edge Service
110109
None if the classroom document is still not uploaded to S3 with success
111110
112111
"""
@@ -117,19 +116,12 @@ def get_url(obj):
117116
file_key = obj.get_storage_key(obj.filename)
118117
return file_storage.url(file_key)
119118

120-
extension = ""
121-
if "." in obj.filename:
122-
extension = splitext(obj.filename)[1]
123-
124-
stamp = time_utils.to_timestamp(obj.uploaded_on)
125-
126-
# Default AWS fallback
127-
url = (
128-
f"{settings.AWS_S3_URL_PROTOCOL}://{settings.CLOUDFRONT_DOMAIN}/"
129-
f"{obj.classroom.pk}/classroomdocument/{obj.pk}/{stamp}{extension}"
119+
# Default fallback to location under "aws" directory
120+
file_key = obj.get_storage_key(
121+
filename=f"{obj.filename}", base_dir=AWS_STORAGE_BASE_DIRECTORY
130122
)
131123

132-
return url
124+
return file_storage.url(file_key)
133125

134126

135127
def create(classroom: Classroom, recording_ready_callback_url: str, attempt=0):

0 commit comments

Comments
 (0)