Skip to content

Commit e8d95fa

Browse files
committed
✨(backend) allow uploading more types of attachments
We want to allow users to upload files to a document, not just images. We try to enforce coherence between the file extension and the real mime type of its content. If a file is deemed unsafe, it is still accepted during upload and the information is stored as metadata on the object for display to readers.
1 parent a9f08df commit e8d95fa

File tree

7 files changed

+228
-49
lines changed

7 files changed

+228
-49
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ and this project adheres to
3737

3838
## Added
3939

40+
- ✨(backend) allow uploading more types of attachments #309
4041
- ✨(backend) add name fields to the user synchronized with OIDC #301
4142
- ✨(ci) add security scan #291
4243
- ♻️(frontend) Add versions #277

Dockerfile

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,15 @@ ENV PYTHONUNBUFFERED=1
6565

6666
# Install required system libs
6767
RUN apk add \
68-
gettext \
6968
cairo \
70-
libffi-dev \
69+
file \
70+
font-noto \
71+
font-noto-emoji \
72+
gettext \
7173
gdk-pixbuf \
72-
pango \
74+
libffi-dev \
7375
pandoc \
74-
font-noto-emoji \
75-
font-noto \
76+
pango \
7677
shared-mime-info
7778

7879
# Copy entrypoint

src/backend/core/api/serializers.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from django.db.models import Q
77
from django.utils.translation import gettext_lazy as _
88

9+
import magic
910
from rest_framework import exceptions, serializers
1011

1112
from core import models
@@ -219,16 +220,42 @@ def validate_file(self, file):
219220
f"File size exceeds the maximum limit of {max_size:d} MB."
220221
)
221222

222-
# Validate file type
223-
mime_type, _ = mimetypes.guess_type(file.name)
224-
if mime_type not in settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES:
225-
mime_types = ", ".join(settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES)
226-
raise serializers.ValidationError(
227-
f"File type '{mime_type:s}' is not allowed. Allowed types are: {mime_types:s}"
228-
)
223+
extension = file.name.rpartition(".")[-1] if "." in file.name else None
224+
225+
# Read the first few bytes to determine the MIME type accurately
226+
mime = magic.Magic(mime=True)
227+
magic_mime_type = mime.from_buffer(file.read(1024))
228+
file.seek(0) # Reset file pointer to the beginning after reading
229+
230+
self.context["is_unsafe"] = (
231+
magic_mime_type in settings.DOCUMENT_UNSAFE_MIME_TYPES
232+
)
233+
234+
extension_mime_type, _ = mimetypes.guess_type(file.name)
235+
236+
# Try guessing a coherent extension from the mimetype
237+
if extension_mime_type != magic_mime_type:
238+
self.context["is_unsafe"] = True
239+
240+
guessed_ext = mimetypes.guess_extension(magic_mime_type)
241+
# Missing extensions or extensions longer than 5 characters (it's as long as an extension
242+
# can be) are replaced by the extension we eventually guessed from mimetype.
243+
if (extension is None or len(extension) > 5) and guessed_ext:
244+
extension = guessed_ext[1:]
245+
246+
if extension is None:
247+
raise serializers.ValidationError("Could not determine file extension.")
248+
249+
self.context["expected_extension"] = extension
229250

230251
return file
231252

253+
def validate(self, attrs):
254+
"""Override validate to add the computed extension to validated_data."""
255+
attrs["expected_extension"] = self.context["expected_extension"]
256+
attrs["is_unsafe"] = self.context["is_unsafe"]
257+
return attrs
258+
232259

233260
class TemplateSerializer(BaseResourceSerializer):
234261
"""Serialize templates."""

src/backend/core/api/viewsets.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""API endpoints"""
22

3-
import os
43
import re
54
import uuid
65
from urllib.parse import urlparse
@@ -476,15 +475,22 @@ def attachment_upload(self, request, *args, **kwargs):
476475
return drf_response.Response(
477476
serializer.errors, status=status.HTTP_400_BAD_REQUEST
478477
)
479-
# Extract the file extension from the original filename
480-
file = serializer.validated_data["file"]
481-
extension = os.path.splitext(file.name)[1]
482478

483479
# Generate a generic yet unique filename to store the image in object storage
484480
file_id = uuid.uuid4()
485-
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}{extension:s}"
481+
extension = serializer.validated_data["expected_extension"]
482+
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}.{extension:s}"
483+
484+
# Prepare metadata for storage
485+
metadata = {"Metadata": {"owner": str(request.user.id)}}
486+
if serializer.validated_data["is_unsafe"]:
487+
metadata["Metadata"]["is_unsafe"] = "true"
488+
489+
file = serializer.validated_data["file"]
490+
default_storage.connection.meta.client.upload_fileobj(
491+
file, default_storage.bucket_name, key, ExtraArgs=metadata
492+
)
486493

487-
default_storage.save(key, file)
488494
return drf_response.Response(
489495
{"file": f"{settings.MEDIA_URL:s}{key:s}"}, status=status.HTTP_201_CREATED
490496
)

src/backend/core/tests/documents/test_api_documents_attachment_upload.py

Lines changed: 106 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import re
66
import uuid
77

8-
from django.core.files.base import ContentFile
8+
from django.core.files.storage import default_storage
99
from django.core.files.uploadedfile import SimpleUploadedFile
1010

1111
import pytest
@@ -16,6 +16,12 @@
1616

1717
pytestmark = pytest.mark.django_db
1818

19+
PIXEL = (
20+
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00"
21+
b"\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\xf8\xff\xff?\x00\x05\xfe\x02\xfe"
22+
b"\xa7V\xbd\xfa\x00\x00\x00\x00IEND\xaeB`\x82"
23+
)
24+
1925

2026
@pytest.mark.parametrize(
2127
"reach, role",
@@ -33,7 +39,7 @@ def test_api_documents_attachment_upload_anonymous_forbidden(reach, role):
3339
and role don't allow it.
3440
"""
3541
document = factories.DocumentFactory(link_reach=reach, link_role=role)
36-
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
42+
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
3743

3844
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
3945
response = APIClient().post(url, {"file": file}, format="multipart")
@@ -50,14 +56,14 @@ def test_api_documents_attachment_upload_anonymous_success():
5056
if the link reach and role permit it.
5157
"""
5258
document = factories.DocumentFactory(link_reach="public", link_role="editor")
53-
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
59+
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
5460

5561
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
5662
response = APIClient().post(url, {"file": file}, format="multipart")
5763

5864
assert response.status_code == 201
5965

60-
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg")
66+
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
6167
match = pattern.search(response.json()["file"])
6268
file_id = match.group(1)
6369

@@ -85,7 +91,7 @@ def test_api_documents_attachment_upload_authenticated_forbidden(reach, role):
8591
client.force_login(user)
8692

8793
document = factories.DocumentFactory(link_reach=reach, link_role=role)
88-
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
94+
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
8995

9096
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
9197
response = client.post(url, {"file": file}, format="multipart")
@@ -114,14 +120,14 @@ def test_api_documents_attachment_upload_authenticated_success(reach, role):
114120
client.force_login(user)
115121

116122
document = factories.DocumentFactory(link_reach=reach, link_role=role)
117-
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
123+
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
118124

119125
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
120126
response = client.post(url, {"file": file}, format="multipart")
121127

122128
assert response.status_code == 201
123129

124-
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg")
130+
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
125131
match = pattern.search(response.json()["file"])
126132
file_id = match.group(1)
127133

@@ -148,7 +154,7 @@ def test_api_documents_attachment_upload_reader(via, mock_user_teams):
148154
document=document, team="lasuite", role="reader"
149155
)
150156

151-
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
157+
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
152158

153159
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
154160
response = client.post(url, {"file": file}, format="multipart")
@@ -179,20 +185,28 @@ def test_api_documents_attachment_upload_success(via, role, mock_user_teams):
179185
document=document, team="lasuite", role=role
180186
)
181187

182-
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
188+
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
183189

184190
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
185191
response = client.post(url, {"file": file}, format="multipart")
186192

187193
assert response.status_code == 201
188194

189-
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg")
190-
match = pattern.search(response.json()["file"])
195+
file_path = response.json()["file"]
196+
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
197+
match = pattern.search(file_path)
191198
file_id = match.group(1)
192199

193200
# Validate that file_id is a valid UUID
194201
uuid.UUID(file_id)
195202

203+
# Now, check the metadata of the uploaded file
204+
key = file_path.replace("/media", "")
205+
file_head = default_storage.connection.meta.client.head_object(
206+
Bucket=default_storage.bucket_name, Key=key
207+
)
208+
assert file_head["Metadata"] == {"owner": str(user.id)}
209+
196210

197211
def test_api_documents_attachment_upload_invalid(client):
198212
"""Attempt to upload without a file should return an explicit error."""
@@ -222,34 +236,102 @@ def test_api_documents_attachment_upload_size_limit_exceeded(settings):
222236
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
223237

224238
# Create a temporary file larger than the allowed size
225-
content = b"a" * (1048576 + 1)
226-
file = ContentFile(content, name="test.jpg")
239+
file = SimpleUploadedFile(
240+
name="test.txt", content=b"a" * (1048576 + 1), content_type="text/plain"
241+
)
227242

228243
response = client.post(url, {"file": file}, format="multipart")
229244

230245
assert response.status_code == 400
231246
assert response.json() == {"file": ["File size exceeds the maximum limit of 1 MB."]}
232247

233248

234-
def test_api_documents_attachment_upload_type_not_allowed(settings):
235-
"""The uploaded file should be of a whitelisted type."""
236-
settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES = ["image/jpeg", "image/png"]
237-
249+
@pytest.mark.parametrize(
250+
"name,content,extension",
251+
[
252+
("test.exe", b"text", "exe"),
253+
("test", b"text", "txt"),
254+
("test.aaaaaa", b"test", "txt"),
255+
("test.txt", PIXEL, "txt"),
256+
("test.py", b"#!/usr/bin/python", "py"),
257+
],
258+
)
259+
def test_api_documents_attachment_upload_fix_extension(name, content, extension):
260+
"""
261+
A file with no extension or a wrong extension is accepted and the extension
262+
is corrected in storage.
263+
"""
238264
user = factories.UserFactory()
239265
client = APIClient()
240266
client.force_login(user)
241267

242268
document = factories.DocumentFactory(users=[(user, "owner")])
243269
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
244270

245-
# Create a temporary file with a not allowed type (e.g., text file)
246-
file = ContentFile(b"a" * 1048576, name="test.txt")
271+
file = SimpleUploadedFile(name=name, content=content)
272+
response = client.post(url, {"file": file}, format="multipart")
273+
274+
assert response.status_code == 201
275+
276+
file_path = response.json()["file"]
277+
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.{extension:s}")
278+
match = pattern.search(file_path)
279+
file_id = match.group(1)
280+
281+
# Validate that file_id is a valid UUID
282+
uuid.UUID(file_id)
283+
284+
# Now, check the metadata of the uploaded file
285+
key = file_path.replace("/media", "")
286+
file_head = default_storage.connection.meta.client.head_object(
287+
Bucket=default_storage.bucket_name, Key=key
288+
)
289+
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}
290+
291+
292+
def test_api_documents_attachment_upload_empty_file():
293+
"""An empty file should be rejected."""
294+
user = factories.UserFactory()
295+
client = APIClient()
296+
client.force_login(user)
297+
298+
document = factories.DocumentFactory(users=[(user, "owner")])
299+
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
247300

301+
file = SimpleUploadedFile(name="test.png", content=b"")
248302
response = client.post(url, {"file": file}, format="multipart")
249303

250304
assert response.status_code == 400
251-
assert response.json() == {
252-
"file": [
253-
"File type 'text/plain' is not allowed. Allowed types are: image/jpeg, image/png"
254-
]
255-
}
305+
assert response.json() == {"file": ["The submitted file is empty."]}
306+
307+
308+
def test_api_documents_attachment_upload_unsafe():
309+
"""A file with an unsafe mime type should be tagged as such."""
310+
user = factories.UserFactory()
311+
client = APIClient()
312+
client.force_login(user)
313+
314+
document = factories.DocumentFactory(users=[(user, "owner")])
315+
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
316+
317+
file = SimpleUploadedFile(
318+
name="script.exe", content=b"\x4d\x5a\x90\x00\x03\x00\x00\x00"
319+
)
320+
response = client.post(url, {"file": file}, format="multipart")
321+
322+
assert response.status_code == 201
323+
324+
file_path = response.json()["file"]
325+
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.exe")
326+
match = pattern.search(file_path)
327+
file_id = match.group(1)
328+
329+
# Validate that file_id is a valid UUID
330+
uuid.UUID(file_id)
331+
332+
# Now, check the metadata of the uploaded file
333+
key = file_path.replace("/media", "")
334+
file_head = default_storage.connection.meta.client.head_object(
335+
Bucket=default_storage.bucket_name, Key=key
336+
)
337+
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}

0 commit comments

Comments
 (0)