Skip to content

Commit 25abd96

Browse files
committed
✨(backend) manage uploaded file status and call to malware detection
In the attachment_upload method, the status in the file metadata to processing and the malware_detection backend is called. We check in the media_auth if the status is ready in order to accept the request.
1 parent a070e1d commit 25abd96

File tree

6 files changed

+144
-20
lines changed

6 files changed

+144
-20
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ and this project adheres to
1212

1313
- ✨ Add a custom callout block to the editor #892
1414
- 🚩(frontend) version MIT only #911
15+
- ✨(backend) integrate maleware_detection from django-lasuite #936
1516

1617
## Changed
1718

src/backend/core/api/viewsets.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import requests
2525
import rest_framework as drf
2626
from botocore.exceptions import ClientError
27+
from lasuite.malware_detection import malware_detection
2728
from rest_framework import filters, status, viewsets
2829
from rest_framework import response as drf_response
2930
from rest_framework.permissions import AllowAny
@@ -1156,7 +1157,10 @@ def attachment_upload(self, request, *args, **kwargs):
11561157

11571158
# Prepare metadata for storage
11581159
extra_args = {
1159-
"Metadata": {"owner": str(request.user.id)},
1160+
"Metadata": {
1161+
"owner": str(request.user.id),
1162+
"status": enums.DocumentAttachmentStatus.PROCESSING,
1163+
},
11601164
"ContentType": serializer.validated_data["content_type"],
11611165
}
11621166
file_unsafe = ""
@@ -1188,6 +1192,8 @@ def attachment_upload(self, request, *args, **kwargs):
11881192
document.attachments.append(key)
11891193
document.save()
11901194

1195+
malware_detection.analyse_file(key, document_id=document.id)
1196+
11911197
return drf.response.Response(
11921198
{"file": f"{settings.MEDIA_URL:s}{key:s}"},
11931199
status=drf.status.HTTP_201_CREATED,
@@ -1271,6 +1277,19 @@ def media_auth(self, request, *args, **kwargs):
12711277
logger.debug("User '%s' lacks permission for attachment", user)
12721278
raise drf.exceptions.PermissionDenied()
12731279

1280+
# Check if the attachment is ready
1281+
s3_client = default_storage.connection.meta.client
1282+
bucket_name = default_storage.bucket_name
1283+
head_resp = s3_client.head_object(Bucket=bucket_name, Key=key)
1284+
metadata = head_resp.get("Metadata", {})
1285+
# In order to be compatible with existing upload without `status` metadata,
1286+
# we consider them as ready.
1287+
if (
1288+
metadata.get("status", enums.DocumentAttachmentStatus.READY)
1289+
!= enums.DocumentAttachmentStatus.READY
1290+
):
1291+
raise drf.exceptions.PermissionDenied()
1292+
12741293
# Generate S3 authorization headers using the extracted URL parameters
12751294
request = utils.generate_s3_authorization_headers(key)
12761295

src/backend/core/malware_detection.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from core.models import Document
1111

1212
logger = logging.getLogger(__name__)
13+
security_logger = logging.getLogger("docs.security")
1314

1415

1516
def malware_detection_callback(file_path, status, error_info, **kwargs):
@@ -35,7 +36,7 @@ def malware_detection_callback(file_path, status, error_info, **kwargs):
3536
return
3637

3738
document_id = kwargs.get("document_id")
38-
logger.error(
39+
security_logger.warning(
3940
"File %s for document %s is infected with malware. Error info: %s",
4041
file_path,
4142
document_id,

src/backend/core/tests/documents/test_api_documents_attachment_upload.py

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import re
66
import uuid
7+
from unittest import mock
78

89
from django.core.files.storage import default_storage
910
from django.core.files.uploadedfile import SimpleUploadedFile
@@ -12,6 +13,7 @@
1213
from rest_framework.test import APIClient
1314

1415
from core import factories
16+
from core.api.viewsets import malware_detection
1517
from core.tests.conftest import TEAM, USER, VIA
1618

1719
pytestmark = pytest.mark.django_db
@@ -59,7 +61,8 @@ def test_api_documents_attachment_upload_anonymous_success():
5961
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
6062

6163
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
62-
response = APIClient().post(url, {"file": file}, format="multipart")
64+
with mock.patch.object(malware_detection, "analyse_file") as mock_analyse_file:
65+
response = APIClient().post(url, {"file": file}, format="multipart")
6366

6467
assert response.status_code == 201
6568

@@ -74,12 +77,13 @@ def test_api_documents_attachment_upload_anonymous_success():
7477
assert document.attachments == [f"{document.id!s}/attachments/{file_id!s}.png"]
7578

7679
# Now, check the metadata of the uploaded file
77-
key = file_path.replace("/media", "")
80+
key = file_path.replace("/media/", "")
81+
mock_analyse_file.assert_called_once_with(key, document_id=document.id)
7882
file_head = default_storage.connection.meta.client.head_object(
7983
Bucket=default_storage.bucket_name, Key=key
8084
)
8185

82-
assert file_head["Metadata"] == {"owner": "None"}
86+
assert file_head["Metadata"] == {"owner": "None", "status": "processing"}
8387
assert file_head["ContentType"] == "image/png"
8488
assert file_head["ContentDisposition"] == 'inline; filename="test.png"'
8589

@@ -139,14 +143,19 @@ def test_api_documents_attachment_upload_authenticated_success(reach, role):
139143
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
140144

141145
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
142-
response = client.post(url, {"file": file}, format="multipart")
146+
with mock.patch.object(malware_detection, "analyse_file") as mock_analyse_file:
147+
response = client.post(url, {"file": file}, format="multipart")
143148

144149
assert response.status_code == 201
145150

146151
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
147152
match = pattern.search(response.json()["file"])
148153
file_id = match.group(1)
149154

155+
mock_analyse_file.assert_called_once_with(
156+
f"{document.id!s}/attachments/{file_id!s}.png", document_id=document.id
157+
)
158+
150159
# Validate that file_id is a valid UUID
151160
uuid.UUID(file_id)
152161

@@ -210,7 +219,8 @@ def test_api_documents_attachment_upload_success(via, role, mock_user_teams):
210219
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
211220

212221
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
213-
response = client.post(url, {"file": file}, format="multipart")
222+
with mock.patch.object(malware_detection, "analyse_file") as mock_analyse_file:
223+
response = client.post(url, {"file": file}, format="multipart")
214224

215225
assert response.status_code == 201
216226

@@ -226,11 +236,12 @@ def test_api_documents_attachment_upload_success(via, role, mock_user_teams):
226236
assert document.attachments == [f"{document.id!s}/attachments/{file_id!s}.png"]
227237

228238
# Now, check the metadata of the uploaded file
229-
key = file_path.replace("/media", "")
239+
key = file_path.replace("/media/", "")
240+
mock_analyse_file.assert_called_once_with(key, document_id=document.id)
230241
file_head = default_storage.connection.meta.client.head_object(
231242
Bucket=default_storage.bucket_name, Key=key
232243
)
233-
assert file_head["Metadata"] == {"owner": str(user.id)}
244+
assert file_head["Metadata"] == {"owner": str(user.id), "status": "processing"}
234245
assert file_head["ContentType"] == "image/png"
235246
assert file_head["ContentDisposition"] == 'inline; filename="test.png"'
236247

@@ -304,7 +315,8 @@ def test_api_documents_attachment_upload_fix_extension(
304315
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
305316

306317
file = SimpleUploadedFile(name=name, content=content)
307-
response = client.post(url, {"file": file}, format="multipart")
318+
with mock.patch.object(malware_detection, "analyse_file") as mock_analyse_file:
319+
response = client.post(url, {"file": file}, format="multipart")
308320

309321
assert response.status_code == 201
310322

@@ -324,11 +336,16 @@ def test_api_documents_attachment_upload_fix_extension(
324336
uuid.UUID(file_id)
325337

326338
# Now, check the metadata of the uploaded file
327-
key = file_path.replace("/media", "")
339+
key = file_path.replace("/media/", "")
340+
mock_analyse_file.assert_called_once_with(key, document_id=document.id)
328341
file_head = default_storage.connection.meta.client.head_object(
329342
Bucket=default_storage.bucket_name, Key=key
330343
)
331-
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}
344+
assert file_head["Metadata"] == {
345+
"owner": str(user.id),
346+
"is_unsafe": "true",
347+
"status": "processing",
348+
}
332349
assert file_head["ContentType"] == content_type
333350
assert file_head["ContentDisposition"] == f'attachment; filename="{name:s}"'
334351

@@ -364,7 +381,8 @@ def test_api_documents_attachment_upload_unsafe():
364381
file = SimpleUploadedFile(
365382
name="script.exe", content=b"\x4d\x5a\x90\x00\x03\x00\x00\x00"
366383
)
367-
response = client.post(url, {"file": file}, format="multipart")
384+
with mock.patch.object(malware_detection, "analyse_file") as mock_analyse_file:
385+
response = client.post(url, {"file": file}, format="multipart")
368386

369387
assert response.status_code == 201
370388

@@ -381,11 +399,16 @@ def test_api_documents_attachment_upload_unsafe():
381399
file_id = file_id.replace("-unsafe", "")
382400
uuid.UUID(file_id)
383401

402+
key = file_path.replace("/media/", "")
403+
mock_analyse_file.assert_called_once_with(key, document_id=document.id)
384404
# Now, check the metadata of the uploaded file
385-
key = file_path.replace("/media", "")
386405
file_head = default_storage.connection.meta.client.head_object(
387406
Bucket=default_storage.bucket_name, Key=key
388407
)
389-
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}
408+
assert file_head["Metadata"] == {
409+
"owner": str(user.id),
410+
"is_unsafe": "true",
411+
"status": "processing",
412+
}
390413
assert file_head["ContentType"] == "application/octet-stream"
391414
assert file_head["ContentDisposition"] == 'attachment; filename="script.exe"'

src/backend/core/tests/documents/test_api_documents_media_auth.py

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from rest_framework.test import APIClient
1616

1717
from core import factories, models
18+
from core.enums import DocumentAttachmentStatus
1819
from core.tests.conftest import TEAM, USER, VIA
1920

2021
pytestmark = pytest.mark.django_db
@@ -45,6 +46,7 @@ def test_api_documents_media_auth_anonymous_public():
4546
Key=key,
4647
Body=BytesIO(b"my prose"),
4748
ContentType="text/plain",
49+
Metadata={"status": DocumentAttachmentStatus.READY},
4850
)
4951

5052
factories.DocumentFactory(id=document_id, link_reach="public", attachments=[key])
@@ -93,7 +95,15 @@ def test_api_documents_media_auth_extensions():
9395
keys = []
9496
for ext in extensions:
9597
filename = f"{uuid4()!s}.{ext:s}"
96-
keys.append(f"{document_id!s}/attachments/{filename:s}")
98+
key = f"{document_id!s}/attachments/{filename:s}"
99+
default_storage.connection.meta.client.put_object(
100+
Bucket=default_storage.bucket_name,
101+
Key=key,
102+
Body=BytesIO(b"my prose"),
103+
ContentType="text/plain",
104+
Metadata={"status": DocumentAttachmentStatus.READY},
105+
)
106+
keys.append(key)
97107

98108
factories.DocumentFactory(link_reach="public", attachments=keys)
99109

@@ -142,6 +152,7 @@ def test_api_documents_media_auth_anonymous_attachments():
142152
Key=key,
143153
Body=BytesIO(b"my prose"),
144154
ContentType="text/plain",
155+
Metadata={"status": DocumentAttachmentStatus.READY},
145156
)
146157

147158
factories.DocumentFactory(id=document_id, link_reach="restricted")
@@ -205,6 +216,7 @@ def test_api_documents_media_auth_authenticated_public_or_authenticated(reach):
205216
Key=key,
206217
Body=BytesIO(b"my prose"),
207218
ContentType="text/plain",
219+
Metadata={"status": DocumentAttachmentStatus.READY},
208220
)
209221

210222
factories.DocumentFactory(id=document_id, link_reach=reach, attachments=[key])
@@ -283,6 +295,7 @@ def test_api_documents_media_auth_related(via, mock_user_teams):
283295
Key=key,
284296
Body=BytesIO(b"my prose"),
285297
ContentType="text/plain",
298+
Metadata={"status": DocumentAttachmentStatus.READY},
286299
)
287300

288301
document = factories.DocumentFactory(
@@ -321,3 +334,70 @@ def test_api_documents_media_auth_related(via, mock_user_teams):
321334
timeout=1,
322335
)
323336
assert response.content.decode("utf-8") == "my prose"
337+
338+
339+
def test_api_documents_media_auth_not_ready_status():
340+
"""Attachments with status not ready should not be accessible"""
341+
document_id = uuid4()
342+
filename = f"{uuid4()!s}.jpg"
343+
key = f"{document_id!s}/attachments/{filename:s}"
344+
default_storage.connection.meta.client.put_object(
345+
Bucket=default_storage.bucket_name,
346+
Key=key,
347+
Body=BytesIO(b"my prose"),
348+
ContentType="text/plain",
349+
Metadata={"status": DocumentAttachmentStatus.PROCESSING},
350+
)
351+
352+
factories.DocumentFactory(id=document_id, link_reach="public", attachments=[key])
353+
354+
original_url = f"http://localhost/media/{key:s}"
355+
response = APIClient().get(
356+
"/api/v1.0/documents/media-auth/", HTTP_X_ORIGINAL_URL=original_url
357+
)
358+
359+
assert response.status_code == 403
360+
361+
362+
def test_api_documents_media_auth_missing_status_metadata():
363+
"""Attachments without status metadata should be considered as ready"""
364+
document_id = uuid4()
365+
filename = f"{uuid4()!s}.jpg"
366+
key = f"{document_id!s}/attachments/{filename:s}"
367+
default_storage.connection.meta.client.put_object(
368+
Bucket=default_storage.bucket_name,
369+
Key=key,
370+
Body=BytesIO(b"my prose"),
371+
ContentType="text/plain",
372+
)
373+
374+
factories.DocumentFactory(id=document_id, link_reach="public", attachments=[key])
375+
376+
original_url = f"http://localhost/media/{key:s}"
377+
response = APIClient().get(
378+
"/api/v1.0/documents/media-auth/", HTTP_X_ORIGINAL_URL=original_url
379+
)
380+
381+
assert response.status_code == 200
382+
383+
authorization = response["Authorization"]
384+
assert "AWS4-HMAC-SHA256 Credential=" in authorization
385+
assert (
386+
"SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature="
387+
in authorization
388+
)
389+
assert response["X-Amz-Date"] == timezone.now().strftime("%Y%m%dT%H%M%SZ")
390+
391+
s3_url = urlparse(settings.AWS_S3_ENDPOINT_URL)
392+
file_url = f"{settings.AWS_S3_ENDPOINT_URL:s}/impress-media-storage/{key:s}"
393+
response = requests.get(
394+
file_url,
395+
headers={
396+
"authorization": authorization,
397+
"x-amz-date": response["x-amz-date"],
398+
"x-amz-content-sha256": response["x-amz-content-sha256"],
399+
"Host": f"{s3_url.hostname:s}:{s3_url.port:d}",
400+
},
401+
timeout=1,
402+
)
403+
assert response.content.decode("utf-8") == "my prose"

src/backend/core/tests/test_malware_detection.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,17 @@
1515
pytestmark = pytest.mark.django_db
1616

1717

18-
@pytest.fixture
19-
def safe_file():
18+
@pytest.fixture(name="safe_file")
19+
def fixture_safe_file():
2020
"""Create a safe file."""
2121
file_path = "test.txt"
2222
default_storage.save(file_path, ContentFile("test"))
2323
yield file_path
2424
default_storage.delete(file_path)
2525

2626

27-
@pytest.fixture
28-
def unsafe_file():
27+
@pytest.fixture(name="unsafe_file")
28+
def fixture_unsafe_file():
2929
"""Create an unsafe file."""
3030
file_path = "unsafe.txt"
3131
default_storage.save(file_path, ContentFile("test"))

0 commit comments

Comments
 (0)