diff --git a/src/backend/core/api/imports.py b/src/backend/core/api/imports.py new file mode 100644 index 0000000000..174c9b8f95 --- /dev/null +++ b/src/backend/core/api/imports.py @@ -0,0 +1,42 @@ +"""Import endpoints for Outline (zip upload).""" + +from __future__ import annotations + +import io +import zipfile + +import rest_framework as drf + +from core.services.outline_import import OutlineImportError, process_outline_zip + + +# ---------- Outline (Zip Upload) ---------- + + +class OutlineImportUploadView(drf.views.APIView): + parser_classes = [drf.parsers.MultiPartParser] + permission_classes = [drf.permissions.IsAuthenticated] + + def post(self, request): + uploaded = request.FILES.get("file") + if not uploaded: + raise drf.exceptions.ValidationError({"file": "File is required"}) + + name = getattr(uploaded, "name", "") + if not name.endswith(".zip"): + raise drf.exceptions.ValidationError({"file": "Must be a .zip file"}) + + try: + content = uploaded.read() + # Fail fast if the upload is not a valid zip archive + with zipfile.ZipFile(io.BytesIO(content)): + pass + created_ids = process_outline_zip(request.user, content) + except zipfile.BadZipFile as exc: + raise drf.exceptions.ValidationError({"file": "Invalid zip archive"}) from exc + except OutlineImportError as exc: + raise drf.exceptions.ValidationError({"file": str(exc)}) from exc + + return drf.response.Response( + {"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED + ) diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py new file mode 100644 index 0000000000..39aec3b652 --- /dev/null +++ b/src/backend/core/services/outline_import.py @@ -0,0 +1,205 @@ +"""Service to import an Outline export (.zip) into Docs documents.""" + +from __future__ import annotations + +import io +import mimetypes +import re +import uuid +import zipfile +from typing import Iterable +import posixpath + +from django.conf import settings +from django.core.files.storage import default_storage + +from lasuite.malware_detection import malware_detection + +from core import enums, models +from core.services.converter_services import YdocConverter + + +class OutlineImportError(Exception): + """Raised when the Outline archive is invalid or unsafe.""" + + +def _ensure_dir_documents(user, dir_path: str, dir_docs: dict[str, models.Document]) -> models.Document | None: + """Ensure each path segment in dir_path has a container document. + + Returns the deepest parent document or None when dir_path is empty. + """ + if not dir_path: + return None + + parts = [p for p in dir_path.split("/") if p] + parent: models.Document | None = None + current = "" + for part in parts: + current = f"{current}/{part}" if current else part + if current in dir_docs: + parent = dir_docs[current] + continue + + if parent is None: + doc = models.Document.add_root( + depth=1, + creator=user, + title=part, + link_reach=models.LinkReachChoices.RESTRICTED, + ) + else: + doc = parent.add_child(creator=user, title=part) + + models.DocumentAccess.objects.update_or_create( + document=doc, + user=user, + defaults={"role": models.RoleChoices.OWNER}, + ) + dir_docs[current] = doc + parent = doc + + return parent + + +def _upload_attachment(user, doc: models.Document, arcname: str, data: bytes) -> str: + """Upload a binary asset into object storage and return its public media URL.""" + content_type, _ = mimetypes.guess_type(arcname) + ext = (arcname.split(".")[-1] or "bin").lower() + file_id = uuid.uuid4() + key = f"{doc.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}.{ext}" + extra_args = { + "Metadata": { + "owner": str(user.id), + "status": enums.DocumentAttachmentStatus.READY, + }, + } + if content_type: + extra_args["ContentType"] = content_type + + default_storage.connection.meta.client.upload_fileobj( + io.BytesIO(data), default_storage.bucket_name, key, ExtraArgs=extra_args + ) + doc.attachments.append(key) + doc.save(update_fields=["attachments", "updated_at"]) + malware_detection.analyse_file(key, document_id=doc.id) + return f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" + + +def process_outline_zip(user, zip_bytes: bytes) -> list[str]: + """Process an Outline export zip and create Docs documents. + + Returns the list of created document IDs (stringified UUIDs) corresponding to + markdown-backed documents. Container folders used to rebuild hierarchy are not listed. + """ + archive = zipfile.ZipFile(io.BytesIO(zip_bytes)) + + # Basic Zip Slip protection: refuse absolute or parent-traversal entries + for name in archive.namelist(): + # Normalize to posix separators and check traversal + if name.startswith("/") or "\\" in name: + raise OutlineImportError("Unsafe path in archive") + parts = [p for p in name.split("/") if p] + if any(part == ".." for part in parts): + raise OutlineImportError("Unsafe path in archive") + + created_ids: list[str] = [] + dir_docs: dict[str, models.Document] = {} + md_files: Iterable[str] = sorted( + [ + n + for n in archive.namelist() + if n.lower().endswith(".md") + and not n.startswith("__MACOSX/") + and not any(part.startswith(".") for part in n.split("/")) + ] + ) + + # Build a set of md files that have corresponding directories (Outline nested docs) + # e.g., "Doc.md" and "Doc/" both exist -> "Doc" is a parent with nested children + md_with_dirs: set[str] = set() + for md_path in md_files: + # Remove .md extension to get potential directory name + base_path = md_path.rsplit(".md", 1)[0] + # Check if there's a directory with the same name + if any(n.startswith(f"{base_path}/") for n in archive.namelist()): + md_with_dirs.add(base_path) + + img_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") + + def read_bytes(path_in_zip: str) -> bytes | None: + try: + with archive.open(path_in_zip, "r") as f: + return f.read() + except KeyError: + return None + + converter = YdocConverter() + + for md_path in md_files: + dir_path, file_name = ( + (md_path.rsplit("/", 1) + [""])[:2] if "/" in md_path else ("", md_path) + ) + parent_doc = _ensure_dir_documents(user, dir_path, dir_docs) + + try: + raw_md = archive.read(md_path).decode("utf-8", errors="ignore") + except Exception: # noqa: BLE001 + raw_md = "" + + title_match = re.search(r"^#\s+(.+)$", raw_md, flags=re.MULTILINE) + title = title_match.group(1).strip() if title_match else file_name.rsplit(".", 1)[0] + + if parent_doc is None: + doc = models.Document.add_root( + depth=1, + creator=user, + title=title, + link_reach=models.LinkReachChoices.RESTRICTED, + ) + else: + doc = parent_doc.add_child(creator=user, title=title) + + # If this md file has a corresponding directory, register it as a container + # so nested children will use this doc as parent instead of creating a duplicate + base_path = md_path.rsplit(".md", 1)[0] + if base_path in md_with_dirs: + dir_docs[base_path] = doc + + models.DocumentAccess.objects.update_or_create( + document=doc, + user=user, + defaults={"role": models.RoleChoices.OWNER}, + ) + + def replace_img_link(match: re.Match[str]) -> str: + url = match.group(1) + if url.startswith("http://") or url.startswith("https://"): + return match.group(0) + asset_rel = f"{dir_path}/{url}" if dir_path else url + asset_rel = re.sub(r"/+", "/", asset_rel) + # sanitize computed asset path + if asset_rel.startswith("/") or any(part == ".." for part in asset_rel.split("/")): + return match.group(0) + data = read_bytes(asset_rel) + if data is None: + return match.group(0) + media_url = _upload_attachment(user, doc, arcname=url, data=data) + return match.group(0).replace(url, media_url) + + rewritten_md = img_pattern.sub(replace_img_link, raw_md) + + try: + ydoc_b64 = converter.convert( + rewritten_md.encode("utf-8"), + content_type="text/markdown", + accept="application/vnd.yjs.doc", + ) + doc.content = ydoc_b64 + doc.save() + except Exception: # noqa: BLE001 + # Keep doc without content on conversion error but continue import + pass + + created_ids.append(str(doc.id)) + + return created_ids diff --git a/src/backend/core/tests/imports/test_api_outline_import_upload.py b/src/backend/core/tests/imports/test_api_outline_import_upload.py new file mode 100644 index 0000000000..110df86103 --- /dev/null +++ b/src/backend/core/tests/imports/test_api_outline_import_upload.py @@ -0,0 +1,127 @@ +"""Tests for the Outline zip import API endpoint.""" + +import io +import zipfile +from unittest.mock import patch + +from django.core.files.uploadedfile import SimpleUploadedFile + +import pytest +from rest_framework.test import APIClient + +from core import factories +from core.api.viewsets import malware_detection +from core.services.outline_import import OutlineImportError + + +pytestmark = pytest.mark.django_db + + +def make_zip_with_markdown_and_image(md_path: str, md_content: str, img_path: str, img_bytes: bytes) -> bytes: + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w") as zf: + zf.writestr(md_path, md_content) + zf.writestr(img_path, img_bytes) + return buf.getvalue() + + +def test_outline_import_upload_anonymous_forbidden(): + """Anonymous users must not be able to use the import endpoint.""" + client = APIClient() + + # Minimal empty zip + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w"): + pass + upload = SimpleUploadedFile(name="export.zip", content=buf.getvalue(), content_type="application/zip") + + response = client.post("/api/v1.0/imports/outline/upload", {"file": upload}, format="multipart") + + assert response.status_code == 401 + assert response.json()["detail"] == "Authentication credentials were not provided." + + +@patch("core.services.converter_services.YdocConverter.convert", return_value="YmFzZTY0Y29udGVudA==") +def test_outline_import_upload_authenticated_success(mock_convert): + """Authenticated users can upload an Outline export zip and create documents.""" + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + # Markdown referencing a local image in the same directory + md = "# Imported Title\n\nSome text.\n\n![Alt](image.png)\n" + img = ( + b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00" + b"\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\xf8\xff\xff?\x00\x05\xfe\x02\xfe" + b"\xa7V\xbd\xfa\x00\x00\x00\x00IEND\xaeB`\x82" + ) + zip_bytes = make_zip_with_markdown_and_image( + md_path="Folder1/page.md", + md_content=md, + img_path="Folder1/image.png", + img_bytes=img, + ) + + upload = SimpleUploadedFile(name="export.zip", content=zip_bytes, content_type="application/zip") + + with patch.object(malware_detection, "analyse_file") as mock_analyse_file: + response = client.post("/api/v1.0/imports/outline/upload", {"file": upload}, format="multipart") + + assert response.status_code == 201 + data = response.json() + assert "created_document_ids" in data + # Only the markdown-backed document ids are returned (container folders are not listed) + assert len(data["created_document_ids"]) == 1 + + # The converter must have been called once per markdown file + mock_convert.assert_called_once() + # An antivirus scan is run for the uploaded image + assert mock_analyse_file.called + + +def test_outline_import_upload_invalid_zip_returns_validation_error(): + """Invalid archives are rejected with a validation error instead of crashing.""" + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + upload = SimpleUploadedFile( + name="export.zip", + content=b"not-a-zip", + content_type="application/zip", + ) + + response = client.post( + "/api/v1.0/imports/outline/upload", + {"file": upload}, + format="multipart", + ) + + assert response.status_code == 400 + assert response.json() == {"file": ["Invalid zip archive"]} + + +@patch("core.api.imports.process_outline_zip", side_effect=OutlineImportError("boom")) +def test_outline_import_upload_outline_error_returns_validation_error(mock_process_outline): + """Service-level Outline import errors are surfaced as validation errors.""" + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + zip_bytes = make_zip_with_markdown_and_image( + md_path="doc.md", + md_content="# Title", + img_path="", + img_bytes=b"", + ) + upload = SimpleUploadedFile(name="export.zip", content=zip_bytes, content_type="application/zip") + + response = client.post( + "/api/v1.0/imports/outline/upload", + {"file": upload}, + format="multipart", + ) + + assert response.status_code == 400 + assert response.json() == {"file": ["boom"]} + mock_process_outline.assert_called_once() diff --git a/src/backend/core/tests/services/test_outline_import_service.py b/src/backend/core/tests/services/test_outline_import_service.py new file mode 100644 index 0000000000..7f94e63a7b --- /dev/null +++ b/src/backend/core/tests/services/test_outline_import_service.py @@ -0,0 +1,52 @@ +"""Unit tests for the Outline import service.""" + +import io +import zipfile +from unittest.mock import patch + +import pytest + +from core import factories +from core.services.outline_import import OutlineImportError, process_outline_zip + + +pytestmark = pytest.mark.django_db + + +def make_zip(entries: dict[str, bytes]) -> bytes: + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w") as zf: + for path, content in entries.items(): + zf.writestr(path, content) + return buf.getvalue() + + +@patch("core.services.converter_services.YdocConverter.convert", return_value="YmFzZTY0Y29udGVudA==") +@patch("core.services.outline_import.malware_detection.analyse_file") +@patch("django.core.files.storage.default_storage.connection.meta.client.upload_fileobj") +def test_process_outline_zip_happy_path(mock_upload, mock_av, mock_convert): + user = factories.UserFactory() + md = b"# T1\n![img](image.png)" + img = b"i-am-png" + zip_bytes = make_zip({ + "dir/page.md": md, + "dir/image.png": img, + "__MACOSX/._noise": b"", + ".hidden/skip.md": b"# hidden", + }) + + created = process_outline_zip(user, zip_bytes) + assert len(created) == 1 + mock_convert.assert_called_once() + mock_upload.assert_called() + mock_av.assert_called() + + +def test_process_outline_zip_zip_slip_rejected(): + user = factories.UserFactory() + zip_bytes = make_zip({ + "../evil.md": b"# E", + }) + with pytest.raises(OutlineImportError): + process_outline_zip(user, zip_bytes) + diff --git a/src/backend/core/urls.py b/src/backend/core/urls.py index 2ad8b00395..acbb631b11 100644 --- a/src/backend/core/urls.py +++ b/src/backend/core/urls.py @@ -7,6 +7,7 @@ from rest_framework.routers import DefaultRouter from core.api import viewsets +from core.api import imports as import_views # - Main endpoints router = DefaultRouter() @@ -58,6 +59,10 @@ r"^templates/(?P[0-9a-z-]*)/", include(template_related_router.urls), ), + path( + "imports/outline/upload", + import_views.OutlineImportUploadView.as_view(), + ), ] ), ), diff --git a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx index 5ca2315913..ca9858be8e 100644 --- a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx +++ b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx @@ -2,7 +2,7 @@ import { Button } from '@openfun/cunningham-react'; import { useRouter } from 'next/router'; import { useTranslation } from 'react-i18next'; -import { Icon } from '@/components'; +import { Box, DropdownMenu, Icon } from '@/components'; import { useCreateDoc } from '@/features/docs/doc-management'; import { useLeftPanelStore } from '../stores'; @@ -18,14 +18,28 @@ export const LeftPanelHeaderButton = () => { }, }); return ( - + + + void router.push('/import/outline'), + showSeparator: false, + }, + ]} + > + ); }; diff --git a/src/frontend/apps/impress/src/i18n/translations.json b/src/frontend/apps/impress/src/i18n/translations.json index 662694431c..0e2cafde51 100644 --- a/src/frontend/apps/impress/src/i18n/translations.json +++ b/src/frontend/apps/impress/src/i18n/translations.json @@ -434,7 +434,11 @@ "Share with {{count}} users_one": "Share with {{count}} user", "Shared with {{count}} users_many": "Shared with {{count}} users", "Shared with {{count}} users_one": "Shared with {{count}} user", - "Shared with {{count}} users_other": "Shared with {{count}} users" + "Shared with {{count}} users_other": "Shared with {{count}} users", + "Import from Outline": "Import from Outline", + "Import Outline archive": "Import Outline archive", + "Select a .zip file": "Select a .zip file", + "Import": "Import" } }, "es": { @@ -768,6 +772,10 @@ "Open invitation actions menu": "Menu des actions d'ouverture d'une invitation", "Open the document options": "Ouvrir les options du document", "Open the header menu": "Ouvrir le menu d'en-tête", + "Import from Outline": "Importer depuis Outline", + "Import Outline archive": "Importer une archive Outline", + "Select a .zip file": "Sélectionnez un fichier .zip", + "Import": "Importer", "Open the menu of actions for the document: {{title}}": "Ouvrir le menu des actions du document : {{title}}", "Organize": "Organiser", "Others are editing this document. Unfortunately your network blocks WebSockets, the technology enabling real-time co-editing.": "D'autres sont en train de modifier ce document. Malheureusement, votre réseau bloque les web sockets, la technologie permettant la coédition en temps réel.", diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx new file mode 100644 index 0000000000..bd0911e330 --- /dev/null +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -0,0 +1,97 @@ +import { Button, Loader } from '@openfun/cunningham-react'; +import { useRouter } from 'next/router'; +import { ReactElement, useState } from 'react'; +import { useTranslation } from 'react-i18next'; + +import { baseApiUrl, getCSRFToken } from '@/api'; +import { Box, Text } from '@/components'; +import { MainLayout } from '@/layouts'; +import { NextPageWithLayout } from '@/types/next'; + +const Page: NextPageWithLayout = () => { + const { t } = useTranslation(); + const router = useRouter(); + const [file, setFile] = useState(null); + const [isUploading, setIsUploading] = useState(false); + const [error, setError] = useState(null); + + const onSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + setError(null); + if (!file) { + setError(t('Please select a .zip file')); + return; + } + setIsUploading(true); + try { + const form = new FormData(); + form.append('file', file); + const csrfToken = getCSRFToken(); + const resp = await fetch(`${baseApiUrl('1.0')}imports/outline/upload`, { + method: 'POST', + body: form, + credentials: 'include', + headers: { + ...(csrfToken && { 'X-CSRFToken': csrfToken }), + }, + }); + if (!resp.ok) { + throw new Error(await resp.text()); + } + const data = (await resp.json()) as { created_document_ids: string[] }; + const first = data.created_document_ids?.[0]; + if (first) { + void router.replace(`/docs/${first}`); + } else { + void router.replace('/'); + } + } catch { + setError(t('Something bad happens, please retry.')); + } finally { + setIsUploading(false); + } + }; + + return ( + + + {t('Import Outline archive')} + +
+ setFile(e.target.files?.[0] ?? null)} + aria-label={t('Select a .zip file')} + /> + + + {isUploading && } + + {error && ( + + {error} + + )} +
+
+ ); +}; + +Page.getLayout = function getLayout(page: ReactElement) { + return {page}; +}; + +export default Page;