From 6dbd910af065df68d1f61fb64fb58ae3c3407ccf Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 13 Sep 2025 07:27:00 +0200 Subject: [PATCH 01/16] outline-import: backend upload endpoint + frontend upload page\n\nBackend:\n- Add POST /api/v1.0/outline_import/upload (zip)\n- Parse .md files, create doc tree from folders, rewrite image links to attachments, convert to Yjs via Y-provider\nFrontend:\n- Add /import/outline page with zip file picker + POST\n- Add menu entry 'Import from Outline' in left panel header\n- Add minimal i18n keys (en, fr) --- src/backend/core/api/viewsets.py | 158 ++++++++++++++++++ src/backend/core/urls.py | 4 + .../components/LeftPanelHeaderButton.tsx | 29 +++- .../apps/impress/src/i18n/translations.json | 10 +- .../src/pages/import/outline/index.tsx | 84 ++++++++++ 5 files changed, 275 insertions(+), 10 deletions(-) create mode 100644 src/frontend/apps/impress/src/pages/import/outline/index.tsx diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 9b44be6897..dec2ee69fd 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -3,6 +3,10 @@ import base64 import json +import io +import mimetypes +import re +import zipfile import logging import uuid from collections import defaultdict @@ -2174,3 +2178,157 @@ def _load_theme_customization(self): ) return theme_customization + + +class OutlineImportUploadView(drf.views.APIView): + """Upload an Outline export (.zip) and import it as Docs documents. + + Expects a multipart/form-data with field name 'file' containing a .zip archive + produced by Outline export. + + Returns a JSON payload with a list of created document ids. + """ + + parser_classes = [drf.parsers.MultiPartParser] + permission_classes = [permissions.IsAuthenticated] + + def post(self, request): + uploaded = request.FILES.get("file") + if not uploaded: + raise drf.exceptions.ValidationError({"file": "File is required"}) + + name = getattr(uploaded, "name", "") + if not name.endswith(".zip"): + raise drf.exceptions.ValidationError({"file": "Must be a .zip file"}) + + try: + content = uploaded.read() + archive = zipfile.ZipFile(io.BytesIO(content)) + except zipfile.BadZipFile as exc: + raise drf.exceptions.ValidationError({"file": "Invalid zip archive"}) from exc + + created_ids: list[str] = [] + dir_docs: dict[str, models.Document] = {} + md_files = sorted([n for n in archive.namelist() if n.lower().endswith(".md")]) + + def ensure_dir_docs(dir_path: str) -> models.Document | None: + if not dir_path: + return None + parts = [p for p in dir_path.split("/") if p] + parent: models.Document | None = None + current = "" + for part in parts: + current = f"{current}/{part}" if current else part + if current in dir_docs: + parent = dir_docs[current] + continue + # create a container doc with the folder name + if parent is None: + doc = models.Document.add_root( + depth=1, + creator=request.user, + title=part, + link_reach=models.LinkReachChoices.RESTRICTED, + ) + else: + doc = parent.add_child(creator=request.user, title=part) + models.DocumentAccess.objects.update_or_create( + document=doc, + user=request.user, + defaults={"role": models.RoleChoices.OWNER}, + ) + dir_docs[current] = doc + parent = doc + return parent + + img_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") + + def upload_attachment(doc: models.Document, arcname: str, data: bytes) -> str: + content_type, _ = mimetypes.guess_type(arcname) + ext = (arcname.split(".")[-1] or "bin").lower() + file_id = uuid.uuid4() + key = f"{doc.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}.{ext}" + extra_args = { + "Metadata": { + "owner": str(request.user.id), + "status": enums.DocumentAttachmentStatus.READY, + }, + } + if content_type: + extra_args["ContentType"] = content_type + default_storage.connection.meta.client.upload_fileobj( + io.BytesIO(data), default_storage.bucket_name, key, ExtraArgs=extra_args + ) + doc.attachments.append(key) + doc.save(update_fields=["attachments", "updated_at"]) + return f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" + + def read_bytes(path_in_zip: str) -> bytes | None: + try: + with archive.open(path_in_zip, "r") as f: + return f.read() + except KeyError: + return None + + converter = YdocConverter() + + for md_path in md_files: + dir_path, file_name = ( + (md_path.rsplit("/", 1) + [""])[:2] if "/" in md_path else ("", md_path) + ) + parent_doc = ensure_dir_docs(dir_path) + + try: + raw_md = archive.read(md_path).decode("utf-8", errors="ignore") + except Exception: # noqa: BLE001 + raw_md = "" + + title_match = re.search(r"^#\s+(.+)$", raw_md, flags=re.MULTILINE) + title = title_match.group(1).strip() if title_match else file_name.rsplit(".", 1)[0] + + if parent_doc is None: + doc = models.Document.add_root( + depth=1, + creator=request.user, + title=title, + link_reach=models.LinkReachChoices.RESTRICTED, + ) + else: + doc = parent_doc.add_child(creator=request.user, title=title) + + models.DocumentAccess.objects.update_or_create( + document=doc, + user=request.user, + defaults={"role": models.RoleChoices.OWNER}, + ) + + def replace_img_link(match: re.Match[str]) -> str: + url = match.group(1) + if url.startswith("http://") or url.startswith("https://"): + return match.group(0) + asset_rel = f"{dir_path}/{url}" if dir_path else url + asset_rel = re.sub(r"/+", "/", asset_rel) + data = read_bytes(asset_rel) + if data is None: + return match.group(0) + media_url = upload_attachment(doc, arcname=url, data=data) + return match.group(0).replace(url, media_url) + + rewritten_md = img_pattern.sub(replace_img_link, raw_md) + + try: + ydoc_b64 = converter.convert( + rewritten_md.encode("utf-8"), + content_type="text/markdown", + accept="application/vnd.yjs.doc", + ) + doc.content = ydoc_b64 + doc.save(update_fields=["content", "updated_at"]) + except Exception as e: # noqa: BLE001 + logger.exception("Outline import failed for %s: %s", md_path, e) + + created_ids.append(str(doc.id)) + + return drf.response.Response( + {"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED + ) diff --git a/src/backend/core/urls.py b/src/backend/core/urls.py index 2ad8b00395..ef5ede4cfb 100644 --- a/src/backend/core/urls.py +++ b/src/backend/core/urls.py @@ -58,6 +58,10 @@ r"^templates/(?P[0-9a-z-]*)/", include(template_related_router.urls), ), + path( + "outline_import/upload", + viewsets.OutlineImportUploadView.as_view(), + ), ] ), ), diff --git a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx index 5ca2315913..8d3b9594ea 100644 --- a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx +++ b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx @@ -2,7 +2,7 @@ import { Button } from '@openfun/cunningham-react'; import { useRouter } from 'next/router'; import { useTranslation } from 'react-i18next'; -import { Icon } from '@/components'; +import { DropdownMenu, Icon } from '@/components'; import { useCreateDoc } from '@/features/docs/doc-management'; import { useLeftPanelStore } from '../stores'; @@ -18,14 +18,25 @@ export const LeftPanelHeaderButton = () => { }, }); return ( - + + ); }; diff --git a/src/frontend/apps/impress/src/i18n/translations.json b/src/frontend/apps/impress/src/i18n/translations.json index 47fabc6b99..1cf5dc2b3a 100644 --- a/src/frontend/apps/impress/src/i18n/translations.json +++ b/src/frontend/apps/impress/src/i18n/translations.json @@ -482,7 +482,11 @@ "Share with {{count}} users_one": "Share with {{count}} user", "Shared with {{count}} users_many": "Shared with {{count}} users", "Shared with {{count}} users_one": "Shared with {{count}} user", - "Shared with {{count}} users_other": "Shared with {{count}} users" + "Shared with {{count}} users_other": "Shared with {{count}} users", + "Import from Outline": "Import from Outline", + "Import Outline archive": "Import Outline archive", + "Select a .zip file": "Select a .zip file", + "Import": "Import" } }, "es": { @@ -838,6 +842,10 @@ "Open Source": "Open Source", "Open the document options": "Ouvrir les options du document", "Open the header menu": "Ouvrir le menu d'en-tête", + "Import from Outline": "Importer depuis Outline", + "Import Outline archive": "Importer une archive Outline", + "Select a .zip file": "Sélectionnez un fichier .zip", + "Import": "Importer", "Open document actions menu": "Ouvrir le menu d'actions du document", "Open the menu of actions for the document: {{title}}": "Ouvrir le menu des actions du document : {{title}}", "Main content": "Contenu principal", diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx new file mode 100644 index 0000000000..04a9036dcd --- /dev/null +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -0,0 +1,84 @@ +import { Button, Loader } from '@openfun/cunningham-react'; +import { useRouter } from 'next/router'; +import { ReactElement, useState } from 'react'; +import { useTranslation } from 'react-i18next'; + +import { Box, Text } from '@/components'; +import { baseApiUrl } from '@/features/docs/doc-management'; +import { MainLayout } from '@/layouts'; +import { NextPageWithLayout } from '@/types/next'; + +const Page: NextPageWithLayout = () => { + const { t } = useTranslation(); + const router = useRouter(); + const [file, setFile] = useState(null); + const [isUploading, setIsUploading] = useState(false); + const [error, setError] = useState(null); + + const onSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + setError(null); + if (!file) { + setError(t('Please select a .zip file')); + return; + } + setIsUploading(true); + try { + const form = new FormData(); + form.append('file', file); + const resp = await fetch(`${baseApiUrl('1.0')}outline_import/upload`, { + method: 'POST', + body: form, + credentials: 'include', + }); + if (!resp.ok) { + throw new Error(await resp.text()); + } + const data = (await resp.json()) as { created_document_ids: string[] }; + const first = data.created_document_ids?.[0]; + if (first) { + void router.replace(`/docs/${first}`); + } else { + void router.replace('/'); + } + } catch (e) { + setError(t('Something bad happens, please retry.')); + } finally { + setIsUploading(false); + } + }; + + return ( + + + {t('Import Outline archive')} + +
+ setFile(e.target.files?.[0] ?? null)} + aria-label={t('Select a .zip file')} + /> + + + {isUploading && } + + {error && ( + + {error} + + )} +
+
+ ); +}; + +Page.getLayout = function getLayout(page: ReactElement) { + return {page}; +}; + +export default Page; + From 1fd44066b2c4ca9045ea604821e0970ae93125b2 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 13 Sep 2025 07:41:48 +0200 Subject: [PATCH 02/16] frontend(import-outline): fix baseApiUrl import path --- src/frontend/apps/impress/src/pages/import/outline/index.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx index 04a9036dcd..0a3ce09b09 100644 --- a/src/frontend/apps/impress/src/pages/import/outline/index.tsx +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -4,7 +4,7 @@ import { ReactElement, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { Box, Text } from '@/components'; -import { baseApiUrl } from '@/features/docs/doc-management'; +import { baseApiUrl } from '@/api'; import { MainLayout } from '@/layouts'; import { NextPageWithLayout } from '@/types/next'; @@ -81,4 +81,3 @@ Page.getLayout = function getLayout(page: ReactElement) { }; export default Page; - From becc51460d320139a6cd785d7128015d88c6ac44 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 13 Sep 2025 07:42:28 +0200 Subject: [PATCH 03/16] outline-import: run malware scan on uploaded assets --- src/backend/core/api/viewsets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index dec2ee69fd..b6a2aaeb3f 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -2261,6 +2261,7 @@ def upload_attachment(doc: models.Document, arcname: str, data: bytes) -> str: ) doc.attachments.append(key) doc.save(update_fields=["attachments", "updated_at"]) + malware_detection.analyse_file(key, document_id=doc.id) return f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" def read_bytes(path_in_zip: str) -> bytes | None: From 9f4fb0692e19c47991a9c3d3ac591089eb05fa4e Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 13 Sep 2025 08:46:42 +0200 Subject: [PATCH 04/16] tests(outline-import): add API tests for upload (.zip) flow\n- Anonymous forbidden\n- Authenticated happy path with local image and mocked conversion --- .../imports/test_api_outline_import_upload.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 src/backend/core/tests/imports/test_api_outline_import_upload.py diff --git a/src/backend/core/tests/imports/test_api_outline_import_upload.py b/src/backend/core/tests/imports/test_api_outline_import_upload.py new file mode 100644 index 0000000000..d0b88b920a --- /dev/null +++ b/src/backend/core/tests/imports/test_api_outline_import_upload.py @@ -0,0 +1,79 @@ +"""Tests for the Outline zip import API endpoint.""" + +import io +import zipfile +from unittest.mock import patch + +from django.core.files.uploadedfile import SimpleUploadedFile + +import pytest +from rest_framework.test import APIClient + +from core import factories +from core.api.viewsets import malware_detection + + +pytestmark = pytest.mark.django_db + + +def make_zip_with_markdown_and_image(md_path: str, md_content: str, img_path: str, img_bytes: bytes) -> bytes: + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w") as zf: + zf.writestr(md_path, md_content) + zf.writestr(img_path, img_bytes) + return buf.getvalue() + + +def test_outline_import_upload_anonymous_forbidden(): + """Anonymous users must not be able to use the import endpoint.""" + client = APIClient() + + # Minimal empty zip + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w"): + pass + upload = SimpleUploadedFile(name="export.zip", content=buf.getvalue(), content_type="application/zip") + + response = client.post("/api/v1.0/outline_import/upload", {"file": upload}, format="multipart") + + assert response.status_code == 401 + assert response.json()["detail"] == "Authentication credentials were not provided." + + +@patch("core.services.converter_services.YdocConverter.convert", return_value="YmFzZTY0Y29udGVudA==") +def test_outline_import_upload_authenticated_success(mock_convert): + """Authenticated users can upload an Outline export zip and create documents.""" + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + # Markdown referencing a local image in the same directory + md = "# Imported Title\n\nSome text.\n\n![Alt](image.png)\n" + img = ( + b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00" + b"\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\xf8\xff\xff?\x00\x05\xfe\x02\xfe" + b"\xa7V\xbd\xfa\x00\x00\x00\x00IEND\xaeB`\x82" + ) + zip_bytes = make_zip_with_markdown_and_image( + md_path="Folder1/page.md", + md_content=md, + img_path="Folder1/image.png", + img_bytes=img, + ) + + upload = SimpleUploadedFile(name="export.zip", content=zip_bytes, content_type="application/zip") + + with patch.object(malware_detection, "analyse_file") as mock_analyse_file: + response = client.post("/api/v1.0/outline_import/upload", {"file": upload}, format="multipart") + + assert response.status_code == 201 + data = response.json() + assert "created_document_ids" in data + # Only the markdown-backed document ids are returned (container folders are not listed) + assert len(data["created_document_ids"]) == 1 + + # The converter must have been called once per markdown file + mock_convert.assert_called_once() + # An antivirus scan is run for the uploaded image + assert mock_analyse_file.called + From 4f3b62db87443268d5542f3cbec24594e9f215d4 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 13 Sep 2025 08:50:13 +0200 Subject: [PATCH 05/16] refactor(outline-import): move import logic to core/services/outline_import.py and call from view\n- Keep view thin; service handles zip, images, conversion, attachments\n- Fix imports accordingly --- src/backend/core/api/viewsets.py | 133 +--------------- src/backend/core/services/outline_import.py | 167 ++++++++++++++++++++ 2 files changed, 171 insertions(+), 129 deletions(-) create mode 100644 src/backend/core/services/outline_import.py diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index b6a2aaeb3f..b18c7be901 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -4,8 +4,6 @@ import base64 import json import io -import mimetypes -import re import zipfile import logging import uuid @@ -2203,133 +2201,10 @@ def post(self, request): try: content = uploaded.read() - archive = zipfile.ZipFile(io.BytesIO(content)) + # Validate the archive format to fail fast on invalid uploads + zipfile.ZipFile(io.BytesIO(content)) except zipfile.BadZipFile as exc: raise drf.exceptions.ValidationError({"file": "Invalid zip archive"}) from exc - created_ids: list[str] = [] - dir_docs: dict[str, models.Document] = {} - md_files = sorted([n for n in archive.namelist() if n.lower().endswith(".md")]) - - def ensure_dir_docs(dir_path: str) -> models.Document | None: - if not dir_path: - return None - parts = [p for p in dir_path.split("/") if p] - parent: models.Document | None = None - current = "" - for part in parts: - current = f"{current}/{part}" if current else part - if current in dir_docs: - parent = dir_docs[current] - continue - # create a container doc with the folder name - if parent is None: - doc = models.Document.add_root( - depth=1, - creator=request.user, - title=part, - link_reach=models.LinkReachChoices.RESTRICTED, - ) - else: - doc = parent.add_child(creator=request.user, title=part) - models.DocumentAccess.objects.update_or_create( - document=doc, - user=request.user, - defaults={"role": models.RoleChoices.OWNER}, - ) - dir_docs[current] = doc - parent = doc - return parent - - img_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") - - def upload_attachment(doc: models.Document, arcname: str, data: bytes) -> str: - content_type, _ = mimetypes.guess_type(arcname) - ext = (arcname.split(".")[-1] or "bin").lower() - file_id = uuid.uuid4() - key = f"{doc.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}.{ext}" - extra_args = { - "Metadata": { - "owner": str(request.user.id), - "status": enums.DocumentAttachmentStatus.READY, - }, - } - if content_type: - extra_args["ContentType"] = content_type - default_storage.connection.meta.client.upload_fileobj( - io.BytesIO(data), default_storage.bucket_name, key, ExtraArgs=extra_args - ) - doc.attachments.append(key) - doc.save(update_fields=["attachments", "updated_at"]) - malware_detection.analyse_file(key, document_id=doc.id) - return f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" - - def read_bytes(path_in_zip: str) -> bytes | None: - try: - with archive.open(path_in_zip, "r") as f: - return f.read() - except KeyError: - return None - - converter = YdocConverter() - - for md_path in md_files: - dir_path, file_name = ( - (md_path.rsplit("/", 1) + [""])[:2] if "/" in md_path else ("", md_path) - ) - parent_doc = ensure_dir_docs(dir_path) - - try: - raw_md = archive.read(md_path).decode("utf-8", errors="ignore") - except Exception: # noqa: BLE001 - raw_md = "" - - title_match = re.search(r"^#\s+(.+)$", raw_md, flags=re.MULTILINE) - title = title_match.group(1).strip() if title_match else file_name.rsplit(".", 1)[0] - - if parent_doc is None: - doc = models.Document.add_root( - depth=1, - creator=request.user, - title=title, - link_reach=models.LinkReachChoices.RESTRICTED, - ) - else: - doc = parent_doc.add_child(creator=request.user, title=title) - - models.DocumentAccess.objects.update_or_create( - document=doc, - user=request.user, - defaults={"role": models.RoleChoices.OWNER}, - ) - - def replace_img_link(match: re.Match[str]) -> str: - url = match.group(1) - if url.startswith("http://") or url.startswith("https://"): - return match.group(0) - asset_rel = f"{dir_path}/{url}" if dir_path else url - asset_rel = re.sub(r"/+", "/", asset_rel) - data = read_bytes(asset_rel) - if data is None: - return match.group(0) - media_url = upload_attachment(doc, arcname=url, data=data) - return match.group(0).replace(url, media_url) - - rewritten_md = img_pattern.sub(replace_img_link, raw_md) - - try: - ydoc_b64 = converter.convert( - rewritten_md.encode("utf-8"), - content_type="text/markdown", - accept="application/vnd.yjs.doc", - ) - doc.content = ydoc_b64 - doc.save(update_fields=["content", "updated_at"]) - except Exception as e: # noqa: BLE001 - logger.exception("Outline import failed for %s: %s", md_path, e) - - created_ids.append(str(doc.id)) - - return drf.response.Response( - {"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED - ) + created_ids = process_outline_zip(request.user, content) + return drf.response.Response({"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED) diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py new file mode 100644 index 0000000000..b05f21872b --- /dev/null +++ b/src/backend/core/services/outline_import.py @@ -0,0 +1,167 @@ +"""Service to import an Outline export (.zip) into Docs documents.""" + +from __future__ import annotations + +import io +import mimetypes +import re +import uuid +import zipfile +from typing import Iterable + +from django.conf import settings +from django.core.files.storage import default_storage + +from lasuite.malware_detection import malware_detection + +from core import enums, models +from core.services.converter_services import YdocConverter + + +def _ensure_dir_documents(user, dir_path: str, dir_docs: dict[str, models.Document]) -> models.Document | None: + """Ensure each path segment in dir_path has a container document. + + Returns the deepest parent document or None when dir_path is empty. + """ + if not dir_path: + return None + + parts = [p for p in dir_path.split("/") if p] + parent: models.Document | None = None + current = "" + for part in parts: + current = f"{current}/{part}" if current else part + if current in dir_docs: + parent = dir_docs[current] + continue + + if parent is None: + doc = models.Document.add_root( + depth=1, + creator=user, + title=part, + link_reach=models.LinkReachChoices.RESTRICTED, + ) + else: + doc = parent.add_child(creator=user, title=part) + + models.DocumentAccess.objects.update_or_create( + document=doc, + user=user, + defaults={"role": models.RoleChoices.OWNER}, + ) + dir_docs[current] = doc + parent = doc + + return parent + + +def _upload_attachment(user, doc: models.Document, arcname: str, data: bytes) -> str: + """Upload a binary asset into object storage and return its public media URL.""" + content_type, _ = mimetypes.guess_type(arcname) + ext = (arcname.split(".")[-1] or "bin").lower() + file_id = uuid.uuid4() + key = f"{doc.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}.{ext}" + extra_args = { + "Metadata": { + "owner": str(user.id), + "status": enums.DocumentAttachmentStatus.READY, + }, + } + if content_type: + extra_args["ContentType"] = content_type + + default_storage.connection.meta.client.upload_fileobj( + io.BytesIO(data), default_storage.bucket_name, key, ExtraArgs=extra_args + ) + doc.attachments.append(key) + doc.save(update_fields=["attachments", "updated_at"]) + malware_detection.analyse_file(key, document_id=doc.id) + return f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" + + +def process_outline_zip(user, zip_bytes: bytes) -> list[str]: + """Process an Outline export zip and create Docs documents. + + Returns the list of created document IDs (stringified UUIDs) corresponding to + markdown-backed documents. Container folders used to rebuild hierarchy are not listed. + """ + archive = zipfile.ZipFile(io.BytesIO(zip_bytes)) + + created_ids: list[str] = [] + dir_docs: dict[str, models.Document] = {} + md_files: Iterable[str] = sorted( + [n for n in archive.namelist() if n.lower().endswith(".md")] + ) + + img_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") + + def read_bytes(path_in_zip: str) -> bytes | None: + try: + with archive.open(path_in_zip, "r") as f: + return f.read() + except KeyError: + return None + + converter = YdocConverter() + + for md_path in md_files: + dir_path, file_name = ( + (md_path.rsplit("/", 1) + [""])[:2] if "/" in md_path else ("", md_path) + ) + parent_doc = _ensure_dir_documents(user, dir_path, dir_docs) + + try: + raw_md = archive.read(md_path).decode("utf-8", errors="ignore") + except Exception: # noqa: BLE001 + raw_md = "" + + title_match = re.search(r"^#\s+(.+)$", raw_md, flags=re.MULTILINE) + title = title_match.group(1).strip() if title_match else file_name.rsplit(".", 1)[0] + + if parent_doc is None: + doc = models.Document.add_root( + depth=1, + creator=user, + title=title, + link_reach=models.LinkReachChoices.RESTRICTED, + ) + else: + doc = parent_doc.add_child(creator=user, title=title) + + models.DocumentAccess.objects.update_or_create( + document=doc, + user=user, + defaults={"role": models.RoleChoices.OWNER}, + ) + + def replace_img_link(match: re.Match[str]) -> str: + url = match.group(1) + if url.startswith("http://") or url.startswith("https://"): + return match.group(0) + asset_rel = f"{dir_path}/{url}" if dir_path else url + asset_rel = re.sub(r"/+", "/", asset_rel) + data = read_bytes(asset_rel) + if data is None: + return match.group(0) + media_url = _upload_attachment(user, doc, arcname=url, data=data) + return match.group(0).replace(url, media_url) + + rewritten_md = img_pattern.sub(replace_img_link, raw_md) + + try: + ydoc_b64 = converter.convert( + rewritten_md.encode("utf-8"), + content_type="text/markdown", + accept="application/vnd.yjs.doc", + ) + doc.content = ydoc_b64 + doc.save(update_fields=["content", "updated_at"]) + except Exception: # noqa: BLE001 + # Keep doc without content on conversion error but continue import + pass + + created_ids.append(str(doc.id)) + + return created_ids + From fa65c45e275b7f8a3df876d39f8ca320d01ce10c Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 13 Sep 2025 09:36:21 +0200 Subject: [PATCH 06/16] outline-import: reinforce safety and tests\n- Zip Slip protection (reject unsafe paths)\n- Ignore __MACOSX and hidden entries\n- Service unit tests (happy path + zip slip)\n- Change API path to /imports/outline/upload and update front + tests --- src/backend/core/api/viewsets.py | 6 ++- src/backend/core/services/outline_import.py | 26 +++++++++- .../imports/test_api_outline_import_upload.py | 5 +- .../services/test_outline_import_service.py | 52 +++++++++++++++++++ src/backend/core/urls.py | 2 +- .../src/pages/import/outline/index.tsx | 2 +- 6 files changed, 84 insertions(+), 9 deletions(-) create mode 100644 src/backend/core/tests/services/test_outline_import_service.py diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index b18c7be901..d0ba8686af 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -50,6 +50,7 @@ YdocConverter, ) from core.tasks.mail import send_ask_for_access_mail +from core.services.outline_import import OutlineImportError, process_outline_zip from core.utils import extract_attachments, filter_descendants from . import permissions, serializers, utils @@ -2203,8 +2204,9 @@ def post(self, request): content = uploaded.read() # Validate the archive format to fail fast on invalid uploads zipfile.ZipFile(io.BytesIO(content)) + created_ids = process_outline_zip(request.user, content) except zipfile.BadZipFile as exc: raise drf.exceptions.ValidationError({"file": "Invalid zip archive"}) from exc - - created_ids = process_outline_zip(request.user, content) + except OutlineImportError as exc: + raise drf.exceptions.ValidationError({"file": str(exc)}) from exc return drf.response.Response({"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED) diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py index b05f21872b..0c9d98575e 100644 --- a/src/backend/core/services/outline_import.py +++ b/src/backend/core/services/outline_import.py @@ -8,6 +8,7 @@ import uuid import zipfile from typing import Iterable +import posixpath from django.conf import settings from django.core.files.storage import default_storage @@ -18,6 +19,10 @@ from core.services.converter_services import YdocConverter +class OutlineImportError(Exception): + """Raised when the Outline archive is invalid or unsafe.""" + + def _ensure_dir_documents(user, dir_path: str, dir_docs: dict[str, models.Document]) -> models.Document | None: """Ensure each path segment in dir_path has a container document. @@ -88,10 +93,25 @@ def process_outline_zip(user, zip_bytes: bytes) -> list[str]: """ archive = zipfile.ZipFile(io.BytesIO(zip_bytes)) + # Basic Zip Slip protection: refuse absolute or parent-traversal entries + for name in archive.namelist(): + # Normalize to posix separators and check traversal + if name.startswith("/") or "\\" in name: + raise OutlineImportError("Unsafe path in archive") + parts = [p for p in name.split("/") if p] + if any(part == ".." for part in parts): + raise OutlineImportError("Unsafe path in archive") + created_ids: list[str] = [] dir_docs: dict[str, models.Document] = {} md_files: Iterable[str] = sorted( - [n for n in archive.namelist() if n.lower().endswith(".md")] + [ + n + for n in archive.namelist() + if n.lower().endswith(".md") + and not n.startswith("__MACOSX/") + and not any(part.startswith(".") for part in n.split("/")) + ] ) img_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") @@ -141,6 +161,9 @@ def replace_img_link(match: re.Match[str]) -> str: return match.group(0) asset_rel = f"{dir_path}/{url}" if dir_path else url asset_rel = re.sub(r"/+", "/", asset_rel) + # sanitize computed asset path + if asset_rel.startswith("/") or any(part == ".." for part in asset_rel.split("/")): + return match.group(0) data = read_bytes(asset_rel) if data is None: return match.group(0) @@ -164,4 +187,3 @@ def replace_img_link(match: re.Match[str]) -> str: created_ids.append(str(doc.id)) return created_ids - diff --git a/src/backend/core/tests/imports/test_api_outline_import_upload.py b/src/backend/core/tests/imports/test_api_outline_import_upload.py index d0b88b920a..aee401aa3b 100644 --- a/src/backend/core/tests/imports/test_api_outline_import_upload.py +++ b/src/backend/core/tests/imports/test_api_outline_import_upload.py @@ -34,7 +34,7 @@ def test_outline_import_upload_anonymous_forbidden(): pass upload = SimpleUploadedFile(name="export.zip", content=buf.getvalue(), content_type="application/zip") - response = client.post("/api/v1.0/outline_import/upload", {"file": upload}, format="multipart") + response = client.post("/api/v1.0/imports/outline/upload", {"file": upload}, format="multipart") assert response.status_code == 401 assert response.json()["detail"] == "Authentication credentials were not provided." @@ -64,7 +64,7 @@ def test_outline_import_upload_authenticated_success(mock_convert): upload = SimpleUploadedFile(name="export.zip", content=zip_bytes, content_type="application/zip") with patch.object(malware_detection, "analyse_file") as mock_analyse_file: - response = client.post("/api/v1.0/outline_import/upload", {"file": upload}, format="multipart") + response = client.post("/api/v1.0/imports/outline/upload", {"file": upload}, format="multipart") assert response.status_code == 201 data = response.json() @@ -76,4 +76,3 @@ def test_outline_import_upload_authenticated_success(mock_convert): mock_convert.assert_called_once() # An antivirus scan is run for the uploaded image assert mock_analyse_file.called - diff --git a/src/backend/core/tests/services/test_outline_import_service.py b/src/backend/core/tests/services/test_outline_import_service.py new file mode 100644 index 0000000000..7f94e63a7b --- /dev/null +++ b/src/backend/core/tests/services/test_outline_import_service.py @@ -0,0 +1,52 @@ +"""Unit tests for the Outline import service.""" + +import io +import zipfile +from unittest.mock import patch + +import pytest + +from core import factories +from core.services.outline_import import OutlineImportError, process_outline_zip + + +pytestmark = pytest.mark.django_db + + +def make_zip(entries: dict[str, bytes]) -> bytes: + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w") as zf: + for path, content in entries.items(): + zf.writestr(path, content) + return buf.getvalue() + + +@patch("core.services.converter_services.YdocConverter.convert", return_value="YmFzZTY0Y29udGVudA==") +@patch("core.services.outline_import.malware_detection.analyse_file") +@patch("django.core.files.storage.default_storage.connection.meta.client.upload_fileobj") +def test_process_outline_zip_happy_path(mock_upload, mock_av, mock_convert): + user = factories.UserFactory() + md = b"# T1\n![img](image.png)" + img = b"i-am-png" + zip_bytes = make_zip({ + "dir/page.md": md, + "dir/image.png": img, + "__MACOSX/._noise": b"", + ".hidden/skip.md": b"# hidden", + }) + + created = process_outline_zip(user, zip_bytes) + assert len(created) == 1 + mock_convert.assert_called_once() + mock_upload.assert_called() + mock_av.assert_called() + + +def test_process_outline_zip_zip_slip_rejected(): + user = factories.UserFactory() + zip_bytes = make_zip({ + "../evil.md": b"# E", + }) + with pytest.raises(OutlineImportError): + process_outline_zip(user, zip_bytes) + diff --git a/src/backend/core/urls.py b/src/backend/core/urls.py index ef5ede4cfb..61a46bce86 100644 --- a/src/backend/core/urls.py +++ b/src/backend/core/urls.py @@ -59,7 +59,7 @@ include(template_related_router.urls), ), path( - "outline_import/upload", + "imports/outline/upload", viewsets.OutlineImportUploadView.as_view(), ), ] diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx index 0a3ce09b09..f47f427288 100644 --- a/src/frontend/apps/impress/src/pages/import/outline/index.tsx +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -26,7 +26,7 @@ const Page: NextPageWithLayout = () => { try { const form = new FormData(); form.append('file', file); - const resp = await fetch(`${baseApiUrl('1.0')}outline_import/upload`, { + const resp = await fetch(`${baseApiUrl('1.0')}imports/outline/upload`, { method: 'POST', body: form, credentials: 'include', From cce6c966482c0855dc6f1a9c0b4e1fd720c1ff8b Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Tue, 16 Sep 2025 20:51:20 +0200 Subject: [PATCH 07/16] Add Outline import API view --- src/backend/core/api/imports.py | 34 +++++++++++++++++++++++++++++++++ src/backend/core/urls.py | 3 ++- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 src/backend/core/api/imports.py diff --git a/src/backend/core/api/imports.py b/src/backend/core/api/imports.py new file mode 100644 index 0000000000..fecd427d42 --- /dev/null +++ b/src/backend/core/api/imports.py @@ -0,0 +1,34 @@ +"""Import endpoints for Outline (zip upload).""" + +from __future__ import annotations + +import rest_framework as drf + +from core.services.outline_import import OutlineImportError, process_outline_zip + + +# ---------- Outline (Zip Upload) ---------- + + +class OutlineImportUploadView(drf.views.APIView): + parser_classes = [drf.parsers.MultiPartParser] + permission_classes = [drf.permissions.IsAuthenticated] + + def post(self, request): + uploaded = request.FILES.get("file") + if not uploaded: + raise drf.exceptions.ValidationError({"file": "File is required"}) + + name = getattr(uploaded, "name", "") + if not name.endswith(".zip"): + raise drf.exceptions.ValidationError({"file": "Must be a .zip file"}) + + try: + content = uploaded.read() + created_ids = process_outline_zip(request.user, content) + except OutlineImportError as exc: + raise drf.exceptions.ValidationError({"file": str(exc)}) from exc + + return drf.response.Response( + {"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED + ) diff --git a/src/backend/core/urls.py b/src/backend/core/urls.py index 61a46bce86..acbb631b11 100644 --- a/src/backend/core/urls.py +++ b/src/backend/core/urls.py @@ -7,6 +7,7 @@ from rest_framework.routers import DefaultRouter from core.api import viewsets +from core.api import imports as import_views # - Main endpoints router = DefaultRouter() @@ -60,7 +61,7 @@ ), path( "imports/outline/upload", - viewsets.OutlineImportUploadView.as_view(), + import_views.OutlineImportUploadView.as_view(), ), ] ), From 6146a48552ebf5e777ce8922ff37f3aa33840c7d Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Tue, 16 Sep 2025 21:38:10 +0200 Subject: [PATCH 08/16] Remove legacy Outline import viewset --- src/backend/core/api/viewsets.py | 35 +------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index d0ba8686af..b5c212473c 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -2176,37 +2176,4 @@ def _load_theme_customization(self): settings.THEME_CUSTOMIZATION_CACHE_TIMEOUT, ) - return theme_customization - - -class OutlineImportUploadView(drf.views.APIView): - """Upload an Outline export (.zip) and import it as Docs documents. - - Expects a multipart/form-data with field name 'file' containing a .zip archive - produced by Outline export. - - Returns a JSON payload with a list of created document ids. - """ - - parser_classes = [drf.parsers.MultiPartParser] - permission_classes = [permissions.IsAuthenticated] - - def post(self, request): - uploaded = request.FILES.get("file") - if not uploaded: - raise drf.exceptions.ValidationError({"file": "File is required"}) - - name = getattr(uploaded, "name", "") - if not name.endswith(".zip"): - raise drf.exceptions.ValidationError({"file": "Must be a .zip file"}) - - try: - content = uploaded.read() - # Validate the archive format to fail fast on invalid uploads - zipfile.ZipFile(io.BytesIO(content)) - created_ids = process_outline_zip(request.user, content) - except zipfile.BadZipFile as exc: - raise drf.exceptions.ValidationError({"file": "Invalid zip archive"}) from exc - except OutlineImportError as exc: - raise drf.exceptions.ValidationError({"file": str(exc)}) from exc - return drf.response.Response({"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED) + return theme_customization \ No newline at end of file From 453b153506df128a0a2b2e4a2eb474950ea790db Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Wed, 17 Sep 2025 05:28:57 +0200 Subject: [PATCH 09/16] Improve Outline import validation and UI --- src/backend/core/api/imports.py | 8 +++ .../imports/test_api_outline_import_upload.py | 49 +++++++++++++++++++ .../components/LeftPanelHeaderButton.tsx | 27 +++++----- 3 files changed, 72 insertions(+), 12 deletions(-) diff --git a/src/backend/core/api/imports.py b/src/backend/core/api/imports.py index fecd427d42..174c9b8f95 100644 --- a/src/backend/core/api/imports.py +++ b/src/backend/core/api/imports.py @@ -2,6 +2,9 @@ from __future__ import annotations +import io +import zipfile + import rest_framework as drf from core.services.outline_import import OutlineImportError, process_outline_zip @@ -25,7 +28,12 @@ def post(self, request): try: content = uploaded.read() + # Fail fast if the upload is not a valid zip archive + with zipfile.ZipFile(io.BytesIO(content)): + pass created_ids = process_outline_zip(request.user, content) + except zipfile.BadZipFile as exc: + raise drf.exceptions.ValidationError({"file": "Invalid zip archive"}) from exc except OutlineImportError as exc: raise drf.exceptions.ValidationError({"file": str(exc)}) from exc diff --git a/src/backend/core/tests/imports/test_api_outline_import_upload.py b/src/backend/core/tests/imports/test_api_outline_import_upload.py index aee401aa3b..110df86103 100644 --- a/src/backend/core/tests/imports/test_api_outline_import_upload.py +++ b/src/backend/core/tests/imports/test_api_outline_import_upload.py @@ -11,6 +11,7 @@ from core import factories from core.api.viewsets import malware_detection +from core.services.outline_import import OutlineImportError pytestmark = pytest.mark.django_db @@ -76,3 +77,51 @@ def test_outline_import_upload_authenticated_success(mock_convert): mock_convert.assert_called_once() # An antivirus scan is run for the uploaded image assert mock_analyse_file.called + + +def test_outline_import_upload_invalid_zip_returns_validation_error(): + """Invalid archives are rejected with a validation error instead of crashing.""" + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + upload = SimpleUploadedFile( + name="export.zip", + content=b"not-a-zip", + content_type="application/zip", + ) + + response = client.post( + "/api/v1.0/imports/outline/upload", + {"file": upload}, + format="multipart", + ) + + assert response.status_code == 400 + assert response.json() == {"file": ["Invalid zip archive"]} + + +@patch("core.api.imports.process_outline_zip", side_effect=OutlineImportError("boom")) +def test_outline_import_upload_outline_error_returns_validation_error(mock_process_outline): + """Service-level Outline import errors are surfaced as validation errors.""" + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + zip_bytes = make_zip_with_markdown_and_image( + md_path="doc.md", + md_content="# Title", + img_path="", + img_bytes=b"", + ) + upload = SimpleUploadedFile(name="export.zip", content=zip_bytes, content_type="application/zip") + + response = client.post( + "/api/v1.0/imports/outline/upload", + {"file": upload}, + format="multipart", + ) + + assert response.status_code == 400 + assert response.json() == {"file": ["boom"]} + mock_process_outline.assert_called_once() diff --git a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx index 8d3b9594ea..ca9858be8e 100644 --- a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx +++ b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx @@ -2,7 +2,7 @@ import { Button } from '@openfun/cunningham-react'; import { useRouter } from 'next/router'; import { useTranslation } from 'react-i18next'; -import { DropdownMenu, Icon } from '@/components'; +import { Box, DropdownMenu, Icon } from '@/components'; import { useCreateDoc } from '@/features/docs/doc-management'; import { useLeftPanelStore } from '../stores'; @@ -18,16 +18,7 @@ export const LeftPanelHeaderButton = () => { }, }); return ( - void router.push('/import/outline'), - showSeparator: false, - }, - ]} - > + - + void router.push('/import/outline'), + showSeparator: false, + }, + ]} + > + ); }; From b7a7663e1bf89caa921acabf5f579eecc5cff5af Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Thu, 25 Sep 2025 00:46:55 +0200 Subject: [PATCH 10/16] feat(outline-import): Add markdown preprocessing for unsupported BlockNote elements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Convert H4/H5/H6 headings to compatible formats (H4→H3 with marker, H5→bold with arrow, H6→paragraph with bullet) - Convert horizontal rules (---, ***, ___) to [DIVIDER_BLOCK] markers - Preserve task lists formatting for proper checkbox rendering - Add comprehensive unit tests for all conversion cases This ensures Outline exports with all 6 heading levels and other markdown features are properly imported into BlockNote.js which only supports 3 heading levels. --- src/backend/core/services/outline_import.py | 46 ++++++++++- .../services/test_outline_import_service.py | 79 ++++++++++++++++++- 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py index 0c9d98575e..16f5ecc00f 100644 --- a/src/backend/core/services/outline_import.py +++ b/src/backend/core/services/outline_import.py @@ -4,11 +4,11 @@ import io import mimetypes +import posixpath import re import uuid import zipfile from typing import Iterable -import posixpath from django.conf import settings from django.core.files.storage import default_storage @@ -19,6 +19,47 @@ from core.services.converter_services import YdocConverter +def _preprocess_outline_markdown(markdown: str) -> str: + """Pre-process Outline markdown to handle unsupported BlockNote.js elements. + + Conversions: + - H4 (####) → H3 with marker + - H5 (#####) → Bold paragraph with ▸ prefix + - H6 (######) → Paragraph with ▪ prefix + - Horizontal rules (---) → [DIVIDER] marker for post-processing + - Task lists (- [ ], - [x]) → Standard checkbox format + """ + lines = markdown.split('\n') + processed_lines = [] + + for line in lines: + # Convert H6 (######) to paragraph with prefix + if line.startswith('###### '): + processed_lines.append('▪ ' + line[7:].strip()) + # Convert H5 (#####) to bold paragraph with prefix + elif line.startswith('##### '): + processed_lines.append('**▸ ' + line[6:].strip() + '**') + # Convert H4 (####) to H3 with marker + elif line.startswith('#### '): + # Add a subtle marker to indicate this was H4 + processed_lines.append('### ' + line[5:].strip() + ' [H4]') + # Convert horizontal rules to divider marker + elif line.strip() in ['---', '***', '___'] and len(line.strip()) >= 3: + # Use a special marker that won't conflict with content + processed_lines.append('[DIVIDER_BLOCK]') + # Convert task lists to checkbox format + elif re.match(r'^\s*- \[ \]', line): + # Unchecked task + processed_lines.append(re.sub(r'^(\s*)- \[ \]', r'\1- [ ]', line)) + elif re.match(r'^\s*- \[x\]', line): + # Checked task + processed_lines.append(re.sub(r'^(\s*)- \[x\]', r'\1- [x]', line)) + else: + processed_lines.append(line) + + return '\n'.join(processed_lines) + + class OutlineImportError(Exception): """Raised when the Outline archive is invalid or unsafe.""" @@ -172,6 +213,9 @@ def replace_img_link(match: re.Match[str]) -> str: rewritten_md = img_pattern.sub(replace_img_link, raw_md) + # Pre-process markdown to handle Outline-specific content + rewritten_md = _preprocess_outline_markdown(rewritten_md) + try: ydoc_b64 = converter.convert( rewritten_md.encode("utf-8"), diff --git a/src/backend/core/tests/services/test_outline_import_service.py b/src/backend/core/tests/services/test_outline_import_service.py index 7f94e63a7b..c5904f0295 100644 --- a/src/backend/core/tests/services/test_outline_import_service.py +++ b/src/backend/core/tests/services/test_outline_import_service.py @@ -7,7 +7,7 @@ import pytest from core import factories -from core.services.outline_import import OutlineImportError, process_outline_zip +from core.services.outline_import import OutlineImportError, process_outline_zip, _preprocess_outline_markdown pytestmark = pytest.mark.django_db @@ -50,3 +50,80 @@ def test_process_outline_zip_zip_slip_rejected(): with pytest.raises(OutlineImportError): process_outline_zip(user, zip_bytes) + +def test_preprocess_outline_markdown_heading_conversions(): + """Test that H4, H5, H6 are properly converted.""" + markdown = """# H1 Title +## H2 Section +### H3 Subsection +#### H4 Content +##### H5 Detail +###### H6 Note +""" + result = _preprocess_outline_markdown(markdown) + + assert "# H1 Title" in result + assert "## H2 Section" in result + assert "### H3 Subsection" in result + assert "### H4 Content [H4]" in result # H4 converted to H3 with marker + assert "**▸ H5 Detail**" in result # H5 converted to bold with arrow + assert "▪ H6 Note" in result # H6 converted to paragraph with bullet + + +def test_preprocess_outline_markdown_horizontal_rules(): + """Test that horizontal rules are converted to divider blocks.""" + markdown = """Content before +--- +Content after +*** +More content +___ +Final content""" + result = _preprocess_outline_markdown(markdown) + + assert result.count("[DIVIDER_BLOCK]") == 3 + assert "---" not in result + assert "***" not in result + assert "___" not in result + + +def test_preprocess_outline_markdown_task_lists(): + """Test that task lists are properly handled.""" + markdown = """- [ ] Unchecked task +- [x] Checked task +- Regular list item + - [ ] Nested unchecked + - [x] Nested checked""" + result = _preprocess_outline_markdown(markdown) + + assert "- [ ] Unchecked task" in result + assert "- [x] Checked task" in result + assert "- Regular list item" in result + assert " - [ ] Nested unchecked" in result + assert " - [x] Nested checked" in result + + +def test_preprocess_outline_markdown_combined(): + """Test combined conversions in a realistic document.""" + markdown = """# Main Title +## Section 1 +### Subsection +#### Deep Section +Some content here. +--- +##### Important Note +This is important. +###### Small detail +- [ ] Task to do +- [x] Completed task +""" + result = _preprocess_outline_markdown(markdown) + + assert "# Main Title" in result + assert "### Deep Section [H4]" in result + assert "[DIVIDER_BLOCK]" in result + assert "**▸ Important Note**" in result + assert "▪ Small detail" in result + assert "- [ ] Task to do" in result + assert "- [x] Completed task" in result + From 06d9c2b228a5695a2410c21a1227bddcc77700b3 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Fri, 26 Sep 2025 14:40:11 +0200 Subject: [PATCH 11/16] Revert "feat(outline-import): Add markdown preprocessing for unsupported BlockNote elements" This reverts commit b7a7663e1bf89caa921acabf5f579eecc5cff5af. --- src/backend/core/services/outline_import.py | 46 +---------- .../services/test_outline_import_service.py | 79 +------------------ 2 files changed, 2 insertions(+), 123 deletions(-) diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py index 16f5ecc00f..0c9d98575e 100644 --- a/src/backend/core/services/outline_import.py +++ b/src/backend/core/services/outline_import.py @@ -4,11 +4,11 @@ import io import mimetypes -import posixpath import re import uuid import zipfile from typing import Iterable +import posixpath from django.conf import settings from django.core.files.storage import default_storage @@ -19,47 +19,6 @@ from core.services.converter_services import YdocConverter -def _preprocess_outline_markdown(markdown: str) -> str: - """Pre-process Outline markdown to handle unsupported BlockNote.js elements. - - Conversions: - - H4 (####) → H3 with marker - - H5 (#####) → Bold paragraph with ▸ prefix - - H6 (######) → Paragraph with ▪ prefix - - Horizontal rules (---) → [DIVIDER] marker for post-processing - - Task lists (- [ ], - [x]) → Standard checkbox format - """ - lines = markdown.split('\n') - processed_lines = [] - - for line in lines: - # Convert H6 (######) to paragraph with prefix - if line.startswith('###### '): - processed_lines.append('▪ ' + line[7:].strip()) - # Convert H5 (#####) to bold paragraph with prefix - elif line.startswith('##### '): - processed_lines.append('**▸ ' + line[6:].strip() + '**') - # Convert H4 (####) to H3 with marker - elif line.startswith('#### '): - # Add a subtle marker to indicate this was H4 - processed_lines.append('### ' + line[5:].strip() + ' [H4]') - # Convert horizontal rules to divider marker - elif line.strip() in ['---', '***', '___'] and len(line.strip()) >= 3: - # Use a special marker that won't conflict with content - processed_lines.append('[DIVIDER_BLOCK]') - # Convert task lists to checkbox format - elif re.match(r'^\s*- \[ \]', line): - # Unchecked task - processed_lines.append(re.sub(r'^(\s*)- \[ \]', r'\1- [ ]', line)) - elif re.match(r'^\s*- \[x\]', line): - # Checked task - processed_lines.append(re.sub(r'^(\s*)- \[x\]', r'\1- [x]', line)) - else: - processed_lines.append(line) - - return '\n'.join(processed_lines) - - class OutlineImportError(Exception): """Raised when the Outline archive is invalid or unsafe.""" @@ -213,9 +172,6 @@ def replace_img_link(match: re.Match[str]) -> str: rewritten_md = img_pattern.sub(replace_img_link, raw_md) - # Pre-process markdown to handle Outline-specific content - rewritten_md = _preprocess_outline_markdown(rewritten_md) - try: ydoc_b64 = converter.convert( rewritten_md.encode("utf-8"), diff --git a/src/backend/core/tests/services/test_outline_import_service.py b/src/backend/core/tests/services/test_outline_import_service.py index c5904f0295..7f94e63a7b 100644 --- a/src/backend/core/tests/services/test_outline_import_service.py +++ b/src/backend/core/tests/services/test_outline_import_service.py @@ -7,7 +7,7 @@ import pytest from core import factories -from core.services.outline_import import OutlineImportError, process_outline_zip, _preprocess_outline_markdown +from core.services.outline_import import OutlineImportError, process_outline_zip pytestmark = pytest.mark.django_db @@ -50,80 +50,3 @@ def test_process_outline_zip_zip_slip_rejected(): with pytest.raises(OutlineImportError): process_outline_zip(user, zip_bytes) - -def test_preprocess_outline_markdown_heading_conversions(): - """Test that H4, H5, H6 are properly converted.""" - markdown = """# H1 Title -## H2 Section -### H3 Subsection -#### H4 Content -##### H5 Detail -###### H6 Note -""" - result = _preprocess_outline_markdown(markdown) - - assert "# H1 Title" in result - assert "## H2 Section" in result - assert "### H3 Subsection" in result - assert "### H4 Content [H4]" in result # H4 converted to H3 with marker - assert "**▸ H5 Detail**" in result # H5 converted to bold with arrow - assert "▪ H6 Note" in result # H6 converted to paragraph with bullet - - -def test_preprocess_outline_markdown_horizontal_rules(): - """Test that horizontal rules are converted to divider blocks.""" - markdown = """Content before ---- -Content after -*** -More content -___ -Final content""" - result = _preprocess_outline_markdown(markdown) - - assert result.count("[DIVIDER_BLOCK]") == 3 - assert "---" not in result - assert "***" not in result - assert "___" not in result - - -def test_preprocess_outline_markdown_task_lists(): - """Test that task lists are properly handled.""" - markdown = """- [ ] Unchecked task -- [x] Checked task -- Regular list item - - [ ] Nested unchecked - - [x] Nested checked""" - result = _preprocess_outline_markdown(markdown) - - assert "- [ ] Unchecked task" in result - assert "- [x] Checked task" in result - assert "- Regular list item" in result - assert " - [ ] Nested unchecked" in result - assert " - [x] Nested checked" in result - - -def test_preprocess_outline_markdown_combined(): - """Test combined conversions in a realistic document.""" - markdown = """# Main Title -## Section 1 -### Subsection -#### Deep Section -Some content here. ---- -##### Important Note -This is important. -###### Small detail -- [ ] Task to do -- [x] Completed task -""" - result = _preprocess_outline_markdown(markdown) - - assert "# Main Title" in result - assert "### Deep Section [H4]" in result - assert "[DIVIDER_BLOCK]" in result - assert "**▸ Important Note**" in result - assert "▪ Small detail" in result - assert "- [ ] Task to do" in result - assert "- [x] Completed task" in result - From 68e58b24c249ee20c78ed98225bf27711a155197 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sun, 12 Oct 2025 17:01:33 +0200 Subject: [PATCH 12/16] fix(outline-import): Fix CSRF token and nested documents handling - Add CSRF token to Outline import upload request - Fix content save by removing invalid update_fields parameter - Handle nested documents properly to avoid duplicates when a document has child documents (e.g., Doc.md with Doc/ directory) --- src/backend/core/services/outline_import.py | 18 +++++++++++++++++- .../impress/src/pages/import/outline/index.tsx | 6 +++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py index 0c9d98575e..39aec3b652 100644 --- a/src/backend/core/services/outline_import.py +++ b/src/backend/core/services/outline_import.py @@ -114,6 +114,16 @@ def process_outline_zip(user, zip_bytes: bytes) -> list[str]: ] ) + # Build a set of md files that have corresponding directories (Outline nested docs) + # e.g., "Doc.md" and "Doc/" both exist -> "Doc" is a parent with nested children + md_with_dirs: set[str] = set() + for md_path in md_files: + # Remove .md extension to get potential directory name + base_path = md_path.rsplit(".md", 1)[0] + # Check if there's a directory with the same name + if any(n.startswith(f"{base_path}/") for n in archive.namelist()): + md_with_dirs.add(base_path) + img_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") def read_bytes(path_in_zip: str) -> bytes | None: @@ -149,6 +159,12 @@ def read_bytes(path_in_zip: str) -> bytes | None: else: doc = parent_doc.add_child(creator=user, title=title) + # If this md file has a corresponding directory, register it as a container + # so nested children will use this doc as parent instead of creating a duplicate + base_path = md_path.rsplit(".md", 1)[0] + if base_path in md_with_dirs: + dir_docs[base_path] = doc + models.DocumentAccess.objects.update_or_create( document=doc, user=user, @@ -179,7 +195,7 @@ def replace_img_link(match: re.Match[str]) -> str: accept="application/vnd.yjs.doc", ) doc.content = ydoc_b64 - doc.save(update_fields=["content", "updated_at"]) + doc.save() except Exception: # noqa: BLE001 # Keep doc without content on conversion error but continue import pass diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx index f47f427288..eaac62d029 100644 --- a/src/frontend/apps/impress/src/pages/import/outline/index.tsx +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -4,7 +4,7 @@ import { ReactElement, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { Box, Text } from '@/components'; -import { baseApiUrl } from '@/api'; +import { baseApiUrl, getCSRFToken } from '@/api'; import { MainLayout } from '@/layouts'; import { NextPageWithLayout } from '@/types/next'; @@ -26,10 +26,14 @@ const Page: NextPageWithLayout = () => { try { const form = new FormData(); form.append('file', file); + const csrfToken = getCSRFToken(); const resp = await fetch(`${baseApiUrl('1.0')}imports/outline/upload`, { method: 'POST', body: form, credentials: 'include', + headers: { + ...(csrfToken && { 'X-CSRFToken': csrfToken }), + }, }); if (!resp.ok) { throw new Error(await resp.text()); From 538c6413e9690c3580a328b50263ad21a9f47b9c Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sun, 12 Oct 2025 17:18:35 +0200 Subject: [PATCH 13/16] Cleanup imports --- src/backend/core/api/viewsets.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 1fe636fc53..685ab091a5 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -3,8 +3,6 @@ import base64 import json -import io -import zipfile import logging import uuid from collections import defaultdict @@ -50,7 +48,6 @@ YdocConverter, ) from core.tasks.mail import send_ask_for_access_mail -from core.services.outline_import import OutlineImportError, process_outline_zip from core.utils import extract_attachments, filter_descendants from . import permissions, serializers, utils @@ -2202,5 +2199,3 @@ def _load_theme_customization(self): theme_customization, settings.THEME_CUSTOMIZATION_CACHE_TIMEOUT, ) - - return theme_customization \ No newline at end of file From 619b6243211d8d1276942a36c8dc5cc12048085a Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sun, 12 Oct 2025 17:19:44 +0200 Subject: [PATCH 14/16] Fix import outline --- src/backend/core/api/viewsets.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 685ab091a5..f5ee2ab3a7 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -2199,3 +2199,5 @@ def _load_theme_customization(self): theme_customization, settings.THEME_CUSTOMIZATION_CACHE_TIMEOUT, ) + + return theme_customization \ No newline at end of file From e1f5a13a3bf40cea119eaae78e901e8f23cab075 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sun, 12 Oct 2025 17:20:40 +0200 Subject: [PATCH 15/16] add new line --- src/backend/core/api/viewsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index f5ee2ab3a7..1fb95c4eb6 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -2200,4 +2200,4 @@ def _load_theme_customization(self): settings.THEME_CUSTOMIZATION_CACHE_TIMEOUT, ) - return theme_customization \ No newline at end of file + return theme_customization From 7d6f0559b5c1b1b9ad5491012fec98cca9fca601 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Mon, 13 Oct 2025 10:17:31 +0200 Subject: [PATCH 16/16] es-lint fixes --- .../impress/src/pages/import/outline/index.tsx | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx index eaac62d029..bd0911e330 100644 --- a/src/frontend/apps/impress/src/pages/import/outline/index.tsx +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -3,8 +3,8 @@ import { useRouter } from 'next/router'; import { ReactElement, useState } from 'react'; import { useTranslation } from 'react-i18next'; -import { Box, Text } from '@/components'; import { baseApiUrl, getCSRFToken } from '@/api'; +import { Box, Text } from '@/components'; import { MainLayout } from '@/layouts'; import { NextPageWithLayout } from '@/types/next'; @@ -45,7 +45,7 @@ const Page: NextPageWithLayout = () => { } else { void router.replace('/'); } - } catch (e) { + } catch { setError(t('Something bad happens, please retry.')); } finally { setIsUploading(false); @@ -53,7 +53,12 @@ const Page: NextPageWithLayout = () => { }; return ( - + {t('Import Outline archive')} @@ -64,7 +69,12 @@ const Page: NextPageWithLayout = () => { onChange={(e) => setFile(e.target.files?.[0] ?? null)} aria-label={t('Select a .zip file')} /> - +