Skip to content

Commit 1ef25cb

Browse files
committed
✨(backend) handle uploaded images in Notion import
1 parent a48eabf commit 1ef25cb

File tree

3 files changed

+87
-22
lines changed

3 files changed

+87
-22
lines changed

src/backend/core/api/viewsets.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2111,13 +2111,33 @@ def notion_import_callback(request):
21112111
return redirect(f"{settings.FRONTEND_URL}/import-notion/")
21122112

21132113

2114-
def _import_notion_child_page(imported_doc, parent_doc, user, imported_docs_by_page_id):
2115-
document_content = YdocConverter().convert_blocks(imported_doc.blocks)
2114+
def _import_notion_doc_content(imported_doc, obj, user):
2115+
for att in imported_doc.attachments:
2116+
extra_args = {
2117+
"Metadata": {
2118+
"owner": str(user.id),
2119+
"status": enums.DocumentAttachmentStatus.READY, # TODO
2120+
},
2121+
}
2122+
file_id = uuid.uuid4()
2123+
key = f"{obj.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}.raw"
2124+
with requests.get(att.file.file["url"], stream=True) as resp:
2125+
default_storage.connection.meta.client.upload_fileobj(
2126+
resp.raw, default_storage.bucket_name, key
2127+
)
2128+
obj.attachments.append(key)
2129+
att.block["props"]["url"] = (
2130+
f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}"
2131+
)
2132+
2133+
obj.content = YdocConverter().convert_blocks(imported_doc.blocks)
2134+
obj.save()
21162135

2136+
2137+
def _import_notion_child_page(imported_doc, parent_doc, user, imported_docs_by_page_id):
21172138
obj = parent_doc.add_child(
21182139
creator=user,
21192140
title=imported_doc.page.get_title() or "Child page",
2120-
content=document_content,
21212141
)
21222142

21232143
models.DocumentAccess.objects.create(
@@ -2126,21 +2146,20 @@ def _import_notion_child_page(imported_doc, parent_doc, user, imported_docs_by_p
21262146
role=models.RoleChoices.OWNER,
21272147
)
21282148

2149+
_import_notion_doc_content(imported_doc, obj, user)
2150+
21292151
imported_docs_by_page_id[imported_doc.page.id] = obj
21302152

21312153
for child in imported_doc.children:
21322154
_import_notion_child_page(child, obj, user, imported_docs_by_page_id)
21332155

21342156

21352157
def _import_notion_root_page(imported_doc, user, imported_docs_by_page_id):
2136-
document_content = YdocConverter().convert_blocks(imported_doc.blocks)
2137-
21382158
obj = models.Document.add_root(
21392159
depth=1,
21402160
creator=user,
21412161
title=imported_doc.page.get_title() or "Imported Notion page",
21422162
link_reach=models.LinkReachChoices.RESTRICTED,
2143-
content=document_content,
21442163
)
21452164

21462165
models.DocumentAccess.objects.create(
@@ -2149,13 +2168,15 @@ def _import_notion_root_page(imported_doc, user, imported_docs_by_page_id):
21492168
role=models.RoleChoices.OWNER,
21502169
)
21512170

2171+
_import_notion_doc_content(imported_doc, obj, user)
2172+
21522173
imported_docs_by_page_id[imported_doc.page.id] = obj
21532174

21542175
for child in imported_doc.children:
21552176
_import_notion_child_page(child, obj, user, imported_docs_by_page_id)
21562177

21572178

2158-
@drf.decorators.api_view(["POST"])
2179+
@drf.decorators.api_view(["GET", "POST"]) # TODO: drop GET (used for testing)
21592180
def notion_import_run(request):
21602181
if "notion_token" not in request.session:
21612182
raise drf.exceptions.PermissionDenied()

src/backend/core/notion_schemas/notion_block.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -166,20 +166,12 @@ class NotionEmbed(BaseModel):
166166
url: str
167167

168168

169-
class NotionFileType(StrEnum):
170-
FILE = "file"
171-
EXTERNAL = "external"
172-
FILE_UPLOAD = "file_upload"
173-
174-
175-
class NotionFile(BaseModel):
169+
class NotionBlockFile(BaseModel):
176170
# FIXME: this is actually another occurrence of type discriminating
177171
"""https://developers.notion.com/reference/block#file"""
178172

179173
block_type: Literal[NotionBlockType.FILE] = NotionBlockType.FILE
180-
caption: list[NotionRichText]
181-
type: NotionFileType
182-
...
174+
# TODO: NotionFile
183175

184176

185177
class NotionImage(BaseModel):
@@ -188,6 +180,13 @@ class NotionImage(BaseModel):
188180
block_type: Literal[NotionBlockType.IMAGE] = NotionBlockType.IMAGE
189181
file: NotionFile
190182

183+
@model_validator(mode="before")
184+
@classmethod
185+
def move_file_type_inward_and_rename(cls, data: Any) -> Any:
186+
if not isinstance(data, dict):
187+
return data
188+
return {"block_type": "image", "file": data}
189+
191190

192191
class NotionVideo(BaseModel):
193192
"""https://developers.notion.com/reference/block#video"""
@@ -280,7 +279,7 @@ def put_all_in_raw(cls, data: Any) -> Any:
280279
| NotionColumnList
281280
| NotionDivider
282281
| NotionEmbed
283-
| NotionFile
282+
| NotionBlockFile
284283
| NotionImage
285284
| NotionVideo
286285
| NotionLinkPreview

src/backend/core/services/notion_import.py

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818
NotionHeading1,
1919
NotionHeading2,
2020
NotionHeading3,
21+
NotionImage,
2122
NotionNumberedListItem,
2223
NotionParagraph,
2324
NotionTable,
2425
NotionTableRow,
2526
NotionToDo,
2627
NotionUnsupported,
2728
)
29+
from ..notion_schemas.notion_file import NotionFileExternal, NotionFileHosted
2830
from ..notion_schemas.notion_page import (
2931
NotionPage,
3032
NotionParentBlock,
@@ -149,7 +151,42 @@ def convert_rich_text(rich_text: NotionRichText) -> dict[str, Any]:
149151
}
150152

151153

152-
def convert_block(block: NotionBlock) -> list[dict[str, Any]]:
154+
class ImportedAttachment(BaseModel):
155+
block: Any
156+
file: NotionFileHosted
157+
158+
159+
def convert_image(
160+
image: NotionImage, attachments: list[ImportedAttachment]
161+
) -> list[dict[str, Any]]:
162+
# TODO: NotionFileUpload
163+
match image.file:
164+
case NotionFileExternal():
165+
return [
166+
{
167+
"type": "image",
168+
"props": {
169+
"url": image.file.external["url"],
170+
},
171+
}
172+
]
173+
case NotionFileHosted():
174+
block = {
175+
"type": "image",
176+
"props": {
177+
"url": "about:blank", # populated later on
178+
},
179+
}
180+
attachments.append(ImportedAttachment(block=block, file=image.file))
181+
182+
return [block]
183+
case _:
184+
return [{"paragraph": {"content": "Unsupported image type"}}]
185+
186+
187+
def convert_block(
188+
block: NotionBlock, attachments: list[ImportedAttachment]
189+
) -> list[dict[str, Any]]:
153190
match block.specific:
154191
case NotionColumnList():
155192
columns_content = []
@@ -170,6 +207,8 @@ def convert_block(block: NotionBlock) -> list[dict[str, Any]]:
170207
"content": content,
171208
}
172209
]
210+
case NotionImage():
211+
return convert_image(block.specific, attachments)
173212
case NotionHeading1() | NotionHeading2() | NotionHeading3():
174213
return [
175214
{
@@ -328,17 +367,20 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]
328367
return res
329368

330369

331-
def convert_block_list(blocks: list[NotionBlock]) -> list[dict[str, Any]]:
370+
def convert_block_list(
371+
blocks: list[NotionBlock], attachments: list[ImportedAttachment]
372+
) -> list[dict[str, Any]]:
332373
converted_blocks = []
333374
for block in blocks:
334-
converted_blocks.extend(convert_block(block))
375+
converted_blocks.extend(convert_block(block, attachments))
335376
return converted_blocks
336377

337378

338379
class ImportedDocument(BaseModel):
339380
page: NotionPage
340381
blocks: list[dict[str, Any]] = []
341382
children: list["ImportedDocument"] = []
383+
attachments: list[ImportedAttachment] = []
342384

343385

344386
def find_block_child_page(block_id: str, all_pages: list[NotionPage]):
@@ -386,10 +428,13 @@ def import_page(
386428
blocks = fetch_block_children(session, page.id)
387429
logger.info(f"Page {page.get_title()} (id {page.id})")
388430
logger.info(blocks)
431+
attachments = []
432+
converted_blocks = convert_block_list(blocks, attachments)
389433
return ImportedDocument(
390434
page=page,
391-
blocks=convert_block_list(blocks),
435+
blocks=converted_blocks,
392436
children=convert_child_pages(session, page, blocks, all_pages),
437+
attachments=attachments,
393438
)
394439

395440

0 commit comments

Comments
 (0)