Skip to content

Commit 99d6526

Browse files
Castavoemersion
authored andcommitted
✨(backend) handle child page blocks in Notion import
1 parent 1ef25cb commit 99d6526

File tree

1 file changed

+96
-51
lines changed

1 file changed

+96
-51
lines changed

src/backend/core/services/notion_import.py

Lines changed: 96 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
from typing import Any
44

5-
from pydantic import BaseModel, TypeAdapter
5+
from pydantic import BaseModel, Field, TypeAdapter
66
from requests import Session
77

88
from ..notion_schemas.notion_block import (
@@ -135,7 +135,7 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]]
135135
{
136136
"type": "link",
137137
"content": [convert_rich_text(rich_text)],
138-
"href": rich_text.href,
138+
"href": rich_text.href, # FIXME: if it was a notion link, we should convert it to a link to the document
139139
}
140140
)
141141
else:
@@ -156,6 +156,11 @@ class ImportedAttachment(BaseModel):
156156
file: NotionFileHosted
157157

158158

159+
class ImportedChildPage(BaseModel):
160+
child_page_block: NotionBlock
161+
block_to_update: Any
162+
163+
159164
def convert_image(
160165
image: NotionImage, attachments: list[ImportedAttachment]
161166
) -> list[dict[str, Any]]:
@@ -185,17 +190,21 @@ def convert_image(
185190

186191

187192
def convert_block(
188-
block: NotionBlock, attachments: list[ImportedAttachment]
193+
block: NotionBlock,
194+
attachments: list[ImportedAttachment],
195+
child_page_blocks: list[ImportedChildPage],
189196
) -> list[dict[str, Any]]:
190197
match block.specific:
191198
case NotionColumnList():
192199
columns_content = []
193200
for column in block.children:
194-
columns_content.extend(convert_block(column, attachments))
201+
columns_content.extend(
202+
convert_block(column, attachments, child_page_blocks)
203+
)
195204
return columns_content
196205
case NotionColumn():
197206
return [
198-
convert_block(child_content, attachments)[0]
207+
convert_block(child_content, attachments, child_page_blocks)[0]
199208
for child_content in block.children
200209
]
201210

@@ -222,7 +231,7 @@ def convert_block(
222231
}
223232
]
224233
# case NotionDivider():
225-
# return {"type": "divider", "properties": {}}
234+
# return [{"type": "divider"}]
226235
case NotionCallout():
227236
return [
228237
{
@@ -289,15 +298,23 @@ def convert_block(
289298
{
290299
"type": "bulletListItem",
291300
"content": convert_rich_texts(block.specific.rich_text),
292-
"children": convert_block_list(block.children, attachments),
301+
"children": convert_block_list(
302+
block.children,
303+
attachments,
304+
child_page_blocks,
305+
),
293306
}
294307
]
295308
case NotionNumberedListItem():
296309
return [
297310
{
298311
"type": "numberedListItem",
299312
"content": convert_rich_texts(block.specific.rich_text),
300-
"children": convert_block_list(block.children, attachments),
313+
"children": convert_block_list(
314+
block.children,
315+
attachments,
316+
child_page_blocks,
317+
),
301318
}
302319
]
303320
case NotionToDo():
@@ -306,7 +323,11 @@ def convert_block(
306323
"type": "checkListItem",
307324
"content": convert_rich_texts(block.specific.rich_text),
308325
"checked": block.specific.checked,
309-
"children": convert_block_list(block.children, attachments),
326+
"children": convert_block_list(
327+
block.children,
328+
attachments,
329+
child_page_blocks,
330+
),
310331
}
311332
]
312333
case NotionCode():
@@ -333,6 +354,22 @@ def convert_block(
333354
],
334355
}
335356
]
357+
case NotionChildPage():
358+
# TODO: convert to a link
359+
res = {
360+
"type": "paragraph",
361+
"content": [
362+
{
363+
"type": "link",
364+
"content": f"Child page: {block.specific.title}",
365+
"href": "about:blank", # populated later on
366+
},
367+
],
368+
}
369+
child_page_blocks.append(
370+
ImportedChildPage(child_page_block=block, block_to_update=res)
371+
)
372+
return [res]
336373
case NotionUnsupported():
337374
return [
338375
{
@@ -368,19 +405,22 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]
368405

369406

370407
def convert_block_list(
371-
blocks: list[NotionBlock], attachments: list[ImportedAttachment]
408+
blocks: list[NotionBlock],
409+
attachments: list[ImportedAttachment],
410+
child_page_blocks: list[ImportedChildPage],
372411
) -> list[dict[str, Any]]:
373412
converted_blocks = []
374413
for block in blocks:
375-
converted_blocks.extend(convert_block(block, attachments))
414+
converted_blocks.extend(convert_block(block, attachments, child_page_blocks))
376415
return converted_blocks
377416

378417

379418
class ImportedDocument(BaseModel):
380419
page: NotionPage
381-
blocks: list[dict[str, Any]] = []
382-
children: list["ImportedDocument"] = []
383-
attachments: list[ImportedAttachment] = []
420+
blocks: list[dict[str, Any]] = Field(default_factory=list)
421+
children: list["ImportedDocument"] = Field(default_factory=list)
422+
attachments: list[ImportedAttachment] = Field(default_factory=list)
423+
child_page_blocks: list[ImportedChildPage] = Field(default_factory=list)
384424

385425

386426
def find_block_child_page(block_id: str, all_pages: list[NotionPage]):
@@ -393,57 +433,62 @@ def find_block_child_page(block_id: str, all_pages: list[NotionPage]):
393433
return None
394434

395435

396-
def convert_child_pages(
397-
session: Session,
398-
parent: NotionPage,
399-
blocks: list[NotionBlock],
400-
all_pages: list[NotionPage],
401-
) -> list[ImportedDocument]:
402-
children = []
403-
404-
for page in all_pages:
405-
if (
406-
isinstance(page.parent, NotionParentPage)
407-
and page.parent.page_id == parent.id
408-
):
409-
children.append(import_page(session, page, all_pages))
410-
411-
for block in blocks:
412-
if not isinstance(block.specific, NotionChildPage):
413-
continue
414-
415-
# TODO: doesn't work, never finds the child
416-
child_page = find_block_child_page(block.id, all_pages)
417-
if child_page == None:
418-
logger.warning(f"Cannot find child page of block {block.id}")
419-
continue
420-
children.append(import_page(session, child_page, all_pages))
421-
422-
return children
423-
424-
425436
def import_page(
426-
session: Session, page: NotionPage, all_pages: list[NotionPage]
437+
session: Session,
438+
page: NotionPage,
439+
child_page_blocs_ids_to_parent_page_ids: dict[str, str],
427440
) -> ImportedDocument:
428441
blocks = fetch_block_children(session, page.id)
429442
logger.info(f"Page {page.get_title()} (id {page.id})")
430443
logger.info(blocks)
431-
attachments = []
432-
converted_blocks = convert_block_list(blocks, attachments)
444+
attachments: list[ImportedAttachment] = []
445+
446+
child_page_blocks: list[ImportedChildPage] = []
447+
448+
converted_blocks = convert_block_list(blocks, attachments, child_page_blocks)
449+
450+
for child_page_block in child_page_blocks:
451+
child_page_blocs_ids_to_parent_page_ids[
452+
child_page_block.child_page_block.id
453+
] = page.id
454+
433455
return ImportedDocument(
434456
page=page,
435457
blocks=converted_blocks,
436-
children=convert_child_pages(session, page, blocks, all_pages),
437458
attachments=attachments,
459+
child_page_blocks=child_page_blocks,
438460
)
439461

440462

441463
def import_notion(token: str) -> list[ImportedDocument]:
442464
"""Recursively imports all Notion pages and blocks accessible using the given token."""
443465
session = build_notion_session(token)
444466
all_pages = fetch_all_pages(session)
445-
docs = []
467+
docs_by_page_id: dict[str, ImportedDocument] = {}
468+
child_page_blocs_ids_to_parent_page_ids: dict[str, str] = {}
446469
for page in all_pages:
447-
if isinstance(page.parent, NotionParentWorkspace):
448-
docs.append(import_page(session, page, all_pages))
449-
return docs
470+
docs_by_page_id[page.id] = import_page(
471+
session, page, child_page_blocs_ids_to_parent_page_ids
472+
)
473+
474+
root_pages = []
475+
for page in all_pages:
476+
if isinstance(page.parent, NotionParentPage):
477+
docs_by_page_id[page.parent.page_id].children.append(
478+
docs_by_page_id[page.id]
479+
)
480+
elif isinstance(page.parent, NotionParentBlock):
481+
parent_page_id = child_page_blocs_ids_to_parent_page_ids.get(page.id)
482+
if parent_page_id:
483+
docs_by_page_id[parent_page_id].children.append(
484+
docs_by_page_id[page.id]
485+
)
486+
else:
487+
logger.warning(
488+
f"Page {page.id} has a parent block, but no parent page found."
489+
)
490+
elif isinstance(page.parent, NotionParentWorkspace):
491+
# This is a root page, not a child of another page
492+
root_pages.append(docs_by_page_id[page.id])
493+
494+
return root_pages

0 commit comments

Comments
 (0)