Skip to content

Commit 1bfac39

Browse files
Castavoemersion
authored andcommitted
✨(backend) handle child page blocks in Notion import
1 parent 6e5b765 commit 1bfac39

File tree

1 file changed

+96
-51
lines changed

1 file changed

+96
-51
lines changed

src/backend/core/services/notion_import.py

Lines changed: 96 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
from typing import Any
44

5-
from pydantic import BaseModel, TypeAdapter
5+
from pydantic import BaseModel, Field, TypeAdapter
66
from requests import Session
77

88
from ..notion_schemas.notion_block import (
@@ -138,7 +138,7 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]]
138138
{
139139
"type": "link",
140140
"content": [convert_rich_text(rich_text)],
141-
"href": rich_text.href,
141+
"href": rich_text.href, # FIXME: if it was a notion link, we should convert it to a link to the document
142142
}
143143
)
144144
else:
@@ -159,6 +159,11 @@ class ImportedAttachment(BaseModel):
159159
file: NotionFileHosted
160160

161161

162+
class ImportedChildPage(BaseModel):
163+
child_page_block: NotionBlock
164+
block_to_update: Any
165+
166+
162167
def convert_image(
163168
image: NotionImage, attachments: list[ImportedAttachment]
164169
) -> list[dict[str, Any]]:
@@ -188,17 +193,21 @@ def convert_image(
188193

189194

190195
def convert_block(
191-
block: NotionBlock, attachments: list[ImportedAttachment]
196+
block: NotionBlock,
197+
attachments: list[ImportedAttachment],
198+
child_page_blocks: list[ImportedChildPage],
192199
) -> list[dict[str, Any]]:
193200
match block.specific:
194201
case NotionColumnList():
195202
columns_content = []
196203
for column in block.children:
197-
columns_content.extend(convert_block(column, attachments))
204+
columns_content.extend(
205+
convert_block(column, attachments, child_page_blocks)
206+
)
198207
return columns_content
199208
case NotionColumn():
200209
return [
201-
convert_block(child_content, attachments)[0]
210+
convert_block(child_content, attachments, child_page_blocks)[0]
202211
for child_content in block.children
203212
]
204213

@@ -225,7 +234,7 @@ def convert_block(
225234
}
226235
]
227236
# case NotionDivider():
228-
# return {"type": "divider", "properties": {}}
237+
# return [{"type": "divider"}]
229238
case NotionCallout():
230239
return [
231240
{
@@ -292,15 +301,23 @@ def convert_block(
292301
{
293302
"type": "bulletListItem",
294303
"content": convert_rich_texts(block.specific.rich_text),
295-
"children": convert_block_list(block.children, attachments),
304+
"children": convert_block_list(
305+
block.children,
306+
attachments,
307+
child_page_blocks,
308+
),
296309
}
297310
]
298311
case NotionNumberedListItem():
299312
return [
300313
{
301314
"type": "numberedListItem",
302315
"content": convert_rich_texts(block.specific.rich_text),
303-
"children": convert_block_list(block.children, attachments),
316+
"children": convert_block_list(
317+
block.children,
318+
attachments,
319+
child_page_blocks,
320+
),
304321
}
305322
]
306323
case NotionToDo():
@@ -309,7 +326,11 @@ def convert_block(
309326
"type": "checkListItem",
310327
"content": convert_rich_texts(block.specific.rich_text),
311328
"checked": block.specific.checked,
312-
"children": convert_block_list(block.children, attachments),
329+
"children": convert_block_list(
330+
block.children,
331+
attachments,
332+
child_page_blocks,
333+
),
313334
}
314335
]
315336
case NotionCode():
@@ -336,6 +357,22 @@ def convert_block(
336357
],
337358
}
338359
]
360+
case NotionChildPage():
361+
# TODO: convert to a link
362+
res = {
363+
"type": "paragraph",
364+
"content": [
365+
{
366+
"type": "link",
367+
"content": f"Child page: {block.specific.title}",
368+
"href": "about:blank", # populated later on
369+
},
370+
],
371+
}
372+
child_page_blocks.append(
373+
ImportedChildPage(child_page_block=block, block_to_update=res)
374+
)
375+
return [res]
339376
case NotionUnsupported():
340377
return [
341378
{
@@ -371,19 +408,22 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]
371408

372409

373410
def convert_block_list(
374-
blocks: list[NotionBlock], attachments: list[ImportedAttachment]
411+
blocks: list[NotionBlock],
412+
attachments: list[ImportedAttachment],
413+
child_page_blocks: list[ImportedChildPage],
375414
) -> list[dict[str, Any]]:
376415
converted_blocks = []
377416
for block in blocks:
378-
converted_blocks.extend(convert_block(block, attachments))
417+
converted_blocks.extend(convert_block(block, attachments, child_page_blocks))
379418
return converted_blocks
380419

381420

382421
class ImportedDocument(BaseModel):
383422
page: NotionPage
384-
blocks: list[dict[str, Any]] = []
385-
children: list["ImportedDocument"] = []
386-
attachments: list[ImportedAttachment] = []
423+
blocks: list[dict[str, Any]] = Field(default_factory=list)
424+
children: list["ImportedDocument"] = Field(default_factory=list)
425+
attachments: list[ImportedAttachment] = Field(default_factory=list)
426+
child_page_blocks: list[ImportedChildPage] = Field(default_factory=list)
387427

388428

389429
def find_block_child_page(block_id: str, all_pages: list[NotionPage]):
@@ -396,57 +436,62 @@ def find_block_child_page(block_id: str, all_pages: list[NotionPage]):
396436
return None
397437

398438

399-
def convert_child_pages(
400-
session: Session,
401-
parent: NotionPage,
402-
blocks: list[NotionBlock],
403-
all_pages: list[NotionPage],
404-
) -> list[ImportedDocument]:
405-
children = []
406-
407-
for page in all_pages:
408-
if (
409-
isinstance(page.parent, NotionParentPage)
410-
and page.parent.page_id == parent.id
411-
):
412-
children.append(import_page(session, page, all_pages))
413-
414-
for block in blocks:
415-
if not isinstance(block.specific, NotionChildPage):
416-
continue
417-
418-
# TODO: doesn't work, never finds the child
419-
child_page = find_block_child_page(block.id, all_pages)
420-
if child_page == None:
421-
logger.warning(f"Cannot find child page of block {block.id}")
422-
continue
423-
children.append(import_page(session, child_page, all_pages))
424-
425-
return children
426-
427-
428439
def import_page(
429-
session: Session, page: NotionPage, all_pages: list[NotionPage]
440+
session: Session,
441+
page: NotionPage,
442+
child_page_blocs_ids_to_parent_page_ids: dict[str, str],
430443
) -> ImportedDocument:
431444
blocks = fetch_block_children(session, page.id)
432445
logger.info(f"Page {page.get_title()} (id {page.id})")
433446
logger.info(blocks)
434-
attachments = []
435-
converted_blocks = convert_block_list(blocks, attachments)
447+
attachments: list[ImportedAttachment] = []
448+
449+
child_page_blocks: list[ImportedChildPage] = []
450+
451+
converted_blocks = convert_block_list(blocks, attachments, child_page_blocks)
452+
453+
for child_page_block in child_page_blocks:
454+
child_page_blocs_ids_to_parent_page_ids[
455+
child_page_block.child_page_block.id
456+
] = page.id
457+
436458
return ImportedDocument(
437459
page=page,
438460
blocks=converted_blocks,
439-
children=convert_child_pages(session, page, blocks, all_pages),
440461
attachments=attachments,
462+
child_page_blocks=child_page_blocks,
441463
)
442464

443465

444466
def import_notion(token: str) -> list[ImportedDocument]:
445467
"""Recursively imports all Notion pages and blocks accessible using the given token."""
446468
session = build_notion_session(token)
447469
all_pages = fetch_all_pages(session)
448-
docs = []
470+
docs_by_page_id: dict[str, ImportedDocument] = {}
471+
child_page_blocs_ids_to_parent_page_ids: dict[str, str] = {}
449472
for page in all_pages:
450-
if isinstance(page.parent, NotionParentWorkspace):
451-
docs.append(import_page(session, page, all_pages))
452-
return docs
473+
docs_by_page_id[page.id] = import_page(
474+
session, page, child_page_blocs_ids_to_parent_page_ids
475+
)
476+
477+
root_pages = []
478+
for page in all_pages:
479+
if isinstance(page.parent, NotionParentPage):
480+
docs_by_page_id[page.parent.page_id].children.append(
481+
docs_by_page_id[page.id]
482+
)
483+
elif isinstance(page.parent, NotionParentBlock):
484+
parent_page_id = child_page_blocs_ids_to_parent_page_ids.get(page.id)
485+
if parent_page_id:
486+
docs_by_page_id[parent_page_id].children.append(
487+
docs_by_page_id[page.id]
488+
)
489+
else:
490+
logger.warning(
491+
f"Page {page.id} has a parent block, but no parent page found."
492+
)
493+
elif isinstance(page.parent, NotionParentWorkspace):
494+
# This is a root page, not a child of another page
495+
root_pages.append(docs_by_page_id[page.id])
496+
497+
return root_pages

0 commit comments

Comments
 (0)