Skip to content

Commit 29ffdb4

Browse files
Castavoemersion
authored andcommitted
✨(backend) handle child page blocks in Notion import
1 parent 62642ad commit 29ffdb4

File tree

1 file changed

+96
-51
lines changed

1 file changed

+96
-51
lines changed

src/backend/core/services/notion_import.py

Lines changed: 96 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
from typing import Any
44

5-
from pydantic import BaseModel, TypeAdapter
5+
from pydantic import BaseModel, Field, TypeAdapter
66
from requests import Session
77

88
from ..notion_schemas.notion_block import (
@@ -138,7 +138,7 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]]
138138
{
139139
"type": "link",
140140
"content": [convert_rich_text(rich_text)],
141-
"href": rich_text.href,
141+
"href": rich_text.href, # FIXME: if it was a notion link, we should convert it to a link to the document
142142
}
143143
)
144144
else:
@@ -159,6 +159,11 @@ class ImportedAttachment(BaseModel):
159159
file: NotionFileHosted
160160

161161

162+
class ImportedChildPage(BaseModel):
163+
child_page_block: NotionBlock
164+
block_to_update: Any
165+
166+
162167
def convert_image(
163168
image: NotionImage, attachments: list[ImportedAttachment]
164169
) -> list[dict[str, Any]]:
@@ -188,17 +193,21 @@ def convert_image(
188193

189194

190195
def convert_block(
191-
block: NotionBlock, attachments: list[ImportedAttachment]
196+
block: NotionBlock,
197+
attachments: list[ImportedAttachment],
198+
child_page_blocks: list[ImportedChildPage],
192199
) -> list[dict[str, Any]]:
193200
match block.specific:
194201
case NotionColumnList():
195202
columns_content = []
196203
for column in block.children:
197-
columns_content.extend(convert_block(column, attachments))
204+
columns_content.extend(
205+
convert_block(column, attachments, child_page_blocks)
206+
)
198207
return columns_content
199208
case NotionColumn():
200209
return [
201-
convert_block(child_content, attachments)[0]
210+
convert_block(child_content, attachments, child_page_blocks)[0]
202211
for child_content in block.children
203212
]
204213

@@ -225,7 +234,7 @@ def convert_block(
225234
}
226235
]
227236
# case NotionDivider():
228-
# return {"type": "divider", "properties": {}}
237+
# return [{"type": "divider"}]
229238
case NotionCallout():
230239
return [
231240
{
@@ -292,15 +301,23 @@ def convert_block(
292301
{
293302
"type": "bulletListItem",
294303
"content": convert_rich_texts(block.specific.rich_text),
295-
"children": convert_block_list(block.children, attachments),
304+
"children": convert_block_list(
305+
block.children,
306+
attachments,
307+
child_page_blocks,
308+
),
296309
}
297310
]
298311
case NotionNumberedListItem():
299312
return [
300313
{
301314
"type": "numberedListItem",
302315
"content": convert_rich_texts(block.specific.rich_text),
303-
"children": convert_block_list(block.children, attachments),
316+
"children": convert_block_list(
317+
block.children,
318+
attachments,
319+
child_page_blocks,
320+
),
304321
}
305322
]
306323
case NotionToDo():
@@ -309,7 +326,11 @@ def convert_block(
309326
"type": "checkListItem",
310327
"content": convert_rich_texts(block.specific.rich_text),
311328
"checked": block.specific.checked,
312-
"children": convert_block_list(block.children, attachments),
329+
"children": convert_block_list(
330+
block.children,
331+
attachments,
332+
child_page_blocks,
333+
),
313334
}
314335
]
315336
case NotionCode():
@@ -336,6 +357,22 @@ def convert_block(
336357
],
337358
}
338359
]
360+
case NotionChildPage():
361+
# TODO: convert to a link
362+
res = {
363+
"type": "paragraph",
364+
"content": [
365+
{
366+
"type": "link",
367+
"content": f"Child page: {block.specific.title}",
368+
"href": "about:blank", # populated later on
369+
},
370+
],
371+
}
372+
child_page_blocks.append(
373+
ImportedChildPage(child_page_block=block, block_to_update=res)
374+
)
375+
return [res]
339376
case NotionUnsupported():
340377
return [
341378
{
@@ -375,19 +412,22 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]
375412

376413

377414
def convert_block_list(
378-
blocks: list[NotionBlock], attachments: list[ImportedAttachment]
415+
blocks: list[NotionBlock],
416+
attachments: list[ImportedAttachment],
417+
child_page_blocks: list[ImportedChildPage],
379418
) -> list[dict[str, Any]]:
380419
converted_blocks = []
381420
for block in blocks:
382-
converted_blocks.extend(convert_block(block, attachments))
421+
converted_blocks.extend(convert_block(block, attachments, child_page_blocks))
383422
return converted_blocks
384423

385424

386425
class ImportedDocument(BaseModel):
387426
page: NotionPage
388-
blocks: list[dict[str, Any]] = []
389-
children: list["ImportedDocument"] = []
390-
attachments: list[ImportedAttachment] = []
427+
blocks: list[dict[str, Any]] = Field(default_factory=list)
428+
children: list["ImportedDocument"] = Field(default_factory=list)
429+
attachments: list[ImportedAttachment] = Field(default_factory=list)
430+
child_page_blocks: list[ImportedChildPage] = Field(default_factory=list)
391431

392432

393433
def find_block_child_page(block_id: str, all_pages: list[NotionPage]):
@@ -400,57 +440,62 @@ def find_block_child_page(block_id: str, all_pages: list[NotionPage]):
400440
return None
401441

402442

403-
def convert_child_pages(
404-
session: Session,
405-
parent: NotionPage,
406-
blocks: list[NotionBlock],
407-
all_pages: list[NotionPage],
408-
) -> list[ImportedDocument]:
409-
children = []
410-
411-
for page in all_pages:
412-
if (
413-
isinstance(page.parent, NotionParentPage)
414-
and page.parent.page_id == parent.id
415-
):
416-
children.append(import_page(session, page, all_pages))
417-
418-
for block in blocks:
419-
if not isinstance(block.specific, NotionChildPage):
420-
continue
421-
422-
# TODO: doesn't work, never finds the child
423-
child_page = find_block_child_page(block.id, all_pages)
424-
if child_page == None:
425-
logger.warning(f"Cannot find child page of block {block.id}")
426-
continue
427-
children.append(import_page(session, child_page, all_pages))
428-
429-
return children
430-
431-
432443
def import_page(
433-
session: Session, page: NotionPage, all_pages: list[NotionPage]
444+
session: Session,
445+
page: NotionPage,
446+
child_page_blocs_ids_to_parent_page_ids: dict[str, str],
434447
) -> ImportedDocument:
435448
blocks = fetch_block_children(session, page.id)
436449
logger.info(f"Page {page.get_title()} (id {page.id})")
437450
logger.info(blocks)
438-
attachments = []
439-
converted_blocks = convert_block_list(blocks, attachments)
451+
attachments: list[ImportedAttachment] = []
452+
453+
child_page_blocks: list[ImportedChildPage] = []
454+
455+
converted_blocks = convert_block_list(blocks, attachments, child_page_blocks)
456+
457+
for child_page_block in child_page_blocks:
458+
child_page_blocs_ids_to_parent_page_ids[
459+
child_page_block.child_page_block.id
460+
] = page.id
461+
440462
return ImportedDocument(
441463
page=page,
442464
blocks=converted_blocks,
443-
children=convert_child_pages(session, page, blocks, all_pages),
444465
attachments=attachments,
466+
child_page_blocks=child_page_blocks,
445467
)
446468

447469

448470
def import_notion(token: str) -> list[ImportedDocument]:
449471
"""Recursively imports all Notion pages and blocks accessible using the given token."""
450472
session = build_notion_session(token)
451473
all_pages = fetch_all_pages(session)
452-
docs = []
474+
docs_by_page_id: dict[str, ImportedDocument] = {}
475+
child_page_blocs_ids_to_parent_page_ids: dict[str, str] = {}
453476
for page in all_pages:
454-
if isinstance(page.parent, NotionParentWorkspace):
455-
docs.append(import_page(session, page, all_pages))
456-
return docs
477+
docs_by_page_id[page.id] = import_page(
478+
session, page, child_page_blocs_ids_to_parent_page_ids
479+
)
480+
481+
root_pages = []
482+
for page in all_pages:
483+
if isinstance(page.parent, NotionParentPage):
484+
docs_by_page_id[page.parent.page_id].children.append(
485+
docs_by_page_id[page.id]
486+
)
487+
elif isinstance(page.parent, NotionParentBlock):
488+
parent_page_id = child_page_blocs_ids_to_parent_page_ids.get(page.id)
489+
if parent_page_id:
490+
docs_by_page_id[parent_page_id].children.append(
491+
docs_by_page_id[page.id]
492+
)
493+
else:
494+
logger.warning(
495+
f"Page {page.id} has a parent block, but no parent page found."
496+
)
497+
elif isinstance(page.parent, NotionParentWorkspace):
498+
# This is a root page, not a child of another page
499+
root_pages.append(docs_by_page_id[page.id])
500+
501+
return root_pages

0 commit comments

Comments
 (0)