2
2
import logging
3
3
from typing import Any
4
4
5
- from pydantic import BaseModel , TypeAdapter
5
+ from pydantic import BaseModel , Field , TypeAdapter
6
6
from requests import Session
7
7
8
8
from ..notion_schemas .notion_block import (
@@ -138,7 +138,7 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]]
138
138
{
139
139
"type" : "link" ,
140
140
"content" : [convert_rich_text (rich_text )],
141
- "href" : rich_text .href ,
141
+ "href" : rich_text .href , # FIXME: if it was a notion link, we should convert it to a link to the document
142
142
}
143
143
)
144
144
else :
@@ -159,6 +159,11 @@ class ImportedAttachment(BaseModel):
159
159
file : NotionFileHosted
160
160
161
161
162
+ class ImportedChildPage (BaseModel ):
163
+ child_page_block : NotionBlock
164
+ block_to_update : Any
165
+
166
+
162
167
def convert_image (
163
168
image : NotionImage , attachments : list [ImportedAttachment ]
164
169
) -> list [dict [str , Any ]]:
@@ -188,17 +193,21 @@ def convert_image(
188
193
189
194
190
195
def convert_block (
191
- block : NotionBlock , attachments : list [ImportedAttachment ]
196
+ block : NotionBlock ,
197
+ attachments : list [ImportedAttachment ],
198
+ child_page_blocks : list [ImportedChildPage ],
192
199
) -> list [dict [str , Any ]]:
193
200
match block .specific :
194
201
case NotionColumnList ():
195
202
columns_content = []
196
203
for column in block .children :
197
- columns_content .extend (convert_block (column , attachments ))
204
+ columns_content .extend (
205
+ convert_block (column , attachments , child_page_blocks )
206
+ )
198
207
return columns_content
199
208
case NotionColumn ():
200
209
return [
201
- convert_block (child_content , attachments )[0 ]
210
+ convert_block (child_content , attachments , child_page_blocks )[0 ]
202
211
for child_content in block .children
203
212
]
204
213
@@ -225,7 +234,7 @@ def convert_block(
225
234
}
226
235
]
227
236
# case NotionDivider():
228
- # return {"type": "divider", "properties": {}}
237
+ # return [ {"type": "divider"}]
229
238
case NotionCallout ():
230
239
return [
231
240
{
@@ -292,15 +301,23 @@ def convert_block(
292
301
{
293
302
"type" : "bulletListItem" ,
294
303
"content" : convert_rich_texts (block .specific .rich_text ),
295
- "children" : convert_block_list (block .children , attachments ),
304
+ "children" : convert_block_list (
305
+ block .children ,
306
+ attachments ,
307
+ child_page_blocks ,
308
+ ),
296
309
}
297
310
]
298
311
case NotionNumberedListItem ():
299
312
return [
300
313
{
301
314
"type" : "numberedListItem" ,
302
315
"content" : convert_rich_texts (block .specific .rich_text ),
303
- "children" : convert_block_list (block .children , attachments ),
316
+ "children" : convert_block_list (
317
+ block .children ,
318
+ attachments ,
319
+ child_page_blocks ,
320
+ ),
304
321
}
305
322
]
306
323
case NotionToDo ():
@@ -309,7 +326,11 @@ def convert_block(
309
326
"type" : "checkListItem" ,
310
327
"content" : convert_rich_texts (block .specific .rich_text ),
311
328
"checked" : block .specific .checked ,
312
- "children" : convert_block_list (block .children , attachments ),
329
+ "children" : convert_block_list (
330
+ block .children ,
331
+ attachments ,
332
+ child_page_blocks ,
333
+ ),
313
334
}
314
335
]
315
336
case NotionCode ():
@@ -336,6 +357,22 @@ def convert_block(
336
357
],
337
358
}
338
359
]
360
+ case NotionChildPage ():
361
+ # TODO: convert to a link
362
+ res = {
363
+ "type" : "paragraph" ,
364
+ "content" : [
365
+ {
366
+ "type" : "link" ,
367
+ "content" : f"Child page: { block .specific .title } " ,
368
+ "href" : "about:blank" , # populated later on
369
+ },
370
+ ],
371
+ }
372
+ child_page_blocks .append (
373
+ ImportedChildPage (child_page_block = block , block_to_update = res )
374
+ )
375
+ return [res ]
339
376
case NotionUnsupported ():
340
377
return [
341
378
{
@@ -371,19 +408,22 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]
371
408
372
409
373
410
def convert_block_list (
374
- blocks : list [NotionBlock ], attachments : list [ImportedAttachment ]
411
+ blocks : list [NotionBlock ],
412
+ attachments : list [ImportedAttachment ],
413
+ child_page_blocks : list [ImportedChildPage ],
375
414
) -> list [dict [str , Any ]]:
376
415
converted_blocks = []
377
416
for block in blocks :
378
- converted_blocks .extend (convert_block (block , attachments ))
417
+ converted_blocks .extend (convert_block (block , attachments , child_page_blocks ))
379
418
return converted_blocks
380
419
381
420
382
421
class ImportedDocument (BaseModel ):
383
422
page : NotionPage
384
- blocks : list [dict [str , Any ]] = []
385
- children : list ["ImportedDocument" ] = []
386
- attachments : list [ImportedAttachment ] = []
423
+ blocks : list [dict [str , Any ]] = Field (default_factory = list )
424
+ children : list ["ImportedDocument" ] = Field (default_factory = list )
425
+ attachments : list [ImportedAttachment ] = Field (default_factory = list )
426
+ child_page_blocks : list [ImportedChildPage ] = Field (default_factory = list )
387
427
388
428
389
429
def find_block_child_page (block_id : str , all_pages : list [NotionPage ]):
@@ -396,57 +436,62 @@ def find_block_child_page(block_id: str, all_pages: list[NotionPage]):
396
436
return None
397
437
398
438
399
- def convert_child_pages (
400
- session : Session ,
401
- parent : NotionPage ,
402
- blocks : list [NotionBlock ],
403
- all_pages : list [NotionPage ],
404
- ) -> list [ImportedDocument ]:
405
- children = []
406
-
407
- for page in all_pages :
408
- if (
409
- isinstance (page .parent , NotionParentPage )
410
- and page .parent .page_id == parent .id
411
- ):
412
- children .append (import_page (session , page , all_pages ))
413
-
414
- for block in blocks :
415
- if not isinstance (block .specific , NotionChildPage ):
416
- continue
417
-
418
- # TODO: doesn't work, never finds the child
419
- child_page = find_block_child_page (block .id , all_pages )
420
- if child_page == None :
421
- logger .warning (f"Cannot find child page of block { block .id } " )
422
- continue
423
- children .append (import_page (session , child_page , all_pages ))
424
-
425
- return children
426
-
427
-
428
439
def import_page (
429
- session : Session , page : NotionPage , all_pages : list [NotionPage ]
440
+ session : Session ,
441
+ page : NotionPage ,
442
+ child_page_blocs_ids_to_parent_page_ids : dict [str , str ],
430
443
) -> ImportedDocument :
431
444
blocks = fetch_block_children (session , page .id )
432
445
logger .info (f"Page { page .get_title ()} (id { page .id } )" )
433
446
logger .info (blocks )
434
- attachments = []
435
- converted_blocks = convert_block_list (blocks , attachments )
447
+ attachments : list [ImportedAttachment ] = []
448
+
449
+ child_page_blocks : list [ImportedChildPage ] = []
450
+
451
+ converted_blocks = convert_block_list (blocks , attachments , child_page_blocks )
452
+
453
+ for child_page_block in child_page_blocks :
454
+ child_page_blocs_ids_to_parent_page_ids [
455
+ child_page_block .child_page_block .id
456
+ ] = page .id
457
+
436
458
return ImportedDocument (
437
459
page = page ,
438
460
blocks = converted_blocks ,
439
- children = convert_child_pages (session , page , blocks , all_pages ),
440
461
attachments = attachments ,
462
+ child_page_blocks = child_page_blocks ,
441
463
)
442
464
443
465
444
466
def import_notion (token : str ) -> list [ImportedDocument ]:
445
467
"""Recursively imports all Notion pages and blocks accessible using the given token."""
446
468
session = build_notion_session (token )
447
469
all_pages = fetch_all_pages (session )
448
- docs = []
470
+ docs_by_page_id : dict [str , ImportedDocument ] = {}
471
+ child_page_blocs_ids_to_parent_page_ids : dict [str , str ] = {}
449
472
for page in all_pages :
450
- if isinstance (page .parent , NotionParentWorkspace ):
451
- docs .append (import_page (session , page , all_pages ))
452
- return docs
473
+ docs_by_page_id [page .id ] = import_page (
474
+ session , page , child_page_blocs_ids_to_parent_page_ids
475
+ )
476
+
477
+ root_pages = []
478
+ for page in all_pages :
479
+ if isinstance (page .parent , NotionParentPage ):
480
+ docs_by_page_id [page .parent .page_id ].children .append (
481
+ docs_by_page_id [page .id ]
482
+ )
483
+ elif isinstance (page .parent , NotionParentBlock ):
484
+ parent_page_id = child_page_blocs_ids_to_parent_page_ids .get (page .id )
485
+ if parent_page_id :
486
+ docs_by_page_id [parent_page_id ].children .append (
487
+ docs_by_page_id [page .id ]
488
+ )
489
+ else :
490
+ logger .warning (
491
+ f"Page { page .id } has a parent block, but no parent page found."
492
+ )
493
+ elif isinstance (page .parent , NotionParentWorkspace ):
494
+ # This is a root page, not a child of another page
495
+ root_pages .append (docs_by_page_id [page .id ])
496
+
497
+ return root_pages
0 commit comments