2
2
import logging
3
3
from typing import Any
4
4
5
- from pydantic import BaseModel , TypeAdapter
5
+ from pydantic import BaseModel , Field , TypeAdapter
6
6
from requests import Session
7
7
8
8
from ..notion_schemas .notion_block import (
@@ -138,7 +138,7 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]]
138
138
{
139
139
"type" : "link" ,
140
140
"content" : [convert_rich_text (rich_text )],
141
- "href" : rich_text .href ,
141
+ "href" : rich_text .href , # FIXME: if it was a notion link, we should convert it to a link to the document
142
142
}
143
143
)
144
144
else :
@@ -159,6 +159,11 @@ class ImportedAttachment(BaseModel):
159
159
file : NotionFileHosted
160
160
161
161
162
+ class ImportedChildPage (BaseModel ):
163
+ child_page_block : NotionBlock
164
+ block_to_update : Any
165
+
166
+
162
167
def convert_image (
163
168
image : NotionImage , attachments : list [ImportedAttachment ]
164
169
) -> list [dict [str , Any ]]:
@@ -188,17 +193,21 @@ def convert_image(
188
193
189
194
190
195
def convert_block (
191
- block : NotionBlock , attachments : list [ImportedAttachment ]
196
+ block : NotionBlock ,
197
+ attachments : list [ImportedAttachment ],
198
+ child_page_blocks : list [ImportedChildPage ],
192
199
) -> list [dict [str , Any ]]:
193
200
match block .specific :
194
201
case NotionColumnList ():
195
202
columns_content = []
196
203
for column in block .children :
197
- columns_content .extend (convert_block (column , attachments ))
204
+ columns_content .extend (
205
+ convert_block (column , attachments , child_page_blocks )
206
+ )
198
207
return columns_content
199
208
case NotionColumn ():
200
209
return [
201
- convert_block (child_content , attachments )[0 ]
210
+ convert_block (child_content , attachments , child_page_blocks )[0 ]
202
211
for child_content in block .children
203
212
]
204
213
@@ -225,7 +234,7 @@ def convert_block(
225
234
}
226
235
]
227
236
# case NotionDivider():
228
- # return {"type": "divider", "properties": {}}
237
+ # return [ {"type": "divider"}]
229
238
case NotionCallout ():
230
239
return [
231
240
{
@@ -292,15 +301,23 @@ def convert_block(
292
301
{
293
302
"type" : "bulletListItem" ,
294
303
"content" : convert_rich_texts (block .specific .rich_text ),
295
- "children" : convert_block_list (block .children , attachments ),
304
+ "children" : convert_block_list (
305
+ block .children ,
306
+ attachments ,
307
+ child_page_blocks ,
308
+ ),
296
309
}
297
310
]
298
311
case NotionNumberedListItem ():
299
312
return [
300
313
{
301
314
"type" : "numberedListItem" ,
302
315
"content" : convert_rich_texts (block .specific .rich_text ),
303
- "children" : convert_block_list (block .children , attachments ),
316
+ "children" : convert_block_list (
317
+ block .children ,
318
+ attachments ,
319
+ child_page_blocks ,
320
+ ),
304
321
}
305
322
]
306
323
case NotionToDo ():
@@ -309,7 +326,11 @@ def convert_block(
309
326
"type" : "checkListItem" ,
310
327
"content" : convert_rich_texts (block .specific .rich_text ),
311
328
"checked" : block .specific .checked ,
312
- "children" : convert_block_list (block .children , attachments ),
329
+ "children" : convert_block_list (
330
+ block .children ,
331
+ attachments ,
332
+ child_page_blocks ,
333
+ ),
313
334
}
314
335
]
315
336
case NotionCode ():
@@ -336,6 +357,22 @@ def convert_block(
336
357
],
337
358
}
338
359
]
360
+ case NotionChildPage ():
361
+ # TODO: convert to a link
362
+ res = {
363
+ "type" : "paragraph" ,
364
+ "content" : [
365
+ {
366
+ "type" : "link" ,
367
+ "content" : f"Child page: { block .specific .title } " ,
368
+ "href" : "about:blank" , # populated later on
369
+ },
370
+ ],
371
+ }
372
+ child_page_blocks .append (
373
+ ImportedChildPage (child_page_block = block , block_to_update = res )
374
+ )
375
+ return [res ]
339
376
case NotionUnsupported ():
340
377
return [
341
378
{
@@ -375,19 +412,22 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]
375
412
376
413
377
414
def convert_block_list (
378
- blocks : list [NotionBlock ], attachments : list [ImportedAttachment ]
415
+ blocks : list [NotionBlock ],
416
+ attachments : list [ImportedAttachment ],
417
+ child_page_blocks : list [ImportedChildPage ],
379
418
) -> list [dict [str , Any ]]:
380
419
converted_blocks = []
381
420
for block in blocks :
382
- converted_blocks .extend (convert_block (block , attachments ))
421
+ converted_blocks .extend (convert_block (block , attachments , child_page_blocks ))
383
422
return converted_blocks
384
423
385
424
386
425
class ImportedDocument (BaseModel ):
387
426
page : NotionPage
388
- blocks : list [dict [str , Any ]] = []
389
- children : list ["ImportedDocument" ] = []
390
- attachments : list [ImportedAttachment ] = []
427
+ blocks : list [dict [str , Any ]] = Field (default_factory = list )
428
+ children : list ["ImportedDocument" ] = Field (default_factory = list )
429
+ attachments : list [ImportedAttachment ] = Field (default_factory = list )
430
+ child_page_blocks : list [ImportedChildPage ] = Field (default_factory = list )
391
431
392
432
393
433
def find_block_child_page (block_id : str , all_pages : list [NotionPage ]):
@@ -400,57 +440,62 @@ def find_block_child_page(block_id: str, all_pages: list[NotionPage]):
400
440
return None
401
441
402
442
403
- def convert_child_pages (
404
- session : Session ,
405
- parent : NotionPage ,
406
- blocks : list [NotionBlock ],
407
- all_pages : list [NotionPage ],
408
- ) -> list [ImportedDocument ]:
409
- children = []
410
-
411
- for page in all_pages :
412
- if (
413
- isinstance (page .parent , NotionParentPage )
414
- and page .parent .page_id == parent .id
415
- ):
416
- children .append (import_page (session , page , all_pages ))
417
-
418
- for block in blocks :
419
- if not isinstance (block .specific , NotionChildPage ):
420
- continue
421
-
422
- # TODO: doesn't work, never finds the child
423
- child_page = find_block_child_page (block .id , all_pages )
424
- if child_page == None :
425
- logger .warning (f"Cannot find child page of block { block .id } " )
426
- continue
427
- children .append (import_page (session , child_page , all_pages ))
428
-
429
- return children
430
-
431
-
432
443
def import_page (
433
- session : Session , page : NotionPage , all_pages : list [NotionPage ]
444
+ session : Session ,
445
+ page : NotionPage ,
446
+ child_page_blocs_ids_to_parent_page_ids : dict [str , str ],
434
447
) -> ImportedDocument :
435
448
blocks = fetch_block_children (session , page .id )
436
449
logger .info (f"Page { page .get_title ()} (id { page .id } )" )
437
450
logger .info (blocks )
438
- attachments = []
439
- converted_blocks = convert_block_list (blocks , attachments )
451
+ attachments : list [ImportedAttachment ] = []
452
+
453
+ child_page_blocks : list [ImportedChildPage ] = []
454
+
455
+ converted_blocks = convert_block_list (blocks , attachments , child_page_blocks )
456
+
457
+ for child_page_block in child_page_blocks :
458
+ child_page_blocs_ids_to_parent_page_ids [
459
+ child_page_block .child_page_block .id
460
+ ] = page .id
461
+
440
462
return ImportedDocument (
441
463
page = page ,
442
464
blocks = converted_blocks ,
443
- children = convert_child_pages (session , page , blocks , all_pages ),
444
465
attachments = attachments ,
466
+ child_page_blocks = child_page_blocks ,
445
467
)
446
468
447
469
448
470
def import_notion (token : str ) -> list [ImportedDocument ]:
449
471
"""Recursively imports all Notion pages and blocks accessible using the given token."""
450
472
session = build_notion_session (token )
451
473
all_pages = fetch_all_pages (session )
452
- docs = []
474
+ docs_by_page_id : dict [str , ImportedDocument ] = {}
475
+ child_page_blocs_ids_to_parent_page_ids : dict [str , str ] = {}
453
476
for page in all_pages :
454
- if isinstance (page .parent , NotionParentWorkspace ):
455
- docs .append (import_page (session , page , all_pages ))
456
- return docs
477
+ docs_by_page_id [page .id ] = import_page (
478
+ session , page , child_page_blocs_ids_to_parent_page_ids
479
+ )
480
+
481
+ root_pages = []
482
+ for page in all_pages :
483
+ if isinstance (page .parent , NotionParentPage ):
484
+ docs_by_page_id [page .parent .page_id ].children .append (
485
+ docs_by_page_id [page .id ]
486
+ )
487
+ elif isinstance (page .parent , NotionParentBlock ):
488
+ parent_page_id = child_page_blocs_ids_to_parent_page_ids .get (page .id )
489
+ if parent_page_id :
490
+ docs_by_page_id [parent_page_id ].children .append (
491
+ docs_by_page_id [page .id ]
492
+ )
493
+ else :
494
+ logger .warning (
495
+ f"Page { page .id } has a parent block, but no parent page found."
496
+ )
497
+ elif isinstance (page .parent , NotionParentWorkspace ):
498
+ # This is a root page, not a child of another page
499
+ root_pages .append (docs_by_page_id [page .id ])
500
+
501
+ return root_pages
0 commit comments