2
2
import logging
3
3
from typing import Any
4
4
5
- from pydantic import BaseModel , TypeAdapter
5
+ from pydantic import BaseModel , Field , TypeAdapter
6
6
from requests import Session
7
7
8
8
from ..notion_schemas .notion_block import (
@@ -135,7 +135,7 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]]
135
135
{
136
136
"type" : "link" ,
137
137
"content" : [convert_rich_text (rich_text )],
138
- "href" : rich_text .href ,
138
+ "href" : rich_text .href , # FIXME: if it was a notion link, we should convert it to a link to the document
139
139
}
140
140
)
141
141
else :
@@ -156,6 +156,11 @@ class ImportedAttachment(BaseModel):
156
156
file : NotionFileHosted
157
157
158
158
159
+ class ImportedChildPage (BaseModel ):
160
+ child_page_block : NotionBlock
161
+ block_to_update : Any
162
+
163
+
159
164
def convert_image (
160
165
image : NotionImage , attachments : list [ImportedAttachment ]
161
166
) -> list [dict [str , Any ]]:
@@ -185,17 +190,21 @@ def convert_image(
185
190
186
191
187
192
def convert_block (
188
- block : NotionBlock , attachments : list [ImportedAttachment ]
193
+ block : NotionBlock ,
194
+ attachments : list [ImportedAttachment ],
195
+ child_page_blocks : list [ImportedChildPage ],
189
196
) -> list [dict [str , Any ]]:
190
197
match block .specific :
191
198
case NotionColumnList ():
192
199
columns_content = []
193
200
for column in block .children :
194
- columns_content .extend (convert_block (column , attachments ))
201
+ columns_content .extend (
202
+ convert_block (column , attachments , child_page_blocks )
203
+ )
195
204
return columns_content
196
205
case NotionColumn ():
197
206
return [
198
- convert_block (child_content , attachments )[0 ]
207
+ convert_block (child_content , attachments , child_page_blocks )[0 ]
199
208
for child_content in block .children
200
209
]
201
210
@@ -222,7 +231,7 @@ def convert_block(
222
231
}
223
232
]
224
233
# case NotionDivider():
225
- # return {"type": "divider", "properties": {}}
234
+ # return [ {"type": "divider"}]
226
235
case NotionCallout ():
227
236
return [
228
237
{
@@ -289,15 +298,23 @@ def convert_block(
289
298
{
290
299
"type" : "bulletListItem" ,
291
300
"content" : convert_rich_texts (block .specific .rich_text ),
292
- "children" : convert_block_list (block .children , attachments ),
301
+ "children" : convert_block_list (
302
+ block .children ,
303
+ attachments ,
304
+ child_page_blocks ,
305
+ ),
293
306
}
294
307
]
295
308
case NotionNumberedListItem ():
296
309
return [
297
310
{
298
311
"type" : "numberedListItem" ,
299
312
"content" : convert_rich_texts (block .specific .rich_text ),
300
- "children" : convert_block_list (block .children , attachments ),
313
+ "children" : convert_block_list (
314
+ block .children ,
315
+ attachments ,
316
+ child_page_blocks ,
317
+ ),
301
318
}
302
319
]
303
320
case NotionToDo ():
@@ -306,7 +323,11 @@ def convert_block(
306
323
"type" : "checkListItem" ,
307
324
"content" : convert_rich_texts (block .specific .rich_text ),
308
325
"checked" : block .specific .checked ,
309
- "children" : convert_block_list (block .children , attachments ),
326
+ "children" : convert_block_list (
327
+ block .children ,
328
+ attachments ,
329
+ child_page_blocks ,
330
+ ),
310
331
}
311
332
]
312
333
case NotionCode ():
@@ -333,6 +354,22 @@ def convert_block(
333
354
],
334
355
}
335
356
]
357
+ case NotionChildPage ():
358
+ # TODO: convert to a link
359
+ res = {
360
+ "type" : "paragraph" ,
361
+ "content" : [
362
+ {
363
+ "type" : "link" ,
364
+ "content" : f"Child page: { block .specific .title } " ,
365
+ "href" : "about:blank" , # populated later on
366
+ },
367
+ ],
368
+ }
369
+ child_page_blocks .append (
370
+ ImportedChildPage (child_page_block = block , block_to_update = res )
371
+ )
372
+ return [res ]
336
373
case NotionUnsupported ():
337
374
return [
338
375
{
@@ -368,19 +405,22 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]
368
405
369
406
370
407
def convert_block_list (
371
- blocks : list [NotionBlock ], attachments : list [ImportedAttachment ]
408
+ blocks : list [NotionBlock ],
409
+ attachments : list [ImportedAttachment ],
410
+ child_page_blocks : list [ImportedChildPage ],
372
411
) -> list [dict [str , Any ]]:
373
412
converted_blocks = []
374
413
for block in blocks :
375
- converted_blocks .extend (convert_block (block , attachments ))
414
+ converted_blocks .extend (convert_block (block , attachments , child_page_blocks ))
376
415
return converted_blocks
377
416
378
417
379
418
class ImportedDocument (BaseModel ):
380
419
page : NotionPage
381
- blocks : list [dict [str , Any ]] = []
382
- children : list ["ImportedDocument" ] = []
383
- attachments : list [ImportedAttachment ] = []
420
+ blocks : list [dict [str , Any ]] = Field (default_factory = list )
421
+ children : list ["ImportedDocument" ] = Field (default_factory = list )
422
+ attachments : list [ImportedAttachment ] = Field (default_factory = list )
423
+ child_page_blocks : list [ImportedChildPage ] = Field (default_factory = list )
384
424
385
425
386
426
def find_block_child_page (block_id : str , all_pages : list [NotionPage ]):
@@ -393,57 +433,62 @@ def find_block_child_page(block_id: str, all_pages: list[NotionPage]):
393
433
return None
394
434
395
435
396
- def convert_child_pages (
397
- session : Session ,
398
- parent : NotionPage ,
399
- blocks : list [NotionBlock ],
400
- all_pages : list [NotionPage ],
401
- ) -> list [ImportedDocument ]:
402
- children = []
403
-
404
- for page in all_pages :
405
- if (
406
- isinstance (page .parent , NotionParentPage )
407
- and page .parent .page_id == parent .id
408
- ):
409
- children .append (import_page (session , page , all_pages ))
410
-
411
- for block in blocks :
412
- if not isinstance (block .specific , NotionChildPage ):
413
- continue
414
-
415
- # TODO: doesn't work, never finds the child
416
- child_page = find_block_child_page (block .id , all_pages )
417
- if child_page == None :
418
- logger .warning (f"Cannot find child page of block { block .id } " )
419
- continue
420
- children .append (import_page (session , child_page , all_pages ))
421
-
422
- return children
423
-
424
-
425
436
def import_page (
426
- session : Session , page : NotionPage , all_pages : list [NotionPage ]
437
+ session : Session ,
438
+ page : NotionPage ,
439
+ child_page_blocs_ids_to_parent_page_ids : dict [str , str ],
427
440
) -> ImportedDocument :
428
441
blocks = fetch_block_children (session , page .id )
429
442
logger .info (f"Page { page .get_title ()} (id { page .id } )" )
430
443
logger .info (blocks )
431
- attachments = []
432
- converted_blocks = convert_block_list (blocks , attachments )
444
+ attachments : list [ImportedAttachment ] = []
445
+
446
+ child_page_blocks : list [ImportedChildPage ] = []
447
+
448
+ converted_blocks = convert_block_list (blocks , attachments , child_page_blocks )
449
+
450
+ for child_page_block in child_page_blocks :
451
+ child_page_blocs_ids_to_parent_page_ids [
452
+ child_page_block .child_page_block .id
453
+ ] = page .id
454
+
433
455
return ImportedDocument (
434
456
page = page ,
435
457
blocks = converted_blocks ,
436
- children = convert_child_pages (session , page , blocks , all_pages ),
437
458
attachments = attachments ,
459
+ child_page_blocks = child_page_blocks ,
438
460
)
439
461
440
462
441
463
def import_notion (token : str ) -> list [ImportedDocument ]:
442
464
"""Recursively imports all Notion pages and blocks accessible using the given token."""
443
465
session = build_notion_session (token )
444
466
all_pages = fetch_all_pages (session )
445
- docs = []
467
+ docs_by_page_id : dict [str , ImportedDocument ] = {}
468
+ child_page_blocs_ids_to_parent_page_ids : dict [str , str ] = {}
446
469
for page in all_pages :
447
- if isinstance (page .parent , NotionParentWorkspace ):
448
- docs .append (import_page (session , page , all_pages ))
449
- return docs
470
+ docs_by_page_id [page .id ] = import_page (
471
+ session , page , child_page_blocs_ids_to_parent_page_ids
472
+ )
473
+
474
+ root_pages = []
475
+ for page in all_pages :
476
+ if isinstance (page .parent , NotionParentPage ):
477
+ docs_by_page_id [page .parent .page_id ].children .append (
478
+ docs_by_page_id [page .id ]
479
+ )
480
+ elif isinstance (page .parent , NotionParentBlock ):
481
+ parent_page_id = child_page_blocs_ids_to_parent_page_ids .get (page .id )
482
+ if parent_page_id :
483
+ docs_by_page_id [parent_page_id ].children .append (
484
+ docs_by_page_id [page .id ]
485
+ )
486
+ else :
487
+ logger .warning (
488
+ f"Page { page .id } has a parent block, but no parent page found."
489
+ )
490
+ elif isinstance (page .parent , NotionParentWorkspace ):
491
+ # This is a root page, not a child of another page
492
+ root_pages .append (docs_by_page_id [page .id ])
493
+
494
+ return root_pages
0 commit comments