@@ -318,11 +318,13 @@ def _to_segmented_page(
318318 self , page : dict , create_words : bool , create_textlines : bool
319319 ) -> SegmentedPdfPage :
320320
321+ char_cells = self ._to_cells (page ["cells" ])
321322 segmented_page = SegmentedPdfPage (
322323 dimension = self ._to_page_geometry (page ["dimension" ]),
323- char_cells = self . _to_cells ( page [ "cells" ]) ,
324+ char_cells = char_cells ,
324325 word_cells = [],
325326 textline_cells = [],
327+ has_chars = len (char_cells ) > 0 ,
326328 bitmap_resources = self ._to_bitmap_resources (page ["images" ]),
327329 lines = self ._to_lines (page ["lines" ]),
328330 )
@@ -360,6 +362,8 @@ def _create_word_cells(
360362 cell = PdfTextCell .model_validate (item )
361363 segmented_page .word_cells .append (cell )
362364
365+ segmented_page .has_words = len (segmented_page .word_cells ) > 0
366+
363367 def _create_textline_cells (
364368 self , segmented_page : SegmentedPdfPage , _loglevel : str = "fatal"
365369 ):
@@ -390,6 +394,8 @@ def _create_textline_cells(
390394 cell = PdfTextCell .model_validate (item )
391395 segmented_page .textline_cells .append (cell )
392396
397+ segmented_page .has_lines = len (segmented_page .textline_cells ) > 0
398+
393399 def _to_parsed_document (
394400 self ,
395401 doc_dict : dict ,
0 commit comments