@@ -442,7 +442,9 @@ def _to_segmented_page(
442442 segmented_page .word_cells = self ._to_cells (page ["word_cells" ])
443443 segmented_page .has_words = len (segmented_page .word_cells ) > 0
444444 elif keep_chars :
445- logging .warning ("`words` will be created for segmented_page in an inefficient way!" )
445+ logging .warning (
446+ "`words` will be created for segmented_page in an inefficient way!"
447+ )
446448 self ._create_word_cells (segmented_page , enforce_same_font = enforce_same_font )
447449 else :
448450 logging .warning ("No `words` will be created for segmented_page" )
@@ -451,7 +453,9 @@ def _to_segmented_page(
451453 segmented_page .textline_cells = self ._to_cells (page ["line_cells" ])
452454 segmented_page .has_lines = len (segmented_page .textline_cells ) > 0
453455 elif keep_chars :
454- logging .warning ("`text_lines` will be created for segmented_page in an inefficient way!" )
456+ logging .warning (
457+ "`text_lines` will be created for segmented_page in an inefficient way!"
458+ )
455459 self ._create_textline_cells (
456460 segmented_page , enforce_same_font = enforce_same_font
457461 )
@@ -464,6 +468,7 @@ def _create_word_cells(
464468 self ,
465469 segmented_page : SegmentedPdfPage ,
466470 * ,
471+ horizontal_cell_tolerance : float = 1.0 ,
467472 space_width_factor_for_merge : float = 0.33 ,
468473 enforce_same_font : bool = True ,
469474 _loglevel : str = "fatal" ,
@@ -485,6 +490,7 @@ def _create_word_cells(
485490
486491 # data = sanitizer.create_word_cells(space_width_factor_for_merge=0.33)
487492 data = sanitizer .create_word_cells (
493+ horizontal_cell_tolerance = horizontal_cell_tolerance ,
488494 space_width_factor_for_merge = space_width_factor_for_merge ,
489495 enforce_same_font = enforce_same_font ,
490496 )
@@ -500,6 +506,7 @@ def _create_textline_cells(
500506 self ,
501507 segmented_page : SegmentedPdfPage ,
502508 * ,
509+ horizontal_cell_tolerance : float = 1.0 ,
503510 space_width_factor_for_merge : float = 1.0 ,
504511 space_width_factor_for_merge_with_space : float = 0.33 ,
505512 enforce_same_font : bool = True ,
@@ -526,6 +533,7 @@ def _create_textline_cells(
526533
527534 # data = sanitizer.create_line_cells()
528535 data = sanitizer .create_line_cells (
536+ horizontal_cell_tolerance = horizontal_cell_tolerance ,
529537 space_width_factor_for_merge = space_width_factor_for_merge ,
530538 space_width_factor_for_merge_with_space = space_width_factor_for_merge_with_space ,
531539 enforce_same_font = enforce_same_font ,
0 commit comments