Skip to content

Commit 8e0e9b7

Browse files
maxmnemonicMaksym Lysak
andauthored
fix: Automatic transformation of output cells bbox coord origin defined by input in get_cells_in_bbox (#219)
* Added automatic transformation of page cell_bbox coord origin to the same as input bbox.coord_origin when requesting get_cells_in_bbox Signed-off-by: Maksym Lysak <[email protected]> * corrected page_cell rect coord origin switch logic Signed-off-by: Maksym Lysak <[email protected]> * avoid modifying original page cells in get_cells_in_bbox, instead make a copy Signed-off-by: Maksym Lysak <[email protected]> --------- Signed-off-by: Maksym Lysak <[email protected]> Co-authored-by: Maksym Lysak <[email protected]>
1 parent ba0fbde commit 8e0e9b7

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

docling_core/types/doc/page.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Datastructures for PaginatedDocument."""
22

3+
import copy
34
import json
45
import logging
56
import math
@@ -530,10 +531,16 @@ def get_cells_in_bbox(
530531
"""
531532
cells = []
532533
for page_cell in self.iterate_cells(cell_unit):
533-
cell_bbox = page_cell.to_bounding_box()
534+
pc = copy.deepcopy(page_cell)
535+
# Bring cell_bbox coord origin to the same as input bbox.coord_origin:
536+
if page_cell.rect.coord_origin != bbox.coord_origin:
537+
if bbox.coord_origin == CoordOrigin.TOPLEFT:
538+
pc.rect = pc.rect.to_top_left_origin(self.dimension.height)
539+
elif bbox.coord_origin == CoordOrigin.BOTTOMLEFT:
540+
pc.rect = pc.rect.to_bottom_left_origin(self.dimension.height)
541+
cell_bbox = pc.to_bounding_box()
534542
if cell_bbox.intersection_over_self(bbox) > ios:
535-
cells.append(page_cell)
536-
543+
cells.append(pc)
537544
return cells
538545

539546
def export_to_dict(self) -> Dict:

0 commit comments

Comments
 (0)