docling-project
diff --git a/‎docling/models/layout_model.py‎
Lines changed: 8 additions & 13 deletions b/‎docling/models/layout_model.py‎
Lines changed: 8 additions & 13 deletions
diff --git a/‎docling/models/table_structure_model.py‎
Lines changed: 54 additions & 21 deletions b/‎docling/models/table_structure_model.py‎
Lines changed: 54 additions & 21 deletions
diff --git a/‎docling/models/tesseract_ocr_cli_model.py‎
Lines changed: 0 additions & 1 deletion b/‎docling/models/tesseract_ocr_cli_model.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docling/utils/ocr_utils.py‎
Lines changed: 6 additions & 4 deletions b/‎docling/utils/ocr_utils.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎docling/utils/orientation.py‎
Lines changed: 27 additions & 37 deletions b/‎docling/utils/orientation.py‎
Lines changed: 27 additions & 37 deletions
diff --git a/‎tests/data_scanned/groundtruth/docling_v1/ocr_test.doctags.txt‎
Lines changed: 7 additions & 1 deletion b/‎tests/data_scanned/groundtruth/docling_v1/ocr_test.doctags.txt‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎tests/data_scanned/groundtruth/docling_v1/ocr_test.md‎
Lines changed: 5 additions & 1 deletion b/‎tests/data_scanned/groundtruth/docling_v1/ocr_test.md‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.doctags.txt‎
Lines changed: 0 additions & 3 deletions b/‎tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.doctags.txt‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.json‎
Lines changed: 0 additions & 1 deletion b/‎tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.json‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.md‎
Lines changed: 0 additions & 1 deletion b/‎tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.md‎
Lines changed: 0 additions & 1 deletion
@@ -1,8 +1,8 @@
 import copy
 import logging
 import warnings
-from copy import deepcopy
 from collections.abc import Iterable
+from copy import deepcopy
 from pathlib import Path
 from typing import Optional
 
@@ -18,7 +18,7 @@
 from docling.models.utils.hf_model_download import download_hf_model
 from docling.utils.accelerator_utils import decide_device
 from docling.utils.layout_postprocessor import LayoutPostprocessor
-from docling.utils.orientation import detect_orientation
+from docling.utils.orientation import detect_orientation, rotate_bounding_box
 from docling.utils.profiling import TimeRecorder
 from docling.utils.visualization import draw_clusters
 
@@ -99,7 +99,6 @@ def draw_clusters_and_cells_side_by_side(
         self,
         conv_res,
         page,
-        page_orientation: int,
         clusters,
         mode_prefix: str,
         show: bool = False,
@@ -113,10 +112,6 @@ def draw_clusters_and_cells_side_by_side(
         page_image = deepcopy(page.image)
         scale_x = page_image.width / page.size.width
         scale_y = page_image.height / page.size.height
-        if page_orientation:
-            page_image = page_image.rotate(-page_orientation, expand=True)
-            if abs(page_orientation) in [90, 270]:
-                scale_x, scale_y = scale_y, scale_x
         # Filter clusters for left and right images
         exclude_labels = {
             DocItemLabel.FORM,
@@ -132,9 +127,6 @@ def draw_clusters_and_cells_side_by_side(
         # Draw clusters on both images
         draw_clusters(left_image, left_clusters, scale_x, scale_y)
         draw_clusters(right_image, right_clusters, scale_x, scale_y)
-        if page_orientation:
-            left_image = left_image.rotate(page_orientation, expand=True)
-            right_image = right_image.rotate(page_orientation, expand=True)
         # Combine the images side by side
         combined_width = left_image.width * 2
         combined_height = left_image.height
@@ -177,11 +169,16 @@ def __call__(
                             .replace(" ", "_")
                             .replace("-", "_")
                         )  # Temporary, until docling-ibm-model uses docling-core types
+                        bbox = BoundingBox.model_validate(pred_item)
+                        if page_orientation:
+                            bbox = rotate_bounding_box(
+                                bbox, page_orientation, page_image.size
+                            ).to_bounding_box()
                         cluster = Cluster(
                             id=ix,
                             label=label,
                             confidence=pred_item["confidence"],
-                            bbox=BoundingBox.model_validate(pred_item),
+                            bbox=bbox,
                             cells=[],
                         )
                         clusters.append(cluster)
@@ -190,7 +187,6 @@ def __call__(
                         self.draw_clusters_and_cells_side_by_side(
                             conv_res,
                             page,
-                            page_orientation,
                             clusters,
                             mode_prefix="raw",
                         )
@@ -228,7 +224,6 @@ def __call__(
                     self.draw_clusters_and_cells_side_by_side(
                         conv_res,
                         page,
-                        page_orientation,
                         processed_clusters,
                         mode_prefix="postprocessed",
                     )
 
@@ -1,8 +1,7 @@
 import copy
 import warnings
-from collections.abc import Iterable
 from pathlib import Path
-from typing import Optional
+from typing import Iterable, Optional, Tuple, cast
 
 import numpy
 from docling_core.types.doc import BoundingBox, DocItemLabel, TableCell
@@ -11,6 +10,7 @@
     TextCellUnit,
 )
 from PIL import ImageDraw
+from PIL.Image import Image
 
 from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.base_models import Page, Table, TableStructurePrediction
@@ -23,13 +23,16 @@
 from docling.models.base_model import BasePageModel
 from docling.models.utils.hf_model_download import download_hf_model
 from docling.utils.accelerator_utils import decide_device
+from docling.utils.orientation import detect_orientation, rotate_bounding_box
 from docling.utils.profiling import TimeRecorder
 
 
 class TableStructureModel(BasePageModel):
     _model_repo_folder = "ds4sd--docling-models"
     _model_path = "model_artifacts/tableformer"
 
+    _table_labels = {DocItemLabel.TABLE, DocItemLabel.DOCUMENT_INDEX}
+
     def __init__(
         self,
         enabled: bool,
@@ -186,31 +189,48 @@ def __call__(
                     page.predictions.tablestructure = (
                         TableStructurePrediction()
                     )  # dummy
+                    cells_orientation = detect_orientation(page.cells)
+                    # Keep only table bboxes
+                    in_tables_clusters = [
+                        cluster
+                        for cluster in page.predictions.layout.clusters
+                        if cluster.label in self._table_labels
+                    ]
 
+                    if not len(in_tables_clusters):
+                        yield page
+                        continue
+                    # Rotate and scale table image
+                    page_im = cast(Image, page.get_image())
+                    scaled_page_im: Image = cast(
+                        Image, page.get_image(scale=self.scale)
+                    )
+                    if cells_orientation:
+                        scaled_page_im = scaled_page_im.rotate(
+                            -cells_orientation, expand=True
+                        )
+                    page_input = {
+                        "width": scaled_page_im.size[0],
+                        "height": scaled_page_im.size[1],
+                        "image": numpy.asarray(scaled_page_im),
+                    }
+                    # Rotate and scale table cells
                     in_tables = [
                         (
-                            cluster,
+                            c,
                             [
-                                round(cluster.bbox.l) * self.scale,
-                                round(cluster.bbox.t) * self.scale,
-                                round(cluster.bbox.r) * self.scale,
-                                round(cluster.bbox.b) * self.scale,
+                                round(x) * self.scale
+                                for x in _rotate_bbox(
+                                    c.bbox,
+                                    orientation=-cells_orientation,
+                                    im_size=page_im.size,
+                                )
+                                .to_top_left_origin(page_im.size[1])
+                                .as_tuple()
                             ],
                         )
-                        for cluster in page.predictions.layout.clusters
-                        if cluster.label
-                        in [DocItemLabel.TABLE, DocItemLabel.DOCUMENT_INDEX]
+                        for c in in_tables_clusters
                     ]
-                    if not len(in_tables):
-                        yield page
-                        continue
-
-                    page_input = {
-                        "width": page.size.width * self.scale,
-                        "height": page.size.height * self.scale,
-                        "image": numpy.asarray(page.get_image(scale=self.scale)),
-                    }
-
                     table_clusters, table_bboxes = zip(*in_tables)
 
                     if len(table_bboxes):
@@ -238,11 +258,16 @@ def __call__(
                                             scale=self.scale
                                         )
                                     )
+                                    new_bbox = _rotate_bbox(
+                                        new_cell.to_bounding_box(),
+                                        orientation=-cells_orientation,
+                                        im_size=scaled_page_im.size,
+                                    ).model_dump()
                                     tokens.append(
                                         {
                                             "id": new_cell.index,
                                             "text": new_cell.text,
-                                            "bbox": new_cell.rect.to_bounding_box().model_dump(),
+                                            "bbox": new_bbox,
                                         }
                                     )
                             page_input["tokens"] = tokens
@@ -302,3 +327,11 @@ def __call__(
                         )
 
                 yield page
+
+
+def _rotate_bbox(
+    bbox: BoundingBox, *, orientation: int, im_size: Tuple[int, int]
+) -> BoundingBox:
+    if orientation:
+        return rotate_bounding_box(bbox, orientation, im_size).to_bounding_box()
+    return bbox
@@ -27,7 +27,6 @@
     parse_tesseract_orientation,
     tesseract_box_to_bounding_rectangle,
 )
-from docling.utils.orientation import Box
 from docling.utils.profiling import TimeRecorder
 
 _log = logging.getLogger(__name__)
 
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Tuple
 
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling_core.types.doc.page import BoundingRectangle
@@ -43,8 +43,10 @@ def tesseract_box_to_bounding_rectangle(
     orientation: int,
     im_size: Tuple[int, int],
 ) -> BoundingRectangle:
-    # box is in the top, left, height, width format, top left coordinates
-    rect = rotate_bounding_box(bbox, angle=-orientation, im_size=im_size)
+    # bbox is in the top, left, height, width format, top left coordinates
+    # We detected the tesseract on the document rotated with minus orientation, we have
+    # to apply an orientation angle
+    rect = rotate_bounding_box(bbox, angle=orientation, im_size=im_size)
     rect = BoundingRectangle(
         r_x0=rect.r_x0 / scale,
         r_y0=rect.r_y0 / scale,
@@ -54,7 +56,7 @@ def tesseract_box_to_bounding_rectangle(
         r_y2=rect.r_y2 / scale,
         r_x3=rect.r_x3 / scale,
         r_y3=rect.r_y3 / scale,
-        coord_origin=CoordOrigin.TOPLEFT,
+        coord_origin=rect.coord_origin,
     )
     if original_offset is not None:
         if original_offset.coord_origin is not CoordOrigin.TOPLEFT:
 
@@ -1,26 +1,22 @@
 from collections import Counter
 from operator import itemgetter
+from typing import Tuple
 
-from docling_core.types.doc.page import TextCell
+from docling_core.types.doc import BoundingBox, CoordOrigin
+from docling_core.types.doc.page import BoundingRectangle, TextCell
 
-_ORIENTATIONS = [0, 90, 180, 270]
+CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
 
 
 def _clipped_orientation(angle: float) -> int:
-    return min((abs(angle - o) % 360, o) for o in _ORIENTATIONS)[1]
+    return min((abs(angle - o) % 360, o) for o in CLIPPED_ORIENTATIONS)[1]
 
 
 def detect_orientation(cells: list[TextCell]) -> int:
     if not cells:
         return 0
     orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells)
     return max(orientation_counter.items(), key=itemgetter(1))[0]
-from typing import Tuple
-
-from docling_core.types.doc import BoundingBox, CoordOrigin
-from docling_core.types.doc.page import BoundingRectangle
-
-CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
 
 
 def rotate_bounding_box(
@@ -31,51 +27,44 @@ def rotate_bounding_box(
     # coordinate system. Then other corners are found rotating counterclockwise
     bbox = bbox.to_top_left_origin(im_size[1])
     left, top, width, height = bbox.l, bbox.t, bbox.width, bbox.height
-    im_h, im_w = im_size
+    im_w, im_h = im_size
     angle = angle % 360
     if angle == 0:
-        r_x0 = left
-        r_y0 = top + height
-        r_x1 = r_x0 + width
-        r_y1 = r_y0
-        r_x2 = r_x0 + width
-        r_y2 = r_y0 - height
-        r_x3 = r_x0
-        r_y3 = r_y0 - height
+        return BoundingRectangle.from_bounding_box(bbox)
     elif angle == 90:
-        r_x0 = im_w - (top + height)
-        r_y0 = left
+        r_x0 = top + height
+        r_y0 = im_w - left
         r_x1 = r_x0
-        r_y1 = r_y0 + width
-        r_x2 = r_x0 + height
-        r_y2 = r_y0 + width
-        r_x3 = r_x0
-        r_y3 = r_y0 + width
+        r_y1 = r_y0 - width
+        r_x2 = r_x1 - height
+        r_y2 = r_y1
+        r_x3 = r_x2
+        r_y3 = r_y0
     elif angle == 180:
-        r_x0 = im_h - left
-        r_y0 = im_w - (top + height)
+        r_x0 = width + left
+        r_y0 = im_h - (top + height)
         r_x1 = r_x0 - width
         r_y1 = r_y0
-        r_x2 = r_x0 - width
-        r_y2 = r_y0 + height
+        r_x2 = r_x1
+        r_y2 = r_x2 + height
         r_x3 = r_x0
-        r_y3 = r_y0 + height
+        r_y3 = r_y2
     elif angle == 270:
-        r_x0 = top + height
-        r_y0 = im_h - left
+        r_x0 = im_h - (top + height)
+        r_y0 = left
         r_x1 = r_x0
-        r_y1 = r_y0 - width
-        r_x2 = r_x0 - height
-        r_y2 = r_y0 - width
-        r_x3 = r_x0 - height
+        r_y1 = r_y0 + width
+        r_x2 = r_x1 + height
+        r_y2 = r_y1
+        r_x3 = r_x2
         r_y3 = r_y0
     else:
         msg = (
             f"invalid orientation {angle}, expected values in:"
             f" {sorted(CLIPPED_ORIENTATIONS)}"
         )
         raise ValueError(msg)
-    return BoundingRectangle(
+    rectangle = BoundingRectangle(
         r_x0=r_x0,
         r_y0=r_y0,
         r_x1=r_x1,
@@ -86,3 +75,4 @@ def rotate_bounding_box(
         r_y3=r_y3,
         coord_origin=CoordOrigin.TOPLEFT,
     )
+    return rectangle
@@ -1,3 +1,9 @@
 <document>
-<paragraph><location><page_1><loc_12><loc_82><loc_85><loc_91></location>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</paragraph>
+<table>
+<location><page_1><loc_12><loc_39><loc_67><loc_87></location>
+<row_0><col_0><body></col_0><col_1><col_header>Column 0</col_1><col_2><col_header>Column 1</col_2><col_3><col_header>Column 2</col_3></row_0>
+<row_1><col_0><row_header>this is row 0</col_0><col_1><body>some cells</col_1><col_2><body>have content</col_2><col_3><body>and</col_3></row_1>
+<row_2><col_0><row_header>and row 1</col_0><col_1><body></col_1><col_2><body>other</col_2><col_3><body>have</col_3></row_2>
+<row_3><col_0><row_header>and last row 2</col_0><col_1><body>nothing</col_1><col_2><body></col_2><col_3><body>inside</col_3></row_3>
+</table>
 </document>
@@ -1 +1,5 @@
-Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package
+|                | Column 0   | Column 1     | Column 2   |
+|----------------|------------|--------------|------------|
+| this is row 0  | some cells | have content | and        |
+| and row 1      |            | other        | have       |
+| and last row 2 | nothing    |              | inside     |
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,6 @@`
`27`	`27`	`parse_tesseract_orientation,`
`28`	`28`	`tesseract_box_to_bounding_rectangle,`
`29`	`29`	`)`
`30`		`-from docling.utils.orientation import Box`
`31`	`30`	`from docling.utils.profiling import TimeRecorder`
`32`	`31`
`33`	`32`	`_log = logging.getLogger(__name__)`