Skip to content

Commit b7d4715

Browse files
committed
fix(layout,table): use default v4 backend for e2e OCR test and fix table structure detection
Signed-off-by: Clément Doumouro <[email protected]>
1 parent 527d63b commit b7d4715

36 files changed

+24637
-4183
lines changed

docling/models/layout_model.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from collections.abc import Iterable
55
from copy import deepcopy
66
from pathlib import Path
7-
from typing import List, Optional, Union
7+
from typing import List, Optional, Union, cast
88

99
import numpy as np
1010
from docling_core.types.doc import DocItemLabel
@@ -195,7 +195,7 @@ def __call__(
195195
continue
196196

197197
page_predictions = batch_predictions[valid_page_idx]
198-
page_image = valid_page_images[valid_page_idx]
198+
page_image = valid_page_images[valid_page_idx] # type: ignore[assignment]
199199
page_orientation = valid_page_orientations[valid_page_idx]
200200
valid_page_idx += 1
201201

@@ -207,23 +207,25 @@ def __call__(
207207
bbox = BoundingBox.model_validate(pred_item)
208208
if page_orientation:
209209
bbox = rotate_bounding_box(
210-
bbox, page_orientation, page_image.size
210+
bbox,
211+
page_orientation,
212+
page_image.size, # type: ignore[union-attr]
211213
).to_bounding_box()
212214
cluster = Cluster(
213215
id=ix,
214-
label=label,
215-
confidence=pred_item["confidence"],
216-
bbox=bbox,
216+
label=label,
217+
confidence=pred_item["confidence"],
218+
bbox=bbox,
217219
cells=[],
218220
)
219221
clusters.append(cluster)
220222

221223
if settings.debug.visualize_raw_layout:
222224
self.draw_clusters_and_cells_side_by_side(
223225
conv_res,
224-
page,
225-
clusters,
226-
mode_prefix="raw",
226+
page,
227+
clusters,
228+
mode_prefix="raw",
227229
)
228230

229231
# Apply postprocessing
@@ -253,9 +255,9 @@ def __call__(
253255
if settings.debug.visualize_layout:
254256
self.draw_clusters_and_cells_side_by_side(
255257
conv_res,
256-
page,
257-
processed_clusters,
258-
mode_prefix="postprocessed",
258+
page,
259+
processed_clusters,
260+
mode_prefix="postprocessed",
259261
)
260262

261263
yield page

docling/models/table_structure_model.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,10 @@ def __call__(
202202
continue
203203
# Rotate and scale table image
204204
page_im = cast(Image, page.get_image())
205+
original_scaled_page_size = (
206+
int(page_im.size[0] * self.scale),
207+
int(page_im.size[1] * self.scale),
208+
)
205209
scaled_page_im: Image = cast(
206210
Image, page.get_image(scale=self.scale)
207211
)
@@ -258,10 +262,12 @@ def __call__(
258262
scale=self.scale
259263
)
260264
)
265+
# _rotate_bbox expects the size of the image in
266+
# which the bbox was found
261267
new_bbox = _rotate_bbox(
262268
new_cell.to_bounding_box(),
263-
orientation=cells_orientation,
264-
im_size=scaled_page_im.size,
269+
orientation=-cells_orientation,
270+
im_size=original_scaled_page_size,
265271
).model_dump()
266272
tokens.append(
267273
{
@@ -286,7 +292,11 @@ def __call__(
286292
the_bbox
287293
)
288294
element["bbox"]["token"] = text_piece
289-
295+
element["bbox"] = _rotate_bbox(
296+
BoundingBox.model_validate(element["bbox"]),
297+
orientation=cells_orientation,
298+
im_size=scaled_page_im.size,
299+
).model_dump()
290300
tc = TableCell.model_validate(element)
291301
if tc.bbox is not None:
292302
tc.bbox = tc.bbox.scaled(1 / self.scale)

0 commit comments

Comments
 (0)