Skip to content

Commit f9da84b

Browse files
committed
fix(layout,table): use default v4 backend for e2e OCR test and fix table structure detection
Signed-off-by: Clément Doumouro <[email protected]>
1 parent 4e9c4b4 commit f9da84b

File tree

12 files changed

+155
-215
lines changed

12 files changed

+155
-215
lines changed

docling/models/layout_model.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from collections.abc import Iterable
55
from copy import deepcopy
66
from pathlib import Path
7-
from typing import List, Optional, Union
7+
from typing import List, Optional, Union, cast
88

99
import numpy as np
1010
from docling_core.types.doc import DocItemLabel
@@ -195,7 +195,7 @@ def __call__(
195195
continue
196196

197197
page_predictions = batch_predictions[valid_page_idx]
198-
page_image = valid_page_images[valid_page_idx]
198+
page_image = valid_page_images[valid_page_idx] # type: ignore[assignment]
199199
page_orientation = valid_page_orientations[valid_page_idx]
200200
valid_page_idx += 1
201201

@@ -207,23 +207,25 @@ def __call__(
207207
bbox = BoundingBox.model_validate(pred_item)
208208
if page_orientation:
209209
bbox = rotate_bounding_box(
210-
bbox, page_orientation, page_image.size
210+
bbox,
211+
page_orientation,
212+
page_image.size, # type: ignore[union-attr]
211213
).to_bounding_box()
212214
cluster = Cluster(
213215
id=ix,
214-
label=label,
215-
confidence=pred_item["confidence"],
216-
bbox=bbox,
216+
label=label,
217+
confidence=pred_item["confidence"],
218+
bbox=bbox,
217219
cells=[],
218220
)
219221
clusters.append(cluster)
220222

221223
if settings.debug.visualize_raw_layout:
222224
self.draw_clusters_and_cells_side_by_side(
223225
conv_res,
224-
page,
225-
clusters,
226-
mode_prefix="raw",
226+
page,
227+
clusters,
228+
mode_prefix="raw",
227229
)
228230

229231
# Apply postprocessing
@@ -253,9 +255,9 @@ def __call__(
253255
if settings.debug.visualize_layout:
254256
self.draw_clusters_and_cells_side_by_side(
255257
conv_res,
256-
page,
257-
processed_clusters,
258-
mode_prefix="postprocessed",
258+
page,
259+
processed_clusters,
260+
mode_prefix="postprocessed",
259261
)
260262

261263
yield page

docling/models/table_structure_model.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,10 @@ def __call__(
202202
continue
203203
# Rotate and scale table image
204204
page_im = cast(Image, page.get_image())
205+
original_scaled_page_size = (
206+
int(page_im.size[0] * self.scale),
207+
int(page_im.size[1] * self.scale),
208+
)
205209
scaled_page_im: Image = cast(
206210
Image, page.get_image(scale=self.scale)
207211
)
@@ -258,10 +262,12 @@ def __call__(
258262
scale=self.scale
259263
)
260264
)
265+
# _rotate_bbox expects the size of the image in
266+
# which the bbox was found
261267
new_bbox = _rotate_bbox(
262268
new_cell.to_bounding_box(),
263-
orientation=cells_orientation,
264-
im_size=scaled_page_im.size,
269+
orientation=-cells_orientation,
270+
im_size=original_scaled_page_size,
265271
).model_dump()
266272
tokens.append(
267273
{
@@ -286,7 +292,11 @@ def __call__(
286292
the_bbox
287293
)
288294
element["bbox"]["token"] = text_piece
289-
295+
element["bbox"] = _rotate_bbox(
296+
BoundingBox.model_validate(element["bbox"]),
297+
orientation=cells_orientation,
298+
im_size=scaled_page_im.size,
299+
).model_dump()
290300
tc = TableCell.model_validate(element)
291301
if tc.bbox is not None:
292302
tc.bbox = tc.bbox.scaled(1 / self.scale)

tests/data/groundtruth/docling_v2/textbox.docx.itxt

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -66,25 +66,22 @@ Collabo ... vention measures at all school levels.
6666
item-60 at level 2: paragraph: Whether the epidemic has eased.
6767
item-61 at level 2: paragraph:
6868
item-62 at level 1: paragraph:
69-
item-63 at level 1: section: group textbox
70-
item-64 at level 2: paragraph: Whether the test results are pos ... legally designated infectious disease.
71-
item-65 at level 2: paragraph: No
72-
item-66 at level 1: paragraph:
69+
item-63 at level 1: paragraph:
70+
item-64 at level 1: paragraph:
71+
item-65 at level 1: section: group textbox
72+
item-66 at level 2: paragraph: Yes
7373
item-67 at level 1: paragraph:
7474
item-68 at level 1: section: group textbox
7575
item-69 at level 2: paragraph: Yes
7676
item-70 at level 1: paragraph:
77-
item-71 at level 1: section: group textbox
78-
item-72 at level 2: paragraph: Yes
79-
item-73 at level 1: paragraph:
80-
item-74 at level 1: paragraph:
81-
item-75 at level 1: section: group textbox
82-
item-76 at level 2: paragraph: Case closed.
83-
item-77 at level 2: paragraph:
84-
item-78 at level 2: paragraph: The Health Bureau will carry out ... ters for Disease Control if necessary.
77+
item-71 at level 1: paragraph:
78+
item-72 at level 1: section: group textbox
79+
item-73 at level 2: paragraph: Case closed.
80+
item-74 at level 2: paragraph:
81+
item-75 at level 2: paragraph: The Health Bureau will carry out ... ters for Disease Control if necessary.
82+
item-76 at level 1: paragraph:
83+
item-77 at level 1: section: group textbox
84+
item-78 at level 2: paragraph: No
8585
item-79 at level 1: paragraph:
86-
item-80 at level 1: section: group textbox
87-
item-81 at level 2: paragraph: No
88-
item-82 at level 1: paragraph:
89-
item-83 at level 1: paragraph:
90-
item-84 at level 1: paragraph:
86+
item-80 at level 1: paragraph:
87+
item-81 at level 1: paragraph:

0 commit comments

Comments
 (0)