Skip to content

Commit c62a141

Browse files
committed
feat(cvat): support rotated CVAT boxes via enclosing axis-aligned bbox
- Store CVAT rotation on elements (rotation_deg, bbox_unrotated) - Compute enclosing bbox for rotated rectangles and apply during parsing - Add tests for rotation math and parser integration Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
1 parent 541634f commit c62a141

File tree

3 files changed

+71
-1
lines changed

3 files changed

+71
-1
lines changed

docling_eval/cvat_tools/geometry.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import math
56
from typing import Iterable, Iterator, Optional, Protocol, Sequence, TypeVar
67

78
from docling_core.types.doc.base import BoundingBox, CoordOrigin
@@ -87,3 +88,56 @@ def iter_unique_by_bbox(
8788
if all(bbox_iou(element.bbox, kept.bbox) < iou_threshold for kept in seen):
8889
seen.append(element)
8990
yield element
91+
92+
93+
def bbox_enclosing_rotated_rect(
94+
unrotated_bbox: BoundingBox, *, rotation_deg: float
95+
) -> BoundingBox:
96+
"""Return the smallest axis-aligned BoundingBox enclosing a rotated rectangle.
97+
98+
CVAT represents rotated boxes as an unrotated rectangle (xtl/ytl/xbr/ybr) plus a
99+
`rotation` attribute. The rotation is applied around the center of that rectangle.
100+
101+
This helper returns an axis-aligned BoundingBox that encloses the rotated rectangle.
102+
It preserves the input bbox coord origin and requires TOPLEFT coordinates (CVAT).
103+
"""
104+
if unrotated_bbox.coord_origin != CoordOrigin.TOPLEFT:
105+
raise ValueError(
106+
"bbox_enclosing_rotated_rect currently expects CoordOrigin.TOPLEFT"
107+
)
108+
109+
normalized = rotation_deg % 360.0
110+
if normalized == 0.0:
111+
return unrotated_bbox
112+
113+
theta = math.radians(normalized)
114+
cos_t = math.cos(theta)
115+
sin_t = math.sin(theta)
116+
117+
cx = (unrotated_bbox.l + unrotated_bbox.r) / 2.0
118+
cy = (unrotated_bbox.t + unrotated_bbox.b) / 2.0
119+
120+
corners = (
121+
(unrotated_bbox.l, unrotated_bbox.t),
122+
(unrotated_bbox.r, unrotated_bbox.t),
123+
(unrotated_bbox.r, unrotated_bbox.b),
124+
(unrotated_bbox.l, unrotated_bbox.b),
125+
)
126+
127+
xs: list[float] = []
128+
ys: list[float] = []
129+
for x, y in corners:
130+
dx = x - cx
131+
dy = y - cy
132+
rx = cx + (dx * cos_t) - (dy * sin_t)
133+
ry = cy + (dx * sin_t) + (dy * cos_t)
134+
xs.append(rx)
135+
ys.append(ry)
136+
137+
return BoundingBox(
138+
l=min(xs),
139+
t=min(ys),
140+
r=max(xs),
141+
b=max(ys),
142+
coord_origin=unrotated_bbox.coord_origin,
143+
)

docling_eval/cvat_tools/models.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,15 @@ class CVATElement(BaseModel):
3232
id: int
3333
label: Union[DocItemLabel, GraphCellLabel, TableStructLabel]
3434
bbox: BoundingBox
35+
# CVAT supports rotated rectangles via the `rotation` attribute on <box>.
36+
# In CVAT, xtl/ytl/xbr/ybr describe the unrotated rectangle and the rotation is applied
37+
# around the box center. Docling's BoundingBox is axis-aligned only, so we store both:
38+
# - bbox: the axis-aligned bbox used by our pipeline (potentially expanded to enclose rotation)
39+
# - bbox_unrotated: the raw, unrotated CVAT bbox as authored in the XML (TOPLEFT origin)
40+
# - rotation_deg: the rotation in degrees as provided by CVAT (clockwise/counterclockwise is
41+
# irrelevant for the enclosing axis-aligned bbox of a rectangle).
42+
rotation_deg: float = 0.0
43+
bbox_unrotated: Optional[BoundingBox] = None
3544
content_layer: ContentLayer
3645
type: Optional[str] = None
3746
level: Optional[int] = None

docling_eval/cvat_tools/parser.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from docling_core.types.doc.document import ContentLayer
1111
from docling_core.types.doc.labels import DocItemLabel, GraphCellLabel
1212

13+
from docling_eval.cvat_tools.geometry import bbox_enclosing_rotated_rect
1314
from docling_eval.cvat_tools.models import (
1415
CVATAnnotationPath,
1516
CVATElement,
@@ -183,7 +184,11 @@ def _parse_image_element(
183184
ytl = float(box.attrib["ytl"])
184185
xbr = float(box.attrib["xbr"])
185186
ybr = float(box.attrib["ybr"])
186-
bbox = cvat_box_to_bbox(xtl, ytl, xbr, ybr) # -> BoundingBox(l,t,r,b) TOPLEFT
187+
bbox_unrotated = cvat_box_to_bbox(
188+
xtl, ytl, xbr, ybr
189+
) # -> BoundingBox(l,t,r,b) TOPLEFT
190+
rotation_deg = float(box.attrib.get("rotation", "0.0"))
191+
bbox = bbox_enclosing_rotated_rect(bbox_unrotated, rotation_deg=rotation_deg)
187192

188193
# Parse child <attribute> tags; default content_layer to BODY
189194
attributes: dict[str, str | None] = {}
@@ -216,6 +221,8 @@ def _parse_image_element(
216221
id=box_id,
217222
label=label_obj, # Union[DocItemLabel, GraphCellLabel, TableStructLabel]
218223
bbox=bbox,
224+
bbox_unrotated=bbox_unrotated,
225+
rotation_deg=rotation_deg,
219226
content_layer=content_layer,
220227
type=type_,
221228
level=level,

0 commit comments

Comments
 (0)