Skip to content

Commit f02bbae

Browse files
feat: added the geometric operations to BoundingBox (#136)
* added the geometric operations Signed-off-by: Peter Staar <[email protected]> * updated the geometric operations and added deprecation warnings to scaled and normalize Signed-off-by: Peter Staar <[email protected]> * finalised the geometric operations Signed-off-by: Peter Staar <[email protected]> * updated tests and functionality Signed-off-by: Peter Staar <[email protected]> --------- Signed-off-by: Peter Staar <[email protected]>
1 parent b787d53 commit f02bbae

File tree

3 files changed

+353
-43
lines changed

3 files changed

+353
-43
lines changed

docling_core/types/doc/base.py

Lines changed: 228 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""Models for the base data types."""
22

3-
import copy
43
from enum import Enum
54
from typing import Tuple
65

@@ -53,33 +52,53 @@ def height(self):
5352
"""height."""
5453
return abs(self.t - self.b)
5554

56-
def scaled(self, scale: float) -> "BoundingBox":
57-
"""scaled.
58-
59-
:param scale: float:
60-
61-
"""
62-
out_bbox = copy.deepcopy(self)
63-
out_bbox.l *= scale
64-
out_bbox.r *= scale
65-
out_bbox.t *= scale
66-
out_bbox.b *= scale
67-
68-
return out_bbox
69-
70-
def normalized(self, page_size: Size) -> "BoundingBox":
71-
"""normalized.
72-
73-
:param page_size: Size:
74-
75-
"""
76-
out_bbox = copy.deepcopy(self)
77-
out_bbox.l /= page_size.width
78-
out_bbox.r /= page_size.width
79-
out_bbox.t /= page_size.height
80-
out_bbox.b /= page_size.height
81-
82-
return out_bbox
55+
def resize_by_scale(self, x_scale: float, y_scale: float):
56+
"""resize_by_scale."""
57+
return BoundingBox(
58+
l=self.l * x_scale,
59+
r=self.r * x_scale,
60+
t=self.t * y_scale,
61+
b=self.b * y_scale,
62+
coord_origin=self.coord_origin,
63+
)
64+
65+
def scale_to_size(self, old_size: Size, new_size: Size):
66+
"""scale_to_size."""
67+
return self.resize_by_scale(
68+
x_scale=new_size.width / old_size.width,
69+
y_scale=new_size.height / old_size.height,
70+
)
71+
72+
# same as before, but using the implementation above
73+
def scaled(self, scale: float):
74+
"""scaled."""
75+
return self.resize_by_scale(x_scale=scale, y_scale=scale)
76+
77+
# same as before, but using the implementation above
78+
def normalized(self, page_size: Size):
79+
"""normalized."""
80+
return self.scale_to_size(
81+
old_size=page_size, new_size=Size(height=1.0, width=1.0)
82+
)
83+
84+
def expand_by_scale(self, x_scale: float, y_scale: float) -> "BoundingBox":
85+
"""expand_to_size."""
86+
if self.coord_origin == CoordOrigin.TOPLEFT:
87+
return BoundingBox(
88+
l=self.l - self.width * x_scale,
89+
r=self.r + self.width * x_scale,
90+
t=self.t - self.height * y_scale,
91+
b=self.b + self.height * y_scale,
92+
coord_origin=self.coord_origin,
93+
)
94+
elif self.coord_origin == CoordOrigin.BOTTOMLEFT:
95+
return BoundingBox(
96+
l=self.l - self.width * x_scale,
97+
r=self.r + self.width * x_scale,
98+
t=self.t + self.height * y_scale,
99+
b=self.b - self.height * y_scale,
100+
coord_origin=self.coord_origin,
101+
)
83102

84103
def as_tuple(self) -> Tuple[float, float, float, float]:
85104
"""as_tuple."""
@@ -116,33 +135,55 @@ def from_tuple(cls, coord: Tuple[float, ...], origin: CoordOrigin):
116135

117136
def area(self) -> float:
118137
"""area."""
119-
area = (self.r - self.l) * (self.b - self.t)
120-
if self.coord_origin == CoordOrigin.BOTTOMLEFT:
121-
area = -area
122-
return area
138+
return abs(self.r - self.l) * abs(self.b - self.t)
123139

124140
def intersection_area_with(self, other: "BoundingBox") -> float:
125-
"""intersection_area_with.
126-
127-
:param other: "BoundingBox":
141+
"""Calculate the intersection area with another bounding box."""
142+
if self.coord_origin != other.coord_origin:
143+
raise ValueError("BoundingBoxes have different CoordOrigin")
128144

129-
"""
130145
# Calculate intersection coordinates
131146
left = max(self.l, other.l)
132-
top = max(self.t, other.t)
133147
right = min(self.r, other.r)
134-
bottom = min(self.b, other.b)
148+
149+
if self.coord_origin == CoordOrigin.TOPLEFT:
150+
bottom = max(self.t, other.t)
151+
top = min(self.b, other.b)
152+
elif self.coord_origin == CoordOrigin.BOTTOMLEFT:
153+
top = min(self.t, other.t)
154+
bottom = max(self.b, other.b)
135155

136156
# Calculate intersection dimensions
137157
width = right - left
138-
height = bottom - top
158+
height = top - bottom
139159

140160
# If the bounding boxes do not overlap, width or height will be negative
141161
if width <= 0 or height <= 0:
142162
return 0.0
143163

144164
return width * height
145165

166+
def intersection_over_union(
167+
self, other: "BoundingBox", eps: float = 1.0e-6
168+
) -> float:
169+
"""intersection_over_union."""
170+
intersection_area = self.intersection_area_with(other=other)
171+
172+
union_area = (
173+
abs(self.l - self.r) * abs(self.t - self.b)
174+
+ abs(other.l - other.r) * abs(other.t - other.b)
175+
- intersection_area
176+
)
177+
178+
return intersection_area / (union_area + eps)
179+
180+
def intersection_over_self(
181+
self, other: "BoundingBox", eps: float = 1.0e-6
182+
) -> float:
183+
"""intersection_over_self."""
184+
intersection_area = self.intersection_area_with(other=other)
185+
return intersection_area / self.area()
186+
146187
def to_bottom_left_origin(self, page_height: float) -> "BoundingBox":
147188
"""to_bottom_left_origin.
148189
@@ -176,3 +217,151 @@ def to_top_left_origin(self, page_height: float) -> "BoundingBox":
176217
b=page_height - self.b, # self.t
177218
coord_origin=CoordOrigin.TOPLEFT,
178219
)
220+
221+
def overlaps(self, other: "BoundingBox") -> bool:
222+
"""overlaps."""
223+
return self.overlaps_horizontally(other=other) and self.overlaps_vertically(
224+
other=other
225+
)
226+
227+
def overlaps_horizontally(self, other: "BoundingBox") -> bool:
228+
"""Check if two bounding boxes overlap horizontally."""
229+
return not (self.r <= other.l or other.r <= self.l)
230+
231+
def overlaps_vertically(self, other: "BoundingBox") -> bool:
232+
"""Check if two bounding boxes overlap vertically."""
233+
if self.coord_origin != other.coord_origin:
234+
raise ValueError("BoundingBoxes have different CoordOrigin")
235+
236+
# Normalize coordinates if needed
237+
if self.coord_origin == CoordOrigin.BOTTOMLEFT:
238+
return not (self.t <= other.b or other.t <= self.b)
239+
elif self.coord_origin == CoordOrigin.TOPLEFT:
240+
return not (self.b <= other.t or other.b <= self.t)
241+
242+
def overlaps_vertically_with_iou(self, other: "BoundingBox", iou: float) -> bool:
243+
"""overlaps_y_with_iou."""
244+
if (
245+
self.coord_origin == CoordOrigin.BOTTOMLEFT
246+
and other.coord_origin == CoordOrigin.BOTTOMLEFT
247+
):
248+
249+
if self.overlaps_vertically(other=other):
250+
251+
u0 = min(self.b, other.b)
252+
u1 = max(self.t, other.t)
253+
254+
i0 = max(self.b, other.b)
255+
i1 = min(self.t, other.t)
256+
257+
iou_ = float(i1 - i0) / float(u1 - u0)
258+
return (iou_) > iou
259+
260+
return False
261+
262+
elif (
263+
self.coord_origin == CoordOrigin.TOPLEFT
264+
and other.coord_origin == CoordOrigin.TOPLEFT
265+
):
266+
if self.overlaps_vertically(other=other):
267+
u0 = min(self.t, other.t)
268+
u1 = max(self.b, other.b)
269+
270+
i0 = max(self.t, other.t)
271+
i1 = min(self.b, other.b)
272+
273+
iou_ = float(i1 - i0) / float(u1 - u0)
274+
return (iou_) > iou
275+
276+
return False
277+
else:
278+
raise ValueError("BoundingBoxes have different CoordOrigin")
279+
280+
return False
281+
282+
def is_left_of(self, other: "BoundingBox") -> bool:
283+
"""is_left_of."""
284+
return self.l < other.l
285+
286+
def is_strictly_left_of(self, other: "BoundingBox", eps: float = 0.001) -> bool:
287+
"""is_strictly_left_of."""
288+
return (self.r + eps) < other.l
289+
290+
def is_above(self, other: "BoundingBox") -> bool:
291+
"""is_above."""
292+
if (
293+
self.coord_origin == CoordOrigin.BOTTOMLEFT
294+
and other.coord_origin == CoordOrigin.BOTTOMLEFT
295+
):
296+
return self.t > other.t
297+
298+
elif (
299+
self.coord_origin == CoordOrigin.TOPLEFT
300+
and other.coord_origin == CoordOrigin.TOPLEFT
301+
):
302+
return self.t < other.t
303+
304+
else:
305+
raise ValueError("BoundingBoxes have different CoordOrigin")
306+
307+
return False
308+
309+
def is_strictly_above(self, other: "BoundingBox", eps: float = 1.0e-3) -> bool:
310+
"""is_strictly_above."""
311+
if (
312+
self.coord_origin == CoordOrigin.BOTTOMLEFT
313+
and other.coord_origin == CoordOrigin.BOTTOMLEFT
314+
):
315+
return (self.b + eps) > other.t
316+
317+
elif (
318+
self.coord_origin == CoordOrigin.TOPLEFT
319+
and other.coord_origin == CoordOrigin.TOPLEFT
320+
):
321+
return (self.b + eps) < other.t
322+
323+
else:
324+
raise ValueError("BoundingBoxes have different CoordOrigin")
325+
326+
return False
327+
328+
def is_horizontally_connected(
329+
self, elem_i: "BoundingBox", elem_j: "BoundingBox"
330+
) -> bool:
331+
"""is_horizontally_connected."""
332+
if (
333+
self.coord_origin == CoordOrigin.BOTTOMLEFT
334+
and elem_i.coord_origin == CoordOrigin.BOTTOMLEFT
335+
and elem_j.coord_origin == CoordOrigin.BOTTOMLEFT
336+
):
337+
min_ij = min(elem_i.b, elem_j.b)
338+
max_ij = max(elem_i.t, elem_j.t)
339+
340+
if self.b < max_ij and min_ij < self.t: # overlap_y
341+
return False
342+
343+
if self.l < elem_i.r and elem_j.l < self.r:
344+
return True
345+
346+
return False
347+
348+
elif (
349+
self.coord_origin == CoordOrigin.TOPLEFT
350+
and elem_i.coord_origin == CoordOrigin.TOPLEFT
351+
and elem_j.coord_origin == CoordOrigin.TOPLEFT
352+
):
353+
min_ij = min(elem_i.t, elem_j.t)
354+
max_ij = max(elem_i.b, elem_j.b)
355+
356+
if self.t < max_ij and min_ij < self.b: # overlap_y
357+
return False
358+
359+
if self.l < elem_i.r and elem_j.l < self.r:
360+
return True
361+
362+
return False
363+
364+
else:
365+
raise ValueError("BoundingBoxes have different CoordOrigin")
366+
367+
return False

docling_core/types/doc/document.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -585,7 +585,8 @@ def get_image(self, doc: "DoclingDocument") -> Optional[PILImage.Image]:
585585
crop_bbox = (
586586
self.prov[0]
587587
.bbox.to_top_left_origin(page_height=page.size.height)
588-
.scaled(scale=page_image.height / page.size.height)
588+
.scale_to_size(old_size=page.size, new_size=page.image.size)
589+
# .scaled(scale=page_image.height / page.size.height)
589590
)
590591
return page_image.crop(crop_bbox.as_tuple())
591592

0 commit comments

Comments
 (0)