Skip to content

Commit c521766

Browse files
feat: add BoundingBox methods for overlap and union calculations (#311)
* feat(BoundingBox): add methods for overlap and union calculations Signed-off-by: samiullahchattha <[email protected]> * format files Signed-off-by: samiullahchattha <[email protected]> --------- Signed-off-by: samiullahchattha <[email protected]> Co-authored-by: samiullahchattha <[email protected]>
1 parent 8415969 commit c521766

File tree

2 files changed

+235
-0
lines changed

2 files changed

+235
-0
lines changed

docling_core/types/doc/base.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,41 @@ def enclosing_bbox(cls, boxes: List["BoundingBox"]) -> "BoundingBox":
395395
raise ValueError("BoundingBoxes have different CoordOrigin")
396396

397397
return cls(l=left, t=top, r=right, b=bottom, coord_origin=origin)
398+
399+
def x_overlap_with(self, other: "BoundingBox") -> float:
400+
"""Calculates the horizontal overlap with another bounding box."""
401+
if self.coord_origin != other.coord_origin:
402+
raise ValueError("BoundingBoxes have different CoordOrigin")
403+
return max(0.0, min(self.r, other.r) - max(self.l, other.l))
404+
405+
def y_overlap_with(self, other: "BoundingBox") -> float:
406+
"""Calculates the vertical overlap with another bounding box, respecting coordinate origin."""
407+
if self.coord_origin != other.coord_origin:
408+
raise ValueError("BoundingBoxes have different CoordOrigin")
409+
if self.coord_origin == CoordOrigin.TOPLEFT:
410+
return max(0.0, min(self.b, other.b) - max(self.t, other.t))
411+
elif self.coord_origin == CoordOrigin.BOTTOMLEFT:
412+
return max(0.0, min(self.t, other.t) - max(self.b, other.b))
413+
raise ValueError("Unsupported CoordOrigin")
414+
415+
def union_area_with(self, other: "BoundingBox") -> float:
416+
"""Calculates the union area with another bounding box."""
417+
if self.coord_origin != other.coord_origin:
418+
raise ValueError("BoundingBoxes have different CoordOrigin")
419+
return self.area() + other.area() - self.intersection_area_with(other)
420+
421+
def x_union_with(self, other: "BoundingBox") -> float:
422+
"""Calculates the horizontal union dimension with another bounding box."""
423+
if self.coord_origin != other.coord_origin:
424+
raise ValueError("BoundingBoxes have different CoordOrigin")
425+
return max(0.0, max(self.r, other.r) - min(self.l, other.l))
426+
427+
def y_union_with(self, other: "BoundingBox") -> float:
428+
"""Calculates the vertical union dimension with another bounding box, respecting coordinate origin."""
429+
if self.coord_origin != other.coord_origin:
430+
raise ValueError("BoundingBoxes have different CoordOrigin")
431+
if self.coord_origin == CoordOrigin.TOPLEFT:
432+
return max(0.0, max(self.b, other.b) - min(self.t, other.t))
433+
elif self.coord_origin == CoordOrigin.BOTTOMLEFT:
434+
return max(0.0, max(self.t, other.t) - min(self.b, other.b))
435+
raise ValueError("Unsupported CoordOrigin")

test/test_docling_doc.py

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,203 @@ def test_intersection_area_with():
155155
bbox1.intersection_area_with(bbox6)
156156

157157

158+
def test_x_overlap_with():
159+
bbox1 = BoundingBox(l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT)
160+
bbox2 = BoundingBox(l=5, t=0, r=15, b=10, coord_origin=CoordOrigin.TOPLEFT)
161+
assert abs(bbox1.x_overlap_with(bbox2) - 5.0) < 1.0e-3
162+
163+
# No overlap (disjoint right)
164+
bbox3 = BoundingBox(l=11, t=0, r=20, b=10, coord_origin=CoordOrigin.TOPLEFT)
165+
assert abs(bbox1.x_overlap_with(bbox3) - 0.0) < 1.0e-3
166+
167+
# No overlap (disjoint left)
168+
bbox4 = BoundingBox(l=-10, t=0, r=-1, b=10, coord_origin=CoordOrigin.TOPLEFT)
169+
assert abs(bbox1.x_overlap_with(bbox4) - 0.0) < 1.0e-3
170+
171+
# Touching edges
172+
bbox5 = BoundingBox(l=10, t=0, r=20, b=10, coord_origin=CoordOrigin.TOPLEFT)
173+
assert abs(bbox1.x_overlap_with(bbox5) - 0.0) < 1.0e-3
174+
175+
# Full containment
176+
bbox6 = BoundingBox(l=2, t=0, r=8, b=10, coord_origin=CoordOrigin.TOPLEFT)
177+
assert abs(bbox1.x_overlap_with(bbox6) - 6.0) < 1.0e-3
178+
179+
# Identical boxes
180+
bbox7 = BoundingBox(l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT)
181+
assert abs(bbox1.x_overlap_with(bbox7) - 10.0) < 1.0e-3
182+
183+
# Different CoordOrigin
184+
bbox_bl = BoundingBox(l=0, t=10, r=10, b=0, coord_origin=CoordOrigin.BOTTOMLEFT)
185+
with pytest.raises(ValueError):
186+
bbox1.x_overlap_with(bbox_bl)
187+
188+
189+
def test_y_overlap_with():
190+
# TOPLEFT origin
191+
bbox1_tl = BoundingBox(l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT)
192+
bbox2_tl = BoundingBox(l=0, t=5, r=10, b=15, coord_origin=CoordOrigin.TOPLEFT)
193+
assert abs(bbox1_tl.y_overlap_with(bbox2_tl) - 5.0) < 1.0e-3
194+
195+
# No overlap (disjoint below)
196+
bbox3_tl = BoundingBox(l=0, t=11, r=10, b=20, coord_origin=CoordOrigin.TOPLEFT)
197+
assert abs(bbox1_tl.y_overlap_with(bbox3_tl) - 0.0) < 1.0e-3
198+
199+
# Touching edges
200+
bbox4_tl = BoundingBox(l=0, t=10, r=10, b=20, coord_origin=CoordOrigin.TOPLEFT)
201+
assert abs(bbox1_tl.y_overlap_with(bbox4_tl) - 0.0) < 1.0e-3
202+
203+
# Full containment
204+
bbox5_tl = BoundingBox(l=0, t=2, r=10, b=8, coord_origin=CoordOrigin.TOPLEFT)
205+
assert abs(bbox1_tl.y_overlap_with(bbox5_tl) - 6.0) < 1.0e-3
206+
207+
# BOTTOMLEFT origin
208+
bbox1_bl = BoundingBox(l=0, b=0, r=10, t=10, coord_origin=CoordOrigin.BOTTOMLEFT)
209+
bbox2_bl = BoundingBox(l=0, b=5, r=10, t=15, coord_origin=CoordOrigin.BOTTOMLEFT)
210+
assert abs(bbox1_bl.y_overlap_with(bbox2_bl) - 5.0) < 1.0e-3
211+
212+
# No overlap (disjoint above)
213+
bbox3_bl = BoundingBox(l=0, b=11, r=10, t=20, coord_origin=CoordOrigin.BOTTOMLEFT)
214+
assert abs(bbox1_bl.y_overlap_with(bbox3_bl) - 0.0) < 1.0e-3
215+
216+
# Touching edges
217+
bbox4_bl = BoundingBox(l=0, b=10, r=10, t=20, coord_origin=CoordOrigin.BOTTOMLEFT)
218+
assert abs(bbox1_bl.y_overlap_with(bbox4_bl) - 0.0) < 1.0e-3
219+
220+
# Full containment
221+
bbox5_bl = BoundingBox(l=0, b=2, r=10, t=8, coord_origin=CoordOrigin.BOTTOMLEFT)
222+
assert abs(bbox1_bl.y_overlap_with(bbox5_bl) - 6.0) < 1.0e-3
223+
224+
# Different CoordOrigin
225+
with pytest.raises(ValueError):
226+
bbox1_tl.y_overlap_with(bbox1_bl)
227+
228+
229+
def test_union_area_with():
230+
# Overlapping (TOPLEFT)
231+
bbox1 = BoundingBox(
232+
l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT
233+
) # Area 100
234+
bbox2 = BoundingBox(
235+
l=5, t=5, r=15, b=15, coord_origin=CoordOrigin.TOPLEFT
236+
) # Area 100
237+
# Intersection area 25
238+
# Union area = 100 + 100 - 25 = 175
239+
assert abs(bbox1.union_area_with(bbox2) - 175.0) < 1.0e-3
240+
241+
# Non-overlapping (TOPLEFT)
242+
bbox3 = BoundingBox(
243+
l=20, t=0, r=30, b=10, coord_origin=CoordOrigin.TOPLEFT
244+
) # Area 100
245+
# Union area = 100 + 100 - 0 = 200
246+
assert abs(bbox1.union_area_with(bbox3) - 200.0) < 1.0e-3
247+
248+
# Touching edges (TOPLEFT)
249+
bbox4 = BoundingBox(
250+
l=10, t=0, r=20, b=10, coord_origin=CoordOrigin.TOPLEFT
251+
) # Area 100
252+
# Union area = 100 + 100 - 0 = 200
253+
assert abs(bbox1.union_area_with(bbox4) - 200.0) < 1.0e-3
254+
255+
# Full containment (TOPLEFT)
256+
bbox5 = BoundingBox(l=2, t=2, r=8, b=8, coord_origin=CoordOrigin.TOPLEFT) # Area 36
257+
# Union area = 100 + 36 - 36 = 100
258+
assert abs(bbox1.union_area_with(bbox5) - 100.0) < 1.0e-3
259+
260+
# Overlapping (BOTTOMLEFT)
261+
bbox6 = BoundingBox(
262+
l=0, b=0, r=10, t=10, coord_origin=CoordOrigin.BOTTOMLEFT
263+
) # Area 100
264+
bbox7 = BoundingBox(
265+
l=5, b=5, r=15, t=15, coord_origin=CoordOrigin.BOTTOMLEFT
266+
) # Area 100
267+
# Intersection area 25
268+
# Union area = 100 + 100 - 25 = 175
269+
assert abs(bbox6.union_area_with(bbox7) - 175.0) < 1.0e-3
270+
271+
# Different CoordOrigin
272+
with pytest.raises(ValueError):
273+
bbox1.union_area_with(bbox6)
274+
275+
276+
def test_x_union_with():
277+
bbox1 = BoundingBox(l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT)
278+
bbox2 = BoundingBox(l=5, t=0, r=15, b=10, coord_origin=CoordOrigin.TOPLEFT)
279+
# x_union = max(10, 15) - min(0, 5) = 15 - 0 = 15
280+
assert abs(bbox1.x_union_with(bbox2) - 15.0) < 1.0e-3
281+
282+
# No overlap (disjoint)
283+
bbox3 = BoundingBox(l=20, t=0, r=30, b=10, coord_origin=CoordOrigin.TOPLEFT)
284+
# x_union = max(10, 30) - min(0, 20) = 30 - 0 = 30
285+
assert abs(bbox1.x_union_with(bbox3) - 30.0) < 1.0e-3
286+
287+
# Touching edges
288+
bbox4 = BoundingBox(l=10, t=0, r=20, b=10, coord_origin=CoordOrigin.TOPLEFT)
289+
# x_union = max(10, 20) - min(0, 10) = 20 - 0 = 20
290+
assert abs(bbox1.x_union_with(bbox4) - 20.0) < 1.0e-3
291+
292+
# Full containment
293+
bbox5 = BoundingBox(l=2, t=0, r=8, b=10, coord_origin=CoordOrigin.TOPLEFT)
294+
# x_union = max(10, 8) - min(0, 2) = 10 - 0 = 10
295+
assert abs(bbox1.x_union_with(bbox5) - 10.0) < 1.0e-3
296+
297+
# Identical boxes
298+
bbox6 = BoundingBox(l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT)
299+
assert abs(bbox1.x_union_with(bbox6) - 10.0) < 1.0e-3
300+
301+
# Different CoordOrigin
302+
bbox_bl = BoundingBox(l=0, t=10, r=10, b=0, coord_origin=CoordOrigin.BOTTOMLEFT)
303+
with pytest.raises(ValueError):
304+
bbox1.x_union_with(bbox_bl)
305+
306+
307+
def test_y_union_with():
308+
309+
bbox1_tl = BoundingBox(l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT)
310+
bbox2_tl = BoundingBox(l=0, t=5, r=10, b=15, coord_origin=CoordOrigin.TOPLEFT)
311+
# y_union = max(10, 15) - min(0, 5) = 15 - 0 = 15
312+
assert abs(bbox1_tl.y_union_with(bbox2_tl) - 15.0) < 1.0e-3
313+
314+
# No overlap (disjoint below)
315+
bbox3_tl = BoundingBox(l=0, t=20, r=10, b=30, coord_origin=CoordOrigin.TOPLEFT)
316+
# y_union = max(10, 30) - min(0, 20) = 30 - 0 = 30
317+
assert abs(bbox1_tl.y_union_with(bbox3_tl) - 30.0) < 1.0e-3
318+
319+
# Touching edges
320+
bbox4_tl = BoundingBox(l=0, t=10, r=10, b=20, coord_origin=CoordOrigin.TOPLEFT)
321+
# y_union = max(10, 20) - min(0, 10) = 20 - 0 = 20
322+
assert abs(bbox1_tl.y_union_with(bbox4_tl) - 20.0) < 1.0e-3
323+
324+
# Full containment
325+
bbox5_tl = BoundingBox(l=0, t=2, r=10, b=8, coord_origin=CoordOrigin.TOPLEFT)
326+
# y_union = max(10, 8) - min(0, 2) = 10 - 0 = 10
327+
assert abs(bbox1_tl.y_union_with(bbox5_tl) - 10.0) < 1.0e-3
328+
329+
# BOTTOMLEFT origin
330+
bbox1_bl = BoundingBox(l=0, b=0, r=10, t=10, coord_origin=CoordOrigin.BOTTOMLEFT)
331+
bbox2_bl = BoundingBox(l=0, b=5, r=10, t=15, coord_origin=CoordOrigin.BOTTOMLEFT)
332+
# y_union = max(10, 15) - min(0, 5) = 15 - 0 = 15
333+
assert abs(bbox1_bl.y_union_with(bbox2_bl) - 15.0) < 1.0e-3
334+
335+
# No overlap (disjoint above)
336+
bbox3_bl = BoundingBox(l=0, b=20, r=10, t=30, coord_origin=CoordOrigin.BOTTOMLEFT)
337+
# y_union = max(10, 30) - min(0, 20) = 30 - 0 = 30
338+
assert abs(bbox1_bl.y_union_with(bbox3_bl) - 30.0) < 1.0e-3
339+
340+
# Touching edges
341+
bbox4_bl = BoundingBox(l=0, b=10, r=10, t=20, coord_origin=CoordOrigin.BOTTOMLEFT)
342+
# y_union = max(10, 20) - min(0, 10) = 20 - 0 = 20
343+
assert abs(bbox1_bl.y_union_with(bbox4_bl) - 20.0) < 1.0e-3
344+
345+
# Full containment
346+
bbox5_bl = BoundingBox(l=0, b=2, r=10, t=8, coord_origin=CoordOrigin.BOTTOMLEFT)
347+
# y_union = max(10, 8) - min(0, 2) = 10 - 0 = 10
348+
assert abs(bbox1_bl.y_union_with(bbox5_bl) - 10.0) < 1.0e-3
349+
350+
# Different CoordOrigin
351+
with pytest.raises(ValueError):
352+
bbox1_tl.y_union_with(bbox1_bl)
353+
354+
158355
def test_orientation():
159356

160357
page_height = 300

0 commit comments

Comments
 (0)