Skip to content

Commit 9d7e831

Browse files
sh-guptaShubham Gupta
andauthored
feat: add get_image for all DocItem (#67)
* Moved image attribute from FloatingItem to DocItem Signed-off-by: Shubham Gupta <[email protected]> * Revert "Moved image attribute from FloatingItem to DocItem" This reverts commit e48cd47. Signed-off-by: Shubham Gupta <[email protected]> * Added get_image to DocItem and FloatingItem Signed-off-by: Shubham Gupta <[email protected]> * Added tests for get_image in DocItem and FloatingItem Signed-off-by: Shubham Gupta <[email protected]> * Updated get_image docstring in DocItem and FloatingItem Signed-off-by: Shubham Gupta <[email protected]> --------- Signed-off-by: Shubham Gupta <[email protected]> Co-authored-by: Shubham Gupta <[email protected]>
1 parent 1a201bc commit 9d7e831

File tree

2 files changed

+164
-0
lines changed

2 files changed

+164
-0
lines changed

docling_core/types/doc/document.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,28 @@ def get_location_tokens(
551551

552552
return location
553553

554+
def get_image(self, doc: "DoclingDocument") -> Optional[PILImage.Image]:
555+
"""Returns the image of this DocItem.
556+
557+
The function returns None if this DocItem has no valid provenance or
558+
if a valid image of the page containing this DocItem is not available
559+
in doc.
560+
"""
561+
if not len(self.prov):
562+
return None
563+
564+
page = doc.pages.get(self.prov[0].page_no)
565+
if page is None or page.size is None or page.image is None:
566+
return None
567+
568+
page_image = page.image.pil_image
569+
crop_bbox = (
570+
self.prov[0]
571+
.bbox.to_top_left_origin(page_height=page.size.height)
572+
.scaled(scale=page_image.height / page.size.height)
573+
)
574+
return page_image.crop(crop_bbox.as_tuple())
575+
554576

555577
class TextItem(DocItem):
556578
"""TextItem."""
@@ -633,6 +655,20 @@ def caption_text(self, doc: "DoclingDocument") -> str:
633655
text += cap.resolve(doc).text
634656
return text
635657

658+
def get_image(self, doc: "DoclingDocument") -> Optional[PILImage.Image]:
659+
"""Returns the image corresponding to this FloatingItem.
660+
661+
This function returns the PIL image from self.image if one is available.
662+
Otherwise, it uses DocItem.get_image to get an image of this FloatingItem.
663+
664+
In particular, when self.image is None, the function returns None if this
665+
FloatingItem has no valid provenance or the doc does not contain a valid image
666+
for the required page.
667+
"""
668+
if self.image is not None:
669+
return self.image.pil_image
670+
return super().get_image(doc=doc)
671+
636672

637673
class PictureItem(FloatingItem):
638674
"""PictureItem."""

test/test_docling_doc.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections import deque
2+
from unittest.mock import Mock
23

34
import pytest
45
import yaml
@@ -7,6 +8,7 @@
78

89
from docling_core.types.doc.document import (
910
CURRENT_VERSION,
11+
BoundingBox,
1012
DocItem,
1113
DoclingDocument,
1214
DocumentOrigin,
@@ -15,7 +17,9 @@
1517
KeyValueItem,
1618
ListItem,
1719
PictureItem,
20+
ProvenanceItem,
1821
SectionHeaderItem,
22+
Size,
1923
TableCell,
2024
TableData,
2125
TableItem,
@@ -407,3 +411,127 @@ def test_version_doc():
407411
comp_version = f"{major_split[0]}.{minor_split[0]}.{int(patch_split[0]) + 1}"
408412
doc = DoclingDocument(name="Untitled 1", version=comp_version)
409413
assert doc.version == CURRENT_VERSION
414+
415+
416+
def test_docitem_get_image():
417+
# Prepare the document
418+
doc = DoclingDocument(name="Dummy")
419+
420+
page1_image = PILImage.new(mode="RGB", size=(200, 400), color=(0, 0, 0))
421+
doc_item_image = PILImage.new(mode="RGB", size=(20, 40), color=(255, 0, 0))
422+
page1_image.paste(doc_item_image, box=(20, 40))
423+
424+
doc.add_page( # With image
425+
page_no=1,
426+
size=Size(width=20, height=40),
427+
image=ImageRef.from_pil(page1_image, dpi=72),
428+
)
429+
doc.add_page(page_no=2, size=Size(width=20, height=40), image=None) # Without image
430+
431+
# DocItem with no provenance
432+
doc_item = DocItem(self_ref="#", label=DocItemLabel.TEXT, prov=[])
433+
assert doc_item.get_image(doc=doc) is None
434+
435+
# DocItem on an invalid page
436+
doc_item = DocItem(
437+
self_ref="#",
438+
label=DocItemLabel.TEXT,
439+
prov=[ProvenanceItem(page_no=3, bbox=Mock(spec=BoundingBox), charspan=(1, 2))],
440+
)
441+
assert doc_item.get_image(doc=doc) is None
442+
443+
# DocItem on a page without page image
444+
doc_item = DocItem(
445+
self_ref="#",
446+
label=DocItemLabel.TEXT,
447+
prov=[ProvenanceItem(page_no=2, bbox=Mock(spec=BoundingBox), charspan=(1, 2))],
448+
)
449+
assert doc_item.get_image(doc=doc) is None
450+
451+
# DocItem on a page with valid page image
452+
doc_item = DocItem(
453+
self_ref="#",
454+
label=DocItemLabel.TEXT,
455+
prov=[
456+
ProvenanceItem(
457+
page_no=1, bbox=BoundingBox(l=2, t=4, r=4, b=8), charspan=(1, 2)
458+
)
459+
],
460+
)
461+
returned_doc_item_image = doc_item.get_image(doc=doc)
462+
assert (
463+
returned_doc_item_image is not None
464+
and returned_doc_item_image.tobytes() == doc_item_image.tobytes()
465+
)
466+
467+
468+
def test_floatingitem_get_image():
469+
# Prepare the document
470+
doc = DoclingDocument(name="Dummy")
471+
472+
page1_image = PILImage.new(mode="RGB", size=(200, 400), color=(0, 0, 0))
473+
floating_item_image = PILImage.new(mode="RGB", size=(20, 40), color=(255, 0, 0))
474+
page1_image.paste(floating_item_image, box=(20, 40))
475+
476+
doc.add_page( # With image
477+
page_no=1,
478+
size=Size(width=20, height=40),
479+
image=ImageRef.from_pil(page1_image, dpi=72),
480+
)
481+
doc.add_page(page_no=2, size=Size(width=20, height=40), image=None) # Without image
482+
483+
# FloatingItem with explicit image different from image based on provenance
484+
new_image = PILImage.new(mode="RGB", size=(40, 80), color=(0, 255, 0))
485+
floating_item = FloatingItem(
486+
self_ref="#",
487+
label=DocItemLabel.PICTURE,
488+
prov=[
489+
ProvenanceItem(
490+
page_no=1, bbox=BoundingBox(l=2, t=4, r=6, b=12), charspan=(1, 2)
491+
)
492+
],
493+
image=ImageRef.from_pil(image=new_image, dpi=72),
494+
)
495+
retured_image = floating_item.get_image(doc=doc)
496+
assert retured_image is not None and retured_image.tobytes() == new_image.tobytes()
497+
498+
# FloatingItem without explicit image and no provenance
499+
floating_item = FloatingItem(
500+
self_ref="#", label=DocItemLabel.PICTURE, prov=[], image=None
501+
)
502+
assert floating_item.get_image(doc=doc) is None
503+
504+
# FloatingItem without explicit image on invalid page
505+
floating_item = FloatingItem(
506+
self_ref="#",
507+
label=DocItemLabel.PICTURE,
508+
prov=[ProvenanceItem(page_no=3, bbox=Mock(spec=BoundingBox), charspan=(1, 2))],
509+
image=None,
510+
)
511+
assert floating_item.get_image(doc=doc) is None
512+
513+
# FloatingItem without explicit image on a page without page image
514+
floating_item = FloatingItem(
515+
self_ref="#",
516+
label=DocItemLabel.PICTURE,
517+
prov=[ProvenanceItem(page_no=2, bbox=Mock(spec=BoundingBox), charspan=(1, 2))],
518+
image=None,
519+
)
520+
assert floating_item.get_image(doc=doc) is None
521+
522+
# FloatingItem without explicit image on a page with page image
523+
floating_item = FloatingItem(
524+
self_ref="#",
525+
label=DocItemLabel.PICTURE,
526+
prov=[
527+
ProvenanceItem(
528+
page_no=1, bbox=BoundingBox(l=2, t=4, r=4, b=8), charspan=(1, 2)
529+
)
530+
],
531+
image=None,
532+
)
533+
retured_image = floating_item.get_image(doc=doc)
534+
assert (
535+
retured_image is not None
536+
and retured_image.tobytes() == floating_item_image.tobytes()
537+
)

0 commit comments

Comments
 (0)