docling-project
diff --git a/‎docling_core/transforms/visualizer/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎docling_core/transforms/visualizer/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docling_core/transforms/visualizer/base.py‎
Lines changed: 23 additions & 0 deletions b/‎docling_core/transforms/visualizer/base.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎docling_core/transforms/visualizer/layout_visualizer.py‎
Lines changed: 201 additions & 0 deletions b/‎docling_core/transforms/visualizer/layout_visualizer.py‎
Lines changed: 201 additions & 0 deletions
diff --git a/‎docling_core/transforms/visualizer/reading_order_visualizer.py‎
Lines changed: 149 additions & 0 deletions b/‎docling_core/transforms/visualizer/reading_order_visualizer.py‎
Lines changed: 149 additions & 0 deletions
@@ -0,0 +1 @@
+"""Define the visualizer types."""
@@ -0,0 +1,23 @@
+"""Define base classes for visualization."""
+
+from abc import ABC, abstractmethod
+from typing import Optional
+
+from PIL.Image import Image
+from pydantic import BaseModel
+
+from docling_core.types.doc import DoclingDocument
+
+
+class BaseVisualizer(BaseModel, ABC):
+    """Visualize base class."""
+
+    @abstractmethod
+    def get_visualization(
+        self,
+        *,
+        doc: DoclingDocument,
+        **kwargs,
+    ) -> dict[Optional[int], Image]:
+        """Get visualization of the document as images by page."""
+        raise NotImplementedError()
@@ -0,0 +1,201 @@
+"""Define classes for layout visualization."""
+
+from copy import deepcopy
+from typing import Literal, Optional, Union
+
+from PIL import ImageDraw, ImageFont
+from PIL.Image import Image
+from PIL.ImageFont import FreeTypeFont
+from pydantic import BaseModel
+from typing_extensions import override
+
+from docling_core.transforms.visualizer.base import BaseVisualizer
+from docling_core.types.doc import DocItemLabel
+from docling_core.types.doc.base import CoordOrigin
+from docling_core.types.doc.document import ContentLayer, DocItem, DoclingDocument
+from docling_core.types.doc.page import BoundingRectangle, TextCell
+
+
+class _TLBoundingRectangle(BoundingRectangle):
+    coord_origin: Literal[CoordOrigin.TOPLEFT] = CoordOrigin.TOPLEFT
+
+
+class _TLTextCell(TextCell):
+    rect: _TLBoundingRectangle
+
+
+class _TLCluster(BaseModel):
+    id: int
+    label: DocItemLabel
+    brec: _TLBoundingRectangle
+    confidence: float = 1.0
+    cells: list[_TLTextCell] = []
+    children: list["_TLCluster"] = []  # Add child cluster support
+
+
+class LayoutVisualizer(BaseVisualizer):
+    """Layout visualizer."""
+
+    class Params(BaseModel):
+        """Layout visualization parameters."""
+
+        show_label: bool = True
+
+    base_visualizer: Optional[BaseVisualizer] = None
+    params: Params = Params()
+
+    def _draw_clusters(
+        self, image: Image, clusters: list[_TLCluster], scale_x: float, scale_y: float
+    ) -> None:
+        """Draw clusters on an image."""
+        draw = ImageDraw.Draw(image, "RGBA")
+        # Create a smaller font for the labels
+        font: Union[ImageFont.ImageFont, FreeTypeFont]
+        try:
+            font = ImageFont.truetype("arial.ttf", 12)
+        except OSError:
+            # Fallback to default font if arial is not available
+            font = ImageFont.load_default()
+        for c_tl in clusters:
+            all_clusters = [c_tl, *c_tl.children]
+            for c in all_clusters:
+                # Draw cells first (underneath)
+                cell_color = (0, 0, 0, 40)  # Transparent black for cells
+                for tc in c.cells:
+                    cx0, cy0, cx1, cy1 = tc.rect.to_bounding_box().as_tuple()
+                    cx0 *= scale_x
+                    cx1 *= scale_x
+                    cy0 *= scale_y
+                    cy1 *= scale_y
+
+                    draw.rectangle(
+                        [(cx0, cy0), (cx1, cy1)],
+                        outline=None,
+                        fill=cell_color,
+                    )
+                # Draw cluster rectangle
+                x0, y0, x1, y1 = c.brec.to_bounding_box().as_tuple()
+                x0 *= scale_x
+                x1 *= scale_x
+                y0 *= scale_y
+                y1 *= scale_y
+
+                cluster_fill_color = (*list(DocItemLabel.get_color(c.label)), 70)
+                cluster_outline_color = (
+                    *list(DocItemLabel.get_color(c.label)),
+                    255,
+                )
+                draw.rectangle(
+                    [(x0, y0), (x1, y1)],
+                    outline=cluster_outline_color,
+                    fill=cluster_fill_color,
+                )
+
+                if self.params.show_label:
+                    # Add label name and confidence
+                    label_text = f"{c.label.name} ({c.confidence:.2f})"
+                    # Create semi-transparent background for text
+                    text_bbox = draw.textbbox((x0, y0), label_text, font=font)
+                    text_bg_padding = 2
+                    draw.rectangle(
+                        [
+                            (
+                                text_bbox[0] - text_bg_padding,
+                                text_bbox[1] - text_bg_padding,
+                            ),
+                            (
+                                text_bbox[2] + text_bg_padding,
+                                text_bbox[3] + text_bg_padding,
+                            ),
+                        ],
+                        fill=(255, 255, 255, 180),  # Semi-transparent white
+                    )
+                    # Draw text
+                    draw.text(
+                        (x0, y0),
+                        label_text,
+                        fill=(0, 0, 0, 255),  # Solid black
+                        font=font,
+                    )
+
+    def _draw_doc_layout(
+        self, doc: DoclingDocument, images: Optional[dict[Optional[int], Image]] = None
+    ):
+        """Draw the document clusters and optionaly the reading order."""
+        clusters = []
+        my_images = images or {}
+        prev_image = None
+        prev_page_nr = None
+        for idx, (elem, _) in enumerate(
+            doc.iterate_items(
+                included_content_layers={ContentLayer.BODY, ContentLayer.FURNITURE}
+            )
+        ):
+            if not isinstance(elem, DocItem):
+                continue
+            if len(elem.prov) == 0:
+                continue  # Skip elements without provenances
+            prov = elem.prov[0]
+            page_nr = prov.page_no
+            image = my_images.get(page_nr)
+
+            if prev_page_nr is None or page_nr > prev_page_nr:  # new page begins
+                # complete previous drawing
+                if prev_page_nr is not None and prev_image and clusters:
+                    self._draw_clusters(
+                        image=prev_image,
+                        clusters=clusters,
+                        scale_x=prev_image.width / doc.pages[prev_page_nr].size.width,
+                        scale_y=prev_image.height / doc.pages[prev_page_nr].size.height,
+                    )
+                    clusters = []
+
+                if image is None:
+                    page_image = doc.pages[page_nr].image
+                    if page_image is None or (pil_img := page_image.pil_image) is None:
+                        raise RuntimeError("Cannot visualize document without images")
+                    else:
+                        image = deepcopy(pil_img)
+                        my_images[page_nr] = image
+            tlo_bbox = prov.bbox.to_top_left_origin(
+                page_height=doc.pages[prov.page_no].size.height
+            )
+            cluster = _TLCluster(
+                id=idx,
+                label=elem.label,
+                brec=_TLBoundingRectangle.from_bounding_box(bbox=tlo_bbox),
+                cells=[],
+            )
+            clusters.append(cluster)
+
+            prev_page_nr = page_nr
+            prev_image = image
+
+        # complete last drawing
+        if prev_page_nr is not None and prev_image and clusters:
+            self._draw_clusters(
+                image=prev_image,
+                clusters=clusters,
+                scale_x=prev_image.width / doc.pages[prev_page_nr].size.width,
+                scale_y=prev_image.height / doc.pages[prev_page_nr].size.height,
+            )
+
+        return my_images
+
+    @override
+    def get_visualization(
+        self,
+        *,
+        doc: DoclingDocument,
+        **kwargs,
+    ) -> dict[Optional[int], Image]:
+        """Get visualization of the document as images by page."""
+        base_images = (
+            self.base_visualizer.get_visualization(doc=doc, **kwargs)
+            if self.base_visualizer
+            else None
+        )
+        return self._draw_doc_layout(
+            doc=doc,
+            images=base_images,
+        )
@@ -0,0 +1,149 @@
+"""Define classes for reading order visualization."""
+
+from copy import deepcopy
+from typing import Optional
+
+from PIL import ImageDraw
+from PIL.Image import Image
+from typing_extensions import override
+
+from docling_core.transforms.visualizer.base import BaseVisualizer
+from docling_core.types.doc.document import ContentLayer, DocItem, DoclingDocument
+
+
+class ReadingOrderVisualizer(BaseVisualizer):
+    """Reading order visualizer."""
+
+    base_visualizer: Optional[BaseVisualizer] = None
+
+    def _draw_arrow(
+        self,
+        draw: ImageDraw.ImageDraw,
+        arrow_coords: tuple[float, float, float, float],
+        line_width: int = 2,
+        color: str = "red",
+    ):
+        """Draw an arrow inside the given draw object."""
+        x0, y0, x1, y1 = arrow_coords
+
+        # Arrow parameters
+        start_point = (x0, y0)  # Starting point of the arrow
+        end_point = (x1, y1)  # Ending point of the arrow
+        arrowhead_length = 20  # Length of the arrowhead
+        arrowhead_width = 10  # Width of the arrowhead
+
+        # Draw the arrow shaft (line)
+        draw.line([start_point, end_point], fill=color, width=line_width)
+
+        # Calculate the arrowhead points
+        dx = end_point[0] - start_point[0]
+        dy = end_point[1] - start_point[1]
+        angle = (dx**2 + dy**2) ** 0.5 + 0.01  # Length of the arrow shaft
+
+        # Normalized direction vector for the arrow shaft
+        ux, uy = dx / angle, dy / angle
+
+        # Base of the arrowhead
+        base_x = end_point[0] - ux * arrowhead_length
+        base_y = end_point[1] - uy * arrowhead_length
+
+        # Left and right points of the arrowhead
+        left_x = base_x - uy * arrowhead_width
+        left_y = base_y + ux * arrowhead_width
+        right_x = base_x + uy * arrowhead_width
+        right_y = base_y - ux * arrowhead_width
+
+        # Draw the arrowhead (triangle)
+        draw.polygon(
+            [end_point, (left_x, left_y), (right_x, right_y)],
+            fill=color,
+        )
+        return draw
+
+    def _draw_doc_reading_order(
+        self,
+        doc: DoclingDocument,
+        images: Optional[dict[Optional[int], Image]] = None,
+    ):
+        """Draw the reading order."""
+        # draw = ImageDraw.Draw(image)
+        x0, y0 = None, None
+        my_images: dict[Optional[int], Image] = images or {}
+        prev_page = None
+        for elem, _ in doc.iterate_items(
+            included_content_layers={ContentLayer.BODY, ContentLayer.FURNITURE},
+        ):
+            if not isinstance(elem, DocItem):
+                continue
+            if len(elem.prov) == 0:
+                continue  # Skip elements without provenances
+            prov = elem.prov[0]
+            page_no = prov.page_no
+            image = my_images.get(page_no)
+
+            if image is None or prev_page is None or page_no > prev_page:
+                # new page begins
+                prev_page = page_no
+                x0 = y0 = None
+
+                if image is None:
+                    page_image = doc.pages[page_no].image
+                    if page_image is None or (pil_img := page_image.pil_image) is None:
+                        raise RuntimeError("Cannot visualize document without images")
+                    else:
+                        image = deepcopy(pil_img)
+                        my_images[page_no] = image
+            draw = ImageDraw.Draw(image)
+
+            # if prov.page_no not in true_doc.pages or prov.page_no != 1:
+            #     logging.error(f"{prov.page_no} not in true_doc.pages -> skipping! ")
+            #     continue
+
+            tlo_bbox = prov.bbox.to_top_left_origin(
+                page_height=doc.pages[prov.page_no].size.height
+            )
+            ro_bbox = tlo_bbox.normalized(doc.pages[prov.page_no].size)
+            ro_bbox.l = round(ro_bbox.l * image.width)  # noqa: E741
+            ro_bbox.r = round(ro_bbox.r * image.width)
+            ro_bbox.t = round(ro_bbox.t * image.height)
+            ro_bbox.b = round(ro_bbox.b * image.height)
+
+            if ro_bbox.b > ro_bbox.t:
+                ro_bbox.b, ro_bbox.t = ro_bbox.t, ro_bbox.b
+
+            if x0 is None and y0 is None:
+                x0 = (ro_bbox.l + ro_bbox.r) / 2.0
+                y0 = (ro_bbox.b + ro_bbox.t) / 2.0
+            else:
+                assert x0 is not None
+                assert y0 is not None
+
+                x1 = (ro_bbox.l + ro_bbox.r) / 2.0
+                y1 = (ro_bbox.b + ro_bbox.t) / 2.0
+
+                draw = self._draw_arrow(
+                    draw=draw,
+                    arrow_coords=(x0, y0, x1, y1),
+                    line_width=2,
+                    color="red",
+                )
+                x0, y0 = x1, y1
+        return my_images
+
+    @override
+    def get_visualization(
+        self,
+        *,
+        doc: DoclingDocument,
+        **kwargs,
+    ) -> dict[Optional[int], Image]:
+        """Get visualization of the document as images by page."""
+        base_images = (
+            self.base_visualizer.get_visualization(doc=doc, **kwargs)
+            if self.base_visualizer
+            else None
+        )
+        return self._draw_doc_reading_order(
+            doc=doc,
+            images=base_images,
+        )