docling-project · cau-git · Dec 5, 2025 · Dec 4, 2025 · Dec 5, 2025
diff --git a/docling_eval/cli/main.py b/docling_eval/cli/main.py
@@ -39,7 +39,7 @@
 from docling.datamodel.vlm_model_specs import (
     SMOLDOCLING_TRANSFORMERS as smoldocling_vlm_conversion_options,
 )
-from docling.document_converter import FormatOption, PdfFormatOption
+from docling.document_converter import FormatOption, ImageFormatOption, PdfFormatOption
 from docling.models.factories import get_ocr_factory
 from docling.pipeline.vlm_pipeline import VlmPipeline
 from PyPDF2 import PdfReader, PdfWriter
@@ -414,7 +414,7 @@ def get_prediction_provider(
         return DoclingPredictionProvider(
             format_options={
                 InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options),
-                InputFormat.IMAGE: PdfFormatOption(pipeline_options=pipeline_options),
+                InputFormat.IMAGE: ImageFormatOption(pipeline_options=pipeline_options),
             },
             do_visualization=do_visualization,
             ignore_missing_predictions=True,
@@ -444,7 +444,7 @@ def get_prediction_provider(
         return DoclingPredictionProvider(
             format_options={
                 InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options),
-                InputFormat.IMAGE: PdfFormatOption(pipeline_options=pipeline_options),
+                InputFormat.IMAGE: ImageFormatOption(pipeline_options=pipeline_options),
             },
             do_visualization=do_visualization,
             ignore_missing_predictions=True,
@@ -493,7 +493,7 @@ def get_prediction_provider(
         return DoclingPredictionProvider(
             format_options={
                 InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_pipeline_options),
-                InputFormat.IMAGE: PdfFormatOption(
+                InputFormat.IMAGE: ImageFormatOption(
                     pipeline_options=ocr_pipeline_options
                 ),
             },
@@ -528,10 +528,14 @@ def get_prediction_provider(
             pipeline_cls=VlmPipeline, pipeline_options=pipeline_options
         )
 
+        image_format_option = ImageFormatOption(
+            pipeline_cls=VlmPipeline, pipeline_options=pipeline_options
+        )
+
         return DoclingPredictionProvider(
             format_options={
                 InputFormat.PDF: pdf_format_option,
-                InputFormat.IMAGE: pdf_format_option,
+                InputFormat.IMAGE: image_format_option,
             },
             do_visualization=do_visualization,
             ignore_missing_predictions=True,
@@ -575,10 +579,14 @@ def get_prediction_provider(
             pipeline_cls=VlmPipeline, pipeline_options=pipeline_options
         )
 
+        image_format_option = ImageFormatOption(
+            pipeline_cls=VlmPipeline, pipeline_options=pipeline_options
+        )
+
         return DoclingPredictionProvider(
             format_options={
                 InputFormat.PDF: pdf_format_option,
-                InputFormat.IMAGE: pdf_format_option,
+                InputFormat.IMAGE: image_format_option,
             },
             do_visualization=do_visualization,
             ignore_missing_predictions=True,

diff --git a/docling_eval/dataset_builders/file_dataset_builder.py b/docling_eval/dataset_builders/file_dataset_builder.py
@@ -205,11 +205,44 @@ def iterate(self) -> Iterable[DatasetRecord]:
                 page_images_column=BenchMarkColumns.GROUNDTRUTH_PAGE_IMAGES.value,
             )
 
-            # Get source as binary data
-            source_bytes = get_binary(filename)
-            source_stream = DocumentStream(
-                name=filename.name, stream=BytesIO(source_bytes)
-            )
+            # Prepare source binary: for JSON inputs prefer image streams when page images exist
+            source_bytes: bytes
+            source_stream: DocumentStream
+            effective_mime_type = mime_type
+
+            if mime_type == "application/json" and len(true_page_images) > 0:
+                images_rgb = [
+                    img.convert("RGB") if img.mode != "RGB" else img
+                    for img in true_page_images
+                ]
+
+                if len(images_rgb) == 1:
+                    buffer = BytesIO()
+                    images_rgb[0].save(buffer, format="PNG")
+                    source_bytes = buffer.getvalue()
+                    source_stream = DocumentStream(
+                        name=f"{filename.stem}.png", stream=BytesIO(source_bytes)
+                    )
+                    effective_mime_type = "image/png"
+                else:
+                    buffer = BytesIO()
+                    images_rgb[0].save(
+                        buffer,
+                        format="TIFF",
+                        save_all=True,
+                        append_images=images_rgb[1:],
+                        compression="tiff_lzw",
+                    )
+                    source_bytes = buffer.getvalue()
+                    source_stream = DocumentStream(
+                        name=f"{filename.stem}.tiff", stream=BytesIO(source_bytes)
+                    )
+                    effective_mime_type = "image/tiff"
+            else:
+                source_bytes = get_binary(filename)
+                source_stream = DocumentStream(
+                    name=filename.name, stream=BytesIO(source_bytes)
+                )
 
             # Create dataset record
             record = DatasetRecord(
@@ -219,7 +252,7 @@ def iterate(self) -> Iterable[DatasetRecord]:
                 ground_truth_pictures=true_pictures,
                 ground_truth_page_images=true_page_images,
                 original=source_stream,
-                mime_type=mime_type,
+                mime_type=effective_mime_type,
             )
 
             yield record