Skip to content

Commit 5473e07

Browse files
fix(cli): avoid generating images for non-image exports (#3127)
* fix(cli): avoid generating images for non-image exports Signed-off-by: Hassan Raza <raihassanraza10@gmail.com> * test(cli): cover output image export policy Signed-off-by: Hassan Raza <raihassanraza10@gmail.com> * fix(cli): use denylist for image export gating Signed-off-by: Hassan Raza <raihassanraza10@gmail.com> * fix(cli): clarify image export mode help text Signed-off-by: Hassan Raza <raihassanraza10@gmail.com> --------- Signed-off-by: Hassan Raza <raihassanraza10@gmail.com>
1 parent 9abf0fd commit 5473e07

File tree

2 files changed

+57
-3
lines changed

2 files changed

+57
-3
lines changed

docling/cli/main.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,25 @@ def _split_list(raw: str | None) -> list[str] | None:
370370
return re.split(r"[;,]", raw)
371371

372372

373+
_OUTPUT_FORMATS_NOT_SUPPORTING_IMAGE_EMBEDDING = frozenset(
374+
{
375+
OutputFormat.TEXT,
376+
OutputFormat.DOCTAGS,
377+
OutputFormat.VTT,
378+
}
379+
)
380+
381+
382+
def _should_generate_export_images(
383+
image_export_mode: ImageRefMode,
384+
to_formats: list[OutputFormat],
385+
) -> bool:
386+
return image_export_mode != ImageRefMode.PLACEHOLDER and any(
387+
to_format not in _OUTPUT_FORMATS_NOT_SUPPORTING_IMAGE_EMBEDDING
388+
for to_format in to_formats
389+
)
390+
391+
373392
@app.command(no_args_is_help=True)
374393
def convert( # noqa: C901
375394
input_sources: Annotated[
@@ -404,7 +423,7 @@ def convert( # noqa: C901
404423
ImageRefMode,
405424
typer.Option(
406425
...,
407-
help="Image export mode for the document (only in case of JSON, Markdown or HTML). With `placeholder`, only the position of the image is marked in the output. In `embedded` mode, the image is embedded as base64 encoded string. In `referenced` mode, the image is exported in PNG format and referenced from the main exported document.",
426+
help="Image export mode for image-capable document outputs (JSON, YAML, HTML, HTML split-page, and Markdown). Text, DocTags, and WebVTT outputs do not export images. With `placeholder`, only the position of the image is marked in the output. In `embedded` mode, the image is embedded as base64 encoded string. In `referenced` mode, the image is exported in PNG format and referenced from the main exported document.",
408427
),
409428
] = ImageRefMode.EMBEDDED,
410429
pipeline: Annotated[
@@ -750,7 +769,10 @@ def convert( # noqa: C901
750769
)
751770
pipeline_options.table_structure_options.mode = table_mode
752771

753-
if image_export_mode != ImageRefMode.PLACEHOLDER:
772+
if _should_generate_export_images(
773+
image_export_mode,
774+
to_formats,
775+
):
754776
pipeline_options.generate_page_images = True
755777
pipeline_options.generate_picture_images = (
756778
True # FIXME: to be deprecated in version 3

tests/test_cli.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
from pathlib import Path
22

3+
import pytest
4+
from docling_core.types.doc import ImageRefMode
35
from typer.testing import CliRunner
46

5-
from docling.cli.main import app
7+
from docling.cli.main import _should_generate_export_images, app
8+
from docling.datamodel.base_models import OutputFormat
69

710
runner = CliRunner()
811

@@ -27,6 +30,35 @@ def test_cli_convert(tmp_path):
2730
assert converted.exists()
2831

2932

33+
@pytest.mark.parametrize(
34+
("image_export_mode", "to_formats", "expected"),
35+
[
36+
(ImageRefMode.PLACEHOLDER, [OutputFormat.JSON], False),
37+
(ImageRefMode.EMBEDDED, [OutputFormat.TEXT, OutputFormat.DOCTAGS], False),
38+
(ImageRefMode.EMBEDDED, [OutputFormat.MARKDOWN], True),
39+
(
40+
ImageRefMode.EMBEDDED,
41+
[OutputFormat.TEXT, OutputFormat.MARKDOWN],
42+
True,
43+
),
44+
],
45+
)
46+
def test_should_generate_export_images(image_export_mode, to_formats, expected):
47+
assert _should_generate_export_images(image_export_mode, to_formats) is expected
48+
49+
50+
def test_image_export_policy_covers_all_output_formats():
51+
non_image_export_formats = {
52+
OutputFormat.TEXT,
53+
OutputFormat.DOCTAGS,
54+
OutputFormat.VTT,
55+
}
56+
image_export_formats = set(OutputFormat) - non_image_export_formats
57+
58+
assert image_export_formats.isdisjoint(non_image_export_formats)
59+
assert image_export_formats | non_image_export_formats == set(OutputFormat)
60+
61+
3062
def test_cli_audio_auto_detection(tmp_path):
3163
"""Test that CLI automatically detects audio files and sets ASR pipeline."""
3264
from docling.datamodel.base_models import FormatToExtensions, InputFormat

0 commit comments

Comments
 (0)