Skip to content

Commit 72fb382

Browse files
authored
feat: Expose classification filters for picture description (#105)
Preserve legacy picture description filters Signed-off-by: drk <drukpa1455@gmail.com>
1 parent a242a66 commit 72fb382

File tree

2 files changed

+85
-2
lines changed

2 files changed

+85
-2
lines changed

docling_jobkit/datamodel/convert.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
DEFAULT_PAGE_RANGE,
4040
PageRange,
4141
)
42-
from docling_core.types.doc import ImageRefMode
42+
from docling_core.types.doc import ImageRefMode, PictureClassificationLabel
4343

4444

4545
class PictureDescriptionLocal(BaseModel):
@@ -70,6 +70,26 @@ class PictureDescriptionLocal(BaseModel):
7070
examples=[{"max_new_tokens": 200, "do_sample": False}],
7171
),
7272
] = {"max_new_tokens": 200, "do_sample": False}
73+
classification_allow: Annotated[
74+
Optional[list[PictureClassificationLabel]],
75+
Field(
76+
description="Only describe pictures whose predicted class is in this allow-list."
77+
),
78+
] = None
79+
classification_deny: Annotated[
80+
Optional[list[PictureClassificationLabel]],
81+
Field(
82+
description="Do not describe pictures whose predicted class is in this deny-list."
83+
),
84+
] = None
85+
classification_min_confidence: Annotated[
86+
float,
87+
Field(
88+
description="Minimum classification confidence required before a picture can be described.",
89+
ge=0.0,
90+
le=1.0,
91+
),
92+
] = 0.0
7393

7494

7595
class PictureDescriptionApi(BaseModel):
@@ -133,6 +153,26 @@ class PictureDescriptionApi(BaseModel):
133153
],
134154
),
135155
] = "Describe this image in a few sentences."
156+
classification_allow: Annotated[
157+
Optional[list[PictureClassificationLabel]],
158+
Field(
159+
description="Only describe pictures whose predicted class is in this allow-list."
160+
),
161+
] = None
162+
classification_deny: Annotated[
163+
Optional[list[PictureClassificationLabel]],
164+
Field(
165+
description="Do not describe pictures whose predicted class is in this deny-list."
166+
),
167+
] = None
168+
classification_min_confidence: Annotated[
169+
float,
170+
Field(
171+
description="Minimum classification confidence required before a picture can be described.",
172+
ge=0.0,
173+
le=1.0,
174+
),
175+
] = 0.0
136176

137177

138178
class VlmModelLocal(BaseModel):

tests/test_options.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
VlmPipelineOptions,
1414
)
1515
from docling.pipeline.vlm_pipeline import VlmPipeline
16-
from docling_core.types.doc import ImageRefMode
16+
from docling_core.types.doc import ImageRefMode, PictureClassificationLabel
1717

1818
from docling_jobkit.convert.manager import (
1919
DoclingConverterManager,
@@ -24,6 +24,7 @@
2424
from docling_jobkit.datamodel.convert import (
2525
ConvertDocumentsOptions,
2626
PictureDescriptionApi,
27+
PictureDescriptionLocal,
2728
)
2829

2930

@@ -281,6 +282,48 @@ def test_options_cache_key_with_presets():
281282
hashes.add(hash)
282283

283284

285+
def test_legacy_picture_description_filters_reach_pipeline_options():
286+
m = DoclingConverterManager(config=DoclingConverterManagerConfig())
287+
288+
with pytest.warns(DeprecationWarning):
289+
opts = ConvertDocumentsOptions(
290+
do_picture_description=True,
291+
picture_description_local=PictureDescriptionLocal(
292+
repo_id="HuggingFaceTB/SmolVLM-256M-Instruct",
293+
classification_allow=[PictureClassificationLabel.BAR_CHART],
294+
classification_deny=[PictureClassificationLabel.LOGO],
295+
classification_min_confidence=0.55,
296+
),
297+
)
298+
299+
pipeline_opts = m.get_pdf_pipeline_opts(opts)
300+
picture_opts = pipeline_opts.pipeline_options.picture_description_options
301+
302+
assert picture_opts.classification_allow == [PictureClassificationLabel.BAR_CHART]
303+
assert picture_opts.classification_deny == [PictureClassificationLabel.LOGO]
304+
assert picture_opts.classification_min_confidence == 0.55
305+
306+
307+
def test_legacy_picture_description_api_filters_reach_pipeline_options():
308+
m = DoclingConverterManager(config=DoclingConverterManagerConfig())
309+
310+
with pytest.warns(DeprecationWarning):
311+
opts = ConvertDocumentsOptions(
312+
do_picture_description=True,
313+
picture_description_api={
314+
"url": "http://localhost",
315+
"classification_allow": ["map"],
316+
"classification_min_confidence": 0.8,
317+
},
318+
)
319+
320+
pipeline_opts = m.get_pdf_pipeline_opts(opts)
321+
picture_opts = pipeline_opts.pipeline_options.picture_description_options
322+
323+
assert [label.value for label in picture_opts.classification_allow] == ["map"]
324+
assert picture_opts.classification_min_confidence == 0.8
325+
326+
284327
def test_image_pipeline_uses_vlm_pipeline_when_requested():
285328
m = DoclingConverterManager(config=DoclingConverterManagerConfig())
286329
opts = ConvertDocumentsOptions(pipeline=ProcessingPipeline.VLM)

0 commit comments

Comments
 (0)