Skip to content

Commit a066e1d

Browse files
authored
fix: Several fixes for visualization and cleanup. Add proper info() for DoclingPredictionProvider (#57)
* Lots of cleanup and fixes Signed-off-by: Christoph Auer <[email protected]> * Add predictor info for Docling Signed-off-by: Christoph Auer <[email protected]> --------- Signed-off-by: Christoph Auer <[email protected]>
1 parent aae6246 commit a066e1d

File tree

21 files changed

+63
-3754
lines changed

21 files changed

+63
-3754
lines changed

docling_eval/cli/main.py

Lines changed: 4 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,10 @@ def get_prediction_provider(
224224
elif provider_type == PredictionProviderType.SMOLDOCLING:
225225
pipeline_options = VlmPipelineOptions()
226226

227+
pipeline_options.images_scale = 2.0
228+
pipeline_options.generate_page_images = True
229+
pipeline_options.generate_picture_images = True
230+
227231
pipeline_options.vlm_options = smoldocling_vlm_conversion_options
228232
if sys.platform == "darwin":
229233
try:
@@ -276,67 +280,6 @@ def get_prediction_provider(
276280
raise ValueError(f"Unsupported prediction provider: {provider_type}")
277281

278282

279-
def create_datasets(
280-
modality: EvaluationModality,
281-
benchmark: BenchMarkNames,
282-
output_dir: Path,
283-
dataset_source: Optional[Path] = None,
284-
split: str = "test",
285-
begin_index: int = 0,
286-
end_index: int = -1,
287-
prediction_provider: Optional[PredictionProviderType] = None,
288-
file_prediction_format: Optional[PredictionFormats] = None,
289-
file_source_path: Optional[Path] = None,
290-
debug: bool = False,
291-
):
292-
"""Create datasets using dataset builders and prediction providers."""
293-
# Set up ground truth directory - matching test_dataset_builder.py layout
294-
gt_dir = output_dir / "gt_dataset"
295-
296-
# Create dataset builder
297-
try:
298-
dataset_builder = get_dataset_builder(
299-
benchmark=benchmark,
300-
target=gt_dir,
301-
split=split,
302-
begin_index=begin_index,
303-
end_index=end_index,
304-
dataset_source=dataset_source,
305-
)
306-
except ValueError as e:
307-
_log.error(f"Error creating dataset builder: {str(e)}")
308-
return
309-
310-
# Retrieve and save the dataset
311-
dataset_builder.retrieve_input_dataset()
312-
dataset_builder.save_to_disk(chunk_size=80)
313-
314-
# If prediction provider is specified, create predictions
315-
if prediction_provider:
316-
# Set up eval dataset directory - matching test_dataset_builder.py layout
317-
pred_dir = output_dir / "eval_dataset"
318-
319-
try:
320-
# Create the appropriate prediction provider
321-
provider = get_prediction_provider(
322-
provider_type=prediction_provider,
323-
file_source_path=file_source_path,
324-
file_prediction_format=file_prediction_format,
325-
)
326-
327-
# Create predictions
328-
provider.create_prediction_dataset(
329-
name=dataset_builder.name,
330-
gt_dataset_dir=gt_dir,
331-
target_dataset_dir=pred_dir,
332-
split=split,
333-
begin_index=begin_index,
334-
end_index=end_index,
335-
)
336-
except ValueError as e:
337-
_log.error(f"Error creating prediction provider: {str(e)}")
338-
339-
340283
def evaluate(
341284
modality: EvaluationModality,
342285
benchmark: BenchMarkNames,

docling_eval/legacy/doclaynet_v1/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)