@@ -224,6 +224,10 @@ def get_prediction_provider(
224224 elif provider_type == PredictionProviderType .SMOLDOCLING :
225225 pipeline_options = VlmPipelineOptions ()
226226
227+ pipeline_options .images_scale = 2.0
228+ pipeline_options .generate_page_images = True
229+ pipeline_options .generate_picture_images = True
230+
227231 pipeline_options .vlm_options = smoldocling_vlm_conversion_options
228232 if sys .platform == "darwin" :
229233 try :
@@ -276,67 +280,6 @@ def get_prediction_provider(
276280 raise ValueError (f"Unsupported prediction provider: { provider_type } " )
277281
278282
279- def create_datasets (
280- modality : EvaluationModality ,
281- benchmark : BenchMarkNames ,
282- output_dir : Path ,
283- dataset_source : Optional [Path ] = None ,
284- split : str = "test" ,
285- begin_index : int = 0 ,
286- end_index : int = - 1 ,
287- prediction_provider : Optional [PredictionProviderType ] = None ,
288- file_prediction_format : Optional [PredictionFormats ] = None ,
289- file_source_path : Optional [Path ] = None ,
290- debug : bool = False ,
291- ):
292- """Create datasets using dataset builders and prediction providers."""
293- # Set up ground truth directory - matching test_dataset_builder.py layout
294- gt_dir = output_dir / "gt_dataset"
295-
296- # Create dataset builder
297- try :
298- dataset_builder = get_dataset_builder (
299- benchmark = benchmark ,
300- target = gt_dir ,
301- split = split ,
302- begin_index = begin_index ,
303- end_index = end_index ,
304- dataset_source = dataset_source ,
305- )
306- except ValueError as e :
307- _log .error (f"Error creating dataset builder: { str (e )} " )
308- return
309-
310- # Retrieve and save the dataset
311- dataset_builder .retrieve_input_dataset ()
312- dataset_builder .save_to_disk (chunk_size = 80 )
313-
314- # If prediction provider is specified, create predictions
315- if prediction_provider :
316- # Set up eval dataset directory - matching test_dataset_builder.py layout
317- pred_dir = output_dir / "eval_dataset"
318-
319- try :
320- # Create the appropriate prediction provider
321- provider = get_prediction_provider (
322- provider_type = prediction_provider ,
323- file_source_path = file_source_path ,
324- file_prediction_format = file_prediction_format ,
325- )
326-
327- # Create predictions
328- provider .create_prediction_dataset (
329- name = dataset_builder .name ,
330- gt_dataset_dir = gt_dir ,
331- target_dataset_dir = pred_dir ,
332- split = split ,
333- begin_index = begin_index ,
334- end_index = end_index ,
335- )
336- except ValueError as e :
337- _log .error (f"Error creating prediction provider: { str (e )} " )
338-
339-
340283def evaluate (
341284 modality : EvaluationModality ,
342285 benchmark : BenchMarkNames ,
0 commit comments