2828from dotenv import load_dotenv
2929from PIL import Image
3030
31- from pipelines .batch_pipeline import run_ocr_batch_pipeline
31+ from pipelines .batch_pipeline import run_batch_ocr_pipeline
3232from pipelines .evaluation_pipeline import run_ocr_evaluation_pipeline
3333from utils .config import (
3434 get_image_paths ,
3535 list_available_ground_truth_files ,
3636 load_config ,
37+ override_batch_config ,
38+ override_evaluation_config ,
3739 print_config_summary ,
40+ select_config_path ,
41+ validate_batch_config ,
42+ validate_evaluation_config ,
3843)
3944from utils .model_configs import DEFAULT_MODEL , MODEL_CONFIGS
4045from utils .ocr_processing import run_ocr
@@ -265,7 +270,7 @@ def run_ui_mode(args, parser):
265270
266271def run_pipeline_mode (args , parser ):
267272 """Run the application in full pipeline mode with ZenML tracking."""
268- # List available ground truth files
273+ # List available ground truth files if requested
269274 if args .list_ground_truth_files :
270275 gt_files = list_available_ground_truth_files (directory = args .ground_truth_dir )
271276 if gt_files :
@@ -276,65 +281,57 @@ def run_pipeline_mode(args, parser):
276281 print (f"No ground truth files found in '{ args .ground_truth_dir } '" )
277282 return
278283
279- # Load configuration
284+ # Determine pipeline mode and select config path
285+ evaluation_mode = args .eval
286+
280287 if args .config :
281- config = load_config ( args .config )
288+ config_path = args .config
282289 else :
283- parser .error ("Please provide a configuration file with --config" )
290+ config_path = select_config_path (evaluation_mode )
291+ print (f"Auto-selecting config file: { config_path } " )
292+
293+ if not os .path .exists (config_path ):
294+ parser .error (f"Config file not found: { config_path } " )
284295 return
285296
286- # Override config with CLI arguments if provided
287- if args .image_paths or args .image_folder or args .custom_prompt :
288- # Create parameters section if it doesn't exist
289- if "parameters" not in config :
290- config ["parameters" ] = {}
291-
292- # Update parameters with CLI arguments
293- if args .image_paths :
294- config ["parameters" ]["input_image_paths" ] = args .image_paths
295- if args .image_folder :
296- config ["parameters" ]["input_image_folder" ] = args .image_folder
297- if args .custom_prompt :
298- # Create steps section if needed
299- if "steps" not in config :
300- config ["steps" ] = {}
301- if "ocr_processor" not in config ["steps" ]:
302- config ["steps" ]["ocr_processor" ] = {"parameters" : {}}
303-
304- # Set custom prompt
305- config ["steps" ]["ocr_processor" ]["parameters" ]["custom_prompt" ] = args .custom_prompt
306-
307- print_config_summary (config )
308-
309- # Create output directories if needed
310- if (
311- "steps" in config
312- and "save_ocr_results" in config ["steps" ]
313- and config ["steps" ]["save_ocr_results" ].get ("parameters" , {}).get ("save_results" , False )
314- ):
315- results_dir = config ["steps" ]["save_ocr_results" ]["parameters" ].get (
316- "results_directory" , "ocr_results"
317- )
318- os .makedirs (results_dir , exist_ok = True )
319-
320- if (
321- "steps" in config
322- and "save_visualization" in config ["steps" ]
323- and config ["steps" ]["save_visualization" ].get ("parameters" , {}).get ("save_locally" , False )
324- ):
325- viz_dir = config ["steps" ]["save_visualization" ]["parameters" ].get (
326- "visualization_directory" , "visualizations"
327- )
328- os .makedirs (viz_dir , exist_ok = True )
297+ # Load the configuration
298+ try :
299+ config = load_config (config_path )
300+ except (ValueError , FileNotFoundError ) as e :
301+ parser .error (f"Error loading configuration: { str (e )} " )
302+ return
303+
304+ cli_args = {
305+ "image_paths" : args .image_paths ,
306+ "image_folder" : args .image_folder ,
307+ "custom_prompt" : args .custom_prompt ,
308+ "ground_truth_dir" : args .ground_truth_dir ,
309+ }
310+
311+ # Override configuration with CLI arguments if provided
312+ try :
313+ if evaluation_mode :
314+ config = override_evaluation_config (config , cli_args )
315+ validate_evaluation_config (config )
316+ else :
317+ config = override_batch_config (config , cli_args )
318+ validate_batch_config (config )
319+ except ValueError as e :
320+ parser .error (f"Configuration error: { str (e )} " )
321+ return
329322
330- # Run pipeline in specified mode
331- mode = config .get ("parameters" , {}).get ("mode" , "evaluation" )
332- if mode == "batch" :
333- print ("Running OCR Batch Pipeline..." )
334- run_ocr_batch_pipeline (config )
335- else : # Default to evaluation mode
336- print ("Running OCR Evaluation Pipeline..." )
337- run_ocr_evaluation_pipeline (config )
323+ print_config_summary (config , is_evaluation_config = evaluation_mode )
324+
325+ try :
326+ if evaluation_mode :
327+ print ("Running OCR Evaluation Pipeline..." )
328+ run_ocr_evaluation_pipeline (config )
329+ else :
330+ print ("Running OCR Batch Pipeline..." )
331+ run_batch_ocr_pipeline (config )
332+ except Exception as e :
333+ print (f"Error running pipeline: { str (e )} " )
334+ return
338335
339336
340337def main ():
@@ -355,14 +352,12 @@ def main():
355352 config_group .add_argument (
356353 "--config" ,
357354 type = str ,
358- default = "configs/config.yaml" ,
359355 help = "Path to YAML configuration file (for pipeline mode)" ,
360356 )
361357 config_group .add_argument (
362- "--create-default-config" ,
363- type = str ,
364- metavar = "PATH" ,
365- help = "Create a default configuration file at the specified path and exit" ,
358+ "--eval" ,
359+ action = "store_true" ,
360+ help = "Run in evaluation pipeline mode (defaults to batch pipeline if not specified)" ,
366361 )
367362
368363 # Ground truth utilities (pipeline mode)
0 commit comments