3434
3535@step (enable_cache = False )
3636def evaluate_models (
37- model_results : Dict [ str , pl .DataFrame ] ,
37+ model_results : pl .DataFrame ,
3838 ground_truth_df : pl .DataFrame ,
3939) -> Annotated [HTMLString , "ocr_visualization" ]:
4040 """Compare the performance of multiple configurable models with visualization.
@@ -46,7 +46,7 @@ def evaluate_models(
4646 Returns:
4747 HTML visualization of the evaluation results
4848 """
49- if model_results is None or len (model_results .keys () ) == 0 :
49+ if model_results is None or len (model_results .columns ) == 0 :
5050 raise ValueError ("At least one model is required for evaluation" )
5151
5252 if ground_truth_df is None or ground_truth_df .is_empty ():
@@ -55,7 +55,7 @@ def evaluate_models(
5555 gt_df = ground_truth_df
5656
5757 # --- 2. Build model info for evaluation models ---
58- model_keys = list (model_results .keys () )
58+ model_keys = list (model_results .columns )
5959 model_info = {}
6060 model_displays = []
6161 model_prefixes = {}
@@ -210,7 +210,12 @@ def evaluate_models(
210210 time_comparison ["time_difference" ] = abs (all_model_times [tk1 ] - all_model_times [tk2 ])
211211
212212 # Log metadata (customize the metadata_dict as needed)
213- log_metadata (metadata = {"fastest_model" : fastest_display , "model_count" : len (model_keys )})
213+ log_metadata (
214+ metadata = {
215+ "fastest_model" : fastest_display ,
216+ "model_count" : len (model_keys ),
217+ }
218+ )
214219
215220 summary_html = create_summary_visualization (
216221 model_metrics = model_metric_averages ,
0 commit comments