@@ -60,9 +60,7 @@ def spatialviz_doc_to_text(doc: Dict[str, Any]) -> str:
6060 return text
6161
6262
63- def spatialviz_process_results (
64- doc : Dict [str , Any ], results : List [str ]
65- ) -> Dict [str , Dict [str , Any ]]:
63+ def spatialviz_process_results (doc : Dict [str , Any ], results : List [str ]) -> Dict [str , Dict [str , Any ]]:
6664 key_name = "spatialviz_score"
6765 grounded_output = doc ["Answer" ]
6866 response = results [0 ]
@@ -149,16 +147,9 @@ def spatialviz_aggregate_results(results: List[Dict[str, Any]]) -> float:
149147 key_to_eval_samples [key ].append (0 )
150148
151149 accuracy = total_correct / total_samples if total_samples > 0 else 0
152- task_accuracies = {
153- task : sum (scores ) / len (scores ) for task , scores in task_to_eval_samples .items ()
154- }
155- category_accuracies = {
156- category : sum (scores ) / len (scores )
157- for category , scores in category_to_eval_samples .items ()
158- }
159- key_accuracies = {
160- key : sum (scores ) / len (scores ) for key , scores in key_to_eval_samples .items ()
161- }
150+ task_accuracies = {task : sum (scores ) / len (scores ) for task , scores in task_to_eval_samples .items ()}
151+ category_accuracies = {category : sum (scores ) / len (scores ) for category , scores in category_to_eval_samples .items ()}
152+ key_accuracies = {key : sum (scores ) / len (scores ) for key , scores in key_to_eval_samples .items ()}
162153
163154 eval_logger .info (f"{ 'Total Samples' :<20} : { total_samples } " )
164155 eval_logger .info (f"{ 'Total Correct' :<20} : { total_correct } " )
0 commit comments