@@ -2305,11 +2305,14 @@ def _calculate_aoai_evaluation_summary(aoai_results: list, logger: logging.Logge
23052305 logger .info (
23062306 f"Processing aoai_result with id: { getattr (aoai_result , 'id' , 'unknown' )} , row keys: { aoai_result .keys () if hasattr (aoai_result , 'keys' ) else 'N/A' } "
23072307 )
2308+ result_counts ["total" ] += 1
2309+ passed_count = 0
2310+ failed_count = 0
2311+ error_count = 0
23082312 if isinstance (aoai_result , dict ) and "results" in aoai_result :
23092313 logger .info (
23102314 f"Processing aoai_result with id: { getattr (aoai_result , 'id' , 'unknown' )} , results count: { len (aoai_result ['results' ])} "
23112315 )
2312- result_counts ["total" ] += len (aoai_result ["results" ])
23132316 for result_item in aoai_result ["results" ]:
23142317 if isinstance (result_item , dict ):
23152318 # Check if the result has a 'passed' field
@@ -2322,23 +2325,35 @@ def _calculate_aoai_evaluation_summary(aoai_results: list, logger: logging.Logge
23222325 "passed" : 0 ,
23232326 }
23242327 if result_item ["passed" ] is True :
2325- result_counts [ "passed" ] += 1
2328+ passed_count += 1
23262329 result_counts_stats [testing_criteria ]["passed" ] += 1
23272330
23282331 elif result_item ["passed" ] is False :
2329- result_counts [ "failed" ] += 1
2332+ failed_count += 1
23302333 result_counts_stats [testing_criteria ]["failed" ] += 1
23312334 # Check if the result indicates an error status
23322335 elif ("status" in result_item and result_item ["status" ] in ["error" , "errored" ]) or (
23332336 "sample" in result_item
23342337 and isinstance (result_item ["sample" ], dict )
23352338 and result_item ["sample" ].get ("error" , None ) is not None
23362339 ):
2337- result_counts [ "errored" ] += 1
2340+ error_count += 1
23382341 elif hasattr (aoai_result , "status" ) and aoai_result .status == "error" :
2339- result_counts [ "errored" ] += 1
2342+ error_count += 1
23402343 elif isinstance (aoai_result , dict ) and aoai_result .get ("status" ) == "error" :
2344+ error_count += 1
2345+
2346+ if error_count > 0 :
23412347 result_counts ["errored" ] += 1
2348+ elif failed_count > 0 :
2349+ result_counts ["failed" ] += 1
2350+ elif (
2351+ error_count == 0
2352+ and failed_count == 0
2353+ and passed_count > 0
2354+ and passed_count == len (aoai_result .get ("results" , []))
2355+ ):
2356+ result_counts ["passed" ] += 1
23422357
23432358 # Extract usage statistics from aoai_result.sample
23442359 sample_data_list = []
0 commit comments