Skip to content

Commit c2e9904

Browse files
authored
Jessli/update result convert to fix result_counts (#43694)
* update * rename * run black * fix result counts * update
1 parent 732bfba commit c2e9904

File tree

1 file changed

+20
-5
lines changed
  • sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate

1 file changed

+20
-5
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2305,11 +2305,14 @@ def _calculate_aoai_evaluation_summary(aoai_results: list, logger: logging.Logge
23052305
logger.info(
23062306
f"Processing aoai_result with id: {getattr(aoai_result, 'id', 'unknown')}, row keys: {aoai_result.keys() if hasattr(aoai_result, 'keys') else 'N/A'}"
23072307
)
2308+
result_counts["total"] += 1
2309+
passed_count = 0
2310+
failed_count = 0
2311+
error_count = 0
23082312
if isinstance(aoai_result, dict) and "results" in aoai_result:
23092313
logger.info(
23102314
f"Processing aoai_result with id: {getattr(aoai_result, 'id', 'unknown')}, results count: {len(aoai_result['results'])}"
23112315
)
2312-
result_counts["total"] += len(aoai_result["results"])
23132316
for result_item in aoai_result["results"]:
23142317
if isinstance(result_item, dict):
23152318
# Check if the result has a 'passed' field
@@ -2322,23 +2325,35 @@ def _calculate_aoai_evaluation_summary(aoai_results: list, logger: logging.Logge
23222325
"passed": 0,
23232326
}
23242327
if result_item["passed"] is True:
2325-
result_counts["passed"] += 1
2328+
passed_count += 1
23262329
result_counts_stats[testing_criteria]["passed"] += 1
23272330

23282331
elif result_item["passed"] is False:
2329-
result_counts["failed"] += 1
2332+
failed_count += 1
23302333
result_counts_stats[testing_criteria]["failed"] += 1
23312334
# Check if the result indicates an error status
23322335
elif ("status" in result_item and result_item["status"] in ["error", "errored"]) or (
23332336
"sample" in result_item
23342337
and isinstance(result_item["sample"], dict)
23352338
and result_item["sample"].get("error", None) is not None
23362339
):
2337-
result_counts["errored"] += 1
2340+
error_count += 1
23382341
elif hasattr(aoai_result, "status") and aoai_result.status == "error":
2339-
result_counts["errored"] += 1
2342+
error_count += 1
23402343
elif isinstance(aoai_result, dict) and aoai_result.get("status") == "error":
2344+
error_count += 1
2345+
2346+
if error_count > 0:
23412347
result_counts["errored"] += 1
2348+
elif failed_count > 0:
2349+
result_counts["failed"] += 1
2350+
elif (
2351+
error_count == 0
2352+
and failed_count == 0
2353+
and passed_count > 0
2354+
and passed_count == len(aoai_result.get("results", []))
2355+
):
2356+
result_counts["passed"] += 1
23422357

23432358
# Extract usage statistics from aoai_result.sample
23442359
sample_data_list = []

0 commit comments

Comments
 (0)