Skip to content

Commit 47a0d8d

Browse files
committed
Update counts to ignore non-primary metrics when multiple metrics
1 parent b778e1a commit 47a0d8d

File tree

2 files changed

+33
-2
lines changed

2 files changed

+33
-2
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ class _EvaluatorMetricMapping:
9797
"""
9898
Static mapping of evaluator names to their metric names, based on assets.json.
9999
The 'builtin.' prefix is removed from the evaluator name keys.
100+
If an evaluator maps to multiple metrics, all metric names are included in the list, and the first one is considered the primary metric.
100101
"""
101102

102103
EVALUATOR_NAME_METRICS_MAPPINGS = {
@@ -119,7 +120,7 @@ class _EvaluatorMetricMapping:
119120
"meteor_score": ["meteor"],
120121
"relevance": ["relevance"],
121122
"response_completeness": ["response_completeness"],
122-
"rouge_score": ["rouge_precision", "rouge_recall", "rouge_f1_score"],
123+
"rouge_score": ["rouge_f1_score", "rouge_precision", "rouge_recall"],
123124
"groundedness_pro": ["groundedness_pro"],
124125
"similarity": ["similarity"],
125126
"intent_resolution": ["intent_resolution"],

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2314,6 +2314,29 @@ def _get_metric_from_criteria(testing_criteria_name: str, metric_key: str, metri
23142314
return metric
23152315

23162316

2317+
def _is_primary_metric(metric_name: str, testing_criteria: str) -> bool:
2318+
"""
2319+
Check if the given metric name is a primary metric.
2320+
2321+
:param metric_name: The name of the metric
2322+
:type metric_name: str
2323+
:return: True if the metric is a primary metric, False otherwise
2324+
:rtype: bool
2325+
"""
2326+
if (
2327+
not _is_none_or_nan(metric_name)
2328+
and not _is_none_or_nan(testing_criteria)
2329+
and testing_criteria in _EvaluatorMetricMapping.EVALUATOR_NAME_METRICS_MAPPINGS
2330+
and isinstance(_EvaluatorMetricMapping.EVALUATOR_NAME_METRICS_MAPPINGS[testing_criteria], list)
2331+
and len(_EvaluatorMetricMapping.EVALUATOR_NAME_METRICS_MAPPINGS[testing_criteria]) > 1
2332+
and metric_name in _EvaluatorMetricMapping.EVALUATOR_NAME_METRICS_MAPPINGS[testing_criteria]
2333+
and metric_name.lower() != _EvaluatorMetricMapping.EVALUATOR_NAME_METRICS_MAPPINGS[testing_criteria][0].lower()
2334+
):
2335+
return False
2336+
else:
2337+
return True
2338+
2339+
23172340
def _calculate_aoai_evaluation_summary(aoai_results: list, logger: logging.Logger) -> Dict[str, Any]:
23182341
"""
23192342
Calculate summary statistics for AOAI evaluation results.
@@ -2343,10 +2366,17 @@ def _calculate_aoai_evaluation_summary(aoai_results: list, logger: logging.Logge
23432366
f"Processing aoai_result with id: {getattr(aoai_result, 'id', 'unknown')}, results count: {len(aoai_result['results'])}"
23442367
)
23452368
for result_item in aoai_result["results"]:
2369+
is_primary_metric = True
23462370
if isinstance(result_item, dict):
2371+
testing_criteria = result_item.get("name", "")
2372+
is_primary_metric = _is_primary_metric(result_item.get("metric", ""), testing_criteria)
2373+
if not is_primary_metric:
2374+
logger.info(
2375+
f"Skip counts for non-primary metric for testing_criteria: {testing_criteria}, metric: {result_item.get('metric', '')}"
2376+
)
2377+
continue
23472378
# Check if the result has a 'passed' field
23482379
if "passed" in result_item and result_item["passed"] is not None:
2349-
testing_criteria = result_item.get("name", "")
23502380
if testing_criteria not in result_counts_stats:
23512381
result_counts_stats[testing_criteria] = {
23522382
"testing_criteria": testing_criteria,

0 commit comments

Comments
 (0)