Skip to content

Commit 06e2602

Browse files
authored
[evaluation] Change ECI to eci (#37489)
* Remove Optional type annotation from get() methods * Remove duplicate overloads * Include xpia in handled_metrics for evaluation aggregation * Update output.py * Change 'ECI' to 'eci' * Revert "Include xpia in handled_metrics for evaluation aggregation" This reverts commit 8629de8.
1 parent 9da614d commit 06e2602

File tree

4 files changed

+13
-27
lines changed

4 files changed

+13
-27
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -217,41 +217,27 @@ def parse_response( # pylint: disable=too-many-branches,too-many-statements
217217
response = response.replace("true", "True")
218218
parsed_response = literal_eval(response)
219219
result = {}
220-
metric_prefix = _get_metric_prefix(metric_name)
221220
# Use label instead of score since these are assumed to be boolean results.
222221
# Use np.nan as null value since it's ignored by aggregations rather than treated as 0.
223-
result[metric_prefix + "_label"] = parsed_response["label"] if "label" in parsed_response else np.nan
224-
result[metric_prefix + "_reason"] = parsed_response["reasoning"] if "reasoning" in parsed_response else ""
222+
result[metric_name + "_label"] = parsed_response["label"] if "label" in parsed_response else np.nan
223+
result[metric_name + "_reason"] = parsed_response["reasoning"] if "reasoning" in parsed_response else ""
225224

226225
if metric_name == EvaluationMetrics.XPIA:
227226
# Add "manipulated_content", "intrusion" and "information_gathering" to the result
228227
# if present else set them to np.nan
229-
result[metric_prefix + "_manipulated_content"] = (
228+
result[metric_name + "_manipulated_content"] = (
230229
parsed_response["manipulated_content"] if "manipulated_content" in parsed_response else np.nan
231230
)
232-
result[metric_prefix + "_intrusion"] = (
231+
result[metric_name + "_intrusion"] = (
233232
parsed_response["intrusion"] if "intrusion" in parsed_response else np.nan
234233
)
235-
result[metric_prefix + "_information_gathering"] = (
234+
result[metric_name + "_information_gathering"] = (
236235
parsed_response["information_gathering"] if "information_gathering" in parsed_response else np.nan
237236
)
238237
return result
239238
return _parse_content_harm_response(batch_response, metric_name)
240239

241240

242-
def _get_metric_prefix(metric_name: str) -> str:
243-
"""Get the prefix for the evaluation metric. This is usually the metric name.
244-
245-
:param metric_name: The evaluation metric to use.
246-
:type metric_name: str
247-
:return: The prefix for the evaluation metric.
248-
:rtype: str
249-
"""
250-
if metric_name == _InternalEvaluationMetrics.ECI:
251-
return "ECI"
252-
return metric_name
253-
254-
255241
def _parse_content_harm_response(batch_response: List[Dict], metric_name: str) -> Dict:
256242
"""Parse the annotation response from Responsible AI service for a content harm evaluation.
257243

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_eci/_eci.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ class ECIEvaluator:
6666
.. code-block:: python
6767
6868
{
69-
"ECI_label": "False",
70-
"ECI_reason": "Some reason."
69+
"eci_label": "False",
70+
"eci_reason": "Some reason."
7171
}
7272
"""
7373

sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_builtin_evaluators.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -467,8 +467,8 @@ def test_eci_evaluator(self, project_scope, azure_cred):
467467
response="Rhombus",
468468
)
469469
assert unrelated_result is not None
470-
assert not unrelated_result["ECI_label"]
471-
assert "geometry question" in unrelated_result["ECI_reason"]
470+
assert not unrelated_result["eci_label"]
471+
assert "geometry question" in unrelated_result["eci_reason"]
472472

473473
# @pytest.mark.skipif(is_live(), reason="API not fully released yet. Don't run in live mode unless connected to INT.")
474474
def test_xpia_evaluator(self, project_scope, azure_cred):

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -467,8 +467,8 @@ def test_content_safety_aggregation(self):
467467

468468
def test_label_based_aggregation(self):
469469
data = {
470-
"eci.ECI_label": [True, False, True, False, True],
471-
"eci.ECI_reasoning": ["a", "b", "c", "d", "e"],
470+
"eci.eci_label": [True, False, True, False, True],
471+
"eci.eci_reasoning": ["a", "b", "c", "d", "e"],
472472
"protected_material.protected_material_label": [False, False, False, False, True],
473473
"protected_material.protected_material_reasoning": ["f", "g", "h", "i", "j"],
474474
"unknown.unaccounted_label": [True, False, False, False, True],
@@ -482,11 +482,11 @@ def test_label_based_aggregation(self):
482482
aggregation = _aggregate_metrics(data_df, evaluators)
483483
# ECI and PM labels should be replaced with defect rates, unaccounted should not
484484
assert len(aggregation) == 3
485-
assert "eci.ECI_label" not in aggregation
485+
assert "eci.eci_label" not in aggregation
486486
assert "protected_material.protected_material_label" not in aggregation
487487
assert aggregation["unknown.unaccounted_label"] == 0.4
488488

489-
assert aggregation["eci.ECI_defect_rate"] == 0.6
489+
assert aggregation["eci.eci_defect_rate"] == 0.6
490490
assert aggregation["protected_material.protected_material_defect_rate"] == 0.2
491491
assert "unaccounted_defect_rate" not in aggregation
492492

0 commit comments

Comments
 (0)