[evaluation] Change ECI to eci (#37489)

diondrapeck · web-flow · commit 06e260232d17 · 2024-09-20T14:22:58.000Z
* Remove Optional type annotation from get() methods * Remove duplicate overloads * Include xpia in handled_metrics for evaluation aggregation * Update output.py * Change 'ECI' to 'eci' * Revert "Include xpia in handled_metrics for evaluation aggregation" This reverts commit 8629de8.
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py
@@ -217,41 +217,27 @@ def parse_response(  # pylint: disable=too-many-branches,too-many-statements
         response = response.replace("true", "True")
         parsed_response = literal_eval(response)
         result = {}
-        metric_prefix = _get_metric_prefix(metric_name)
         # Use label instead of score since these are assumed to be boolean results.
         # Use np.nan as null value since it's ignored by aggregations rather than treated as 0.
-        result[metric_prefix + "_label"] = parsed_response["label"] if "label" in parsed_response else np.nan
-        result[metric_prefix + "_reason"] = parsed_response["reasoning"] if "reasoning" in parsed_response else ""
+        result[metric_name + "_label"] = parsed_response["label"] if "label" in parsed_response else np.nan
+        result[metric_name + "_reason"] = parsed_response["reasoning"] if "reasoning" in parsed_response else ""
 
         if metric_name == EvaluationMetrics.XPIA:
             # Add "manipulated_content", "intrusion" and "information_gathering" to the result
             # if present else set them to np.nan
-            result[metric_prefix + "_manipulated_content"] = (
+            result[metric_name + "_manipulated_content"] = (
                 parsed_response["manipulated_content"] if "manipulated_content" in parsed_response else np.nan
             )
-            result[metric_prefix + "_intrusion"] = (
+            result[metric_name + "_intrusion"] = (
                 parsed_response["intrusion"] if "intrusion" in parsed_response else np.nan
             )
-            result[metric_prefix + "_information_gathering"] = (
+            result[metric_name + "_information_gathering"] = (
                 parsed_response["information_gathering"] if "information_gathering" in parsed_response else np.nan
             )
         return result
     return _parse_content_harm_response(batch_response, metric_name)
 
 
-def _get_metric_prefix(metric_name: str) -> str:
-    """Get the prefix for the evaluation metric. This is usually the metric name.
-
-    :param metric_name: The evaluation metric to use.
-    :type metric_name: str
-    :return: The prefix for the evaluation metric.
-    :rtype: str
-    """
-    if metric_name == _InternalEvaluationMetrics.ECI:
-        return "ECI"
-    return metric_name
-
-
 def _parse_content_harm_response(batch_response: List[Dict], metric_name: str) -> Dict:
     """Parse the annotation response from Responsible AI service for a content harm evaluation.
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_eci/_eci.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_eci/_eci.py
@@ -66,8 +66,8 @@ class ECIEvaluator:
     .. code-block:: python
 
         {
-            "ECI_label": "False",
-            "ECI_reason": "Some reason."
+            "eci_label": "False",
+            "eci_reason": "Some reason."
         }
     """
 
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_builtin_evaluators.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_builtin_evaluators.py
@@ -467,8 +467,8 @@ def test_eci_evaluator(self, project_scope, azure_cred):
             response="Rhombus",
         )
         assert unrelated_result is not None
-        assert not unrelated_result["ECI_label"]
-        assert "geometry question" in unrelated_result["ECI_reason"]
+        assert not unrelated_result["eci_label"]
+        assert "geometry question" in unrelated_result["eci_reason"]
 
     # @pytest.mark.skipif(is_live(), reason="API not fully released yet. Don't run in live mode unless connected to INT.")
     def test_xpia_evaluator(self, project_scope, azure_cred):
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
@@ -467,8 +467,8 @@ def test_content_safety_aggregation(self):
 
     def test_label_based_aggregation(self):
         data = {
-            "eci.ECI_label": [True, False, True, False, True],
-            "eci.ECI_reasoning": ["a", "b", "c", "d", "e"],
+            "eci.eci_label": [True, False, True, False, True],
+            "eci.eci_reasoning": ["a", "b", "c", "d", "e"],
             "protected_material.protected_material_label": [False, False, False, False, True],
             "protected_material.protected_material_reasoning": ["f", "g", "h", "i", "j"],
             "unknown.unaccounted_label": [True, False, False, False, True],
@@ -482,11 +482,11 @@ def test_label_based_aggregation(self):
         aggregation = _aggregate_metrics(data_df, evaluators)
         # ECI and PM labels should be replaced with defect rates, unaccounted should not
         assert len(aggregation) == 3
-        assert "eci.ECI_label" not in aggregation
+        assert "eci.eci_label" not in aggregation
         assert "protected_material.protected_material_label" not in aggregation
         assert aggregation["unknown.unaccounted_label"] == 0.4
 
-        assert aggregation["eci.ECI_defect_rate"] == 0.6
+        assert aggregation["eci.eci_defect_rate"] == 0.6
         assert aggregation["protected_material.protected_material_defect_rate"] == 0.2
         assert "unaccounted_defect_rate" not in aggregation
 

Original file line number	Diff line number	Diff line change
`@@ -66,8 +66,8 @@ class ECIEvaluator:`
`66`	`66`	`.. code-block:: python`
`67`	`67`
`68`	`68`	`{`
`69`		`- "ECI_label": "False",`
`70`		`- "ECI_reason": "Some reason."`
	`69`	`+ "eci_label": "False",`
	`70`	`+ "eci_reason": "Some reason."`
`71`	`71`	`}`
`72`	`72`	`"""`
`73`	`73`