Skip to content

Commit 8ac638c

Browse files
[fix] avoid failure when partial success of some raiservice evaluators (#44429)
Co-authored-by: zyysurely <[email protected]>
1 parent da88eec commit 8ac638c

File tree

2 files changed

+31
-0
lines changed

2 files changed

+31
-0
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,10 @@ def _aggregate_label_defect_metrics(df: pd.DataFrame) -> Tuple[List[str], Dict[s
235235

236236

237237
def _process_rows(row, detail_defect_rates):
238+
# Ignore the failed rows (NaN, None, etc.), which is not a expected dict
239+
if _is_none_or_nan(row):
240+
return detail_defect_rates
241+
238242
for key, value in row.items():
239243
if key not in detail_defect_rates:
240244
detail_defect_rates[key] = []

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
_apply_target_to_data,
3838
_rename_columns_conditionally,
3939
_convert_results_to_aoai_evaluation_results,
40+
_process_rows,
41+
_aggregate_label_defect_metrics,
4042
)
4143
from azure.ai.evaluation._evaluate._utils import _convert_name_map_into_property_entries
4244
from azure.ai.evaluation._evaluate._utils import _apply_column_mapping, _trace_destination_from_project_scope
@@ -719,6 +721,31 @@ def test_general_aggregation(self):
719721
assert "bad_thing.boolean_with_nan" not in aggregation
720722
assert "bad_thing.boolean_with_none" not in aggregation
721723

724+
def test_aggregate_label_defect_metrics_with_nan_in_details(self):
725+
"""Test that NaN/None values in details column are properly ignored during aggregation."""
726+
data = {
727+
"evaluator.protected_material_label": [True, False, True, False],
728+
"evaluator.protected_material_details": [
729+
{"detail1": 1, "detail2": 0},
730+
np.nan, # Failed evaluation
731+
{"detail1": 0, "detail2": 1},
732+
None, # Another failure case
733+
],
734+
}
735+
df = pd.DataFrame(data)
736+
737+
label_cols, defect_rates = _aggregate_label_defect_metrics(df)
738+
739+
# Should calculate defect rate for label column (all 4 rows)
740+
assert "evaluator.protected_material_defect_rate" in defect_rates
741+
assert defect_rates["evaluator.protected_material_defect_rate"] == 0.5
742+
743+
# Should calculate defect rates for detail keys (only from 2 valid dict rows)
744+
assert "evaluator.protected_material_details.detail1_defect_rate" in defect_rates
745+
assert "evaluator.protected_material_details.detail2_defect_rate" in defect_rates
746+
assert defect_rates["evaluator.protected_material_details.detail1_defect_rate"] == 0.5
747+
assert defect_rates["evaluator.protected_material_details.detail2_defect_rate"] == 0.5
748+
722749
@pytest.mark.skip(reason="Breaking CI by crashing pytest somehow")
723750
def test_optional_inputs_with_data(self, questions_file, questions_answers_basic_file):
724751
from test_evaluators.test_inputs_evaluators import HalfOptionalEval, NoInputEval, NonOptionalEval, OptionalEval

0 commit comments

Comments
 (0)