|
37 | 37 | _apply_target_to_data, |
38 | 38 | _rename_columns_conditionally, |
39 | 39 | _convert_results_to_aoai_evaluation_results, |
| 40 | + _process_rows, |
| 41 | + _aggregate_label_defect_metrics, |
40 | 42 | ) |
41 | 43 | from azure.ai.evaluation._evaluate._utils import _convert_name_map_into_property_entries |
42 | 44 | from azure.ai.evaluation._evaluate._utils import _apply_column_mapping, _trace_destination_from_project_scope |
@@ -719,6 +721,31 @@ def test_general_aggregation(self): |
719 | 721 | assert "bad_thing.boolean_with_nan" not in aggregation |
720 | 722 | assert "bad_thing.boolean_with_none" not in aggregation |
721 | 723 |
|
| 724 | + def test_aggregate_label_defect_metrics_with_nan_in_details(self): |
| 725 | + """Test that NaN/None values in details column are properly ignored during aggregation.""" |
| 726 | + data = { |
| 727 | + "evaluator.protected_material_label": [True, False, True, False], |
| 728 | + "evaluator.protected_material_details": [ |
| 729 | + {"detail1": 1, "detail2": 0}, |
| 730 | + np.nan, # Failed evaluation |
| 731 | + {"detail1": 0, "detail2": 1}, |
| 732 | + None, # Another failure case |
| 733 | + ], |
| 734 | + } |
| 735 | + df = pd.DataFrame(data) |
| 736 | + |
| 737 | + label_cols, defect_rates = _aggregate_label_defect_metrics(df) |
| 738 | + |
| 739 | + # Should calculate defect rate for label column (all 4 rows) |
| 740 | + assert "evaluator.protected_material_defect_rate" in defect_rates |
| 741 | + assert defect_rates["evaluator.protected_material_defect_rate"] == 0.5 |
| 742 | + |
| 743 | + # Should calculate defect rates for detail keys (only from 2 valid dict rows) |
| 744 | + assert "evaluator.protected_material_details.detail1_defect_rate" in defect_rates |
| 745 | + assert "evaluator.protected_material_details.detail2_defect_rate" in defect_rates |
| 746 | + assert defect_rates["evaluator.protected_material_details.detail1_defect_rate"] == 0.5 |
| 747 | + assert defect_rates["evaluator.protected_material_details.detail2_defect_rate"] == 0.5 |
| 748 | + |
722 | 749 | @pytest.mark.skip(reason="Breaking CI by crashing pytest somehow") |
723 | 750 | def test_optional_inputs_with_data(self, questions_file, questions_answers_basic_file): |
724 | 751 | from test_evaluators.test_inputs_evaluators import HalfOptionalEval, NoInputEval, NonOptionalEval, OptionalEval |
|
0 commit comments