Skip to content

Commit a67562e

Browse files
nagkumar91Nagkumar ArkalgudNagkumar ArkalgudCopilot
authored andcommitted
Chore/support updated oai sdk (#43053)
* Prepare evals SDK Release * Fix bug * Fix for ADV_CONV for FDP projects * Update release date * re-add pyrit to matrix * Change grader ids * Update unit test * replace all old grader IDs in tests * Update platform-matrix.json Add pyrit and not remove the other one * Update test to ensure everything is mocked * tox/black fixes * Skip that test with issues * update grader ID according to API View feedback * Update test * remove string check for grader ID * Update changelog and officialy start freeze * update the enum according to suggestions * update the changelog * Finalize logic * Initial plan * Fix client request ID headers in azure-ai-evaluation Co-authored-by: nagkumar91 <[email protected]> * Fix client request ID header format in rai_service.py Co-authored-by: nagkumar91 <[email protected]> * Passing threshold in AzureOpenAIScoreModelGrader * Add changelog * Adding the self.pass_threshold instead of pass_threshold * Add the python grader * Remove redundant test * Add class to exception list and format code * Add properties to evaluation upload run for FDP * Remove debug * Remove the redundant property * Fix changelog * Fix the multiple features added section * removed the properties in update * Updates to support aoai 1.108.0 version with updated types for evals * Black reformat * Handle case when AOAI returns dict --------- Co-authored-by: Nagkumar Arkalgud <[email protected]> Co-authored-by: Nagkumar Arkalgud <[email protected]> Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: nagkumar91 <[email protected]>
1 parent f5da6d0 commit a67562e

File tree

3 files changed

+48
-6
lines changed

3 files changed

+48
-6
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate_aoai.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,33 @@ def _get_single_run_results(
272272
for row_result in all_results:
273273
listed_results["index"].append(row_result.datasource_item_id)
274274
for single_grader_row_result in row_result.results:
275-
grader_name = run_info["grader_name_map"][single_grader_row_result["name"]]
276-
for name, value in single_grader_row_result.items():
275+
if isinstance(single_grader_row_result, dict):
276+
result_dict = single_grader_row_result
277+
elif hasattr(single_grader_row_result, "model_dump"):
278+
result_dict = single_grader_row_result.model_dump()
279+
elif hasattr(single_grader_row_result, "dict"):
280+
result_dict = single_grader_row_result.dict()
281+
elif hasattr(single_grader_row_result, "__dict__"):
282+
result_dict = vars(single_grader_row_result)
283+
else:
284+
raise EvaluationException(
285+
message=("Unsupported AOAI evaluation result type: " f"{type(single_grader_row_result)!r}."),
286+
blame=ErrorBlame.UNKNOWN,
287+
category=ErrorCategory.FAILED_EXECUTION,
288+
target=ErrorTarget.AOAI_GRADER,
289+
)
290+
291+
grader_result_name = result_dict.get("name", None)
292+
if grader_result_name is None:
293+
raise EvaluationException(
294+
message="AOAI evaluation response missing grader result name; unable to map to original grader.",
295+
blame=ErrorBlame.UNKNOWN,
296+
category=ErrorCategory.FAILED_EXECUTION,
297+
target=ErrorTarget.AOAI_GRADER,
298+
)
299+
300+
grader_name = run_info["grader_name_map"][grader_result_name]
301+
for name, value in result_dict.items():
277302
if name in ["name"]:
278303
continue
279304
if name.lower() == "passed":

sdk/evaluation/azure-ai-evaluation/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
"httpx>=0.25.1",
7676
# Dependencies added since Promptflow will soon be made optional
7777
"pandas>=2.1.2,<3.0.0",
78-
"openai>=1.78.0",
78+
"openai>=1.108.0",
7979
"ruamel.yaml>=0.17.10,<1.0.0",
8080
"msrest>=0.6.21",
8181
"Jinja2>=3.1.6",

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_aoai_score_model_grader.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,22 @@
2020
)
2121

2222

23+
def _sampling_params_as_dict(value):
24+
"""Normalize sampling params to a plain dictionary for assertions."""
25+
26+
if value is None:
27+
return {}
28+
if isinstance(value, dict):
29+
return value
30+
if hasattr(value, "model_dump"):
31+
return value.model_dump(exclude_none=True)
32+
if hasattr(value, "dict"):
33+
return value.dict(exclude_none=True)
34+
if hasattr(value, "__dict__"):
35+
return {k: v for k, v in vars(value).items() if v is not None and not k.startswith("_")}
36+
return value
37+
38+
2339
@pytest.fixture
2440
def mock_aoai_model_config():
2541
"""Mock Azure OpenAI model configuration for testing."""
@@ -200,8 +216,9 @@ def test_grader_config_properties(self, mock_aoai_model_config, basic_score_grad
200216
assert config.input[0].role == "system"
201217
assert config.input[1].role == "user"
202218
assert config.range == [0.0, 1.0]
203-
assert config.sampling_params["temperature"] == 0.0
204-
assert config.sampling_params["max_tokens"] == 100
219+
sampling_params = _sampling_params_as_dict(config.sampling_params)
220+
assert sampling_params["temperature"] == 0.0
221+
assert sampling_params["max_tokens"] == 100
205222
assert grader.pass_threshold == 0.5
206223

207224
def test_different_score_ranges(self, mock_aoai_model_config):
@@ -534,7 +551,7 @@ def test_grader_with_complex_sampling_params(self, mock_aoai_model_config):
534551
sampling_params=complex_params,
535552
)
536553

537-
assert grader._grader_config.sampling_params == complex_params
554+
assert _sampling_params_as_dict(grader._grader_config.sampling_params) == complex_params
538555

539556
def test_grader_with_unicode_content(self, mock_aoai_model_config):
540557
"""Test grader with Unicode and special characters in content."""

0 commit comments

Comments
 (0)