Skip to content

Commit a9741f5

Browse files
authored
Add task_navigation_efficiency_label Output (#43332)
1 parent bfbbcff commit a9741f5

File tree

3 files changed

+17
-16
lines changed

3 files changed

+17
-16
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class TaskNavigationEfficiencyEvaluator(EvaluatorBase):
6464
6565
.. code-block:: python
6666
67-
from azure.ai.evaluation import TaskNavigationEfficiencyEvaluator
67+
from azure.ai.evaluation._evaluators._task_navigation_efficiency import TaskNavigationEfficiencyEvaluator
6868
6969
task_navigation_efficiency_eval = TaskNavigationEfficiencyEvaluator(
7070
matching_mode=TaskNavigationEfficiencyMatchingMode.EXACT_MATCH
@@ -320,8 +320,9 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str, Dict[s
320320
)
321321

322322
return {
323+
"task_navigation_efficiency_label": match_result,
323324
"task_navigation_efficiency_result": EVALUATION_PASS_FAIL_MAPPING[match_result],
324-
"properties": additional_properties_metrics,
325+
"task_navigation_efficiency_details": additional_properties_metrics,
325326
}
326327
else:
327328
raise EvaluationException(

sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/task_navigation_efficiency.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,7 @@
449449
" \n",
450450
" # Display the returned results\n",
451451
" for key, value in result.items():\n",
452-
" if key == \"properties\":\n",
452+
" if key == \"task_navigation_efficiency_details\":\n",
453453
" print(f\" {key}:\")\n",
454454
" for prop_key, prop_value in value.items():\n",
455455
" print(f\" {prop_key}: {prop_value:.3f}\")\n",

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_task_navigation_efficiency_evaluators.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ def test_exact_match_scenario(self):
2929

3030
result = evaluator(response=response, ground_truth=ground_truth)
3131
assert result["task_navigation_efficiency_result"] == "pass"
32-
assert "properties" in result
33-
assert result["properties"]["precision_score"] == 1.0
34-
assert result["properties"]["recall_score"] == 1.0
35-
assert result["properties"]["f1_score"] == 1.0
32+
assert "task_navigation_efficiency_details" in result
33+
assert result["task_navigation_efficiency_details"]["precision_score"] == 1.0
34+
assert result["task_navigation_efficiency_details"]["recall_score"] == 1.0
35+
assert result["task_navigation_efficiency_details"]["f1_score"] == 1.0
3636

3737
def test_in_order_match_with_extra_steps(self):
3838
"""Test when agent has extra steps but maintains order."""
@@ -60,9 +60,9 @@ def test_in_order_match_with_extra_steps(self):
6060

6161
result = evaluator(response=response, ground_truth=ground_truth)
6262
assert result["task_navigation_efficiency_result"] == "pass"
63-
assert result["properties"]["precision_score"] == 0.75 # 3/4
64-
assert result["properties"]["recall_score"] == 1.0 # 3/3
65-
assert result["properties"]["f1_score"] == pytest.approx(0.857, rel=1e-2)
63+
assert result["task_navigation_efficiency_details"]["precision_score"] == 0.75 # 3/4
64+
assert result["task_navigation_efficiency_details"]["recall_score"] == 1.0 # 3/3
65+
assert result["task_navigation_efficiency_details"]["f1_score"] == pytest.approx(0.857, rel=1e-2)
6666

6767
def test_any_order_match(self):
6868
"""Test when agent has all steps but in wrong order."""
@@ -88,9 +88,9 @@ def test_any_order_match(self):
8888

8989
result = evaluator(response=response, ground_truth=ground_truth)
9090
assert result["task_navigation_efficiency_result"] == "pass"
91-
assert result["properties"]["precision_score"] == 1.0
92-
assert result["properties"]["recall_score"] == 1.0
93-
assert result["properties"]["f1_score"] == 1.0
91+
assert result["task_navigation_efficiency_details"]["precision_score"] == 1.0
92+
assert result["task_navigation_efficiency_details"]["recall_score"] == 1.0
93+
assert result["task_navigation_efficiency_details"]["f1_score"] == 1.0
9494

9595
def test_exact_match_failure(self):
9696
"""Test when exact match fails but other matches succeed."""
@@ -159,9 +159,9 @@ def test_tuple_format_with_parameters(self):
159159

160160
result = evaluator(response=response, ground_truth=ground_truth)
161161
assert result["task_navigation_efficiency_result"] == "pass"
162-
assert result["properties"]["precision_score"] == 1.0
163-
assert result["properties"]["recall_score"] == 1.0
164-
assert result["properties"]["f1_score"] == 1.0
162+
assert result["task_navigation_efficiency_details"]["precision_score"] == 1.0
163+
assert result["task_navigation_efficiency_details"]["recall_score"] == 1.0
164+
assert result["task_navigation_efficiency_details"]["f1_score"] == 1.0
165165

166166
def test_matching_mode_validation(self):
167167
"""Test validation of matching_mode parameter."""

0 commit comments

Comments
 (0)