Skip to content

Commit 5ee4fc4

Browse files
authored
Update agent-evaluate-sdk.md
1 parent 137b6bc commit 5ee4fc4

File tree

1 file changed

+21
-21
lines changed

1 file changed

+21
-21
lines changed

articles/ai-foundry/how-to/develop/agent-evaluate-sdk.md

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def fetch_weather(location: str) -> str:
7878
:rtype: str
7979
"""
8080
# In a real-world scenario, you'd integrate with a weather API.
81-
# Here, we'll mock the response.
81+
# In the following code snippet, we mock the response.
8282
mock_weather_data = {"Seattle": "Sunny, 25°C", "London": "Cloudy, 18°C", "Tokyo": "Rainy, 22°C"}
8383
weather = mock_weather_data.get(location, "Weather data not available for this location.")
8484
weather_json = json.dumps({"weather": weather})
@@ -89,7 +89,7 @@ user_functions: Set[Callable[..., Any]] = {
8989
fetch_weather,
9090
}
9191

92-
# Add tools that agent will use.
92+
# Add tools that the agent will use.
9393
functions = FunctionTool(user_functions)
9494

9595
toolset = ToolSet()
@@ -162,9 +162,9 @@ converted_data = converter.convert(thread_id, run_id)
162162
And that's it! You don't need to read the input requirements for each evaluator and do any work to parse them. You need only to select your evaluator and call the evaluator on this single run. For model choice, we recommend a strong reasoning model like `o3-mini` and later models. We set up a list of quality and safety evaluators in `quality_evaluators` and `safety_evaluators` and reference them in [Evaluating multiples agent runs or a thread](#evaluate-multiple-agent-runs-or-threads).
163163

164164
```python
165-
# specific to agentic workflows
165+
# This is specific to agentic workflows.
166166
from azure.ai.evaluation import IntentResolutionEvaluator, TaskAdherenceEvaluator, ToolCallAccuracyEvaluator
167-
# other quality as well as risk and safety metrics
167+
# Other quality, risk, and safety metrics:
168168
from azure.ai.evaluation import RelevanceEvaluator, CoherenceEvaluator, CodeVulnerabilityEvaluator, ContentSafetyEvaluator, IndirectAttackEvaluator, FluencyEvaluator
169169
from azure.ai.projects.models import ConnectionType
170170
from azure.identity import DefaultAzureCredential
@@ -196,7 +196,7 @@ azure_ai_project = os.environ.get("AZURE_AI_PROJECT")
196196

197197
safety_evaluators = {evaluator.__name__: evaluator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential()) for evaluator in[ContentSafetyEvaluator, IndirectAttackEvaluator, CodeVulnerabilityEvaluator]}
198198

199-
# reference the quality and safety evaluator list above
199+
# Reference the quality and safety evaluator list above.
200200
quality_and_safety_evaluators = {**quality_evaluators, **safety_evaluators}
201201

202202
for name, evaluator in quality_and_safety_evaluators.items():
@@ -268,10 +268,10 @@ To evaluate multiple agent runs or threads, we recommend using the batch `evalua
268268
import json
269269
from azure.ai.evaluation import AIAgentConverter
270270

271-
# Initialize the converter
271+
# Initialize the converter.
272272
converter = AIAgentConverter(project_client)
273273

274-
# Specify a file path to save agent output (which is evaluation input data)
274+
# Specify a file path to save the agent output (evaluation input data) to.
275275
filename = os.path.join(os.getcwd(), "evaluation_input_data.jsonl")
276276

277277
evaluation_data = converter.prepare_evaluation_data(thread_ids=thread_id, filename=filename)
@@ -287,23 +287,23 @@ from dotenv import load_dotenv
287287
load_dotenv()
288288

289289

290-
# Batch evaluation API (local)
290+
# Batch evaluation API (local):
291291
from azure.ai.evaluation import evaluate
292292

293293
response = evaluate(
294294
data=filename,
295295
evaluation_name="agent demo - batch run",
296296
evaluators=quality_and_safety_evaluators,
297-
# optionally, log your results to your Azure AI Foundry project for rich visualization
297+
# Optionally, log your results to your Azure AI Foundry project for rich visualization.
298298
azure_ai_project={
299299
"subscription_id": os.environ["AZURE_SUBSCRIPTION_ID"],
300300
"project_name": os.environ["PROJECT_NAME"],
301301
"resource_group_name": os.environ["RESOURCE_GROUP_NAME"],
302302
}
303303
)
304-
# Inspect the average scores at a high-level
304+
# Inspect the average scores at a high level.
305305
print(response["metrics"])
306-
# Use the URL to inspect the results on the UI
306+
# Use the URL to inspect the results on the UI.
307307
print(f'AI Foundary URL: {response.get("studio_url")}')
308308
```
309309

@@ -358,8 +358,8 @@ model_config = AzureOpenAIModelConfiguration(
358358

359359
intent_resolution_evaluator = IntentResolutionEvaluator(model_config)
360360

361-
# Evaluating query and response as strings
362-
# A positive example. Intent is identified and understood and the response correctly resolves user intent
361+
# Evaluate the query and response as strings.
362+
# The following is a positive example. Intent is identified and understood and the response correctly resolves user intent.
363363
result = intent_resolution_evaluator(
364364
query="What are the opening hours of the Eiffel Tower?",
365365
response="Opening hours of the Eiffel Tower are 9:00 AM to 11:00 PM.",
@@ -455,13 +455,13 @@ In agent message format, `query` and `response` are a list of OpenAI-style messa
455455
```python
456456
import json
457457

458-
# user asked a question
458+
# The user asked a question.
459459
query = [
460460
{
461461
"role": "system",
462462
"content": "You are a friendly and helpful customer service agent."
463463
},
464-
# past interactions omitted
464+
# Past interactions are omitted.
465465
# ...
466466
{
467467
"createdAt": "2025-03-14T06:14:20Z",
@@ -474,7 +474,7 @@ query = [
474474
]
475475
}
476476
]
477-
# the agent emits multiple messages to fulfill the request
477+
# The agent emits multiple messages to fulfill the request.
478478
response = [
479479
{
480480
"createdAt": "2025-03-14T06:14:30Z",
@@ -502,9 +502,9 @@ response = [
502502
}
503503
]
504504
},
505-
# many more messages omitted
505+
# Many more messages are omitted.
506506
# ...
507-
# here is the agent's final response
507+
# Here is the agent's final response:
508508
{
509509
"createdAt": "2025-03-14T06:15:05Z",
510510
"run_id": "0",
@@ -518,7 +518,7 @@ response = [
518518
}
519519
]
520520

521-
# An example of tool definitions available to the agent
521+
# An example of tool definitions available to the agent:
522522
tool_definitions = [
523523
{
524524
"name": "get_orders",
@@ -533,14 +533,14 @@ tool_definitions = [
533533
}
534534
}
535535
},
536-
# other tool definitions omitted
536+
# Other tool definitions are omitted.
537537
# ...
538538
]
539539

540540
result = intent_resolution_evaluator(
541541
query=query,
542542
response=response,
543-
# optionally provide the tool definitions
543+
# Optionally, provide the tool definitions.
544544
tool_definitions=tool_definitions
545545
)
546546
print(json.dumps(result, indent=4))

0 commit comments

Comments
 (0)