@@ -78,7 +78,7 @@ def fetch_weather(location: str) -> str:
78
78
:rtype: str
79
79
"""
80
80
# In a real-world scenario, you'd integrate with a weather API.
81
- # Here , we'll mock the response.
81
+ # In the following code snippet , we mock the response.
82
82
mock_weather_data = {" Seattle" : " Sunny, 25°C" , " London" : " Cloudy, 18°C" , " Tokyo" : " Rainy, 22°C" }
83
83
weather = mock_weather_data.get(location, " Weather data not available for this location." )
84
84
weather_json = json.dumps({" weather" : weather})
@@ -89,7 +89,7 @@ user_functions: Set[Callable[..., Any]] = {
89
89
fetch_weather,
90
90
}
91
91
92
- # Add tools that agent will use.
92
+ # Add tools that the agent will use.
93
93
functions = FunctionTool(user_functions)
94
94
95
95
toolset = ToolSet()
@@ -162,9 +162,9 @@ converted_data = converter.convert(thread_id, run_id)
162
162
And that's it! You don't need to read the input requirements for each evaluator and do any work to parse them. You need only to select your evaluator and call the evaluator on this single run. For model choice, we recommend a strong reasoning model like ` o3-mini ` and later models. We set up a list of quality and safety evaluators in ` quality_evaluators ` and ` safety_evaluators ` and reference them in [ Evaluating multiples agent runs or a thread] ( #evaluate-multiple-agent-runs-or-threads ) .
163
163
164
164
``` python
165
- # specific to agentic workflows
165
+ # This is specific to agentic workflows.
166
166
from azure.ai.evaluation import IntentResolutionEvaluator, TaskAdherenceEvaluator, ToolCallAccuracyEvaluator
167
- # other quality as well as risk and safety metrics
167
+ # Other quality, risk, and safety metrics:
168
168
from azure.ai.evaluation import RelevanceEvaluator, CoherenceEvaluator, CodeVulnerabilityEvaluator, ContentSafetyEvaluator, IndirectAttackEvaluator, FluencyEvaluator
169
169
from azure.ai.projects.models import ConnectionType
170
170
from azure.identity import DefaultAzureCredential
@@ -196,7 +196,7 @@ azure_ai_project = os.environ.get("AZURE_AI_PROJECT")
196
196
197
197
safety_evaluators = {evaluator.__name__ : evaluator(azure_ai_project = azure_ai_project, credential = DefaultAzureCredential()) for evaluator in [ContentSafetyEvaluator, IndirectAttackEvaluator, CodeVulnerabilityEvaluator]}
198
198
199
- # reference the quality and safety evaluator list above
199
+ # Reference the quality and safety evaluator list above.
200
200
quality_and_safety_evaluators = {** quality_evaluators, ** safety_evaluators}
201
201
202
202
for name, evaluator in quality_and_safety_evaluators.items():
@@ -268,10 +268,10 @@ To evaluate multiple agent runs or threads, we recommend using the batch `evalua
268
268
import json
269
269
from azure.ai.evaluation import AIAgentConverter
270
270
271
- # Initialize the converter
271
+ # Initialize the converter.
272
272
converter = AIAgentConverter(project_client)
273
273
274
- # Specify a file path to save agent output (which is evaluation input data)
274
+ # Specify a file path to save the agent output (evaluation input data) to.
275
275
filename = os.path.join(os.getcwd(), " evaluation_input_data.jsonl" )
276
276
277
277
evaluation_data = converter.prepare_evaluation_data(thread_ids = thread_id, filename = filename)
@@ -287,23 +287,23 @@ from dotenv import load_dotenv
287
287
load_dotenv()
288
288
289
289
290
- # Batch evaluation API (local)
290
+ # Batch evaluation API (local):
291
291
from azure.ai.evaluation import evaluate
292
292
293
293
response = evaluate(
294
294
data = filename,
295
295
evaluation_name = " agent demo - batch run" ,
296
296
evaluators = quality_and_safety_evaluators,
297
- # optionally , log your results to your Azure AI Foundry project for rich visualization
297
+ # Optionally , log your results to your Azure AI Foundry project for rich visualization.
298
298
azure_ai_project = {
299
299
" subscription_id" : os.environ[" AZURE_SUBSCRIPTION_ID" ],
300
300
" project_name" : os.environ[" PROJECT_NAME" ],
301
301
" resource_group_name" : os.environ[" RESOURCE_GROUP_NAME" ],
302
302
}
303
303
)
304
- # Inspect the average scores at a high- level
304
+ # Inspect the average scores at a high level.
305
305
print (response[" metrics" ])
306
- # Use the URL to inspect the results on the UI
306
+ # Use the URL to inspect the results on the UI.
307
307
print (f ' AI Foundary URL: { response.get(" studio_url" )} ' )
308
308
```
309
309
@@ -358,8 +358,8 @@ model_config = AzureOpenAIModelConfiguration(
358
358
359
359
intent_resolution_evaluator = IntentResolutionEvaluator(model_config)
360
360
361
- # Evaluating query and response as strings
362
- # A positive example. Intent is identified and understood and the response correctly resolves user intent
361
+ # Evaluate the query and response as strings.
362
+ # The following is a positive example. Intent is identified and understood and the response correctly resolves user intent.
363
363
result = intent_resolution_evaluator(
364
364
query = " What are the opening hours of the Eiffel Tower?" ,
365
365
response = " Opening hours of the Eiffel Tower are 9:00 AM to 11:00 PM." ,
@@ -455,13 +455,13 @@ In agent message format, `query` and `response` are a list of OpenAI-style messa
455
455
``` python
456
456
import json
457
457
458
- # user asked a question
458
+ # The user asked a question.
459
459
query = [
460
460
{
461
461
" role" : " system" ,
462
462
" content" : " You are a friendly and helpful customer service agent."
463
463
},
464
- # past interactions omitted
464
+ # Past interactions are omitted.
465
465
# ...
466
466
{
467
467
" createdAt" : " 2025-03-14T06:14:20Z" ,
@@ -474,7 +474,7 @@ query = [
474
474
]
475
475
}
476
476
]
477
- # the agent emits multiple messages to fulfill the request
477
+ # The agent emits multiple messages to fulfill the request.
478
478
response = [
479
479
{
480
480
" createdAt" : " 2025-03-14T06:14:30Z" ,
@@ -502,9 +502,9 @@ response = [
502
502
}
503
503
]
504
504
},
505
- # many more messages omitted
505
+ # Many more messages are omitted.
506
506
# ...
507
- # here is the agent's final response
507
+ # Here is the agent's final response:
508
508
{
509
509
" createdAt" : " 2025-03-14T06:15:05Z" ,
510
510
" run_id" : " 0" ,
@@ -518,7 +518,7 @@ response = [
518
518
}
519
519
]
520
520
521
- # An example of tool definitions available to the agent
521
+ # An example of tool definitions available to the agent:
522
522
tool_definitions = [
523
523
{
524
524
" name" : " get_orders" ,
@@ -533,14 +533,14 @@ tool_definitions = [
533
533
}
534
534
}
535
535
},
536
- # other tool definitions omitted
536
+ # Other tool definitions are omitted.
537
537
# ...
538
538
]
539
539
540
540
result = intent_resolution_evaluator(
541
541
query = query,
542
542
response = response,
543
- # optionally provide the tool definitions
543
+ # Optionally, provide the tool definitions.
544
544
tool_definitions = tool_definitions
545
545
)
546
546
print (json.dumps(result, indent = 4 ))
0 commit comments