@@ -73,33 +73,31 @@ import os
73
73
from azure.ai.evaluation import AzureOpenAIModelConfiguration
74
74
from azure.identity import DefaultAzureCredential
75
75
from azure.ai.evaluation import IntentResolutionEvaluator, ResponseCompletenessEvaluator
76
-
77
-
76
+
78
77
model_config = AzureOpenAIModelConfiguration(
79
78
azure_endpoint = os.environ[" AZURE_OPENAI_ENDPOINT" ],
80
79
api_key = os.environ[" AZURE_OPENAI_API_KEY" ],
81
80
api_version = os.environ[" AZURE_OPENAI_API_VERSION" ],
82
81
azure_deployment = os.environ[" MODEL_DEPLOYMENT_NAME" ],
83
82
)
84
-
83
+
85
84
intent_resolution_evaluator = IntentResolutionEvaluator(model_config)
86
- completeness_evaluator = CompletenessEvaluator (model_config = model_config)
87
-
85
+ response_completeness_evaluator = ResponseCompletenessEvaluator (model_config = model_config)
86
+
88
87
# Evaluating query and response as strings
89
88
# A positive example. Intent is identified and understood and the response correctly resolves user intent
90
89
result = intent_resolution_evaluator(
91
90
query = " What are the opening hours of the Eiffel Tower?" ,
92
91
response = " Opening hours of the Eiffel Tower are 9:00 AM to 11:00 PM." ,
93
92
)
94
93
print (result)
95
-
94
+
96
95
# A negative example. Only half of the statements in the response were complete according to the ground truth
97
- result = completeness_evaluator (
96
+ result = response_completeness_evaluator (
98
97
response = " Itinery: Day 1 take a train to visit Disneyland outside of the city; Day 2 rests in hotel." ,
99
98
ground_truth = " Itinery: Day 1 take a train to visit the downtown area for city sightseeing; Day 2 rests in hotel."
100
99
)
101
100
print (result)
102
-
103
101
```
104
102
105
103
Examples of ` tool_calls ` and ` tool_definitions ` for ` ToolCallAccuracyEvaluator ` :
0 commit comments