|
3 | 3 | { |
4 | 4 | "cell_type": "markdown", |
5 | 5 | "id": "bf5280e2", |
6 | | - "metadata": { |
7 | | - "vscode": { |
8 | | - "languageId": "plaintext" |
9 | | - } |
10 | | - }, |
| 6 | + "metadata": {}, |
11 | 7 | "source": [ |
12 | 8 | "# Evaluate Semantic Kernel AI (ChatCompletion) Agents in Azure AI Foundry" |
13 | 9 | ] |
|
66 | 62 | { |
67 | 63 | "cell_type": "markdown", |
68 | 64 | "id": "ba1d6576", |
69 | | - "metadata": { |
70 | | - "vscode": { |
71 | | - "languageId": "plaintext" |
72 | | - } |
73 | | - }, |
| 65 | + "metadata": {}, |
74 | 66 | "source": [ |
75 | 67 | "### Create a AzureChatCompletion service - [reference](https://learn.microsoft.com/en-us/semantic-kernel/concepts/ai-services/chat-completion/?tabs=csharp-AzureOpenAI%2Cpython-AzureOpenAI%2Cjava-AzureOpenAI&pivots=programming-language-python)" |
76 | 68 | ] |
|
106 | 98 | "from semantic_kernel.functions import kernel_function\n", |
107 | 99 | "from typing import Annotated\n", |
108 | 100 | "\n", |
| 101 | + "\n", |
109 | 102 | "# This is a sample plugin that provides tools\n", |
110 | 103 | "class MenuPlugin:\n", |
111 | 104 | " \"\"\"A sample Menu Plugin used for the concept sample.\"\"\"\n", |
|
122 | 115 | " def get_item_price(\n", |
123 | 116 | " self, menu_item: Annotated[str, \"The name of the menu item.\"]\n", |
124 | 117 | " ) -> Annotated[str, \"Returns the price of the menu item.\"]:\n", |
125 | | - " _ = menu_item # This is just to simulate a function that uses the input.\n", |
| 118 | + " _ = menu_item # This is just to simulate a function that uses the input.\n", |
126 | 119 | " return \"$9.99\"" |
127 | 120 | ] |
128 | 121 | }, |
129 | 122 | { |
130 | 123 | "cell_type": "code", |
131 | | - "execution_count": 4, |
| 124 | + "execution_count": null, |
132 | 125 | "id": "d6abead3", |
133 | 126 | "metadata": {}, |
134 | 127 | "outputs": [], |
|
146 | 139 | }, |
147 | 140 | { |
148 | 141 | "cell_type": "code", |
149 | | - "execution_count": 5, |
| 142 | + "execution_count": null, |
150 | 143 | "id": "3b7b9ba3", |
151 | 144 | "metadata": {}, |
152 | | - "outputs": [ |
153 | | - { |
154 | | - "name": "stdout", |
155 | | - "output_type": "stream", |
156 | | - "text": [ |
157 | | - "## User: Hello\n", |
158 | | - "## Chef: Hello! How can I assist you today? If you have any questions about the menu, feel free to ask!\n", |
159 | | - "\n", |
160 | | - "## User: What is the special drink today?\n", |
161 | | - "## Chef: The special drink today is Chai Tea. Would you like to know more about other specials or the menu?\n", |
162 | | - "\n", |
163 | | - "## User: What does that cost?\n", |
164 | | - "## Chef: The Chai Tea costs $9.99. Is there anything else you would like to know?\n", |
165 | | - "\n", |
166 | | - "## User: Thank you\n", |
167 | | - "## Chef: You're welcome! If you have any more questions or need assistance in the future, feel free to ask. Enjoy your day!\n", |
168 | | - "\n" |
169 | | - ] |
170 | | - } |
171 | | - ], |
| 145 | + "outputs": [], |
172 | 146 | "source": [ |
173 | 147 | "thread = None\n", |
174 | 148 | "\n", |
|
199 | 173 | "execution_count": null, |
200 | 174 | "id": "fcd6ac41", |
201 | 175 | "metadata": {}, |
202 | | - "outputs": [ |
203 | | - { |
204 | | - "name": "stdout", |
205 | | - "output_type": "stream", |
206 | | - "text": [ |
207 | | - "Available turn indices: [0, 1, 2, 3]\n" |
208 | | - ] |
209 | | - } |
210 | | - ], |
| 176 | + "outputs": [], |
211 | 177 | "source": [ |
212 | 178 | "from azure.ai.evaluation import SKAgentConverter\n", |
213 | 179 | "\n", |
214 | | - "# Get the avaiable turn indices for the thread, \n", |
| 180 | + "# Get the avaiable turn indices for the thread,\n", |
215 | 181 | "# useful for selecting a specific turn for evaluation\n", |
216 | 182 | "turn_indices = await SKAgentConverter._get_thread_turn_indices(thread=thread)\n", |
217 | 183 | "print(f\"Available turn indices: {turn_indices}\")" |
218 | 184 | ] |
219 | 185 | }, |
220 | 186 | { |
221 | 187 | "cell_type": "code", |
222 | | - "execution_count": 7, |
| 188 | + "execution_count": null, |
223 | 189 | "id": "d1d4ae12", |
224 | 190 | "metadata": {}, |
225 | | - "outputs": [ |
226 | | - { |
227 | | - "name": "stderr", |
228 | | - "output_type": "stream", |
229 | | - "text": [ |
230 | | - "Class SKAgentConverter: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" |
231 | | - ] |
232 | | - } |
233 | | - ], |
| 191 | + "outputs": [], |
234 | 192 | "source": [ |
235 | 193 | "converter = SKAgentConverter()\n", |
236 | 194 | "\n", |
237 | 195 | "# Get a single agent run data\n", |
238 | 196 | "evaluation_data_single_run = await converter.convert(\n", |
239 | 197 | " thread=thread,\n", |
240 | | - " turn_index=2, # Specify the turn index you want to evaluate\n", |
241 | | - " agent=agent # Pass it to include the instructions and plugins in the evaluation data\n", |
| 198 | + " turn_index=2, # Specify the turn index you want to evaluate\n", |
| 199 | + " agent=agent, # Pass it to include the instructions and plugins in the evaluation data\n", |
242 | 200 | ")" |
243 | 201 | ] |
244 | 202 | }, |
245 | 203 | { |
246 | 204 | "cell_type": "code", |
247 | | - "execution_count": 8, |
| 205 | + "execution_count": null, |
248 | 206 | "id": "7813b5eb", |
249 | 207 | "metadata": {}, |
250 | | - "outputs": [ |
251 | | - { |
252 | | - "data": { |
253 | | - "text/plain": [ |
254 | | - "4" |
255 | | - ] |
256 | | - }, |
257 | | - "execution_count": 8, |
258 | | - "metadata": {}, |
259 | | - "output_type": "execute_result" |
260 | | - } |
261 | | - ], |
| 208 | + "outputs": [], |
262 | 209 | "source": [ |
263 | 210 | "import json\n", |
264 | 211 | "\n", |
265 | 212 | "file_name = \"evaluation_data.jsonl\"\n", |
266 | 213 | "# Save the agent thread data to a JSONL file (all turns)\n", |
267 | 214 | "evaluation_data = await converter.prepare_evaluation_data(threads=[thread], filename=file_name, agent=agent)\n", |
268 | 215 | "# print(json.dumps(evaluation_data, indent=4))\n", |
269 | | - "len(evaluation_data) # number of turns in the thread" |
| 216 | + "len(evaluation_data) # number of turns in the thread" |
270 | 217 | ] |
271 | 218 | }, |
272 | 219 | { |
|
288 | 235 | "execution_count": null, |
289 | 236 | "id": "e6ee09df", |
290 | 237 | "metadata": {}, |
291 | | - "outputs": [ |
292 | | - { |
293 | | - "name": "stderr", |
294 | | - "output_type": "stream", |
295 | | - "text": [ |
296 | | - "Class IntentResolutionEvaluator: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" |
297 | | - ] |
298 | | - }, |
299 | | - { |
300 | | - "name": "stderr", |
301 | | - "output_type": "stream", |
302 | | - "text": [ |
303 | | - "Class ToolCallAccuracyEvaluator: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", |
304 | | - "Class TaskAdherenceEvaluator: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" |
305 | | - ] |
306 | | - } |
307 | | - ], |
| 238 | + "outputs": [], |
308 | 239 | "source": [ |
309 | 240 | "import os\n", |
310 | 241 | "from pprint import pprint\n", |
|
332 | 263 | }, |
333 | 264 | { |
334 | 265 | "cell_type": "code", |
335 | | - "execution_count": 10, |
| 266 | + "execution_count": null, |
336 | 267 | "id": "80bd50ff", |
337 | 268 | "metadata": {}, |
338 | | - "outputs": [ |
339 | | - { |
340 | | - "name": "stdout", |
341 | | - "output_type": "stream", |
342 | | - "text": [ |
343 | | - "{\n", |
344 | | - " \"tool_call_accuracy\": 1.0,\n", |
345 | | - " \"tool_call_accuracy_result\": \"pass\",\n", |
346 | | - " \"tool_call_accuracy_threshold\": 0.8,\n", |
347 | | - " \"per_tool_call_details\": [\n", |
348 | | - " {\n", |
349 | | - " \"tool_call_accurate\": true,\n", |
350 | | - " \"tool_call_accurate_reason\": \"The tool call is relevant to the user's request for the price of the special drink, has appropriate parameters that match the tool definition, and uses a parameter value that is present in the conversation. Therefore, it is likely to help resolve the user's need.\",\n", |
351 | | - " \"tool_call_id\": \"call_4aGdX7zYtMV28tjngPTTvyXx\"\n", |
352 | | - " }\n", |
353 | | - " ]\n", |
354 | | - "}\n" |
355 | | - ] |
356 | | - } |
357 | | - ], |
| 269 | + "outputs": [], |
358 | 270 | "source": [ |
359 | 271 | "# Test a single evaluation run\n", |
360 | 272 | "evaluator = ToolCallAccuracyEvaluator(model_config=model_config)\n", |
|
437 | 349 | "mimetype": "text/x-python", |
438 | 350 | "name": "python", |
439 | 351 | "nbconvert_exporter": "python", |
440 | | - "pygments_lexer": "ipython3", |
441 | | - "version": "3.10.12" |
| 352 | + "pygments_lexer": "ipython3" |
442 | 353 | } |
443 | 354 | }, |
444 | 355 | "nbformat": 4, |
|
0 commit comments