|
9 | 9 | "## Model routing lab\n", |
10 | 10 | "\n", |
11 | 11 | "\n", |
12 | | - "Playground to try routing to an AI Foundry backend based on the requested model.\n", |
| 12 | + "Playground to try routing to an AI Foundry backend based on the requested model (Chat Completions and Responses API).\n", |
13 | 13 | "\n", |
14 | 14 | "### Prerequisites\n", |
15 | 15 | "\n", |
|
57 | 57 | "models_config = [{\"name\": \"gpt-4.1\", \"publisher\": \"OpenAI\", \"version\": \"2025-04-14\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry1\"},\n", |
58 | 58 | " {\"name\": \"gpt-4.1-mini\", \"publisher\": \"OpenAI\", \"version\": \"2025-04-14\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry2\"},\n", |
59 | 59 | " {\"name\": \"gpt-4.1-nano\", \"publisher\": \"OpenAI\", \"version\": \"2025-04-14\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry2\"},\n", |
60 | | - " {\"name\": \"model-router\", \"publisher\": \"OpenAI\", \"version\": \"2025-05-19\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry3\"}]\n", |
| 60 | + " {\"name\": \"model-router\", \"publisher\": \"OpenAI\", \"version\": \"2025-05-19\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry3\"},\n", |
| 61 | + " {\"name\": \"DeepSeek-R1\", \"publisher\": \"DeepSeek\", \"version\": \"1\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry3\"}]\n", |
61 | 62 | "\n", |
62 | 63 | "apim_sku = 'Basicv2'\n", |
63 | 64 | "apim_subscriptions_config = [{\"name\": \"subscription1\", \"displayName\": \"Subscription 1\"}]\n", |
|
179 | 180 | "metadata": {}, |
180 | 181 | "source": [ |
181 | 182 | "<a id='sdk'></a>\n", |
182 | | - "### 🧪 Test the API using the Azure OpenAI Python SDK\n" |
| 183 | + "### 🧪 Test the API using the Azure OpenAI Python SDK\n", |
| 184 | + "#### Chat Completions\n" |
183 | 185 | ] |
184 | 186 | }, |
185 | 187 | { |
|
200 | 202 | " api_version=inference_api_version\n", |
201 | 203 | ")\n", |
202 | 204 | "try:\n", |
203 | | - " response = client.chat.completions.with_raw_response.create(model=models_config[3]['name'], messages=messages)\n", |
204 | | - " print(\"headers \", response.headers)\n", |
205 | | - " print(\"x-ms-region: \", response.headers.get(\"x-ms-region\")) # this header is useful to determine the region of the backend that served the request\n", |
| 205 | + " for model in ['model-router', 'DeepSeek-R1', 'gpt-4.1']:\n", |
| 206 | + " completion = client.chat.completions.with_raw_response.create(model=model, messages=messages)\n", |
| 207 | + " # print(\"headers \", completion.headers)\n", |
| 208 | + " print(\"x-ms-region: \", completion.headers.get(\"x-ms-region\")) # this header is useful to determine the region of the backend that served the request\n", |
206 | 209 | "\n", |
207 | | - " completion = response.parse() \n", |
| 210 | + " completion = completion.parse()\n", |
| 211 | + "\n", |
| 212 | + " print(f\"Model: {completion.model} 💬: {completion.choices[0].message.content}\\n\")\n", |
| 213 | + "except Exception as e:\n", |
| 214 | + " print(f\"Error: {e}\")\n" |
| 215 | + ] |
| 216 | + }, |
| 217 | + { |
| 218 | + "cell_type": "markdown", |
| 219 | + "metadata": {}, |
| 220 | + "source": [ |
| 221 | + "#### Responses API\n", |
| 222 | + "*Note* OpenAI Model Router only supports Chat Completions API\n", |
| 223 | + "*Note* Responses API currently only supports Azure OpenAI models" |
| 224 | + ] |
| 225 | + }, |
| 226 | + { |
| 227 | + "cell_type": "code", |
| 228 | + "execution_count": null, |
| 229 | + "metadata": {}, |
| 230 | + "outputs": [], |
| 231 | + "source": [ |
| 232 | + "start_time = time.time()\n", |
| 233 | + "input_message = \"which model are you using?\"\n", |
| 234 | + "\n", |
| 235 | + "client = AzureOpenAI(\n", |
| 236 | + " azure_endpoint=f\"{apim_resource_gateway_url}/{inference_api_path}\",\n", |
| 237 | + " api_key=api_key,\n", |
| 238 | + " api_version=inference_api_version\n", |
| 239 | + ")\n", |
| 240 | + "try:\n", |
| 241 | + " for model in ['gpt-4.1-mini', 'gpt-4.1-nano', 'gpt-4.1']:\n", |
| 242 | + " responses = client.responses.with_raw_response.create(model=model, input=input_message)\n", |
| 243 | + " # print(\"headers \", responses.headers)\n", |
| 244 | + " print(\"x-ms-region: \", responses.headers.get(\"x-ms-region\"))\n", |
| 245 | + " output = responses.parse()\n", |
| 246 | + " print(f\"Model: {output.model} 💬: {output.output_text}\\n\")\n", |
208 | 247 | "\n", |
209 | | - " print(f\"Model: {completion.model} 💬: {completion.choices[0].message.content}\")\n", |
210 | 248 | "except Exception as e:\n", |
211 | 249 | " print(f\"Error: {e}\")\n" |
212 | 250 | ] |
|
225 | 263 | ], |
226 | 264 | "metadata": { |
227 | 265 | "kernelspec": { |
228 | | - "display_name": "Python 3", |
| 266 | + "display_name": "myenv", |
229 | 267 | "language": "python", |
230 | 268 | "name": "python3" |
231 | 269 | }, |
|
239 | 277 | "name": "python", |
240 | 278 | "nbconvert_exporter": "python", |
241 | 279 | "pygments_lexer": "ipython3", |
242 | | - "version": "3.12.10" |
| 280 | + "version": "3.13.5" |
243 | 281 | } |
244 | 282 | }, |
245 | 283 | "nbformat": 4, |
|
0 commit comments