Skip to content

Commit 5639a4d

Browse files
Updates to the model-routing lab: (#206)
- Added support for the new "model-router" and "DeepSeek-R1" models. - Updated the policy to route requests based on the requested model for both chat completions and responses API. - updated graphical representation of the lab to include new models
1 parent f940c9a commit 5639a4d

File tree

4 files changed

+85
-23
lines changed

4 files changed

+85
-23
lines changed

images/model-routing.gif

-243 KB
Loading

labs/model-routing/clean-up-resources.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
],
2727
"metadata": {
2828
"kernelspec": {
29-
"display_name": "Python 3",
29+
"display_name": "myenv",
3030
"language": "python",
3131
"name": "python3"
3232
},
@@ -40,7 +40,7 @@
4040
"name": "python",
4141
"nbconvert_exporter": "python",
4242
"pygments_lexer": "ipython3",
43-
"version": "3.12.10"
43+
"version": "3.13.5"
4444
}
4545
},
4646
"nbformat": 4,

labs/model-routing/model-routing.ipynb

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"## Model routing lab\n",
1010
"![flow](../../images/model-routing.gif)\n",
1111
"\n",
12-
"Playground to try routing to an AI Foundry backend based on the requested model.\n",
12+
"Playground to try routing to an AI Foundry backend based on the requested model (Chat Completions and Responses API).\n",
1313
"\n",
1414
"### Prerequisites\n",
1515
"\n",
@@ -57,7 +57,8 @@
5757
"models_config = [{\"name\": \"gpt-4.1\", \"publisher\": \"OpenAI\", \"version\": \"2025-04-14\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry1\"},\n",
5858
" {\"name\": \"gpt-4.1-mini\", \"publisher\": \"OpenAI\", \"version\": \"2025-04-14\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry2\"},\n",
5959
" {\"name\": \"gpt-4.1-nano\", \"publisher\": \"OpenAI\", \"version\": \"2025-04-14\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry2\"},\n",
60-
" {\"name\": \"model-router\", \"publisher\": \"OpenAI\", \"version\": \"2025-05-19\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry3\"}]\n",
60+
" {\"name\": \"model-router\", \"publisher\": \"OpenAI\", \"version\": \"2025-05-19\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry3\"},\n",
61+
" {\"name\": \"DeepSeek-R1\", \"publisher\": \"DeepSeek\", \"version\": \"1\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry3\"}]\n",
6162
"\n",
6263
"apim_sku = 'Basicv2'\n",
6364
"apim_subscriptions_config = [{\"name\": \"subscription1\", \"displayName\": \"Subscription 1\"}]\n",
@@ -179,7 +180,8 @@
179180
"metadata": {},
180181
"source": [
181182
"<a id='sdk'></a>\n",
182-
"### 🧪 Test the API using the Azure OpenAI Python SDK\n"
183+
"### 🧪 Test the API using the Azure OpenAI Python SDK\n",
184+
"#### Chat Completions\n"
183185
]
184186
},
185187
{
@@ -200,13 +202,49 @@
200202
" api_version=inference_api_version\n",
201203
")\n",
202204
"try:\n",
203-
" response = client.chat.completions.with_raw_response.create(model=models_config[3]['name'], messages=messages)\n",
204-
" print(\"headers \", response.headers)\n",
205-
" print(\"x-ms-region: \", response.headers.get(\"x-ms-region\")) # this header is useful to determine the region of the backend that served the request\n",
205+
" for model in ['model-router', 'DeepSeek-R1', 'gpt-4.1']:\n",
206+
" completion = client.chat.completions.with_raw_response.create(model=model, messages=messages)\n",
207+
" # print(\"headers \", completion.headers)\n",
208+
" print(\"x-ms-region: \", completion.headers.get(\"x-ms-region\")) # this header is useful to determine the region of the backend that served the request\n",
206209
"\n",
207-
" completion = response.parse() \n",
210+
" completion = completion.parse()\n",
211+
"\n",
212+
" print(f\"Model: {completion.model} 💬: {completion.choices[0].message.content}\\n\")\n",
213+
"except Exception as e:\n",
214+
" print(f\"Error: {e}\")\n"
215+
]
216+
},
217+
{
218+
"cell_type": "markdown",
219+
"metadata": {},
220+
"source": [
221+
"#### Responses API\n",
222+
"*Note* OpenAI Model Router only supports Chat Completions API\n",
223+
"*Note* Responses API currently only supports Azure OpenAI models"
224+
]
225+
},
226+
{
227+
"cell_type": "code",
228+
"execution_count": null,
229+
"metadata": {},
230+
"outputs": [],
231+
"source": [
232+
"start_time = time.time()\n",
233+
"input_message = \"which model are you using?\"\n",
234+
"\n",
235+
"client = AzureOpenAI(\n",
236+
" azure_endpoint=f\"{apim_resource_gateway_url}/{inference_api_path}\",\n",
237+
" api_key=api_key,\n",
238+
" api_version=inference_api_version\n",
239+
")\n",
240+
"try:\n",
241+
" for model in ['gpt-4.1-mini', 'gpt-4.1-nano', 'gpt-4.1']:\n",
242+
" responses = client.responses.with_raw_response.create(model=model, input=input_message)\n",
243+
" # print(\"headers \", responses.headers)\n",
244+
" print(\"x-ms-region: \", responses.headers.get(\"x-ms-region\"))\n",
245+
" output = responses.parse()\n",
246+
" print(f\"Model: {output.model} 💬: {output.output_text}\\n\")\n",
208247
"\n",
209-
" print(f\"Model: {completion.model} 💬: {completion.choices[0].message.content}\")\n",
210248
"except Exception as e:\n",
211249
" print(f\"Error: {e}\")\n"
212250
]
@@ -225,7 +263,7 @@
225263
],
226264
"metadata": {
227265
"kernelspec": {
228-
"display_name": "Python 3",
266+
"display_name": "myenv",
229267
"language": "python",
230268
"name": "python3"
231269
},
@@ -239,7 +277,7 @@
239277
"name": "python",
240278
"nbconvert_exporter": "python",
241279
"pygments_lexer": "ipython3",
242-
"version": "3.12.10"
280+
"version": "3.13.5"
243281
}
244282
},
245283
"nbformat": 4,

labs/model-routing/policy.xml

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,53 @@
1+
<!-- /policies -->
12
<policies>
23
<inbound>
34
<base />
4-
5-
<!-- Use a <choose> block to evaluate deployment-id -->
5+
<!-- 1a – deployment-id from the route template -->
6+
<set-variable name="deployment" value="@(context.Request.MatchedParameters.ContainsKey("deployment-id")
7+
? context.Request.MatchedParameters["deployment-id"]
8+
: string.Empty)" />
9+
<!-- 1b – model from the request body (JSON) -->
10+
<set-variable name="reqBody" value="@(context.Request.Body?.As<JObject>(preserveContent:true)
11+
?? new JObject())" />
12+
<set-variable name="model" value="@( ((JObject)context.Variables["reqBody"])
13+
.Property("model")?.Value?.ToString()
14+
?? string.Empty)" />
15+
<!-- 1c – first non-empty of deployment-id or model -->
16+
<set-variable name="requestedModel" value="@( !string.IsNullOrEmpty((string)context.Variables["deployment"])
17+
? (string)context.Variables["deployment"]
18+
: (string)context.Variables["model"] )" />
19+
<!-- 2. Decide what to do with the request -->
620
<choose>
7-
<when condition="@(context.Request.MatchedParameters["deployment-id"] == "gpt-4.1")">
21+
<!-- route tier-1 GPT-4.1 -->
22+
<when condition="@( ((string)context.Variables["requestedModel"]) == "gpt-4.1")">
823
<set-backend-service backend-id="foundry1" />
924
</when>
10-
<when condition="@(context.Request.MatchedParameters["deployment-id"] == "gpt-4.1-mini" || context.Request.MatchedParameters["deployment-id"] == "gpt-4.1-nano")">
25+
<when condition="@( ((string)context.Variables["requestedModel"]) == "gpt-4.1-mini"
26+
|| ((string)context.Variables["requestedModel"]) == "gpt-4.1-nano")">
1127
<set-backend-service backend-id="foundry2" />
1228
</when>
13-
<when condition="@(context.Request.MatchedParameters["deployment-id"] == "model-router")">
29+
<when condition="@( ((string)context.Variables["requestedModel"]) == "model-router"
30+
|| ((string)context.Variables["requestedModel"]) == "DeepSeek-R1")">
1431
<set-backend-service backend-id="foundry3" />
1532
</when>
33+
<!-- gate any GPT-4o* variants -->
34+
<when condition="@( ((string)context.Variables["requestedModel"] ?? string.Empty)
35+
.StartsWith("gpt-4o"))">
36+
<return-response>
37+
<set-status code="403" reason="Forbidden" />
38+
<set-body>@("{\"error\":\"Model '" + (string)context.Variables["requestedModel"] + "' is not permitted.\"}")</set-body>
39+
</return-response>
40+
</when>
41+
<!-- catch-all -->
1642
<otherwise>
1743
<return-response>
1844
<set-status code="400" reason="Bad Request" />
1945
<set-header name="Content-Type" exists-action="override">
2046
<value>application/json</value>
2147
</set-header>
22-
<set-body>
23-
{
24-
"error": "Invalid deployment-id. Please provide a valid deployment-id."
25-
}
26-
</set-body>
48+
<set-body>{
49+
"error": "Invalid model or deployment-id. Supply a valid name in the URL or JSON body."
50+
}</set-body>
2751
</return-response>
2852
</otherwise>
2953
</choose>
@@ -37,4 +61,4 @@
3761
<on-error>
3862
<base />
3963
</on-error>
40-
</policies>
64+
</policies>

0 commit comments

Comments
 (0)