Updates to the model-routing lab: (#206)

nourshaker-msft · web-flow · commit 5639a4d29bd0 · 2025-08-07T15:35:46.000+01:00
- Added support for the new "model-router" and "DeepSeek-R1" models.
- Updated the policy to route requests based on the requested model for both chat completions and responses API.
- updated graphical representation of the lab to include new models
diff --git a/images/model-routing.gif b/images/model-routing.gif
diff --git a/labs/model-routing/clean-up-resources.ipynb b/labs/model-routing/clean-up-resources.ipynb
@@ -26,7 +26,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "myenv",
    "language": "python",
    "name": "python3"
   },
@@ -40,7 +40,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.10"
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,
diff --git a/labs/model-routing/model-routing.ipynb b/labs/model-routing/model-routing.ipynb
@@ -9,7 +9,7 @@
     "## Model routing lab\n",
     "![flow](../../images/model-routing.gif)\n",
     "\n",
-    "Playground to try routing to an AI Foundry backend based on the requested model.\n",
+    "Playground to try routing to an AI Foundry backend based on the requested model (Chat Completions and Responses API).\n",
     "\n",
     "### Prerequisites\n",
     "\n",
@@ -57,7 +57,8 @@
     "models_config = [{\"name\": \"gpt-4.1\", \"publisher\": \"OpenAI\", \"version\": \"2025-04-14\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry1\"},\n",
     "                 {\"name\": \"gpt-4.1-mini\", \"publisher\": \"OpenAI\", \"version\": \"2025-04-14\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry2\"},\n",
     "                 {\"name\": \"gpt-4.1-nano\", \"publisher\": \"OpenAI\", \"version\": \"2025-04-14\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry2\"},\n",
-    "                 {\"name\": \"model-router\", \"publisher\": \"OpenAI\", \"version\": \"2025-05-19\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry3\"}]\n",
+    "                 {\"name\": \"model-router\", \"publisher\": \"OpenAI\", \"version\": \"2025-05-19\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry3\"},\n",
+    "                 {\"name\": \"DeepSeek-R1\", \"publisher\": \"DeepSeek\", \"version\": \"1\", \"sku\": \"GlobalStandard\", \"capacity\": 20, \"aiservice\": \"foundry3\"}]\n",
     "\n",
     "apim_sku = 'Basicv2'\n",
     "apim_subscriptions_config = [{\"name\": \"subscription1\", \"displayName\": \"Subscription 1\"}]\n",
@@ -179,7 +180,8 @@
    "metadata": {},
    "source": [
     "<a id='sdk'></a>\n",
-    "### 🧪 Test the API using the Azure OpenAI Python SDK\n"
+    "### 🧪 Test the API using the Azure OpenAI Python SDK\n",
+    "#### Chat Completions\n"
    ]
   },
   {
@@ -200,13 +202,49 @@
     "    api_version=inference_api_version\n",
     ")\n",
     "try:\n",
-    "    response = client.chat.completions.with_raw_response.create(model=models_config[3]['name'], messages=messages)\n",
-    "    print(\"headers \", response.headers)\n",
-    "    print(\"x-ms-region: \", response.headers.get(\"x-ms-region\")) # this header is useful to determine the region of the backend that served the request\n",
+    "    for model in ['model-router', 'DeepSeek-R1', 'gpt-4.1']:\n",
+    "        completion = client.chat.completions.with_raw_response.create(model=model, messages=messages)\n",
+    "        # print(\"headers \", completion.headers)\n",
+    "        print(\"x-ms-region: \", completion.headers.get(\"x-ms-region\")) # this header is useful to determine the region of the backend that served the request\n",
     "\n",
-    "    completion = response.parse() \n",
+    "        completion = completion.parse()\n",
+    "\n",
+    "        print(f\"Model: {completion.model} 💬: {completion.choices[0].message.content}\\n\")\n",
+    "except Exception as e:\n",
+    "    print(f\"Error: {e}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Responses API\n",
+    "*Note* OpenAI Model Router only supports Chat Completions API\n",
+    "*Note* Responses API currently only supports Azure OpenAI models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start_time = time.time()\n",
+    "input_message = \"which model are you using?\"\n",
+    "\n",
+    "client = AzureOpenAI(\n",
+    "    azure_endpoint=f\"{apim_resource_gateway_url}/{inference_api_path}\",\n",
+    "    api_key=api_key,\n",
+    "    api_version=inference_api_version\n",
+    ")\n",
+    "try:\n",
+    "    for model in ['gpt-4.1-mini', 'gpt-4.1-nano', 'gpt-4.1']:\n",
+    "        responses = client.responses.with_raw_response.create(model=model, input=input_message)\n",
+    "        # print(\"headers \", responses.headers)\n",
+    "        print(\"x-ms-region: \", responses.headers.get(\"x-ms-region\"))\n",
+    "        output = responses.parse()\n",
+    "        print(f\"Model: {output.model} 💬: {output.output_text}\\n\")\n",
     "\n",
-    "    print(f\"Model: {completion.model} 💬: {completion.choices[0].message.content}\")\n",
     "except Exception as e:\n",
     "    print(f\"Error: {e}\")\n"
    ]
@@ -225,7 +263,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "myenv",
    "language": "python",
    "name": "python3"
   },
@@ -239,7 +277,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.10"
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,
diff --git a/labs/model-routing/policy.xml b/labs/model-routing/policy.xml
@@ -1,29 +1,53 @@
+<!-- /policies -->
 <policies>
     <inbound>
         <base />
-
-        <!-- Use a <choose> block to evaluate deployment-id -->
+        <!-- 1a – deployment-id from the route template -->
+        <set-variable name="deployment" value="@(context.Request.MatchedParameters.ContainsKey("deployment-id") 
+                           ? context.Request.MatchedParameters["deployment-id"] 
+                           : string.Empty)" />
+        <!-- 1b – model from the request body (JSON) -->
+        <set-variable name="reqBody" value="@(context.Request.Body?.As<JObject>(preserveContent:true) 
+                           ?? new JObject())" />
+        <set-variable name="model" value="@( ((JObject)context.Variables["reqBody"])
+                              .Property("model")?.Value?.ToString() 
+                              ?? string.Empty)" />
+        <!-- 1c – first non-empty of deployment-id or model -->
+        <set-variable name="requestedModel" value="@( !string.IsNullOrEmpty((string)context.Variables["deployment"]) 
+                           ? (string)context.Variables["deployment"]
+                           : (string)context.Variables["model"] )" />
+        <!-- 2. Decide what to do with the request -->
         <choose>
-            <when condition="@(context.Request.MatchedParameters["deployment-id"] == "gpt-4.1")">
+            <!-- route tier-1 GPT-4.1 -->
+            <when condition="@( ((string)context.Variables["requestedModel"]) == "gpt-4.1")">
                 <set-backend-service backend-id="foundry1" />
             </when>
-            <when condition="@(context.Request.MatchedParameters["deployment-id"] == "gpt-4.1-mini" || context.Request.MatchedParameters["deployment-id"] == "gpt-4.1-nano")">
+            <when condition="@( ((string)context.Variables["requestedModel"]) == "gpt-4.1-mini" 
+                         || ((string)context.Variables["requestedModel"]) == "gpt-4.1-nano")">
                 <set-backend-service backend-id="foundry2" />
             </when>
-            <when condition="@(context.Request.MatchedParameters["deployment-id"] == "model-router")">
+            <when condition="@( ((string)context.Variables["requestedModel"]) == "model-router"
+                         || ((string)context.Variables["requestedModel"]) == "DeepSeek-R1")">
                 <set-backend-service backend-id="foundry3" />
             </when>
+            <!-- gate any GPT-4o* variants -->
+            <when condition="@( ((string)context.Variables["requestedModel"] ?? string.Empty)
+                           .StartsWith("gpt-4o"))">
+                <return-response>
+                    <set-status code="403" reason="Forbidden" />
+                    <set-body>@("{\"error\":\"Model '" + (string)context.Variables["requestedModel"] + "' is not permitted.\"}")</set-body>
+                </return-response>
+            </when>
+            <!-- catch-all -->
             <otherwise>
                 <return-response>
                     <set-status code="400" reason="Bad Request" />
                     <set-header name="Content-Type" exists-action="override">
                         <value>application/json</value>
                     </set-header>
-                    <set-body>
-                        {
-                            "error": "Invalid deployment-id. Please provide a valid deployment-id."
-                        }
-                    </set-body>
+                    <set-body>{
+              "error": "Invalid model or deployment-id. Supply a valid name in the URL or JSON body."
+            }</set-body>
                 </return-response>
             </otherwise>
         </choose>
@@ -37,4 +61,4 @@
     <on-error>
         <base />
     </on-error>
-</policies>
+</policies>