Skip to content

Commit 557b48c

Browse files
Fix missing priority & weight properties
1 parent 158f3bc commit 557b48c

File tree

4 files changed

+18
-10
lines changed

4 files changed

+18
-10
lines changed

labs/backend-pool-load-balancing/backend-pool-load-balancing.ipynb

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,11 @@
6767
"\n",
6868
"apim_sku = 'Basicv2'\n",
6969
"\n",
70+
"# Prioritize UK South until exhaustion (simulate PTU with TPM), then equally distribute between Sweden and France (consumption fallback)\n",
7071
"openai_resources = [\n",
71-
" {\"name\": \"openai1\", \"location\": \"uksouth\", \"priority\": 1, \"weight\": 80},\n",
72-
" {\"name\": \"openai2\", \"location\": \"swedencentral\", \"priority\": 1, \"weight\": 10},\n",
73-
" {\"name\": \"openai3\", \"location\": \"francecentral\", \"priority\": 1, \"weight\": 10}\n",
72+
" {\"name\": \"openai1\", \"location\": \"uksouth\", \"priority\": 1},\n",
73+
" {\"name\": \"openai2\", \"location\": \"swedencentral\", \"priority\": 2, \"weight\": 50},\n",
74+
" {\"name\": \"openai3\", \"location\": \"francecentral\", \"priority\": 2, \"weight\": 50}\n",
7475
"]\n",
7576
"\n",
7677
"openai_deployment_name = \"gpt-35-turbo\"\n",
@@ -79,7 +80,7 @@
7980
"openai_model_capacity = 8\n",
8081
"openai_api_version = \"2024-02-01\"\n",
8182
"\n",
82-
"utils.print_ok('Notebook initiaized')"
83+
"utils.print_ok('Notebook initialized')"
8384
]
8485
},
8586
{
@@ -208,7 +209,7 @@
208209
"source": [
209210
"import requests, time\n",
210211
"\n",
211-
"runs = 10\n",
212+
"runs = 20\n",
212213
"sleep_time_ms = 100\n",
213214
"url = f\"{apim_resource_gateway_url}/openai/deployments/{openai_deployment_name}/chat/completions?api-version={openai_api_version}\"\n",
214215
"api_runs = []\n",
@@ -265,7 +266,9 @@
265266
"metadata": {},
266267
"source": [
267268
"<a id='plot'></a>\n",
268-
"### 🔍 Analyze Load Balancing results\n"
269+
"### 🔍 Analyze Load Balancing results\n",
270+
"\n",
271+
"The priority 1 backend will be used until TPM exhaustion sets in, then distribution will occur near equally across the two priority 2 backends with 50/50 weights."
269272
]
270273
},
271274
{
@@ -323,7 +326,7 @@
323326
"import time\n",
324327
"from openai import AzureOpenAI\n",
325328
"\n",
326-
"runs = 10\n",
329+
"runs = 20\n",
327330
"sleep_time_ms = 100\n",
328331
"\n",
329332
"client = AzureOpenAI(\n",
34.7 KB
Loading

modules/apim/v1/openai-api.bicep

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,10 @@ resource backendPoolOpenAI 'Microsoft.ApiManagement/service/backends@2024-06-01-
153153
type: 'Pool'
154154
pool: {
155155
services: [for (config, i) in openAIConfig: {
156-
id: '/backends/${backendOpenAI[i].name}'
157-
}
158-
]
156+
id: '/backends/${backendOpenAI[i].name}'
157+
priority: config.?priority
158+
weight: config.?weight
159+
}]
159160
}
160161
}
161162
}

modules/cognitive-services/v1/openai.bicep

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,12 @@ resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [
112112
// ------------------
113113

114114
output extendedOpenAIConfig array = [for (config, i) in openAIConfig: {
115+
// Original openAIConfig properties
115116
name: config.name
116117
location: config.location
118+
priority: config.?priority
119+
weight: config.?weight
120+
// Additional properties
117121
sku: openAISku
118122
deploymentName: openAIDeploymentName
119123
modelName: openAIModelName

0 commit comments

Comments
 (0)