|
67 | 67 | "\n", |
68 | 68 | "apim_sku = 'Basicv2'\n", |
69 | 69 | "\n", |
| 70 | + "# Prioritize UK South until exhaustion (simulate PTU with TPM), then equally distribute between Sweden and France (consumption fallback)\n", |
70 | 71 | "openai_resources = [\n", |
71 | | - " {\"name\": \"openai1\", \"location\": \"uksouth\", \"priority\": 1, \"weight\": 80},\n", |
72 | | - " {\"name\": \"openai2\", \"location\": \"swedencentral\", \"priority\": 1, \"weight\": 10},\n", |
73 | | - " {\"name\": \"openai3\", \"location\": \"francecentral\", \"priority\": 1, \"weight\": 10}\n", |
| 72 | + " {\"name\": \"openai1\", \"location\": \"uksouth\", \"priority\": 1},\n", |
| 73 | + " {\"name\": \"openai2\", \"location\": \"swedencentral\", \"priority\": 2, \"weight\": 50},\n", |
| 74 | + " {\"name\": \"openai3\", \"location\": \"francecentral\", \"priority\": 2, \"weight\": 50}\n", |
74 | 75 | "]\n", |
75 | 76 | "\n", |
76 | 77 | "openai_deployment_name = \"gpt-35-turbo\"\n", |
|
79 | 80 | "openai_model_capacity = 8\n", |
80 | 81 | "openai_api_version = \"2024-02-01\"\n", |
81 | 82 | "\n", |
82 | | - "utils.print_ok('Notebook initiaized')" |
| 83 | + "utils.print_ok('Notebook initialized')" |
83 | 84 | ] |
84 | 85 | }, |
85 | 86 | { |
|
208 | 209 | "source": [ |
209 | 210 | "import requests, time\n", |
210 | 211 | "\n", |
211 | | - "runs = 10\n", |
| 212 | + "runs = 20\n", |
212 | 213 | "sleep_time_ms = 100\n", |
213 | 214 | "url = f\"{apim_resource_gateway_url}/openai/deployments/{openai_deployment_name}/chat/completions?api-version={openai_api_version}\"\n", |
214 | 215 | "api_runs = []\n", |
|
265 | 266 | "metadata": {}, |
266 | 267 | "source": [ |
267 | 268 | "<a id='plot'></a>\n", |
268 | | - "### 🔍 Analyze Load Balancing results\n" |
| 269 | + "### 🔍 Analyze Load Balancing results\n", |
| 270 | + "\n", |
| 271 | + "The priority 1 backend will be used until TPM exhaustion sets in, then distribution will occur near equally across the two priority 2 backends with 50/50 weights." |
269 | 272 | ] |
270 | 273 | }, |
271 | 274 | { |
|
323 | 326 | "import time\n", |
324 | 327 | "from openai import AzureOpenAI\n", |
325 | 328 | "\n", |
326 | | - "runs = 10\n", |
| 329 | + "runs = 20\n", |
327 | 330 | "sleep_time_ms = 100\n", |
328 | 331 | "\n", |
329 | 332 | "client = AzureOpenAI(\n", |
|
0 commit comments