|
21 | 21 | "\n", |
22 | 22 | "### TOC\n", |
23 | 23 | "- [0️⃣ Initialize notebook variables](#0)\n", |
24 | | - "- [1️⃣ Create the Azure Resource Group](#1)\n", |
| 24 | + "- [1️⃣ Verify the Azure CLI and the connected Azure subscription](#1)\n", |
25 | 25 | "- [2️⃣ Create deployment using 🦾 Bicep](#2)\n", |
26 | 26 | "- [3️⃣ Get the deployment outputs](#3)\n", |
27 | 27 | "- [🧪 Test the API using a direct HTTP call](#requests)\n", |
|
53 | 53 | }, |
54 | 54 | { |
55 | 55 | "cell_type": "code", |
56 | | - "execution_count": 1, |
| 56 | + "execution_count": null, |
57 | 57 | "metadata": {}, |
58 | 58 | "outputs": [], |
59 | 59 | "source": [ |
60 | | - "import os\n", |
| 60 | + "import os, sys, json\n", |
| 61 | + "sys.path.insert(1, '../../shared') # add the shared directory to the Python path\n", |
| 62 | + "import utils\n", |
61 | 63 | "\n", |
62 | 64 | "deployment_name = os.path.basename(os.path.dirname(globals()['__vsc_ipynb_file__']))\n", |
63 | 65 | "resource_group_name = f\"lab-{deployment_name}\" # change the name to match your naming style\n", |
64 | 66 | "resource_group_location = \"westeurope\"\n", |
65 | 67 | "\n", |
| 68 | + "apim_sku = 'Basicv2'\n", |
| 69 | + "\n", |
66 | 70 | "openai_resources = [\n", |
67 | 71 | " {\"name\": \"openai1\", \"location\": \"uksouth\", \"priority\": 1, \"weight\": 80},\n", |
68 | 72 | " {\"name\": \"openai2\", \"location\": \"swedencentral\", \"priority\": 1, \"weight\": 10},\n", |
69 | 73 | " {\"name\": \"openai3\", \"location\": \"francecentral\", \"priority\": 1, \"weight\": 10}\n", |
70 | 74 | "]\n", |
71 | 75 | "\n", |
| 76 | + "openai_deployment_name = \"gpt-35-turbo\"\n", |
72 | 77 | "openai_model_name = \"gpt-35-turbo\"\n", |
73 | 78 | "openai_model_version = \"0613\"\n", |
74 | | - "openai_deployment_name = \"gpt-35-turbo\"\n", |
75 | | - "openai_api_version = \"2024-02-01\"\n" |
| 79 | + "openai_model_capacity = 8\n", |
| 80 | + "openai_api_version = \"2024-02-01\"\n", |
| 81 | + "\n", |
| 82 | + "utils.print_ok('Notebook initiaized')" |
76 | 83 | ] |
77 | 84 | }, |
78 | 85 | { |
79 | 86 | "cell_type": "markdown", |
80 | 87 | "metadata": {}, |
81 | 88 | "source": [ |
82 | 89 | "<a id='1'></a>\n", |
83 | | - "### 1️⃣ Create the Azure Resource Group\n", |
84 | | - "All resources deployed in this lab will be created in the specified resource group. Skip this step if you want to use an existing resource group." |
| 90 | + "### 1️⃣ Verify the Azure CLI and the connected Azure subscription\n", |
| 91 | + "\n", |
| 92 | + "The following commands ensure that you have the latest version of the Azure CLI and that the Azure CLI is connected to your Azure subscription." |
85 | 93 | ] |
86 | 94 | }, |
87 | 95 | { |
|
90 | 98 | "metadata": {}, |
91 | 99 | "outputs": [], |
92 | 100 | "source": [ |
93 | | - "# %load ../../shared/snippets/create-az-resource-group.py\n", |
94 | | - "\n", |
95 | | - "# type: ignore\n", |
96 | | - "\n", |
97 | | - "import datetime\n", |
98 | | - "\n", |
99 | | - "resource_group_stdout = ! az group create --name {resource_group_name} --location {resource_group_location}\n", |
100 | | - "\n", |
101 | | - "if resource_group_stdout.n.startswith(\"ERROR\"):\n", |
102 | | - " print(resource_group_stdout)\n", |
103 | | - "else:\n", |
104 | | - " print(f\"✅ Azure Resource Group {resource_group_name} created ⌚ {datetime.datetime.now().time()}\")\n" |
| 101 | + "output = utils.run(\"az account show\", \"Retrieved az account\", \"Failed to get the current az account\")\n", |
| 102 | + "if output.success and output.json_data:\n", |
| 103 | + " current_user = output.json_data['user']['name']\n", |
| 104 | + " subscription_id = output.json_data['id']\n", |
| 105 | + " tenant_id = output.json_data['tenantId']" |
105 | 106 | ] |
106 | 107 | }, |
107 | 108 | { |
|
111 | 112 | "<a id='2'></a>\n", |
112 | 113 | "### 2️⃣ Create deployment using 🦾 Bicep\n", |
113 | 114 | "\n", |
114 | | - "This lab uses [Bicep](https://learn.microsoft.com/azure/azure-resource-manager/bicep/overview?tabs=bicep) to declarative define all the resources that will be deployed. Change the parameters or the [main.bicep](main.bicep) directly to try different configurations. \n", |
| 115 | + "This lab uses [Bicep](https://learn.microsoft.com/azure/azure-resource-manager/bicep/overview?tabs=bicep) to declarative define all the resources that will be deployed in the specified resource group. Change the parameters or the [main.bicep](main.bicep) directly to try different configurations. \n", |
115 | 116 | "\n", |
116 | | - "`openAIModelCapacity` is set intentionally low to `8` (8k tokens per minute) in _main.bicep_ to showcase the retry logic in the load balancer." |
| 117 | + "`openAIModelCapacity` is set intentionally low to `8` (8k tokens per minute) to showcase the retry logic in the load balancer." |
117 | 118 | ] |
118 | 119 | }, |
119 | 120 | { |
|
122 | 123 | "metadata": {}, |
123 | 124 | "outputs": [], |
124 | 125 | "source": [ |
125 | | - "# %load ../../shared/snippets/create-az-deployment.py\n", |
126 | | - "\n", |
127 | | - "# type: ignore\n", |
128 | | - "\n", |
129 | | - "import json\n", |
130 | | - "\n", |
131 | | - "backend_id = \"openai-backend-pool\" if len(openai_resources) > 1 else openai_resources[0].get(\"name\")\n", |
| 126 | + "# create the resource group if doesn't exist\n", |
| 127 | + "utils.create_resource_group(True, resource_group_name, resource_group_location)\n", |
132 | 128 | "\n", |
| 129 | + "# update the APIM policy file before the deployment\n", |
| 130 | + "policy_xml = None\n", |
133 | 131 | "with open(\"policy.xml\", 'r') as policy_xml_file:\n", |
134 | | - " policy_xml = policy_xml_file.read()\n", |
135 | | - "\n", |
136 | | - " if \"{backend-id}\" in policy_xml:\n", |
137 | | - " policy_xml = policy_xml.replace(\"{backend-id}\", backend_id)\n", |
138 | | - "\n", |
139 | | - " if \"{aad-client-application-id}\" in policy_xml:\n", |
140 | | - " policy_xml = policy_xml.replace(\"{aad-client-application-id}\", client_id)\n", |
141 | | - "\n", |
142 | | - " if \"{aad-tenant-id}\" in policy_xml:\n", |
143 | | - " policy_xml = policy_xml.replace(\"{aad-tenant-id}\", tenant_id)\n", |
144 | | - "\n", |
| 132 | + " policy_template_xml = policy_xml_file.read()\n", |
| 133 | + " if \"{backend-id}\" in policy_template_xml:\n", |
| 134 | + " policy_xml = policy_template_xml.replace(\"{backend-id}\", str(\"openai-backend-pool\" if len(openai_resources) > 1 else openai_resources[0].get(\"name\"))) \n", |
145 | 135 | " policy_xml_file.close()\n", |
146 | | - "open(\"policy-updated.xml\", 'w').write(policy_xml)\n", |
| 136 | + "if policy_xml is not None:\n", |
| 137 | + " open(\"policy.xml\", 'w').write(policy_xml)\n", |
147 | 138 | "\n", |
| 139 | + "# define the BICEP parameters\n", |
148 | 140 | "bicep_parameters = {\n", |
149 | 141 | " \"$schema\": \"https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#\",\n", |
150 | 142 | " \"contentVersion\": \"1.0.0.0\",\n", |
151 | 143 | " \"parameters\": {\n", |
| 144 | + " \"apimSku\": { \"value\": apim_sku },\n", |
152 | 145 | " \"openAIConfig\": { \"value\": openai_resources },\n", |
153 | 146 | " \"openAIDeploymentName\": { \"value\": openai_deployment_name },\n", |
154 | 147 | " \"openAIModelName\": { \"value\": openai_model_name },\n", |
155 | 148 | " \"openAIModelVersion\": { \"value\": openai_model_version },\n", |
| 149 | + " \"openAIModelCapacity\": { \"value\": openai_model_capacity },\n", |
156 | 150 | " \"openAIAPIVersion\": { \"value\": openai_api_version }\n", |
157 | 151 | " }\n", |
158 | 152 | "}\n", |
159 | 153 | "\n", |
| 154 | + "# write the parameters to a file \n", |
160 | 155 | "with open('params.json', 'w') as bicep_parameters_file:\n", |
161 | 156 | " bicep_parameters_file.write(json.dumps(bicep_parameters))\n", |
162 | 157 | "\n", |
163 | | - "! az deployment group create --name {deployment_name} --resource-group {resource_group_name} --template-file \"main.bicep\" --parameters \"params.json\"\n" |
| 158 | + "# run the deployment\n", |
| 159 | + "output = utils.run(f\"az deployment group create --name {deployment_name} --resource-group {resource_group_name} --template-file main.bicep --parameters params.json\", \n", |
| 160 | + " f\"Deployment '{deployment_name}' succeeded\", f\"Deployment '{deployment_name}' failed\")\n", |
| 161 | + "open(\"policy.xml\", 'w').write(policy_template_xml)\n", |
| 162 | + "\n" |
164 | 163 | ] |
165 | 164 | }, |
166 | 165 | { |
|
170 | 169 | "<a id='3'></a>\n", |
171 | 170 | "### 3️⃣ Get the deployment outputs\n", |
172 | 171 | "\n", |
173 | | - "We are now at the stage where we only need to retrieve the gateway URL and the subscription before we are ready for testing." |
| 172 | + "Retrieve the required outputs from the Bicep deployment." |
174 | 173 | ] |
175 | 174 | }, |
176 | 175 | { |
177 | 176 | "cell_type": "code", |
178 | | - "execution_count": 1, |
| 177 | + "execution_count": null, |
179 | 178 | "metadata": {}, |
180 | 179 | "outputs": [], |
181 | 180 | "source": [ |
182 | | - "# %load ../../shared/snippets/deployment-outputs.py\n", |
183 | | - "# type: ignore\n", |
184 | | - "\n", |
185 | 181 | "# Obtain all of the outputs from the deployment\n", |
186 | | - "stdout = ! az deployment group show --name {deployment_name} -g {resource_group_name} --query properties.outputs -o json\n", |
187 | | - "outputs = json.loads(stdout.n)\n", |
188 | | - "\n", |
189 | | - "# Extract the individual properties\n", |
190 | | - "apim_service_id = outputs.get('apimServiceId', {}).get('value', '')\n", |
191 | | - "apim_subscription_key = outputs.get('apimSubscriptionKey', {}).get('value', '')\n", |
192 | | - "apim_subscription1_key = outputs.get('apimSubscription1Key', {}).get('value', '')\n", |
193 | | - "apim_subscription2_key = outputs.get('apimSubscription2Key', {}).get('value', '')\n", |
194 | | - "apim_subscription3_key = outputs.get('apimSubscription3Key', {}).get('value', '')\n", |
195 | | - "apim_resource_gateway_url = outputs.get('apimResourceGatewayURL', {}).get('value', '')\n", |
196 | | - "workspace_id = outputs.get('logAnalyticsWorkspaceId', {}).get('value', '')\n", |
197 | | - "app_id = outputs.get('applicationInsightsAppId', {}).get('value', '')\n", |
198 | | - "function_app_resource_name = outputs.get('functionAppResourceName', {}).get('value', '')\n", |
199 | | - "cosmosdb_connection_string = outputs.get('cosmosDBConnectionString', {}).get('value', '')\n", |
200 | | - "\n", |
201 | | - "# Print the extracted properties if they are not empty\n", |
202 | | - "if apim_service_id:\n", |
203 | | - " print(f\"👉🏻 APIM Service Id: {apim_service_id}\")\n", |
204 | | - "\n", |
205 | | - "if apim_subscription_key:\n", |
206 | | - " print(f\"👉🏻 APIM Subscription Key (masked): ****{apim_subscription_key[-4:]}\")\n", |
207 | | - "\n", |
208 | | - "if apim_subscription1_key:\n", |
209 | | - " print(f\"👉🏻 APIM Subscription Key 1 (masked): ****{apim_subscription1_key[-4:]}\")\n", |
210 | | - "\n", |
211 | | - "if apim_subscription2_key:\n", |
212 | | - " print(f\"👉🏻 APIM Subscription Key 2 (masked): ****{apim_subscription2_key[-4:]}\")\n", |
213 | | - "\n", |
214 | | - "if apim_subscription3_key:\n", |
215 | | - " print(f\"👉🏻 APIM Subscription Key 3 (masked): ****{apim_subscription3_key[-4:]}\")\n", |
216 | | - "\n", |
217 | | - "if apim_resource_gateway_url:\n", |
218 | | - " print(f\"👉🏻 APIM API Gateway URL: {apim_resource_gateway_url}\")\n", |
219 | | - "\n", |
220 | | - "if workspace_id:\n", |
221 | | - " print(f\"👉🏻 Workspace ID: {workspace_id}\")\n", |
222 | | - "\n", |
223 | | - "if app_id:\n", |
224 | | - " print(f\"👉🏻 App ID: {app_id}\")\n", |
225 | | - "\n", |
226 | | - "if function_app_resource_name:\n", |
227 | | - " print(f\"👉🏻 Function Name: {function_app_resource_name}\")\n", |
228 | | - "\n", |
229 | | - "if cosmosdb_connection_string:\n", |
230 | | - " print(f\"👉🏻 Cosmos DB Connection String: {cosmosdb_connection_string}\")\n" |
| 182 | + "output = utils.run(f\"az deployment group show --name {deployment_name} -g {resource_group_name}\", f\"Retrieved deployment: {deployment_name}\", f\"Failed to retrieve deployment: {deployment_name}\")\n", |
| 183 | + "if output.success and output.json_data:\n", |
| 184 | + " apim_service_id = utils.get_deployment_output(output, 'apimServiceId', 'APIM Service Id')\n", |
| 185 | + " apim_subscription_key = utils.get_deployment_output(output, 'apimSubscriptionKey', 'APIM Subscription Key (masked)', True)\n", |
| 186 | + " apim_resource_gateway_url = utils.get_deployment_output(output, 'apimResourceGatewayURL', 'APIM API Gateway URL')\n", |
| 187 | + "\n" |
231 | 188 | ] |
232 | 189 | }, |
233 | 190 | { |
|
249 | 206 | "metadata": {}, |
250 | 207 | "outputs": [], |
251 | 208 | "source": [ |
252 | | - "# %load ../../shared/snippets/api-http-requests.py\n", |
253 | | - "\n", |
254 | | - "import json\n", |
255 | | - "import requests\n", |
256 | | - "import time\n", |
| 209 | + "import requests, time\n", |
257 | 210 | "\n", |
258 | 211 | "runs = 10\n", |
259 | 212 | "sleep_time_ms = 100\n", |
|
267 | 220 | "# Initialize a session for connection pooling\n", |
268 | 221 | "session = requests.Session()\n", |
269 | 222 | "# Set default headers\n", |
270 | | - "session.headers.update({'api-key': apim_subscription_key})\n", |
| 223 | + "session.headers.update({'api-key': apim_subscription_key}) # type: ignore\n", |
271 | 224 | "\n", |
272 | 225 | "try:\n", |
273 | 226 | " for i in range(runs):\n", |
|
288 | 241 | "\n", |
289 | 242 | " # Print the response status with the appropriate formatting\n", |
290 | 243 | " print(f\"Response status: {status_code_str}\")\n", |
291 | | - " print(f\"Response headers: {json.dumps(dict(response.headers), indent = 4)}\")\n", |
| 244 | + " # print(f\"Response headers: {json.dumps(dict(response.headers), indent = 4)}\")\n", |
292 | 245 | "\n", |
293 | 246 | " if \"x-ms-region\" in response.headers:\n", |
294 | 247 | " print(f\"x-ms-region: \\x1b[1;32m{response.headers.get(\"x-ms-region\")}\\x1b[0m\") # this header is useful to determine the region of the backend that served the request\n", |
|
367 | 320 | "metadata": {}, |
368 | 321 | "outputs": [], |
369 | 322 | "source": [ |
370 | | - "# %load ../../shared/snippets/openai-api-requests.py\n", |
371 | | - "\n", |
372 | 323 | "import time\n", |
373 | 324 | "from openai import AzureOpenAI\n", |
374 | 325 | "\n", |
375 | 326 | "runs = 10\n", |
376 | 327 | "sleep_time_ms = 100\n", |
377 | 328 | "\n", |
378 | 329 | "client = AzureOpenAI(\n", |
379 | | - " azure_endpoint = apim_resource_gateway_url,\n", |
| 330 | + " azure_endpoint = apim_resource_gateway_url, # type: ignore\n", |
380 | 331 | " api_key = apim_subscription_key,\n", |
381 | 332 | " api_version = openai_api_version\n", |
382 | 333 | ")\n", |
|
390 | 341 | " print(f\"▶️ Run {i+1}/{runs}:\")\n", |
391 | 342 | "\n", |
392 | 343 | " start_time = time.time()\n", |
393 | | - " response = client.chat.completions.create(model = openai_model_name, messages = messages) # type: ignore\n", |
| 344 | + " raw_response = client.chat.completions.with_raw_response.create(model = openai_model_name, messages = messages) # type: ignore\n", |
394 | 345 | " response_time = time.time() - start_time\n", |
395 | 346 | " print(f\"⌚ {response_time:.2f} seconds\")\n", |
| 347 | + " print(f\"x-ms-region: \\x1b[1;32m{raw_response.headers.get(\"x-ms-region\")}\\x1b[0m\") # this header is useful to determine the region of the backend that served the request\n", |
| 348 | + " response = raw_response.parse()\n", |
396 | 349 | " if response.usage:\n", |
397 | 350 | " print(f\"Token usage: Total tokens: {response.usage.total_tokens} (Prompt tokens: {response.usage.prompt_tokens} & Completion tokens: {response.usage.completion_tokens})\")\n", |
398 | 351 | " print(f\"💬 {response.choices[0].message.content}\\n\")\n", |
|
414 | 367 | ], |
415 | 368 | "metadata": { |
416 | 369 | "kernelspec": { |
417 | | - "display_name": ".venv", |
| 370 | + "display_name": "Python 3", |
418 | 371 | "language": "python", |
419 | 372 | "name": "python3" |
420 | 373 | }, |
|
0 commit comments