Howie/use azd quota validation (#148)

howieleung · web-flow · commit 0a5ebd2f14a5 · 2025-06-30T09:13:45.000-07:00
* use bicep quota validation

* update
diff --git a/azure.yaml b/azure.yaml
@@ -18,17 +18,6 @@ hooks:
       run: ./scripts/validate_env_vars.ps1
       interactive: true
       continueOnError: false      
-  preprovision:
-    posix:
-      shell: sh
-      run: chmod u+r+x ./scripts/set_default_models.sh; chmod u+r+x ./scripts/resolve_model_quota.sh; ./scripts/set_default_models.sh
-      interactive: true
-      continueOnError: false
-    windows:
-      shell: pwsh
-      run: ./scripts/set_default_models.ps1
-      interactive: true
-      continueOnError: false      
   postprovision:
     windows:
       shell: pwsh
diff --git a/docs/deploy_customization.md b/docs/deploy_customization.md
@@ -66,9 +66,9 @@ Set the version of the agent model:
 azd env set AZURE_AI_AGENT_MODEL_VERSION 2024-07-18
 ```
 
-### Setting capacity and deployment SKU
+### Setting models, capacity, and deployment SKU
 
-For quota regions, you may find yourself needing to modify the default capacity and deployment SKU using environment variables as below. The default tokens per minute deployed in this template is 80,000 for agent model and 50,000 for the embedding model that is enough for all operations.  If the region has quota less the these numbers, you will be prompt to input a lower capacity up to the available limit.
+By default, this template sets the agent model deployment capacity to 80,000 tokens per minute. For AI Search, the embedding model requires a capacity of 50,000 tokens per minute. Due to current Bicep limitations, only the chat model quota is validated when you select a location during `azd up`. If you want to change these defaults, set the desired region using `azd env set AZURE_LOCATION <region>` (for example, `eastus`) to bypass quota validation. Follow the instructions below to update the model settings before running `azd up`.
 
 Change the default capacity (in thousands of tokens per minute) of the agent deployment:
 
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -20,10 +20,9 @@ param environmentName string
 @metadata({
   azd: {
     type: 'location'
-    // quota-validation for ai models: gpt-4o-mini & text-embedding-3-small
+    // quota-validation for ai models: gpt-4o-mini
     usageName: [
-      'OpenAI.GlobalStandard.gpt-4o-mini,30'
-      'OpenAI.GlobalStandard.text-embedding-3-small,30'
+      'OpenAI.GlobalStandard.gpt-4o-mini,80'
     ]
   }
 })
diff --git a/infra/main.parameters.json b/infra/main.parameters.json
@@ -60,42 +60,42 @@
       "value": "${AZURE_EXISTING_AGENT_ID}"
     },
     "agentDeploymentName": {
-      "value": "${AZURE_AI_AGENT_MODEL_NAME}"
+      "value": "${AZURE_AI_AGENT_MODEL_NAME=gpt-4o-mini}"
     },
     "agentModelFormat": {
-      "value": "${AZURE_AI_AGENT_MODEL_FORMAT}"
+      "value": "${AZURE_AI_AGENT_MODEL_FORMAT=OpenAI}"
     },
     "agentModelName": {
-      "value": "${AZURE_AI_AGENT_MODEL_NAME}"
+      "value": "${AZURE_AI_AGENT_MODEL_NAME=gpt-4o-mini}"
     },
     "agentModelVersion": {
-      "value": "${AZURE_AI_AGENT_MODEL_VERSION}"
+      "value": "${AZURE_AI_AGENT_MODEL_VERSION=2024-07-18}"
     },
     "agentDeploymentSku": {
-      "value": "${AZURE_AI_AGENT_DEPLOYMENT_SKU}"
+      "value": "${AZURE_AI_AGENT_DEPLOYMENT_SKU=GlobalStandard}"
     },
     "agentDeploymentCapacity": {
-      "value": "${AZURE_AI_AGENT_DEPLOYMENT_CAPACITY}"
+      "value": "${AZURE_AI_AGENT_DEPLOYMENT_CAPACITY=80}"
     },
     "embeddingDeploymentName": {
-      "value": "${AZURE_AI_EMBED_DEPLOYMENT_NAME}"
+      "value": "${AZURE_AI_EMBED_DEPLOYMENT_NAME=text-embedding-3-small}"
     },
     "embedModelFormat": {
-      "value": "${AZURE_AI_EMBED_MODEL_FORMAT}"
+      "value": "${AZURE_AI_EMBED_MODEL_FORMAT=OpenAI}"
     },
     "embedModelName": {
-      "value": "${AZURE_AI_EMBED_MODEL_NAME}"
+      "value": "${AZURE_AI_EMBED_MODEL_NAME=text-embedding-3-small}"
     },
     "embedModelVersion": {
-      "value": "${AZURE_AI_EMBED_MODEL_VERSION}"
+      "value": "${AZURE_AI_EMBED_MODEL_VERSION=1}"
     },
     "embedDeploymentSku": {
-      "value": "${AZURE_AI_EMBED_DEPLOYMENT_SKU}"
+      "value": "${AZURE_AI_EMBED_DEPLOYMENT_SKU=Standard}"
     },
     "embedDeploymentCapacity": {
-      "value": "${AZURE_AI_EMBED_DEPLOYMENT_CAPACITY}"
+      "value": "${AZURE_AI_EMBED_DEPLOYMENT_CAPACITY=50}"
     },
-    "embeddingDeploymentDimensions": {
+        "embeddingDeploymentDimensions": {
       "value": "${AZURE_AI_EMBED_DIMENSIONS=100}"
     },
     "apiAppExists": {

Original file line number	Diff line number	Diff line change
`@@ -20,10 +20,9 @@ param environmentName string`
`20`	`20`	`@metadata({`
`21`	`21`	`azd: {`
`22`	`22`	`type: 'location'`
`23`		`- // quota-validation for ai models: gpt-4o-mini & text-embedding-3-small`
	`23`	`+ // quota-validation for ai models: gpt-4o-mini`
`24`	`24`	`usageName: [`
`25`		`- 'OpenAI.GlobalStandard.gpt-4o-mini,30'`
`26`		`- 'OpenAI.GlobalStandard.text-embedding-3-small,30'`
	`25`	`+ 'OpenAI.GlobalStandard.gpt-4o-mini,80'`
`27`	`26`	`]`
`28`	`27`	`}`
`29`	`28`	`})`