fixed model deployment issue (#752)

AjitPadhi-Microsoft · web-flow · commit b467e870a147 · 2025-11-19T12:11:25.000+05:30
diff --git a/docs/CustomizingAzdParameters.md b/docs/CustomizingAzdParameters.md
@@ -11,9 +11,9 @@ By default this template will use the environment name as the prefix to prevent
 | -----------------------------| ------- | ------------------- | ---------------------------------------------------------------------------------------------------- |
 | `AZURE_ENV_NAME`            | string  | `azdtemp`           | Used as a prefix for all resource names to ensure uniqueness across environments.                    |
 | `AZURE_LOCATION`            | string  | `<User selects during deployment>`         | Sets the Azure region for resource deployment.  |
-| `AZURE_OPENAI_MODEL_DEPLOYMENT_TYPE`             | string  | `Standard`    | Change the Model Deployment Type (allowed values: Standard, GlobalStandard).                         |
-| `AZURE_OPENAI_DEPLOYMENT_MODEL`               | string  | `gpt-35-turbo`            | Set the GPT model name (allowed values: `gpt-35-turbo`, `gpt-4`, `gpt-4o`).                                                      |
-| `AZURE_OPENAI_API_VERSION`     | string  | `0125`        | Set the Azure OpenAI model version.                                       |
+| `AZURE_OPENAI_MODEL_DEPLOYMENT_TYPE`             | string  | `GlobalStandard`    | Change the Model Deployment Type (allowed values: Standard, GlobalStandard).                         |
+| `AZURE_OPENAI_DEPLOYMENT_MODEL`               | string  | `gpt-4.1-mini`            | Set the GPT model name (allowed values: `gpt-4.1-mini`, `gpt-4`, `gpt-4o`).                                                      |
+| `AZURE_OPENAI_API_VERSION`     | string  | `2025-04-14`        | Set the Azure OpenAI model version.                                       |
 | `AZURE_OPENAI_DEPLOYMENT_MODEL_CAPACITY`     | integer | `30`               | Set the model capacity for GPT deployment. Choose based on your Azure quota and usage needs.         |
 | `AZURE_OPENAI_EMBEDDING_MODEL`            | string  | `text-embedding-ada-002`  | Set the model name used for embeddings.                                                              |
 | `AZURE_OPENAI_EMBEDDING_MODEL_VERSION`            | string  | `2`  | Set the version for the embedding model.                                                              |
diff --git a/docs/QuotaCheck.md b/docs/QuotaCheck.md
@@ -1,7 +1,7 @@
 ## Check Quota Availability Before Deployment
 
 Before deploying the accelerator, **ensure sufficient quota availability** for the required model.
-> **For Global Standard | GPT-35-turbo - increase the capacity to at least 50K tokens for optimal performance.**
+> **For Global Standard | gpt-4.1-mini - increase the capacity to at least 50K tokens for optimal performance.**
 
 ### Login if you have not done so already
 ```
@@ -11,7 +11,7 @@ azd auth login
 
 ### 📌 Default Models & Capacities:
 ```
-gpt-35-turbo:30, text-embedding-ada-002:45
+gpt-4.1-mini:30, text-embedding-ada-002:45
 ```
 ### 📌 Default Regions:
 ```
@@ -37,19 +37,19 @@ australiaeast, francecentral, japaneast, northcentralus, southcentralus, westus,
    ```
 ✔️ Check specific model(s) in default regions:
   ```
-  ./quota_check_params.sh --models gpt-35-turbo:30,text-embedding-ada-002:45
+  ./quota_check_params.sh --models gpt-4.1-mini:30,text-embedding-ada-002:45
   ```
 ✔️ Check default models in specific region(s):
   ```
 ./quota_check_params.sh --regions eastus,westus
   ```
 ✔️ Passing Both models and regions:  
   ```
-  ./quota_check_params.sh --models gpt-35-turbo:30 --regions eastus,westus
+  ./quota_check_params.sh --models gpt-4.1-mini:30 --regions eastus,westus
   ```
 ✔️ All parameters combined:
   ```
- ./quota_check_params.sh --models gpt-35-turbo:30,text-embedding-ada-002:45 --regions eastus,westus --verbose
+ ./quota_check_params.sh --models gpt-4.1-mini:30,text-embedding-ada-002:45 --regions eastus,westus --verbose
   ```
 
 ### **Sample Output**
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -16,13 +16,13 @@ param location string = resourceGroup().location
   'Standard'
   'GlobalStandard'
 ])
-param gptModelDeploymentType string = 'Standard'
+param gptModelDeploymentType string = 'GlobalStandard'
 
 @description('Optional. Name of the GPT model to deploy:')
-param gptModelName string = 'gpt-35-turbo'
+param gptModelName string = 'gpt-4.1-mini'
 
 @description('Optional. Version of the GPT model to deploy:')
-param gptModelVersion string = '0125'
+param gptModelVersion string = '2025-04-14'
 
 @minValue(10)
 @description('Optional. Capacity of the GPT deployment:')
@@ -107,7 +107,7 @@ var solutionSuffix = toLower(trim(replace(
   '*',
   ''
 )))
-var resourceGroupName = resourceGroup().name
+
 var baseUrl = 'https://raw.githubusercontent.com/microsoft/Build-your-own-copilot-Solution-Accelerator/byoc-researcher/'
 
 var allTags = union(
@@ -1070,9 +1070,9 @@ module webSite 'modules/web-sites.bicep' = {
           AZURE_SEARCH_URL_COLUMN:'publicurl'
           AZURE_OPENAI_RESOURCE:azOpenAI.outputs.endpoint
           AZURE_OPENAI_ENDPOINT:azOpenAI.outputs.endpoint
-          AZURE_OPENAI_MODEL:'gpt-35-turbo'
+          AZURE_OPENAI_MODEL:gptModelName
           AZURE_OPENAI_KEY:'@Microsoft.KeyVault(SecretUri=${openAIKeyUri})'
-          AZURE_OPENAI_MODEL_NAME:'gpt-35-turbo'
+          AZURE_OPENAI_MODEL_NAME:gptModelName
           AZURE_OPENAI_TEMPERATURE:'0'
           AZURE_OPENAI_TOP_P:'1'
           AZURE_OPENAI_MAX_TOKENS:'1000'
diff --git a/infra/main.json b/infra/main.json
@@ -6,7 +6,7 @@
     "_generator": {
       "name": "bicep",
       "version": "0.37.4.10188",
-      "templateHash": "7195801196475709586"
+      "templateHash": "3418531138421621433"
     }
   },
   "parameters": {
@@ -31,7 +31,7 @@
     },
     "gptModelDeploymentType": {
       "type": "string",
-      "defaultValue": "Standard",
+      "defaultValue": "GlobalStandard",
       "allowedValues": [
         "Standard",
         "GlobalStandard"
@@ -43,14 +43,14 @@
     },
     "gptModelName": {
       "type": "string",
-      "defaultValue": "gpt-35-turbo",
+      "defaultValue": "gpt-4.1-mini",
       "metadata": {
         "description": "Optional. Name of the GPT model to deploy:"
       }
     },
     "gptModelVersion": {
       "type": "string",
-      "defaultValue": "0125",
+      "defaultValue": "2025-04-14",
       "metadata": {
         "description": "Optional. Version of the GPT model to deploy:"
       }
@@ -220,7 +220,6 @@
   },
   "variables": {
     "solutionSuffix": "[toLower(trim(replace(replace(replace(replace(replace(replace(format('{0}{1}', parameters('solutionName'), parameters('solutionUniqueText')), '-', ''), '_', ''), '.', ''), '/', ''), ' ', ''), '*', '')))]",
-    "resourceGroupName": "[resourceGroup().name]",
     "baseUrl": "https://raw.githubusercontent.com/microsoft/Build-your-own-copilot-Solution-Accelerator/byoc-researcher/",
     "allTags": "[union(createObject('azd-env-name', parameters('solutionName')), parameters('tags'))]",
     "replicaRegionPairs": {
@@ -22463,10 +22462,10 @@
         }
       },
       "dependsOn": [
-        "[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').storageDfs)]",
         "[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').storageFile)]",
         "[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').storageBlob)]",
         "[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').storageQueue)]",
+        "[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').storageDfs)]",
         "userAssignedIdentity",
         "virtualNetwork"
       ]
@@ -47219,9 +47218,9 @@
                   "AZURE_SEARCH_URL_COLUMN": "publicurl",
                   "AZURE_OPENAI_RESOURCE": "[reference('azOpenAI').outputs.endpoint.value]",
                   "AZURE_OPENAI_ENDPOINT": "[reference('azOpenAI').outputs.endpoint.value]",
-                  "AZURE_OPENAI_MODEL": "gpt-35-turbo",
+                  "AZURE_OPENAI_MODEL": "[parameters('gptModelName')]",
                   "AZURE_OPENAI_KEY": "[format('@Microsoft.KeyVault(SecretUri={0})', reference('azOpenAI').outputs.exportedSecrets.value['AZURE-OPENAI-KEY'].secretUri)]",
-                  "AZURE_OPENAI_MODEL_NAME": "gpt-35-turbo",
+                  "AZURE_OPENAI_MODEL_NAME": "[parameters('gptModelName')]",
                   "AZURE_OPENAI_TEMPERATURE": "0",
                   "AZURE_OPENAI_TOP_P": "1",
                   "AZURE_OPENAI_MAX_TOKENS": "1000",
diff --git a/infra/scripts/aihub_scripts/flows/DraftFlow.zip b/infra/scripts/aihub_scripts/flows/DraftFlow.zip
diff --git a/infra/scripts/checkquota.sh b/infra/scripts/checkquota.sh
@@ -32,7 +32,7 @@ echo "✅ Azure subscription set successfully."
 
 # Define models and their minimum required capacities
 declare -A MIN_CAPACITY=(
-    ["OpenAI.Standard.gpt-35-turbo"]=$GPT_MIN_CAPACITY
+    ["OpenAI.Standard.gpt-4.1-mini"]=$GPT_MIN_CAPACITY
     ["OpenAI.GlobalStandard.text-embedding-ada-002"]=$TEXT_EMBEDDING_MIN_CAPACITY
 )
 
diff --git a/infra/scripts/quota_check_params.sh b/infra/scripts/quota_check_params.sh
@@ -47,7 +47,7 @@ log_verbose() {
 }
 
 # Default Models and Capacities (Comma-separated in "model:capacity" format)
-DEFAULT_MODEL_CAPACITY="gpt-35-turbo:30,text-embedding-ada-002:45"
+DEFAULT_MODEL_CAPACITY="gpt-4.1-mini:30,text-embedding-ada-002:45"
 
 # Convert the comma-separated string into an array
 IFS=',' read -r -a MODEL_CAPACITY_PAIRS <<< "$DEFAULT_MODEL_CAPACITY"
diff --git a/src/.env.sample b/src/.env.sample
@@ -17,7 +17,7 @@ AZURE_SEARCH_STRICTNESS=3
 AZURE_OPENAI_RESOURCE=
 AZURE_OPENAI_MODEL=
 AZURE_OPENAI_KEY=
-AZURE_OPENAI_MODEL_NAME=gpt-35-turbo
+AZURE_OPENAI_MODEL_NAME=gpt-4.1-mini
 AZURE_OPENAI_TEMPERATURE=0
 AZURE_OPENAI_TOP_P=1.0
 AZURE_OPENAI_MAX_TOKENS=1000
diff --git a/src/app.py b/src/app.py
@@ -86,8 +86,8 @@ def assets(path):
 )
 AZURE_OPENAI_STREAM = os.environ.get("AZURE_OPENAI_STREAM", "true")
 AZURE_OPENAI_MODEL_NAME = os.environ.get(
-    "AZURE_OPENAI_MODEL_NAME", "gpt-35-turbo"
-)  # Name of the model, e.g. 'gpt-35-turbo' or 'gpt-4'
+    "AZURE_OPENAI_MODEL_NAME", "gpt-4.1-mini"
+)
 AZURE_OPENAI_EMBEDDING_ENDPOINT = os.environ.get("AZURE_OPENAI_EMBEDDING_ENDPOINT")
 AZURE_OPENAI_EMBEDDING_KEY = os.environ.get("AZURE_OPENAI_EMBEDDING_KEY")
 AZURE_OPENAI_EMBEDDING_NAME = os.environ.get("AZURE_OPENAI_EMBEDDING_NAME", "")
@@ -108,7 +108,7 @@ def assets(path):
 def is_chat_model():
     if (
         "gpt-4" in AZURE_OPENAI_MODEL_NAME.lower()
-        or AZURE_OPENAI_MODEL_NAME.lower() in ["gpt-35-turbo-4k", "gpt-35-turbo"]
+        or AZURE_OPENAI_MODEL_NAME.lower() in ["gpt-35-turbo-4k", "gpt-4.1-mini"]
     ):
         return True
     return False
diff --git a/src/test_app.py b/src/test_app.py
@@ -48,7 +48,7 @@ def test_is_chat_model_with_gpt35_turbo_4k():
 
 
 def test_is_chat_model_with_gpt35_turbo_16k():
-    with patch("app.AZURE_OPENAI_MODEL_NAME", "gpt-35-turbo"):
+    with patch("app.AZURE_OPENAI_MODEL_NAME", "gpt-4.1-mini"):
         assert is_chat_model() is True
 
 
@@ -291,7 +291,7 @@ def test_stream_with_data_azure_success():
     with patch("requests.Session.post") as mock_post:
         mock_response = MagicMock()
         mock_response.iter_lines.return_value = [
-            b'data: {"id":"1","model":"gpt-35-turbo","created":1736397875,"object":"extensions.chat.completion.chunk","choices":[{"index":0,"delta":{"context":{"messages":[{"role":"tool","content":"hello","end_turn":false}]}},"end_turn":false,"finish_reason":"None"}]}'
+            b'data: {"id":"1","model":"gpt-4.1-mini","created":1736397875,"object":"extensions.chat.completion.chunk","choices":[{"index":0,"delta":{"context":{"messages":[{"role":"tool","content":"hello","end_turn":false}]}},"end_turn":false,"finish_reason":"None"}]}'
         ]
         mock_response.headers = {"apim-request-id": "test-request-id"}
         mock_post.return_value.__enter__.return_value = mock_response
@@ -381,7 +381,7 @@ def test_stream_with_data_azure_error():
         #     body = mock_body
         mock_response = MagicMock()
         mock_response.iter_lines.return_value = [
-            b'data: {"id":"1","model":"gpt-35-turbo","created":1736397875,"object":"extensions.chat.completion.chunk","choices":[{"index":0,"delta":{"context":{"messages":[{"role":"tool","content":"hello","end_turn":false}]}},"end_turn":false,"finish_reason":"None"}]}'
+            b'data: {"id":"1","model":"gpt-4.1-mini","created":1736397875,"object":"extensions.chat.completion.chunk","choices":[{"index":0,"delta":{"context":{"messages":[{"role":"tool","content":"hello","end_turn":false}]}},"end_turn":false,"finish_reason":"None"}]}'
         ]
         mock_response.headers = {"apim-request-id": "test-request-id"}
         mock_post.return_value.__enter__.return_value = mock_response

Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,7 @@ echo "✅ Azure subscription set successfully."`
`32`	`32`
`33`	`33`	`# Define models and their minimum required capacities`
`34`	`34`	`declare -A MIN_CAPACITY=(`
`35`		`- ["OpenAI.Standard.gpt-35-turbo"]=$GPT_MIN_CAPACITY`
	`35`	`+ ["OpenAI.Standard.gpt-4.1-mini"]=$GPT_MIN_CAPACITY`
`36`	`36`	`["OpenAI.GlobalStandard.text-embedding-ada-002"]=$TEXT_EMBEDDING_MIN_CAPACITY`
`37`	`37`	`)`
`38`	`38`
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ log_verbose() {`
`47`	`47`	`}`
`48`	`48`
`49`	`49`	`# Default Models and Capacities (Comma-separated in "model:capacity" format)`
`50`		`-DEFAULT_MODEL_CAPACITY="gpt-35-turbo:30,text-embedding-ada-002:45"`
	`50`	`+DEFAULT_MODEL_CAPACITY="gpt-4.1-mini:30,text-embedding-ada-002:45"`
`51`	`51`
`52`	`52`	`# Convert the comma-separated string into an array`
`53`	`53`	`IFS=',' read -r -a MODEL_CAPACITY_PAIRS <<< "$DEFAULT_MODEL_CAPACITY"`