Skip to content

Commit b467e87

Browse files
fixed model deployment issue (#752)
1 parent 3924e6d commit b467e87

File tree

10 files changed

+30
-31
lines changed

10 files changed

+30
-31
lines changed

docs/CustomizingAzdParameters.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ By default this template will use the environment name as the prefix to prevent
1111
| -----------------------------| ------- | ------------------- | ---------------------------------------------------------------------------------------------------- |
1212
| `AZURE_ENV_NAME` | string | `azdtemp` | Used as a prefix for all resource names to ensure uniqueness across environments. |
1313
| `AZURE_LOCATION` | string | `<User selects during deployment>` | Sets the Azure region for resource deployment. |
14-
| `AZURE_OPENAI_MODEL_DEPLOYMENT_TYPE` | string | `Standard` | Change the Model Deployment Type (allowed values: Standard, GlobalStandard). |
15-
| `AZURE_OPENAI_DEPLOYMENT_MODEL` | string | `gpt-35-turbo` | Set the GPT model name (allowed values: `gpt-35-turbo`, `gpt-4`, `gpt-4o`). |
16-
| `AZURE_OPENAI_API_VERSION` | string | `0125` | Set the Azure OpenAI model version. |
14+
| `AZURE_OPENAI_MODEL_DEPLOYMENT_TYPE` | string | `GlobalStandard` | Change the Model Deployment Type (allowed values: Standard, GlobalStandard). |
15+
| `AZURE_OPENAI_DEPLOYMENT_MODEL` | string | `gpt-4.1-mini` | Set the GPT model name (allowed values: `gpt-4.1-mini`, `gpt-4`, `gpt-4o`). |
16+
| `AZURE_OPENAI_API_VERSION` | string | `2025-04-14` | Set the Azure OpenAI model version. |
1717
| `AZURE_OPENAI_DEPLOYMENT_MODEL_CAPACITY` | integer | `30` | Set the model capacity for GPT deployment. Choose based on your Azure quota and usage needs. |
1818
| `AZURE_OPENAI_EMBEDDING_MODEL` | string | `text-embedding-ada-002` | Set the model name used for embeddings. |
1919
| `AZURE_OPENAI_EMBEDDING_MODEL_VERSION` | string | `2` | Set the version for the embedding model. |

docs/QuotaCheck.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
## Check Quota Availability Before Deployment
22

33
Before deploying the accelerator, **ensure sufficient quota availability** for the required model.
4-
> **For Global Standard | GPT-35-turbo - increase the capacity to at least 50K tokens for optimal performance.**
4+
> **For Global Standard | gpt-4.1-mini - increase the capacity to at least 50K tokens for optimal performance.**
55
66
### Login if you have not done so already
77
```
@@ -11,7 +11,7 @@ azd auth login
1111

1212
### 📌 Default Models & Capacities:
1313
```
14-
gpt-35-turbo:30, text-embedding-ada-002:45
14+
gpt-4.1-mini:30, text-embedding-ada-002:45
1515
```
1616
### 📌 Default Regions:
1717
```
@@ -37,19 +37,19 @@ australiaeast, francecentral, japaneast, northcentralus, southcentralus, westus,
3737
```
3838
✔️ Check specific model(s) in default regions:
3939
```
40-
./quota_check_params.sh --models gpt-35-turbo:30,text-embedding-ada-002:45
40+
./quota_check_params.sh --models gpt-4.1-mini:30,text-embedding-ada-002:45
4141
```
4242
✔️ Check default models in specific region(s):
4343
```
4444
./quota_check_params.sh --regions eastus,westus
4545
```
4646
✔️ Passing Both models and regions:
4747
```
48-
./quota_check_params.sh --models gpt-35-turbo:30 --regions eastus,westus
48+
./quota_check_params.sh --models gpt-4.1-mini:30 --regions eastus,westus
4949
```
5050
✔️ All parameters combined:
5151
```
52-
./quota_check_params.sh --models gpt-35-turbo:30,text-embedding-ada-002:45 --regions eastus,westus --verbose
52+
./quota_check_params.sh --models gpt-4.1-mini:30,text-embedding-ada-002:45 --regions eastus,westus --verbose
5353
```
5454

5555
### **Sample Output**

infra/main.bicep

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@ param location string = resourceGroup().location
1616
'Standard'
1717
'GlobalStandard'
1818
])
19-
param gptModelDeploymentType string = 'Standard'
19+
param gptModelDeploymentType string = 'GlobalStandard'
2020

2121
@description('Optional. Name of the GPT model to deploy:')
22-
param gptModelName string = 'gpt-35-turbo'
22+
param gptModelName string = 'gpt-4.1-mini'
2323

2424
@description('Optional. Version of the GPT model to deploy:')
25-
param gptModelVersion string = '0125'
25+
param gptModelVersion string = '2025-04-14'
2626

2727
@minValue(10)
2828
@description('Optional. Capacity of the GPT deployment:')
@@ -107,7 +107,7 @@ var solutionSuffix = toLower(trim(replace(
107107
'*',
108108
''
109109
)))
110-
var resourceGroupName = resourceGroup().name
110+
111111
var baseUrl = 'https://raw.githubusercontent.com/microsoft/Build-your-own-copilot-Solution-Accelerator/byoc-researcher/'
112112

113113
var allTags = union(
@@ -1070,9 +1070,9 @@ module webSite 'modules/web-sites.bicep' = {
10701070
AZURE_SEARCH_URL_COLUMN:'publicurl'
10711071
AZURE_OPENAI_RESOURCE:azOpenAI.outputs.endpoint
10721072
AZURE_OPENAI_ENDPOINT:azOpenAI.outputs.endpoint
1073-
AZURE_OPENAI_MODEL:'gpt-35-turbo'
1073+
AZURE_OPENAI_MODEL:gptModelName
10741074
AZURE_OPENAI_KEY:'@Microsoft.KeyVault(SecretUri=${openAIKeyUri})'
1075-
AZURE_OPENAI_MODEL_NAME:'gpt-35-turbo'
1075+
AZURE_OPENAI_MODEL_NAME:gptModelName
10761076
AZURE_OPENAI_TEMPERATURE:'0'
10771077
AZURE_OPENAI_TOP_P:'1'
10781078
AZURE_OPENAI_MAX_TOKENS:'1000'

infra/main.json

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"_generator": {
77
"name": "bicep",
88
"version": "0.37.4.10188",
9-
"templateHash": "7195801196475709586"
9+
"templateHash": "3418531138421621433"
1010
}
1111
},
1212
"parameters": {
@@ -31,7 +31,7 @@
3131
},
3232
"gptModelDeploymentType": {
3333
"type": "string",
34-
"defaultValue": "Standard",
34+
"defaultValue": "GlobalStandard",
3535
"allowedValues": [
3636
"Standard",
3737
"GlobalStandard"
@@ -43,14 +43,14 @@
4343
},
4444
"gptModelName": {
4545
"type": "string",
46-
"defaultValue": "gpt-35-turbo",
46+
"defaultValue": "gpt-4.1-mini",
4747
"metadata": {
4848
"description": "Optional. Name of the GPT model to deploy:"
4949
}
5050
},
5151
"gptModelVersion": {
5252
"type": "string",
53-
"defaultValue": "0125",
53+
"defaultValue": "2025-04-14",
5454
"metadata": {
5555
"description": "Optional. Version of the GPT model to deploy:"
5656
}
@@ -220,7 +220,6 @@
220220
},
221221
"variables": {
222222
"solutionSuffix": "[toLower(trim(replace(replace(replace(replace(replace(replace(format('{0}{1}', parameters('solutionName'), parameters('solutionUniqueText')), '-', ''), '_', ''), '.', ''), '/', ''), ' ', ''), '*', '')))]",
223-
"resourceGroupName": "[resourceGroup().name]",
224223
"baseUrl": "https://raw.githubusercontent.com/microsoft/Build-your-own-copilot-Solution-Accelerator/byoc-researcher/",
225224
"allTags": "[union(createObject('azd-env-name', parameters('solutionName')), parameters('tags'))]",
226225
"replicaRegionPairs": {
@@ -22463,10 +22462,10 @@
2246322462
}
2246422463
},
2246522464
"dependsOn": [
22466-
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').storageDfs)]",
2246722465
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').storageFile)]",
2246822466
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').storageBlob)]",
2246922467
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').storageQueue)]",
22468+
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').storageDfs)]",
2247022469
"userAssignedIdentity",
2247122470
"virtualNetwork"
2247222471
]
@@ -47219,9 +47218,9 @@
4721947218
"AZURE_SEARCH_URL_COLUMN": "publicurl",
4722047219
"AZURE_OPENAI_RESOURCE": "[reference('azOpenAI').outputs.endpoint.value]",
4722147220
"AZURE_OPENAI_ENDPOINT": "[reference('azOpenAI').outputs.endpoint.value]",
47222-
"AZURE_OPENAI_MODEL": "gpt-35-turbo",
47221+
"AZURE_OPENAI_MODEL": "[parameters('gptModelName')]",
4722347222
"AZURE_OPENAI_KEY": "[format('@Microsoft.KeyVault(SecretUri={0})', reference('azOpenAI').outputs.exportedSecrets.value['AZURE-OPENAI-KEY'].secretUri)]",
47224-
"AZURE_OPENAI_MODEL_NAME": "gpt-35-turbo",
47223+
"AZURE_OPENAI_MODEL_NAME": "[parameters('gptModelName')]",
4722547224
"AZURE_OPENAI_TEMPERATURE": "0",
4722647225
"AZURE_OPENAI_TOP_P": "1",
4722747226
"AZURE_OPENAI_MAX_TOKENS": "1000",
-1.85 KB
Binary file not shown.

infra/scripts/checkquota.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ echo "✅ Azure subscription set successfully."
3232

3333
# Define models and their minimum required capacities
3434
declare -A MIN_CAPACITY=(
35-
["OpenAI.Standard.gpt-35-turbo"]=$GPT_MIN_CAPACITY
35+
["OpenAI.Standard.gpt-4.1-mini"]=$GPT_MIN_CAPACITY
3636
["OpenAI.GlobalStandard.text-embedding-ada-002"]=$TEXT_EMBEDDING_MIN_CAPACITY
3737
)
3838

infra/scripts/quota_check_params.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ log_verbose() {
4747
}
4848

4949
# Default Models and Capacities (Comma-separated in "model:capacity" format)
50-
DEFAULT_MODEL_CAPACITY="gpt-35-turbo:30,text-embedding-ada-002:45"
50+
DEFAULT_MODEL_CAPACITY="gpt-4.1-mini:30,text-embedding-ada-002:45"
5151

5252
# Convert the comma-separated string into an array
5353
IFS=',' read -r -a MODEL_CAPACITY_PAIRS <<< "$DEFAULT_MODEL_CAPACITY"

src/.env.sample

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ AZURE_SEARCH_STRICTNESS=3
1717
AZURE_OPENAI_RESOURCE=
1818
AZURE_OPENAI_MODEL=
1919
AZURE_OPENAI_KEY=
20-
AZURE_OPENAI_MODEL_NAME=gpt-35-turbo
20+
AZURE_OPENAI_MODEL_NAME=gpt-4.1-mini
2121
AZURE_OPENAI_TEMPERATURE=0
2222
AZURE_OPENAI_TOP_P=1.0
2323
AZURE_OPENAI_MAX_TOKENS=1000

src/app.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,8 @@ def assets(path):
8686
)
8787
AZURE_OPENAI_STREAM = os.environ.get("AZURE_OPENAI_STREAM", "true")
8888
AZURE_OPENAI_MODEL_NAME = os.environ.get(
89-
"AZURE_OPENAI_MODEL_NAME", "gpt-35-turbo"
90-
) # Name of the model, e.g. 'gpt-35-turbo' or 'gpt-4'
89+
"AZURE_OPENAI_MODEL_NAME", "gpt-4.1-mini"
90+
)
9191
AZURE_OPENAI_EMBEDDING_ENDPOINT = os.environ.get("AZURE_OPENAI_EMBEDDING_ENDPOINT")
9292
AZURE_OPENAI_EMBEDDING_KEY = os.environ.get("AZURE_OPENAI_EMBEDDING_KEY")
9393
AZURE_OPENAI_EMBEDDING_NAME = os.environ.get("AZURE_OPENAI_EMBEDDING_NAME", "")
@@ -108,7 +108,7 @@ def assets(path):
108108
def is_chat_model():
109109
if (
110110
"gpt-4" in AZURE_OPENAI_MODEL_NAME.lower()
111-
or AZURE_OPENAI_MODEL_NAME.lower() in ["gpt-35-turbo-4k", "gpt-35-turbo"]
111+
or AZURE_OPENAI_MODEL_NAME.lower() in ["gpt-35-turbo-4k", "gpt-4.1-mini"]
112112
):
113113
return True
114114
return False

src/test_app.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def test_is_chat_model_with_gpt35_turbo_4k():
4848

4949

5050
def test_is_chat_model_with_gpt35_turbo_16k():
51-
with patch("app.AZURE_OPENAI_MODEL_NAME", "gpt-35-turbo"):
51+
with patch("app.AZURE_OPENAI_MODEL_NAME", "gpt-4.1-mini"):
5252
assert is_chat_model() is True
5353

5454

@@ -291,7 +291,7 @@ def test_stream_with_data_azure_success():
291291
with patch("requests.Session.post") as mock_post:
292292
mock_response = MagicMock()
293293
mock_response.iter_lines.return_value = [
294-
b'data: {"id":"1","model":"gpt-35-turbo","created":1736397875,"object":"extensions.chat.completion.chunk","choices":[{"index":0,"delta":{"context":{"messages":[{"role":"tool","content":"hello","end_turn":false}]}},"end_turn":false,"finish_reason":"None"}]}'
294+
b'data: {"id":"1","model":"gpt-4.1-mini","created":1736397875,"object":"extensions.chat.completion.chunk","choices":[{"index":0,"delta":{"context":{"messages":[{"role":"tool","content":"hello","end_turn":false}]}},"end_turn":false,"finish_reason":"None"}]}'
295295
]
296296
mock_response.headers = {"apim-request-id": "test-request-id"}
297297
mock_post.return_value.__enter__.return_value = mock_response
@@ -381,7 +381,7 @@ def test_stream_with_data_azure_error():
381381
# body = mock_body
382382
mock_response = MagicMock()
383383
mock_response.iter_lines.return_value = [
384-
b'data: {"id":"1","model":"gpt-35-turbo","created":1736397875,"object":"extensions.chat.completion.chunk","choices":[{"index":0,"delta":{"context":{"messages":[{"role":"tool","content":"hello","end_turn":false}]}},"end_turn":false,"finish_reason":"None"}]}'
384+
b'data: {"id":"1","model":"gpt-4.1-mini","created":1736397875,"object":"extensions.chat.completion.chunk","choices":[{"index":0,"delta":{"context":{"messages":[{"role":"tool","content":"hello","end_turn":false}]}},"end_turn":false,"finish_reason":"None"}]}'
385385
]
386386
mock_response.headers = {"apim-request-id": "test-request-id"}
387387
mock_post.return_value.__enter__.return_value = mock_response

0 commit comments

Comments
 (0)