diff --git a/.github/workflows/deploy-waf.yml b/.github/workflows/deploy-waf.yml index 0a3394259..0b567ff6b 100644 --- a/.github/workflows/deploy-waf.yml +++ b/.github/workflows/deploy-waf.yml @@ -21,7 +21,7 @@ jobs: export AZURE_TENANT_ID=${{ secrets.AZURE_TENANT_ID }} export AZURE_CLIENT_SECRET=${{ secrets.AZURE_CLIENT_SECRET }} export AZURE_SUBSCRIPTION_ID="${{ secrets.AZURE_SUBSCRIPTION_ID }}" - export GPT_MIN_CAPACITY="5" + export GPT_MIN_CAPACITY="150" export AZURE_REGIONS="${{ vars.AZURE_REGIONS }}" chmod +x infra/scripts/checkquota.sh diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index c8e5b6c97..5f81962c2 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,7 +1,6 @@ name: Validate Deployment on: - push: workflow_run: workflows: ["Build Docker and Optional Push"] types: diff --git a/docs/quota_check.md b/docs/quota_check.md index bf59bc36d..f8cae1a5b 100644 --- a/docs/quota_check.md +++ b/docs/quota_check.md @@ -1,7 +1,7 @@ ## Check Quota Availability Before Deployment Before deploying the accelerator, **ensure sufficient quota availability** for the required model. -> **For Global Standard | GPT-4o - the capacity to at least 140k tokens for optimal performance.** +> **For Global Standard | GPT-4o - the capacity to at least 150k tokens for optimal performance.** ### Login if you have not done so already ``` @@ -11,7 +11,7 @@ azd auth login ### 📌 Default Models & Capacities: ``` -gpt-4o:140 +gpt-4o:150 ``` ### 📌 Default Regions: ``` @@ -37,7 +37,7 @@ eastus, uksouth, eastus2, northcentralus, swedencentral, westus, westus2, southc ``` ✔️ Check specific model(s) in default regions: ``` - ./quota_check_params.sh --models gpt-4o:140 + ./quota_check_params.sh --models gpt-4o:150 ``` ✔️ Check default models in specific region(s): ``` @@ -45,11 +45,11 @@ eastus, uksouth, eastus2, northcentralus, swedencentral, westus, westus2, southc ``` ✔️ Passing Both models and regions: ``` - ./quota_check_params.sh --models gpt-4o:140 --regions eastus,westus2 + ./quota_check_params.sh --models gpt-4o:150 --regions eastus,westus2 ``` ✔️ All parameters combined: ``` - ./quota_check_params.sh --models gpt-4o:140 --regions eastus,westus --verbose + ./quota_check_params.sh --models gpt-4o:150 --regions eastus,westus --verbose ``` ### **Sample Output** diff --git a/infra/scripts/quota_check_params.sh b/infra/scripts/quota_check_params.sh index 71df64e0f..6182e4497 100644 --- a/infra/scripts/quota_check_params.sh +++ b/infra/scripts/quota_check_params.sh @@ -47,7 +47,7 @@ log_verbose() { } # Default Models and Capacities (Comma-separated in "model:capacity" format) -DEFAULT_MODEL_CAPACITY="gpt-4o:50" +DEFAULT_MODEL_CAPACITY="gpt-4o:150" # Convert the comma-separated string into an array IFS=',' read -r -a MODEL_CAPACITY_PAIRS <<< "$DEFAULT_MODEL_CAPACITY"