From cb2a475d5f3bb6229a1868228637003aaec192e7 Mon Sep 17 00:00:00 2001 From: Travis Hilbert Date: Thu, 22 May 2025 15:19:42 -0700 Subject: [PATCH 1/2] Updated README, Default set to NonWAF, Param for tokenCount --- documentation/DeploymentGuide.md | 2 +- infra/main.bicep | 6 +++++- infra/main.bicepparam | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/documentation/DeploymentGuide.md b/documentation/DeploymentGuide.md index 4abae3955..cc7b799f0 100644 --- a/documentation/DeploymentGuide.md +++ b/documentation/DeploymentGuide.md @@ -148,7 +148,7 @@ Once you've opened the project in [Codespaces](#github-codespaces), [Dev Contain 3. Provide an `azd` environment name (e.g., "macaeapp"). 4. Select a subscription from your Azure account and choose a location that has quota for all the resources. - - This deployment will take _4-6 minutes_ to provision the resources in your account and set up the solution with sample data. + - The default deployment will take _4-6 minutes_ to provision the resources in your account and set up the solution with sample data. The WAF aligned deployment will take between _25-45 minuntes_. - If you encounter an error or timeout during deployment, changing the location may help, as there could be availability constraints for the resources. 5. Once the deployment has completed successfully, open the [Azure Portal](https://portal.azure.com/), go to the deployed resource group, find the App Service, and get the app URL from `Default domain`. diff --git a/infra/main.bicep b/infra/main.bicep index db151ddc3..3d3ba3fdc 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -5,6 +5,9 @@ metadata description = 'This module contains the resources required to deploy th @maxLength(19) param solutionPrefix string = 'macae${uniqueString(deployer().objectId, deployer().tenantId, subscription().subscriptionId, resourceGroup().id)}' +@description('Capacity of the AI Foundry AI Services resource. The default value is 140.') +param aiFoundryCapacity int + @description('Optional. Location for all Resources.') param solutionLocation string = resourceGroup().location @@ -717,7 +720,8 @@ var aiFoundryAiServicesModelDeployment = { version: '2024-08-06' sku: { name: 'GlobalStandard' - capacity: 50 + //Curently the capacity is set to 140 for opinanal performance. + capacity: aiFoundryCapacity } raiPolicyName: 'Microsoft.Default' } diff --git a/infra/main.bicepparam b/infra/main.bicepparam index 2d707c5b7..3cddaffc8 100644 --- a/infra/main.bicepparam +++ b/infra/main.bicepparam @@ -3,6 +3,7 @@ using './main.bicep' param enableTelemetry = true param solutionPrefix = null //Type a string value to customize the prefix for your resource names param solutionLocation = 'australiaeast' +param aiFoundryCapacity= 140 param logAnalyticsWorkspaceConfiguration = { dataRetentionInDays: 30 } @@ -10,7 +11,7 @@ param applicationInsightsConfiguration = { retentionInDays: 30 } param virtualNetworkConfiguration = { - enabled: true + enabled: false } param aiFoundryStorageAccountConfiguration = { sku: 'Standard_LRS' From 6883d52830f3d724c47d69db9ffe632d51b00776 Mon Sep 17 00:00:00 2001 From: Prashant-Microsoft Date: Tue, 3 Jun 2025 09:43:11 +0530 Subject: [PATCH 2/2] feat: added rate limit error popup --- src/backend/app_kernel.py | 2 +- src/backend/kernel_agents/planner_agent.py | 8 +++++++- src/frontend/wwwroot/home/home.js | 10 ++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/backend/app_kernel.py b/src/backend/app_kernel.py index 674290a26..2e2b05200 100644 --- a/src/backend/app_kernel.py +++ b/src/backend/app_kernel.py @@ -195,7 +195,7 @@ async def input_task_endpoint(input_task: InputTask, request: Request): "error": str(e), }, ) - raise HTTPException(status_code=400, detail="Error creating plan") + raise HTTPException(status_code=400, detail=f"Error creating plan: {e}") @app.post("/api/human_feedback") diff --git a/src/backend/kernel_agents/planner_agent.py b/src/backend/kernel_agents/planner_agent.py index c9d9d651b..fbea47ab1 100644 --- a/src/backend/kernel_agents/planner_agent.py +++ b/src/backend/kernel_agents/planner_agent.py @@ -439,7 +439,13 @@ async def _create_structured_plan( return plan, steps except Exception as e: - logging.exception(f"Error creating structured plan: {e}") + error_message = str(e) + if "Rate limit is exceeded" in error_message: + logging.warning("Rate limit hit. Consider retrying after some delay.") + raise + else: + logging.exception(f"Error creating structured plan: {e}") + # Create a fallback dummy plan when parsing fails logging.info("Creating fallback dummy plan due to parsing error") diff --git a/src/frontend/wwwroot/home/home.js b/src/frontend/wwwroot/home/home.js index 00cd04755..dd1828793 100644 --- a/src/frontend/wwwroot/home/home.js +++ b/src/frontend/wwwroot/home/home.js @@ -103,6 +103,16 @@ }) .then((response) => response.json()) .then((data) => { + // Check if 'detail' field contains rate limit error + if (data.detail && data.detail.includes("Rate limit is exceeded")) { + notyf.error("Application temporarily unavailable due to quota limits. Please try again later."); + newTaskPrompt.disabled = false; + startTaskButton.disabled = false; + startTaskButton.classList.remove("is-loading"); + hideOverlay(); + return; + } + if (data.status == "Plan not created" || data.plan_id == "") { notyf.error("Unable to create plan for this task."); newTaskPrompt.disabled = false;