Merge pull request #351 from microsoft/psl-pk-ratelimit

Avijit-Microsoft · web-flow · commit a97e3d23d1b5 · 2025-04-04T19:17:10.000+05:30
fix: Adjust Hosting Plan, Tags, and Improve Error Handling
diff --git a/.github/workflows/deploy-KMGeneric.yml b/.github/workflows/deploy-KMGeneric.yml
@@ -103,7 +103,7 @@ jobs:
 
       - name: Determine Tag Name Based on Branch
         id: determine_tag
-        run: echo "tagname=${{ github.ref_name == 'main' && 'latest_migra' || github.ref_name == 'dev' && 'dev' || github.ref_name == 'demo' && 'demo' || github.ref_name == 'dependabotchanges' && 'dependabotchanges' || github.head_ref || 'default' }}" >> $GITHUB_OUTPUT
+        run: echo "tagname=${{ github.ref_name == 'main' && 'latest_migrated' || github.ref_name == 'dev' && 'dev' || github.ref_name == 'demo' && 'demo' || github.ref_name == 'dependabotchanges' && 'dependabotchanges' || github.head_ref || 'default' }}" >> $GITHUB_OUTPUT
 
       - name: Deploy Bicep Template
         id: deploy
diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
@@ -49,7 +49,7 @@ jobs:
         id: determine_tag
         run: |
          if [[ "${{ github.ref_name }}" == "main" ]]; then
-          echo "tagname=latest_migra" >> $GITHUB_OUTPUT
+          echo "tagname=latest_migrated" >> $GITHUB_OUTPUT
          elif [[ "${{ github.ref_name }}" == "dev" ]]; then
           echo "tagname=dev" >> $GITHUB_OUTPUT
          elif [[ "${{ github.ref_name }}" == "demo" ]]; then
diff --git a/infra/deploy_app_service_plan.bicep b/infra/deploy_app_service_plan.bicep
@@ -8,7 +8,7 @@ param HostingPlanName string = '${ solutionName }-app-service-plan'
 @allowed(
   ['F1', 'D1', 'B1', 'B2', 'B3', 'S1', 'S2', 'S3', 'P1', 'P2', 'P3', 'P4','P0v3']
 )
-param HostingPlanSku string = 'P0v3'
+param HostingPlanSku string = 'B2'
 
 resource HostingPlan 'Microsoft.Web/serverfarms@2020-06-01' = {
   name: HostingPlanName
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -63,7 +63,7 @@ param embeddingModel string = 'text-embedding-ada-002'
 @description('Capacity of the Embedding Model deployment')
 param embeddingDeploymentCapacity int = 80
 
-param imageTag string = 'latest_migra'
+param imageTag string = 'latest_migrated'
 
 var uniqueId = toLower(uniqueString(subscription().id, environmentName, resourceGroup().location))
 var solutionPrefix = 'km${padLeft(take(uniqueId, 12), 12, '0')}'
diff --git a/infra/main.json b/infra/main.json
@@ -5,7 +5,7 @@
     "_generator": {
       "name": "bicep",
       "version": "0.34.44.8038",
-      "templateHash": "13730134018880843517"
+      "templateHash": "11969070936051168983"
     }
   },
   "parameters": {
@@ -92,7 +92,7 @@
     },
     "imageTag": {
       "type": "string",
-      "defaultValue": "latest_migra"
+      "defaultValue": "latest_migrated"
     }
   },
   "variables": {
@@ -1875,7 +1875,7 @@
             "_generator": {
               "name": "bicep",
               "version": "0.34.44.8038",
-              "templateHash": "18192780555217274649"
+              "templateHash": "5000589525239764864"
             },
             "description": "Creates an Azure App Service plan."
           },
@@ -1892,7 +1892,7 @@
             },
             "HostingPlanSku": {
               "type": "string",
-              "defaultValue": "P0v3",
+              "defaultValue": "B2",
               "allowedValues": [
                 "F1",
                 "D1",
diff --git a/src/App/src/components/Chat/Chat.tsx b/src/App/src/components/Chat/Chat.tsx
@@ -73,7 +73,7 @@ const Chat: React.FC<ChatProps> = ({
       payload: true,
     });
 
-    if ((reqType !== 'graph' && reqType !== 'error') && isCharthDisplayDefault){
+    if (((reqType !== 'graph' && reqType !== 'error') &&  messages[messages.length - 1].role !== ERROR) && isCharthDisplayDefault ){
       setIsChartLoading(true);
       setTimeout(()=>{
         makeApiRequestForChart('show in a graph by default', convId, messages[messages.length - 1].content as string)
@@ -267,7 +267,7 @@ const Chat: React.FC<ChatProps> = ({
           const errorMsg = JSON.parse(runningText).error;
           const errorMessage: ChatMessage = {
             id: generateUUIDv4(),
-            role: ASSISTANT,
+            role: ERROR,
             content: errorMsg,
             date: new Date().toISOString(),
           };
@@ -332,7 +332,7 @@ const Chat: React.FC<ChatProps> = ({
                 parsedChartResponse?.object?.message;
               const errorMessage: ChatMessage = {
                 id: generateUUIDv4(),
-                role: ASSISTANT,
+                role: ERROR,
                 content: errorMsg,
                 date: new Date().toISOString(),
               };
@@ -546,7 +546,7 @@ const Chat: React.FC<ChatProps> = ({
           
           const errorMessage: ChatMessage = {
             id: generateUUIDv4(),
-            role: ASSISTANT,
+            role: ERROR,
             content: errorMsg,
             date: new Date().toISOString(),
           };
@@ -612,7 +612,7 @@ const Chat: React.FC<ChatProps> = ({
                 parsedChartResponse?.object?.message;
               const errorMessage: ChatMessage = {
                 id: generateUUIDv4(),
-                role: ASSISTANT,
+                role: ERROR,
                 content: errorMsg,
                 date: new Date().toISOString(),
               };
diff --git a/src/api/services/chat_service.py b/src/api/services/chat_service.py
@@ -11,6 +11,7 @@
 from semantic_kernel.agents.open_ai import AzureAssistantAgent
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
+from semantic_kernel.exceptions.agent_exceptions import AgentInvokeException  # Import the exception
 
 from common.config.config import Config
 from helpers.utils import format_stream_response
@@ -145,50 +146,70 @@ async def stream_chat_request(self, request_body, conversation_id, query):
         history_metadata = request_body.get("history_metadata", {})
 
         async def generate():
-            assistant_content = ""
-            # Call the OpenAI streaming method
-            response = await self.stream_openai_text(conversation_id, query)
-            # Stream chunks of data
-            async for chunk in response.body_iterator:
-                if isinstance(chunk, dict):
-                    chunk = json.dumps(chunk)  # Convert dict to JSON string
-                assistant_content += chunk
-                chat_completion_chunk = {
-                    "id": "",
-                    "model": "",
-                    "created": 0,
-                    "object": "",
-                    "choices": [
-                        {
-                            "messages": [],
-                            "delta": {},
-                        }
-                    ],
-                    "history_metadata": history_metadata,
-                    "apim-request-id": "",
-                }
-
-                chat_completion_chunk["id"] = str(uuid.uuid4())
-                chat_completion_chunk["model"] = "rag-model"
-                chat_completion_chunk["created"] = int(time.time())
-                # chat_completion_chunk["object"] = assistant_content
-                chat_completion_chunk["object"] = "extensions.chat.completion.chunk"
-                chat_completion_chunk["apim-request-id"] = response.headers.get(
-                    "apim-request-id", ""
-                )
-                chat_completion_chunk["choices"][0]["messages"].append(
-                    {"role": "assistant", "content": assistant_content}
-                )
-                chat_completion_chunk["choices"][0]["delta"] = {
-                    "role": "assistant",
-                    "content": assistant_content,
-                }
-
-                completion_chunk_obj = json.loads(
-                    json.dumps(chat_completion_chunk),
-                    object_hook=lambda d: SimpleNamespace(**d),
-                )
-                yield json.dumps(format_stream_response(completion_chunk_obj, history_metadata, response.headers.get("apim-request-id", ""))) + "\n\n"
+            try:
+                assistant_content = ""
+                # Call the OpenAI streaming method
+                response = await self.stream_openai_text(conversation_id, query)
+                # Stream chunks of data
+                async for chunk in response.body_iterator:
+                    if isinstance(chunk, dict):
+                        chunk = json.dumps(chunk)  # Convert dict to JSON string
+                    assistant_content += chunk
+                    chat_completion_chunk = {
+                        "id": "",
+                        "model": "",
+                        "created": 0,
+                        "object": "",
+                        "choices": [
+                            {
+                                "messages": [],
+                                "delta": {},
+                            }
+                        ],
+                        "history_metadata": history_metadata,
+                        "apim-request-id": "",
+                    }
+
+                    chat_completion_chunk["id"] = str(uuid.uuid4())
+                    chat_completion_chunk["model"] = "rag-model"
+                    chat_completion_chunk["created"] = int(time.time())
+                    # chat_completion_chunk["object"] = assistant_content
+                    chat_completion_chunk["object"] = "extensions.chat.completion.chunk"
+                    chat_completion_chunk["apim-request-id"] = response.headers.get(
+                        "apim-request-id", ""
+                    )
+                    chat_completion_chunk["choices"][0]["messages"].append(
+                        {"role": "assistant", "content": assistant_content}
+                    )
+                    chat_completion_chunk["choices"][0]["delta"] = {
+                        "role": "assistant",
+                        "content": assistant_content,
+                    }
+
+                    completion_chunk_obj = json.loads(
+                        json.dumps(chat_completion_chunk),
+                        object_hook=lambda d: SimpleNamespace(**d),
+                    )
+                    yield json.dumps(format_stream_response(completion_chunk_obj, history_metadata, response.headers.get("apim-request-id", ""))) + "\n\n"
+
+            except AgentInvokeException as e:
+                error_message = str(e)
+                retry_after = "sometime"
+                if "Rate limit is exceeded" in error_message:
+                    import re
+                    match = re.search(r"Try again in (\d+) seconds", error_message)
+                    if match:
+                        retry_after = f"{match.group(1)} seconds"
+                    logger.error(f"Rate limit error: {error_message}")
+                    yield json.dumps({"error": f"Rate limit is exceeded. Try again in {retry_after}."}) + "\n\n"
+                else:
+                    logger.error(f"AgentInvokeException: {error_message}")
+                    yield json.dumps({"error": "An error occurred. Please try again later."}) + "\n\n"
+
+            except Exception as e:
+                logger.error(f"Error in stream_chat_request: {e}", exc_info=True)
+                yield json.dumps({"error": "An error occurred while processing the request."}) + "\n\n"
+
         return generate()
 
     async def complete_chat_request(self, query, last_rag_response=None):

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@ param HostingPlanName string = '${ solutionName }-app-service-plan'`
`8`	`8`	`@allowed(`
`9`	`9`	`['F1', 'D1', 'B1', 'B2', 'B3', 'S1', 'S2', 'S3', 'P1', 'P2', 'P3', 'P4','P0v3']`
`10`	`10`	`)`
`11`		`-param HostingPlanSku string = 'P0v3'`
	`11`	`+param HostingPlanSku string = 'B2'`
`12`	`12`
`13`	`13`	`resource HostingPlan 'Microsoft.Web/serverfarms@2020-06-01' = {`
`14`	`14`	`name: HostingPlanName`