Skip to content

Commit a97e3d2

Browse files
Merge pull request #351 from microsoft/psl-pk-ratelimit
fix: Adjust Hosting Plan, Tags, and Improve Error Handling
2 parents 3e2a688 + 92afdc8 commit a97e3d2

File tree

7 files changed

+78
-57
lines changed

7 files changed

+78
-57
lines changed

.github/workflows/deploy-KMGeneric.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ jobs:
103103
104104
- name: Determine Tag Name Based on Branch
105105
id: determine_tag
106-
run: echo "tagname=${{ github.ref_name == 'main' && 'latest_migra' || github.ref_name == 'dev' && 'dev' || github.ref_name == 'demo' && 'demo' || github.ref_name == 'dependabotchanges' && 'dependabotchanges' || github.head_ref || 'default' }}" >> $GITHUB_OUTPUT
106+
run: echo "tagname=${{ github.ref_name == 'main' && 'latest_migrated' || github.ref_name == 'dev' && 'dev' || github.ref_name == 'demo' && 'demo' || github.ref_name == 'dependabotchanges' && 'dependabotchanges' || github.head_ref || 'default' }}" >> $GITHUB_OUTPUT
107107

108108
- name: Deploy Bicep Template
109109
id: deploy

.github/workflows/docker-build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ jobs:
4949
id: determine_tag
5050
run: |
5151
if [[ "${{ github.ref_name }}" == "main" ]]; then
52-
echo "tagname=latest_migra" >> $GITHUB_OUTPUT
52+
echo "tagname=latest_migrated" >> $GITHUB_OUTPUT
5353
elif [[ "${{ github.ref_name }}" == "dev" ]]; then
5454
echo "tagname=dev" >> $GITHUB_OUTPUT
5555
elif [[ "${{ github.ref_name }}" == "demo" ]]; then

infra/deploy_app_service_plan.bicep

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ param HostingPlanName string = '${ solutionName }-app-service-plan'
88
@allowed(
99
['F1', 'D1', 'B1', 'B2', 'B3', 'S1', 'S2', 'S3', 'P1', 'P2', 'P3', 'P4','P0v3']
1010
)
11-
param HostingPlanSku string = 'P0v3'
11+
param HostingPlanSku string = 'B2'
1212

1313
resource HostingPlan 'Microsoft.Web/serverfarms@2020-06-01' = {
1414
name: HostingPlanName

infra/main.bicep

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ param embeddingModel string = 'text-embedding-ada-002'
6363
@description('Capacity of the Embedding Model deployment')
6464
param embeddingDeploymentCapacity int = 80
6565

66-
param imageTag string = 'latest_migra'
66+
param imageTag string = 'latest_migrated'
6767

6868
var uniqueId = toLower(uniqueString(subscription().id, environmentName, resourceGroup().location))
6969
var solutionPrefix = 'km${padLeft(take(uniqueId, 12), 12, '0')}'

infra/main.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"_generator": {
66
"name": "bicep",
77
"version": "0.34.44.8038",
8-
"templateHash": "13730134018880843517"
8+
"templateHash": "11969070936051168983"
99
}
1010
},
1111
"parameters": {
@@ -92,7 +92,7 @@
9292
},
9393
"imageTag": {
9494
"type": "string",
95-
"defaultValue": "latest_migra"
95+
"defaultValue": "latest_migrated"
9696
}
9797
},
9898
"variables": {
@@ -1875,7 +1875,7 @@
18751875
"_generator": {
18761876
"name": "bicep",
18771877
"version": "0.34.44.8038",
1878-
"templateHash": "18192780555217274649"
1878+
"templateHash": "5000589525239764864"
18791879
},
18801880
"description": "Creates an Azure App Service plan."
18811881
},
@@ -1892,7 +1892,7 @@
18921892
},
18931893
"HostingPlanSku": {
18941894
"type": "string",
1895-
"defaultValue": "P0v3",
1895+
"defaultValue": "B2",
18961896
"allowedValues": [
18971897
"F1",
18981898
"D1",

src/App/src/components/Chat/Chat.tsx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ const Chat: React.FC<ChatProps> = ({
7373
payload: true,
7474
});
7575

76-
if ((reqType !== 'graph' && reqType !== 'error') && isCharthDisplayDefault){
76+
if (((reqType !== 'graph' && reqType !== 'error') && messages[messages.length - 1].role !== ERROR) && isCharthDisplayDefault ){
7777
setIsChartLoading(true);
7878
setTimeout(()=>{
7979
makeApiRequestForChart('show in a graph by default', convId, messages[messages.length - 1].content as string)
@@ -267,7 +267,7 @@ const Chat: React.FC<ChatProps> = ({
267267
const errorMsg = JSON.parse(runningText).error;
268268
const errorMessage: ChatMessage = {
269269
id: generateUUIDv4(),
270-
role: ASSISTANT,
270+
role: ERROR,
271271
content: errorMsg,
272272
date: new Date().toISOString(),
273273
};
@@ -332,7 +332,7 @@ const Chat: React.FC<ChatProps> = ({
332332
parsedChartResponse?.object?.message;
333333
const errorMessage: ChatMessage = {
334334
id: generateUUIDv4(),
335-
role: ASSISTANT,
335+
role: ERROR,
336336
content: errorMsg,
337337
date: new Date().toISOString(),
338338
};
@@ -546,7 +546,7 @@ const Chat: React.FC<ChatProps> = ({
546546

547547
const errorMessage: ChatMessage = {
548548
id: generateUUIDv4(),
549-
role: ASSISTANT,
549+
role: ERROR,
550550
content: errorMsg,
551551
date: new Date().toISOString(),
552552
};
@@ -612,7 +612,7 @@ const Chat: React.FC<ChatProps> = ({
612612
parsedChartResponse?.object?.message;
613613
const errorMessage: ChatMessage = {
614614
id: generateUUIDv4(),
615-
role: ASSISTANT,
615+
role: ERROR,
616616
content: errorMsg,
617617
date: new Date().toISOString(),
618618
};

src/api/services/chat_service.py

Lines changed: 65 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from semantic_kernel.agents.open_ai import AzureAssistantAgent
1212
from semantic_kernel.contents.chat_message_content import ChatMessageContent
1313
from semantic_kernel.contents.utils.author_role import AuthorRole
14+
from semantic_kernel.exceptions.agent_exceptions import AgentInvokeException # Import the exception
1415

1516
from common.config.config import Config
1617
from helpers.utils import format_stream_response
@@ -145,50 +146,70 @@ async def stream_chat_request(self, request_body, conversation_id, query):
145146
history_metadata = request_body.get("history_metadata", {})
146147

147148
async def generate():
148-
assistant_content = ""
149-
# Call the OpenAI streaming method
150-
response = await self.stream_openai_text(conversation_id, query)
151-
# Stream chunks of data
152-
async for chunk in response.body_iterator:
153-
if isinstance(chunk, dict):
154-
chunk = json.dumps(chunk) # Convert dict to JSON string
155-
assistant_content += chunk
156-
chat_completion_chunk = {
157-
"id": "",
158-
"model": "",
159-
"created": 0,
160-
"object": "",
161-
"choices": [
162-
{
163-
"messages": [],
164-
"delta": {},
165-
}
166-
],
167-
"history_metadata": history_metadata,
168-
"apim-request-id": "",
169-
}
170-
171-
chat_completion_chunk["id"] = str(uuid.uuid4())
172-
chat_completion_chunk["model"] = "rag-model"
173-
chat_completion_chunk["created"] = int(time.time())
174-
# chat_completion_chunk["object"] = assistant_content
175-
chat_completion_chunk["object"] = "extensions.chat.completion.chunk"
176-
chat_completion_chunk["apim-request-id"] = response.headers.get(
177-
"apim-request-id", ""
178-
)
179-
chat_completion_chunk["choices"][0]["messages"].append(
180-
{"role": "assistant", "content": assistant_content}
181-
)
182-
chat_completion_chunk["choices"][0]["delta"] = {
183-
"role": "assistant",
184-
"content": assistant_content,
185-
}
186-
187-
completion_chunk_obj = json.loads(
188-
json.dumps(chat_completion_chunk),
189-
object_hook=lambda d: SimpleNamespace(**d),
190-
)
191-
yield json.dumps(format_stream_response(completion_chunk_obj, history_metadata, response.headers.get("apim-request-id", ""))) + "\n\n"
149+
try:
150+
assistant_content = ""
151+
# Call the OpenAI streaming method
152+
response = await self.stream_openai_text(conversation_id, query)
153+
# Stream chunks of data
154+
async for chunk in response.body_iterator:
155+
if isinstance(chunk, dict):
156+
chunk = json.dumps(chunk) # Convert dict to JSON string
157+
assistant_content += chunk
158+
chat_completion_chunk = {
159+
"id": "",
160+
"model": "",
161+
"created": 0,
162+
"object": "",
163+
"choices": [
164+
{
165+
"messages": [],
166+
"delta": {},
167+
}
168+
],
169+
"history_metadata": history_metadata,
170+
"apim-request-id": "",
171+
}
172+
173+
chat_completion_chunk["id"] = str(uuid.uuid4())
174+
chat_completion_chunk["model"] = "rag-model"
175+
chat_completion_chunk["created"] = int(time.time())
176+
# chat_completion_chunk["object"] = assistant_content
177+
chat_completion_chunk["object"] = "extensions.chat.completion.chunk"
178+
chat_completion_chunk["apim-request-id"] = response.headers.get(
179+
"apim-request-id", ""
180+
)
181+
chat_completion_chunk["choices"][0]["messages"].append(
182+
{"role": "assistant", "content": assistant_content}
183+
)
184+
chat_completion_chunk["choices"][0]["delta"] = {
185+
"role": "assistant",
186+
"content": assistant_content,
187+
}
188+
189+
completion_chunk_obj = json.loads(
190+
json.dumps(chat_completion_chunk),
191+
object_hook=lambda d: SimpleNamespace(**d),
192+
)
193+
yield json.dumps(format_stream_response(completion_chunk_obj, history_metadata, response.headers.get("apim-request-id", ""))) + "\n\n"
194+
195+
except AgentInvokeException as e:
196+
error_message = str(e)
197+
retry_after = "sometime"
198+
if "Rate limit is exceeded" in error_message:
199+
import re
200+
match = re.search(r"Try again in (\d+) seconds", error_message)
201+
if match:
202+
retry_after = f"{match.group(1)} seconds"
203+
logger.error(f"Rate limit error: {error_message}")
204+
yield json.dumps({"error": f"Rate limit is exceeded. Try again in {retry_after}."}) + "\n\n"
205+
else:
206+
logger.error(f"AgentInvokeException: {error_message}")
207+
yield json.dumps({"error": "An error occurred. Please try again later."}) + "\n\n"
208+
209+
except Exception as e:
210+
logger.error(f"Error in stream_chat_request: {e}", exc_info=True)
211+
yield json.dumps({"error": "An error occurred while processing the request."}) + "\n\n"
212+
192213
return generate()
193214

194215
async def complete_chat_request(self, query, last_rag_response=None):

0 commit comments

Comments
 (0)