Skip to content

Commit a297572

Browse files
handle rate limit from backend
1 parent 56147fb commit a297572

File tree

5 files changed

+70
-49
lines changed

5 files changed

+70
-49
lines changed

.github/workflows/deploy-KMGeneric.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ jobs:
103103
104104
- name: Determine Tag Name Based on Branch
105105
id: determine_tag
106-
run: echo "tagname=${{ github.ref_name == 'main' && 'latest_migra' || github.ref_name == 'dev' && 'dev' || github.ref_name == 'demo' && 'demo' || github.ref_name == 'dependabotchanges' && 'dependabotchanges' || github.head_ref || 'default' }}" >> $GITHUB_OUTPUT
106+
run: echo "tagname=${{ github.ref_name == 'main' && 'latest_migrated' || github.ref_name == 'dev' && 'dev' || github.ref_name == 'demo' && 'demo' || github.ref_name == 'dependabotchanges' && 'dependabotchanges' || github.head_ref || 'default' }}" >> $GITHUB_OUTPUT
107107

108108
- name: Deploy Bicep Template
109109
id: deploy

.github/workflows/docker-build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ jobs:
4949
id: determine_tag
5050
run: |
5151
if [[ "${{ github.ref_name }}" == "main" ]]; then
52-
echo "tagname=latest_migra" >> $GITHUB_OUTPUT
52+
echo "tagname=latest_migrated" >> $GITHUB_OUTPUT
5353
elif [[ "${{ github.ref_name }}" == "dev" ]]; then
5454
echo "tagname=dev" >> $GITHUB_OUTPUT
5555
elif [[ "${{ github.ref_name }}" == "demo" ]]; then

infra/main.bicep

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ param embeddingModel string = 'text-embedding-ada-002'
6363
@description('Capacity of the Embedding Model deployment')
6464
param embeddingDeploymentCapacity int = 80
6565

66-
param imageTag string = 'latest_migra'
66+
param imageTag string = 'latest_migrated'
6767

6868
var uniqueId = toLower(uniqueString(subscription().id, environmentName, resourceGroup().location))
6969
var solutionPrefix = 'km${padLeft(take(uniqueId, 12), 12, '0')}'

infra/main.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"_generator": {
66
"name": "bicep",
77
"version": "0.34.44.8038",
8-
"templateHash": "13730134018880843517"
8+
"templateHash": "2995962395312305521"
99
}
1010
},
1111
"parameters": {
@@ -92,7 +92,7 @@
9292
},
9393
"imageTag": {
9494
"type": "string",
95-
"defaultValue": "latest_migra"
95+
"defaultValue": "latest_migrated"
9696
}
9797
},
9898
"variables": {

src/api/services/chat_service.py

Lines changed: 65 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from semantic_kernel.agents.open_ai import AzureAssistantAgent
1212
from semantic_kernel.contents.chat_message_content import ChatMessageContent
1313
from semantic_kernel.contents.utils.author_role import AuthorRole
14+
from semantic_kernel.exceptions.agent_exceptions import AgentInvokeException # Import the exception
1415

1516
from common.config.config import Config
1617
from helpers.utils import format_stream_response
@@ -145,50 +146,70 @@ async def stream_chat_request(self, request_body, conversation_id, query):
145146
history_metadata = request_body.get("history_metadata", {})
146147

147148
async def generate():
148-
assistant_content = ""
149-
# Call the OpenAI streaming method
150-
response = await self.stream_openai_text(conversation_id, query)
151-
# Stream chunks of data
152-
async for chunk in response.body_iterator:
153-
if isinstance(chunk, dict):
154-
chunk = json.dumps(chunk) # Convert dict to JSON string
155-
assistant_content += chunk
156-
chat_completion_chunk = {
157-
"id": "",
158-
"model": "",
159-
"created": 0,
160-
"object": "",
161-
"choices": [
162-
{
163-
"messages": [],
164-
"delta": {},
165-
}
166-
],
167-
"history_metadata": history_metadata,
168-
"apim-request-id": "",
169-
}
170-
171-
chat_completion_chunk["id"] = str(uuid.uuid4())
172-
chat_completion_chunk["model"] = "rag-model"
173-
chat_completion_chunk["created"] = int(time.time())
174-
# chat_completion_chunk["object"] = assistant_content
175-
chat_completion_chunk["object"] = "extensions.chat.completion.chunk"
176-
chat_completion_chunk["apim-request-id"] = response.headers.get(
177-
"apim-request-id", ""
178-
)
179-
chat_completion_chunk["choices"][0]["messages"].append(
180-
{"role": "assistant", "content": assistant_content}
181-
)
182-
chat_completion_chunk["choices"][0]["delta"] = {
183-
"role": "assistant",
184-
"content": assistant_content,
185-
}
186-
187-
completion_chunk_obj = json.loads(
188-
json.dumps(chat_completion_chunk),
189-
object_hook=lambda d: SimpleNamespace(**d),
190-
)
191-
yield json.dumps(format_stream_response(completion_chunk_obj, history_metadata, response.headers.get("apim-request-id", ""))) + "\n\n"
149+
try:
150+
assistant_content = ""
151+
# Call the OpenAI streaming method
152+
response = await self.stream_openai_text(conversation_id, query)
153+
# Stream chunks of data
154+
async for chunk in response.body_iterator:
155+
if isinstance(chunk, dict):
156+
chunk = json.dumps(chunk) # Convert dict to JSON string
157+
assistant_content += chunk
158+
chat_completion_chunk = {
159+
"id": "",
160+
"model": "",
161+
"created": 0,
162+
"object": "",
163+
"choices": [
164+
{
165+
"messages": [],
166+
"delta": {},
167+
}
168+
],
169+
"history_metadata": history_metadata,
170+
"apim-request-id": "",
171+
}
172+
173+
chat_completion_chunk["id"] = str(uuid.uuid4())
174+
chat_completion_chunk["model"] = "rag-model"
175+
chat_completion_chunk["created"] = int(time.time())
176+
# chat_completion_chunk["object"] = assistant_content
177+
chat_completion_chunk["object"] = "extensions.chat.completion.chunk"
178+
chat_completion_chunk["apim-request-id"] = response.headers.get(
179+
"apim-request-id", ""
180+
)
181+
chat_completion_chunk["choices"][0]["messages"].append(
182+
{"role": "assistant", "content": assistant_content}
183+
)
184+
chat_completion_chunk["choices"][0]["delta"] = {
185+
"role": "assistant",
186+
"content": assistant_content,
187+
}
188+
189+
completion_chunk_obj = json.loads(
190+
json.dumps(chat_completion_chunk),
191+
object_hook=lambda d: SimpleNamespace(**d),
192+
)
193+
yield json.dumps(format_stream_response(completion_chunk_obj, history_metadata, response.headers.get("apim-request-id", ""))) + "\n\n"
194+
195+
except AgentInvokeException as e:
196+
error_message = str(e)
197+
retry_after = "sometime"
198+
if "Rate limit is exceeded" in error_message:
199+
import re
200+
match = re.search(r"Try again in (\d+) seconds", error_message)
201+
if match:
202+
retry_after = f"{match.group(1)} seconds"
203+
logger.error(f"Rate limit error: {error_message}")
204+
yield json.dumps({"error": f"Rate limit is exceeded. Try again in {retry_after}."}) + "\n\n"
205+
else:
206+
logger.error(f"AgentInvokeException: {error_message}")
207+
yield json.dumps({"error": "An error occurred. Please try again later."}) + "\n\n"
208+
209+
except Exception as e:
210+
logger.error(f"Error in stream_chat_request: {e}", exc_info=True)
211+
yield json.dumps({"error": "An error occurred while processing the request."}) + "\n\n"
212+
192213
return generate()
193214

194215
async def complete_chat_request(self, query, last_rag_response=None):

0 commit comments

Comments
 (0)