|
11 | 11 | from semantic_kernel.agents.open_ai import AzureAssistantAgent |
12 | 12 | from semantic_kernel.contents.chat_message_content import ChatMessageContent |
13 | 13 | from semantic_kernel.contents.utils.author_role import AuthorRole |
| 14 | +from semantic_kernel.exceptions.agent_exceptions import AgentInvokeException # Import the exception |
14 | 15 |
|
15 | 16 | from common.config.config import Config |
16 | 17 | from helpers.utils import format_stream_response |
@@ -145,50 +146,70 @@ async def stream_chat_request(self, request_body, conversation_id, query): |
145 | 146 | history_metadata = request_body.get("history_metadata", {}) |
146 | 147 |
|
147 | 148 | async def generate(): |
148 | | - assistant_content = "" |
149 | | - # Call the OpenAI streaming method |
150 | | - response = await self.stream_openai_text(conversation_id, query) |
151 | | - # Stream chunks of data |
152 | | - async for chunk in response.body_iterator: |
153 | | - if isinstance(chunk, dict): |
154 | | - chunk = json.dumps(chunk) # Convert dict to JSON string |
155 | | - assistant_content += chunk |
156 | | - chat_completion_chunk = { |
157 | | - "id": "", |
158 | | - "model": "", |
159 | | - "created": 0, |
160 | | - "object": "", |
161 | | - "choices": [ |
162 | | - { |
163 | | - "messages": [], |
164 | | - "delta": {}, |
165 | | - } |
166 | | - ], |
167 | | - "history_metadata": history_metadata, |
168 | | - "apim-request-id": "", |
169 | | - } |
170 | | - |
171 | | - chat_completion_chunk["id"] = str(uuid.uuid4()) |
172 | | - chat_completion_chunk["model"] = "rag-model" |
173 | | - chat_completion_chunk["created"] = int(time.time()) |
174 | | - # chat_completion_chunk["object"] = assistant_content |
175 | | - chat_completion_chunk["object"] = "extensions.chat.completion.chunk" |
176 | | - chat_completion_chunk["apim-request-id"] = response.headers.get( |
177 | | - "apim-request-id", "" |
178 | | - ) |
179 | | - chat_completion_chunk["choices"][0]["messages"].append( |
180 | | - {"role": "assistant", "content": assistant_content} |
181 | | - ) |
182 | | - chat_completion_chunk["choices"][0]["delta"] = { |
183 | | - "role": "assistant", |
184 | | - "content": assistant_content, |
185 | | - } |
186 | | - |
187 | | - completion_chunk_obj = json.loads( |
188 | | - json.dumps(chat_completion_chunk), |
189 | | - object_hook=lambda d: SimpleNamespace(**d), |
190 | | - ) |
191 | | - yield json.dumps(format_stream_response(completion_chunk_obj, history_metadata, response.headers.get("apim-request-id", ""))) + "\n\n" |
| 149 | + try: |
| 150 | + assistant_content = "" |
| 151 | + # Call the OpenAI streaming method |
| 152 | + response = await self.stream_openai_text(conversation_id, query) |
| 153 | + # Stream chunks of data |
| 154 | + async for chunk in response.body_iterator: |
| 155 | + if isinstance(chunk, dict): |
| 156 | + chunk = json.dumps(chunk) # Convert dict to JSON string |
| 157 | + assistant_content += chunk |
| 158 | + chat_completion_chunk = { |
| 159 | + "id": "", |
| 160 | + "model": "", |
| 161 | + "created": 0, |
| 162 | + "object": "", |
| 163 | + "choices": [ |
| 164 | + { |
| 165 | + "messages": [], |
| 166 | + "delta": {}, |
| 167 | + } |
| 168 | + ], |
| 169 | + "history_metadata": history_metadata, |
| 170 | + "apim-request-id": "", |
| 171 | + } |
| 172 | + |
| 173 | + chat_completion_chunk["id"] = str(uuid.uuid4()) |
| 174 | + chat_completion_chunk["model"] = "rag-model" |
| 175 | + chat_completion_chunk["created"] = int(time.time()) |
| 176 | + # chat_completion_chunk["object"] = assistant_content |
| 177 | + chat_completion_chunk["object"] = "extensions.chat.completion.chunk" |
| 178 | + chat_completion_chunk["apim-request-id"] = response.headers.get( |
| 179 | + "apim-request-id", "" |
| 180 | + ) |
| 181 | + chat_completion_chunk["choices"][0]["messages"].append( |
| 182 | + {"role": "assistant", "content": assistant_content} |
| 183 | + ) |
| 184 | + chat_completion_chunk["choices"][0]["delta"] = { |
| 185 | + "role": "assistant", |
| 186 | + "content": assistant_content, |
| 187 | + } |
| 188 | + |
| 189 | + completion_chunk_obj = json.loads( |
| 190 | + json.dumps(chat_completion_chunk), |
| 191 | + object_hook=lambda d: SimpleNamespace(**d), |
| 192 | + ) |
| 193 | + yield json.dumps(format_stream_response(completion_chunk_obj, history_metadata, response.headers.get("apim-request-id", ""))) + "\n\n" |
| 194 | + |
| 195 | + except AgentInvokeException as e: |
| 196 | + error_message = str(e) |
| 197 | + retry_after = "sometime" |
| 198 | + if "Rate limit is exceeded" in error_message: |
| 199 | + import re |
| 200 | + match = re.search(r"Try again in (\d+) seconds", error_message) |
| 201 | + if match: |
| 202 | + retry_after = f"{match.group(1)} seconds" |
| 203 | + logger.error(f"Rate limit error: {error_message}") |
| 204 | + yield json.dumps({"error": f"Rate limit is exceeded. Try again in {retry_after}."}) + "\n\n" |
| 205 | + else: |
| 206 | + logger.error(f"AgentInvokeException: {error_message}") |
| 207 | + yield json.dumps({"error": "An error occurred. Please try again later."}) + "\n\n" |
| 208 | + |
| 209 | + except Exception as e: |
| 210 | + logger.error(f"Error in stream_chat_request: {e}", exc_info=True) |
| 211 | + yield json.dumps({"error": "An error occurred while processing the request."}) + "\n\n" |
| 212 | + |
192 | 213 | return generate() |
193 | 214 |
|
194 | 215 | async def complete_chat_request(self, query, last_rag_response=None): |
|
0 commit comments