|
1 | 1 | # -*- coding: utf-8 -*- |
2 | 2 | """OpenAI Client.""" |
| 3 | +import copy |
3 | 4 | import os |
4 | 5 | from typing import Any, AsyncGenerator, Callable, Dict, Literal, Type |
5 | 6 |
|
|
13 | 14 | from openjudge.utils.utils import repair_and_load_json |
14 | 15 |
|
15 | 16 |
|
16 | | -def _format_audio_data_for_qwen_omni(messages: list[dict | ChatMessage]) -> None: |
| 17 | +def _format_audio_data_for_qwen_omni(messages: list[dict | ChatMessage]) -> list[dict]: |
17 | 18 | """Qwen-omni uses OpenAI-compatible API but requires different audio |
18 | 19 | data format than OpenAI with "data:;base64," prefix. |
19 | 20 | Refer to `Qwen-omni documentation |
20 | | - <https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2867839>`_ |
| 21 | + <https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2867839>` |
21 | 22 | for more details. |
22 | 23 |
|
23 | 24 | Args: |
24 | 25 | messages (`list[dict]`): |
25 | 26 | The list of message dictionaries from OpenAI formatter. |
26 | 27 | """ |
| 28 | + format_data = [] |
27 | 29 | for msg in messages: |
28 | | - msg_dict = msg.to_dict() if isinstance(msg, ChatMessage) else msg |
29 | | - if isinstance(msg_dict.get("content"), list): |
30 | | - for block in msg_dict["content"]: |
31 | | - if ( |
32 | | - isinstance(block, dict) |
33 | | - and "input_audio" in block |
34 | | - and isinstance(block["input_audio"].get("data"), str) |
35 | | - ): |
36 | | - if not block["input_audio"]["data"].startswith("http"): |
| 30 | + try: |
| 31 | + msg_copy = copy.deepcopy(msg) |
| 32 | + msg_dict = msg_copy.to_dict() if isinstance(msg_copy, ChatMessage) else msg_copy |
| 33 | + if isinstance(msg_dict.get("content"), list): |
| 34 | + for block in msg_dict["content"]: |
| 35 | + if ( |
| 36 | + isinstance(block, dict) |
| 37 | + and "input_audio" in block |
| 38 | + and isinstance(block["input_audio"].get("data"), str) |
| 39 | + and not block["input_audio"]["data"].startswith("http") |
| 40 | + ): |
37 | 41 | block["input_audio"]["data"] = "data:;base64," + block["input_audio"]["data"] |
| 42 | + format_data.append(msg_dict) |
| 43 | + except Exception as e: |
| 44 | + logger.error(f"Failed to format audio data: {type(e).__name__}: {e}", exc_info=True) |
| 45 | + format_data.append(msg.to_dict() if isinstance(msg, ChatMessage) else msg) |
| 46 | + return format_data |
38 | 47 |
|
39 | 48 |
|
40 | 49 | class OpenAIChatModel(BaseChatModel): |
@@ -150,7 +159,7 @@ async def achat( |
150 | 159 |
|
151 | 160 | # Qwen-omni requires different base64 audio format from openai |
152 | 161 | if "omni" in self.model.lower(): |
153 | | - _format_audio_data_for_qwen_omni(messages) |
| 162 | + messages = _format_audio_data_for_qwen_omni(messages) |
154 | 163 |
|
155 | 164 | kwargs = { |
156 | 165 | "model": self.model, |
@@ -188,9 +197,14 @@ async def achat( |
188 | 197 |
|
189 | 198 | # Use simple json_object format for models that don't support complex JSON schema |
190 | 199 | if "qwen" in self.model.lower() or "gemini" in self.model.lower(): |
191 | | - structured_model = {"type": "json_object"} # type: ignore |
| 200 | + logger.warning( |
| 201 | + "Qwen models do not support Pydantic structured output via `response_format`. " |
| 202 | + "Update the unstructured JSON mode with `response_format={'type': 'json_object'}`." |
| 203 | + ) |
| 204 | + structured_model = {"type": "json_object"} |
192 | 205 |
|
193 | 206 | kwargs["response_format"] = structured_model |
| 207 | + |
194 | 208 | if not self.stream: |
195 | 209 | response = await self.client.chat.completions.parse(**kwargs) |
196 | 210 | else: |
|
0 commit comments