|
| 1 | +from collections.abc import Generator |
| 2 | + |
| 3 | +from memos.configs.llm import QwenLLMConfig |
| 4 | +from memos.llms.openai import OpenAILLM |
| 5 | +from memos.llms.utils import remove_thinking_tags |
| 6 | +from memos.log import get_logger |
| 7 | +from memos.types import MessageList |
| 8 | + |
| 9 | + |
| 10 | +logger = get_logger(__name__) |
| 11 | + |
| 12 | + |
| 13 | +class QwenLLM(OpenAILLM): |
| 14 | + """Qwen (DashScope) LLM class via OpenAI-compatible API.""" |
| 15 | + |
| 16 | + def __init__(self, config: QwenLLMConfig): |
| 17 | + super().__init__(config) |
| 18 | + |
| 19 | + def generate(self, messages: MessageList) -> str: |
| 20 | + """Generate a response from Qwen LLM.""" |
| 21 | + response = self.client.chat.completions.create( |
| 22 | + model=self.config.model_name_or_path, |
| 23 | + messages=messages, |
| 24 | + extra_body=self.config.extra_body, |
| 25 | + temperature=self.config.temperature, |
| 26 | + max_tokens=self.config.max_tokens, |
| 27 | + top_p=self.config.top_p, |
| 28 | + ) |
| 29 | + logger.info(f"Response from Qwen: {response.model_dump_json()}") |
| 30 | + response_content = response.choices[0].message.content |
| 31 | + if self.config.remove_think_prefix: |
| 32 | + return remove_thinking_tags(response_content) |
| 33 | + else: |
| 34 | + return response_content |
| 35 | + |
| 36 | + def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]: |
| 37 | + """Stream response from Qwen LLM.""" |
| 38 | + response = self.client.chat.completions.create( |
| 39 | + model=self.config.model_name_or_path, |
| 40 | + messages=messages, |
| 41 | + stream=True, |
| 42 | + temperature=self.config.temperature, |
| 43 | + max_tokens=self.config.max_tokens, |
| 44 | + top_p=self.config.top_p, |
| 45 | + extra_body=self.config.extra_body, |
| 46 | + ) |
| 47 | + |
| 48 | + reasoning_started = False |
| 49 | + for chunk in response: |
| 50 | + delta = chunk.choices[0].delta |
| 51 | + |
| 52 | + # Some models may have separate `reasoning_content` vs `content` |
| 53 | + # For Qwen (DashScope), likely only `content` is used |
| 54 | + if hasattr(delta, "reasoning_content") and delta.reasoning_content: |
| 55 | + if not reasoning_started and not self.config.remove_think_prefix: |
| 56 | + yield "<think>" |
| 57 | + reasoning_started = True |
| 58 | + yield delta.reasoning_content |
| 59 | + elif hasattr(delta, "content") and delta.content: |
| 60 | + if reasoning_started and not self.config.remove_think_prefix: |
| 61 | + yield "</think>" |
| 62 | + reasoning_started = False |
| 63 | + yield delta.content |
0 commit comments