Support qwen agent format (#2722)

tastelikefeet · web-flow · commit 0937497ac610 · 2024-12-22T01:08:08.000+08:00
diff --git a/swift/llm/infer/infer_engine/infer_engine.py b/swift/llm/infer/infer_engine/infer_engine.py
@@ -136,12 +136,13 @@ def _gen_wrapper():
                 pass
             return self._update_metrics(res, metrics)
 
-    def _get_toolcall(self, response: Union[str, List[Dict[str,
-                                                           Any]]]) -> Optional[List[ChatCompletionMessageToolCall]]:
+    def _get_toolcall(self,
+                      response: Union[str, List[Dict[str, Any]]],
+                      tools_prompt='react_en') -> Optional[List[ChatCompletionMessageToolCall]]:
         if not isinstance(response, str):
             response = '\n'.join([resp['text'] for resp in response if resp['type'] == 'text'])
 
-        action, action_input = split_action_action_input(response)
+        action, action_input = split_action_action_input(response, tools_prompt=tools_prompt)
         if action is None:
             return None
 
diff --git a/swift/llm/infer/infer_engine/lmdeploy_engine.py b/swift/llm/infer/infer_engine/lmdeploy_engine.py
@@ -211,7 +211,7 @@ async def _infer_stream_async(
                 usage_info = self._get_usage_info(len(inputs['input_ids']), output.num_token)
                 toolcall = None
                 if is_finished:
-                    toolcall = self._get_toolcall(template.decode(output.token_ids))
+                    toolcall = self._get_toolcall(template.decode(output.token_ids), template.tools_prompt)
                 finish_reason = self._get_finish_reason(generation_config.max_new_tokens, output.num_token,
                                                         output.status.name == 'FINISH')
                 choices = [
@@ -237,7 +237,7 @@ async def _infer_full_async(self, template: Template, inputs: Dict[str, Any],
         logprobs = self._get_logprobs(template.tokenizer, output.logprobs, output.token_ids, generation_config.logprobs)
 
         usage_info = self._get_usage_info(len(inputs['input_ids']), output.num_token)
-        toolcall = self._get_toolcall(response)
+        toolcall = self._get_toolcall(response, template.tools_prompt)
         finish_reason = self._get_finish_reason(generation_config.max_new_tokens, output.num_token,
                                                 output.status.name == 'FINISH')
         choices = [
diff --git a/swift/llm/infer/infer_engine/pt_engine.py b/swift/llm/infer/infer_engine/pt_engine.py
@@ -230,7 +230,7 @@ def _model_generate(**kwargs):
                 usage_info = self._get_usage_info(num_prompt_tokens, len(generate_ids))
                 toolcall = None
                 if is_finished[i]:
-                    toolcall = self._get_toolcall(template.decode(generate_ids))
+                    toolcall = self._get_toolcall(template.decode(generate_ids), template.tools_prompt)
                 finish_reason = self._get_finish_reason(generation_config.max_new_tokens, num_prompt_tokens,
                                                         is_finished[i])
 
@@ -291,7 +291,7 @@ def _infer_full(self,
             usage_info = self._get_usage_info(num_prompt_tokens, len(generate_ids))
             response = template.decode(generate_ids, template_inputs=template_inputs[i])
             finish_reason = self._get_finish_reason(generation_config.max_new_tokens, num_prompt_tokens, True)
-            toolcall = self._get_toolcall(response)
+            toolcall = self._get_toolcall(response, template.tools_prompt)
             choices = [
                 ChatCompletionResponseChoice(
                     index=0,
diff --git a/swift/llm/infer/infer_engine/vllm_engine.py b/swift/llm/infer/infer_engine/vllm_engine.py
@@ -300,7 +300,7 @@ async def _infer_stream_async(self, template: Template, inputs: Dict[str, Any],
                 token_idxs[output.index] = len(output.token_ids)
                 toolcall = None
                 if output.is_finished:
-                    toolcall = self._get_toolcall(template.decode(output.token_ids))
+                    toolcall = self._get_toolcall(template.decode(output.token_ids), template.tools_prompt)
                 choice = ChatCompletionResponseStreamChoice(
                     index=output.index,
                     delta=DeltaMessage(role='assistant', content=output.delta_text, tool_calls=toolcall),
@@ -328,7 +328,7 @@ async def _infer_full_async(self,
             response = template.decode(output.token_ids)
             logprobs = self._get_logprobs(template.tokenizer, output.logprobs, output.token_ids,
                                           generation_config.logprobs)
-            toolcall = self._get_toolcall(response)
+            toolcall = self._get_toolcall(response, template.tools_prompt)
             choice = ChatCompletionResponseChoice(
                 index=output.index,
                 message=ChatMessage(role='assistant', content=response, tool_calls=toolcall),
diff --git a/swift/llm/template/template_inputs.py b/swift/llm/template/template_inputs.py
@@ -113,6 +113,8 @@ class StdTemplateInputs:
     videos: List[str] = field(default_factory=list)
     objects: List[Dict[str, Any]] = field(default_factory=list)
 
+    agent_keyword: Optional[Dict[str, str]] = None
+
     def __post_init__(self):
         self.image_idx = 0
         self.audio_idx = 0
@@ -125,6 +127,8 @@ def __post_init__(self):
             self.videos = [self.videos]
         if self.audios and not isinstance(self.audios, (list, tuple)):
             self.audios = [self.audios]
+        if self.agent_keyword is None:
+            self.agent_keyword = {}
 
     def to_history(self):
         if not self.messages:
@@ -137,7 +141,7 @@ def is_multimodal(self):
 
     @classmethod
     def from_dict(cls, inputs: Dict[str, Any], *, tools_prompt: str = 'react_en') -> 'StdTemplateInputs':
-        from swift.plugin import get_tools_prompt
+        from swift.plugin import get_tools_prompt, get_tools_keyword
         inputs = deepcopy(inputs)
         kwargs = {}
         for key in ['rejected_response', 'label']:
@@ -153,12 +157,15 @@ def from_dict(cls, inputs: Dict[str, Any], *, tools_prompt: str = 'react_en') ->
         else:
             system = None
 
+        keyword = None
         if tools is not None:
             if system is not None:
-                logger.warning_once('You have tools prompt but you also have a system field, which will be ignored')
+                logger.warning_once(
+                    'You have tools prompt but you also have a system field, so the system field will be ignored')
             if isinstance(tools, str):
                 tools = json.loads(tools)
             system = get_tools_prompt(tools, tools_prompt)
+            keyword = get_tools_keyword(tools_prompt)
 
         media_kwargs = StdTemplateInputs.remove_messages_media(messages)
         for k in list(media_kwargs.keys()):
@@ -173,8 +180,8 @@ def from_dict(cls, inputs: Dict[str, Any], *, tools_prompt: str = 'react_en') ->
             else:
                 media_kwargs[k] = inputs_mm_data
 
-        StdTemplateInputs.messages_join_observation(messages)
-        return cls(messages=messages, system=system, objects=objects, **kwargs, **media_kwargs)
+        StdTemplateInputs.messages_join_observation(messages, tools_prompt)
+        return cls(messages=messages, system=system, objects=objects, agent_keyword=keyword, **kwargs, **media_kwargs)
 
     @staticmethod
     def remove_messages_media(messages: Messages) -> Dict[str, Any]:
@@ -204,7 +211,7 @@ def remove_messages_media(messages: Messages) -> Dict[str, Any]:
         return res
 
     @staticmethod
-    def messages_join_observation(messages: Messages) -> None:
+    def messages_join_observation(messages: Messages, tools_prompt='react_en') -> None:
         """
         Joins observations from 'tool' message into the 'assistant' response.
 
@@ -228,12 +235,14 @@ def messages_join_observation(messages: Messages) -> None:
         if len(messages) < 2:
             return
         i = 1
+        from swift.plugin import get_tools_keyword
+        keyword = get_tools_keyword(tools_prompt)
         while i < len(messages):
             pre_message, message = messages[i - 1], messages[i]
             pre_role, pre_content = pre_message['role'], pre_message['content']
             role, content = message['role'], message['content']
-            if pre_role == 'assistant' and role == 'tool' and isinstance(pre_content,
-                                                                         str) and pre_content.endswith('Observation:'):
+            if (pre_role == 'assistant' and role == 'tool' and isinstance(pre_content, str)
+                    and pre_content.endswith(keyword.get('observation'))):
                 assert isinstance(pre_content, str)
                 pre_message['content'] = pre_content + content  # assistant
                 messages.pop(i)  # remove tool
diff --git a/swift/llm/template/utils.py b/swift/llm/template/utils.py
@@ -184,18 +184,24 @@ def split_parts_by_regex(text_list: list, regex_delimiters: Dict[str, List[float
                 text_list[i:i + 1] = segments
 
 
-def split_action_action_input(response: str) -> Tuple[Optional[str], Optional[str]]:
+def split_action_action_input(response: str, tools_prompt='react_en') -> Tuple[Optional[str], Optional[str]]:
+
     agent_keyword = [
         'action:', 'Action:', 'ACTION:', 'action input:', 'Action Input:', 'Action input:', 'ACTION INPUT:', 'Thought:',
         'Final Answer:', 'Observation:'
     ]
+    from swift.plugin import get_tools_keyword
+    keyword = get_tools_keyword(tools_prompt)
+    for key in keyword.values():
+        if key not in agent_keyword:
+            agent_keyword.append(key)
     agent_parts = split_str_parts_by(response, agent_keyword)
     action = None
     action_input = None
     for c in agent_parts:
-        if c['key'].lower() == 'action:':
+        if c['key'].lower() == keyword['action'].lower():
             action = c['content']
-        elif c['key'].lower() == 'action input:':
+        elif c['key'].lower() == keyword['action_input'].lower():
             action_input = c['content']
     if action:
         action = action.strip().replace('\n', '')
diff --git a/swift/plugin/__init__.py b/swift/plugin/__init__.py
@@ -10,7 +10,7 @@
     from .loss_scale import loss_scale_map
     from .metric import InferStats, MeanMetric, Metric, compute_acc, get_metric
     from .optimizer import optimizers_map
-    from .tools import get_tools_prompt
+    from .tools import get_tools_prompt, get_tools_keyword
     from .tuner import Tuner, extra_tuners
 
 else:
@@ -22,7 +22,7 @@
         'loss_scale': ['loss_scale_map'],
         'metric': ['InferStats', 'MeanMetric', 'Metric', 'compute_acc', 'get_metric'],
         'optimizer': ['optimizers_map'],
-        'tools': ['get_tools_prompt'],
+        'tools': ['get_tools_prompt', 'get_tools_keyword'],
         'tuner': ['Tuner', 'extra_tuners'],
     }
 
diff --git a/swift/plugin/agent/qwen_loss_scale_config.json b/swift/plugin/agent/qwen_loss_scale_config.json
@@ -0,0 +1,6 @@
+{
+    "✿FUNCTION✿:": [2.0, 2.0],
+    "✿ARGS✿:": [2.0, 2.0],
+    "✿RETURN✿:": [1.0, 1.0],
+    "✿RESULT✿:": [2.0, 0.0]
+}
diff --git a/swift/plugin/loss_scale.py b/swift/plugin/loss_scale.py
@@ -154,6 +154,20 @@ def get_loss_scale(self,
         return super().get_loss_scale(context, context_type, is_last_round)
 
 
+class QwenLossScale(LossScale):
+    loss_scale_config = 'qwen_loss_scale_config.json'
+
+    def get_loss_scale(self,
+                       context: str,
+                       context_type: ContextType,
+                       is_last_round: bool,
+                       *,
+                       query: Optional[str] = None):
+        if context_type == ContextType.RESPONSE:
+            return calculate_loss_scale(query, context, self.loss_scale_map)
+        return super().get_loss_scale(context, context_type, is_last_round)
+
+
 class AlphaUmiLossScale(REACTLossScale):
     loss_scale_config = 'alpha_umi_loss_scale_config.json'
 
@@ -171,5 +185,6 @@ def get_loss_scale(self, context: str, context_type: ContextType, *args, **kwarg
     'alpha_umi': AlphaUmiLossScale(),
     'default': LossScale(),
     'last_round': LastRoundLossScale(),
+    'qwen': QwenLossScale(),
     'all': TrainAllLossScale(),
 }
diff --git a/swift/plugin/tools.py b/swift/plugin/tools.py
@@ -1,6 +1,17 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
+import datetime as dt
+from dataclasses import dataclass
 from typing import Dict, List, Optional, Union
 
+import json
+
+
+@dataclass
+class AgentKeyword:
+    action: str = 'Action:'
+    action_input: str = 'Action Input:'
+    observation: str = 'Observation:'
+
 
 def format_react_en(tool_names, tool_descs):
     REACT_PROMPT = """Answer the following questions as best as you can. You have access to the following tools:
@@ -18,6 +29,7 @@ def format_react_en(tool_names, tool_descs):
 
 Begin!
 """
+    tool_descs = [json.dumps(t) if not isinstance(t, str) else t for t in tool_descs]
     return REACT_PROMPT.format(tool_list='\n\n'.join(tool_descs), tool_names=','.join(tool_names))
 
 
@@ -37,6 +49,7 @@ def format_react_zh(tool_names, tool_descs):
 
 开始！
 """
+    tool_descs = [json.dumps(t) if not isinstance(t, str) else t for t in tool_descs]
     return REACT_ZH_PROMPT.format(tool_list='\n\n'.join(tool_descs), tool_names=','.join(tool_names))
 
 
@@ -46,6 +59,7 @@ def format_glm4(tool_names, tool_descs):
 # 可用工具
 
 {tool_list}"""
+    tool_descs = [json.dumps(t) if not isinstance(t, str) else t for t in tool_descs]
     tool_list = ''
     for name, tool in zip(tool_names, tool_descs):
         tool_list += f'## {name}\n\n{tool}\n\n'
@@ -78,28 +92,72 @@ def format_toolbench(tool_names, tool_descs):
 use function Finish->give_up_and_restart.
 2.Do not use origin tool names, use only subfunctions' names.
 Specifically, you have access to the following APIs: {tool_list}"""
+    tool_descs = [json.dumps(t) if not isinstance(t, str) else t for t in tool_descs]
     return TOOLBENCH_PROMPT.format(tool_list='\n\n'.join(tool_descs))
 
 
+def format_qwen(tool_names, tool_descs):
+    PROMPT = '''You are a helpful assistant.
+
+当前时间：{date}
+
+# 工具
+
+## 你拥有如下工具：
+
+{tool_list}
+
+## 你可以在回复中插入以下命令以调用这些工具：
+
+{format_list}
+    '''
+    # 定义星期映射
+    weekdays = {0: '星期一', 1: '星期二', 2: '星期三', 3: '星期四', 4: '星期五', 5: '星期六', 6: '星期日'}
+    now = dt.datetime.now()
+    year = now.year
+    month = now.month
+    day = now.day
+    weekday = weekdays[now.weekday()]
+    formatted_date = f'{year}年{month:02d}月{day:02d}日，{weekday}'
+    PROMPT = PROMPT.replace('{date}', formatted_date)
+    tool_list = ''
+    for name, tool in zip(tool_names, tool_descs):
+        tool_list += f'### {name} \n{name}: {tool["description"]} 输入参数: {json.dumps(tool["parameters"])}\n'
+
+    PROMPT = PROMPT.replace('{tool_list}', tool_list)
+
+    format_list = ''
+    for i, _ in enumerate(tool_names):
+        format_list += f'✿FUNCTION✿:工具{i+1}的名称\n✿ARGS✿:工具{i + 1}的输入\n✿RESULT✿:工具{i + 1}的结果\n'
+    PROMPT = PROMPT.replace('{format_list}', format_list)
+    return PROMPT
+
+
 def format_custom(tool_names, tool_descs):
     PROMPT = '''你是一个人工智能助手。你的任务是针对用户的问题和要求提供适当的答复和支持。
 
     # 可用工具
 
     {tool_list}'''
     tool_list = ''
+    tool_descs = [json.dumps(t) if not isinstance(t, str) else t for t in tool_descs]
     for name, tool in zip(tool_names, tool_descs):
         tool_list += f'## {name}\n\n{tool}\n\n'
     return PROMPT.format(tool_list=tool_list)
 
 
 # Add your prompt here, use --tools_prompt to train
 tools_prompt = {
-    'react_en': format_react_en,
-    'react_zh': format_react_zh,
-    'glm4': format_glm4,
-    'toolbench': format_toolbench,
-    'custom': format_custom,
+    'react_en': (format_react_en, AgentKeyword().__dict__),
+    'react_zh': (format_react_zh, AgentKeyword().__dict__),
+    'glm4': (format_glm4, AgentKeyword().__dict__),
+    'toolbench': (format_toolbench, AgentKeyword().__dict__),
+    'qwen': (format_qwen, AgentKeyword(
+        action='✿FUNCTION✿:',
+        action_input='✿ARGS✿:',
+        observation='✿RESULT✿:',
+    ).__dict__),
+    'custom': (format_custom, AgentKeyword().__dict__),
 }
 
 
@@ -111,10 +169,15 @@ def get_tools_prompt(tools: List[Dict[str, Union[str, Dict]]], prompt_format: st
             if isinstance(info, dict) and 'function' in info:
                 info = info['function']
             tool_names.append(info['name'])
-            tool_descs.append(str(info))  # info: dict
+            tool_descs.append(info)  # info: dict
         except KeyError:
             print('invalid tools format, please check'
                   'https://github.com/modelscope/swift/blob/main/docs/source_en/LLM/Agent-deployment-best-practice.md')
             return None
-    prompt_format = tools_prompt.get(prompt_format) or format_toolbench
+    prompt_format = tools_prompt.get(prompt_format, (None, None))[0] or format_toolbench
     return prompt_format(tool_names, tool_descs)
+
+
+def get_tools_keyword(prompt_format: str = 'react_en') -> Dict[str, str]:
+    keyword = tools_prompt.get(prompt_format, (None, None))[1] or AgentKeyword().__dict__
+    return keyword