Fix the conflict between agent and CT (#379)

tastelikefeet · web-flow · commit 225724c9013e · 2024-02-04T16:46:40.000+08:00
diff --git a/docs/source/LLM/命令行参数.md b/docs/source/LLM/命令行参数.md
@@ -96,6 +96,7 @@
 - `--gpu_memory_fraction`: 默认为None. 该参数旨在指定显卡最大可用显存比例的情况下运行训练，用于极限测试.
 - `--train_dataset_mix_ratio`: 默认为0. 该参数定义了如何进行数据集打混训练. 指定该参数时, 训练集会以`train_dataset_mix_ratio`倍数混合`train_dataset_mix_ds`指定的通用知识数据集, 使整体数据集长度达到`train_dataset_sample`.
 - `--train_dataset_mix_ds`: 默认为`ms-bench`. 用于防止知识遗忘的通用知识数据集.
+- `--use_loss_scale`: 默认为True. 生效时会讲Agent的部分字段(Action/Action Input部分)的loss权重加强以强化CoT, 对普通SFT场景没有任何效果.
 
 ### AdaLoRA微调参数
 
diff --git a/swift/llm/agent/utils.py b/swift/llm/agent/utils.py
@@ -56,7 +56,8 @@ def split_agent_parts_by(text: str, delimiters: List[str]):
     return text_list
 
 
-def calculate_loss_scale(response: str) -> Tuple[List[str], List[float]]:
+def calculate_loss_scale(response: str,
+                         use_loss_scale=True) -> Tuple[List[str], List[float]]:
     """Calculate the loss scale by splitting the agent response.
 
     This algorithm comes from paper: https://arxiv.org/pdf/2309.00986.pdf
@@ -76,11 +77,12 @@ def calculate_loss_scale(response: str) -> Tuple[List[str], List[float]]:
 
     Args:
         response: The response text
+        use_loss_scale: Use weighted loss. With this, some part of the loss will be enhanced to improve performance.
 
     Returns:
         A tuple of agent response parts and their weights.
     """
-    if 'Action:' in response and 'Thought:' in response:
+    if 'Action:' in response and 'Observation:' in response and use_loss_scale:
         agent_keyword = [
             'Action:', 'Action Input:', 'Thought:', 'Final Answer:',
             'Observation:'
diff --git a/swift/llm/sft.py b/swift/llm/sft.py
@@ -134,6 +134,7 @@ def llm_sft(args: SftArguments) -> Dict[str, Union[str, Any]]:
     use_model = template_info.get('use_model', False)
     if use_model:
         template_kwargs['model'] = model
+    template_kwargs['use_loss_scale'] = args.use_loss_scale
     template: Template = get_template(args.template_type, tokenizer,
                                       args.system, args.max_length,
                                       args.truncation_strategy,
diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py
@@ -70,6 +70,7 @@ class SftArguments:
     train_dataset_mix_ds: List[str] = field(
         default_factory=lambda: ['ms-bench'])
     val_dataset_sample: Optional[int] = None  # -1: all dataset
+    use_loss_scale: Optional[bool] = True
     system: Optional[str] = None
     max_length: int = 2048  # -1: no limit
     truncation_strategy: Literal['delete', 'truncation_left'] = 'delete'
diff --git a/swift/llm/utils/template.py b/swift/llm/utils/template.py
@@ -177,6 +177,7 @@ def _init_template(self,
         self.max_length = max_length
         self.truncation_strategy = truncation_strategy
         self.model = kwargs.get('model', None)
+        self.use_loss_scale = kwargs.get('use_loss_scale', True)
         for key in [
                 'prefix', 'prompt', 'chat_sep', 'suffix', 'prefix_has_system'
         ]:
@@ -207,6 +208,8 @@ def encode(
             system = None
         else:
             assert self.prefix_has_system is not None, 'The template does not support `system`.'
+        if query is None:
+            query = ''
         inputs, tokenizer_kwargs = self._encode(query, response, history,
                                                 system,
                                                 self.truncation_strategy)
@@ -233,7 +236,8 @@ def _concat_context_list(
             if isinstance(context, str):
                 if '{{RESPONSE}}' == context:
                     assert response is not None
-                    content_part, weight_part = calculate_loss_scale(response)
+                    content_part, weight_part = calculate_loss_scale(
+                        response, self.use_loss_scale)
                     res_context_list.extend(content_part)
                     compute_loss_idx.extend(weight_part)
                     continue
@@ -330,7 +334,7 @@ def _encode(
                 # last response
                 context_list.append('{{RESPONSE}}')
                 context_list += self.suffix
-            if q is not None:
+            if q or r:
                 self._concat_context_list(
                     context_list,
                     res_context_list,
@@ -457,7 +461,7 @@ def register_template(template_type: str,
 class DefaultGenerationTemplate(Template):
 
     def __init__(self):
-        return super().__init__([], ['{{QUERY}}'], None, [['eos_token_id']])
+        super().__init__([], ['{{QUERY}}'], None, [['eos_token_id']])
 
 
 register_template(TemplateType.default_generation, DefaultGenerationTemplate())