Fix conflict bug in DPO (#2705)

Ace-To-HYB · web-flow · commit 03b533c5e552 · 2025-09-28T21:35:03.000+08:00
diff --git a/examples/alignment/dpo/dpo_argument.py b/examples/alignment/dpo/dpo_argument.py
@@ -146,14 +146,6 @@ class DPOModelArgument:
         default=None,
         metadata={"help": "whether to fuse first up and gate proj in mlp block"},
     )
-    use_sparse_head_and_loss_fn: bool = field(
-        default=True,
-        metadata={"help": "Whether to use sparse indexing for loss calculation."},
-    )
-    use_fused_head_and_loss_fn: bool = field(
-        default=True,
-        metadata={"help": "Whether to use fused kernel to calculate lm head and loss."},
-    )
     use_attn_mask_startend_row_indices: bool = field(
         default=True,
         metadata={"help": "Sparse attention mode."},
diff --git a/examples/alignment/dpo/run_dpo.py b/examples/alignment/dpo/run_dpo.py
@@ -152,6 +152,8 @@ def main():
             model_args.model_name_or_path,
             dtype=dtype,
         )
+        ref_model_config._attn_implementation = model_args.attn_impl
+
         LlmMetaConfig.set_llm_config(ref_model_config, training_args)
 
     if training_args.pipeline_parallel_degree > 1:
@@ -309,8 +311,8 @@ def main():
             collate_fn,
             tokenizer=tokenizer,
             max_seq_len=max_seq_len,
-            use_sparse_head_and_loss_fn=model_args.use_sparse_head_and_loss_fn,
-            use_fused_head_and_loss_fn=model_args.use_fused_head_and_loss_fn,
+            use_sparse_head_and_loss_fn=model_config.use_sparse_head_and_loss_fn,
+            use_fused_head_and_loss_fn=model_config.use_fused_head_and_loss_fn,
         ),
         ignore_eos_token=True,
     )