Fix prompt-completion labeling with add_generation_prompt and warning (#4201)

behroozazarkhalili · qgallouedec · web-flow · commit 5d870955f88d · 2025-10-06T18:35:50.000-06:00
Co-authored-by: behroozazarkhalili &lt;ermiaazarkhalili&gt;
Co-authored-by: Quentin Gallouédec &lt;45557362+qgallouedec@users.noreply.github.com&gt;
Co-authored-by: Quentin Gallouédec &lt;gallouedec.quentin@gmail.com&gt;
diff --git a/trl/trainer/sft_trainer.py b/trl/trainer/sft_trainer.py
@@ -937,6 +937,7 @@ def tokenize_fn(example, processing_class, dataset_text_field, assistant_only_lo
                             prompt_ids = processing_class.apply_chat_template(
                                 example["prompt"],
                                 tokenize=True,
+                                add_generation_prompt=True,
                                 tools=example.get("tools"),
                                 **example.get("chat_template_kwargs", {}),
                             )
@@ -974,7 +975,7 @@ def tokenize_fn(example, processing_class, dataset_text_field, assistant_only_lo
                                 "token handling. Verify that the tokenizer is processing text consistently."
                             )
 
-                        # Create a completion mask
+                        # Create completion mask
                         completion_mask = [0] * len(prompt_ids) + [1] * (len(prompt_completion_ids) - len(prompt_ids))
                         output["input_ids"] = prompt_completion_ids
                         output["completion_mask"] = completion_mask
@@ -994,17 +995,17 @@ def tokenize_fn(example, processing_class, dataset_text_field, assistant_only_lo
                             # Fix transformers inconsistency: for VLMs, apply_chat_template returns lists of lists
                             # even for single examples, while for LLMs it returns lists of ints.
                             processed = {k: v[0] if isinstance(v[0], list) else v for k, v in processed.items()}
-                            if "assistant_masks" in processed and 1 not in processed["assistant_masks"]:
-                                raise RuntimeError(
-                                    "You're using `assistant_only_loss=True`, but at least one example has no "
-                                    "assistant tokens. This usually means the tokenizer's chat template doesn't "
-                                    "generate assistant masks — it may be missing the `{% generation %}` keyword. Please "
-                                    "check the template and ensure it's correctly configured to support assistant "
-                                    "masking."
-                                )
                             output = {k: processed[k] for k in ("input_ids", "assistant_masks") if k in processed}
                         else:
                             output = {"input_ids": processing_class(text=example[dataset_text_field])["input_ids"]}
+
+                    if "assistant_masks" in output and 1 not in output["assistant_masks"]:
+                        raise RuntimeError(
+                            "You're using `assistant_only_loss=True`, but at least one example has no assistant "
+                            "tokens. This usually means the tokenizer's chat template doesn't generate assistant "
+                            "masks — it may be missing the `{% generation %}` keyword. Please check the template and "
+                            "ensure it's correctly configured to support assistant masking."
+                        )
                     return output
 
                 dataset = dataset.map(