unslothai · danielhanchen · Feb 26, 2026 · gemini-code-assist · Feb 26, 2026
diff --git a/nb/Qwen2_5_7B_VL_GRPO.ipynb b/nb/Qwen2_5_7B_VL_GRPO.ipynb
@@ -1106,8 +1106,19 @@
       "source": [
         "from unsloth_zoo.utils import Version\n",
         "\n",
-        "# Only apply chat template for TRL < 0.24.0, otherwise TRL handles it\n",
-        "if Version(\"trl\") < Version(\"0.24.0\"):\n",
+        "# Apply chat template whenever prompts are still structured messages.\n",
+        "# This keeps multimodal placeholder handling consistent across TRL versions.\n",
+        "if len(train_dataset) != 0 and isinstance(train_dataset[0][\"prompt\"], list):\n",
+        "    train_dataset = train_dataset.map(\n",
+        "        lambda example: {\n",
+        "            \"prompt\": tokenizer.apply_chat_template(\n",
+        "                example[\"prompt\"],\n",
+        "                tokenize = False,\n",
+        "                add_generation_prompt = True, # Must add assistant\n",
+        "            )\n",
+        "        }\n",
+        "    )\n",
+        "elif Version(\"trl\") < Version(\"0.24.0\"):\n",
         "    train_dataset = train_dataset.map(\n",
         "        lambda example: {\n",
         "            \"prompt\": tokenizer.apply_chat_template(\n",