Explaining padding_side left

sergiopaniego · sergiopaniego · commit 0789a31ebd46 · 2025-07-28T17:58:52.000+02:00
diff --git a/notebooks/en/fine_tuning_vlm_grpo_trl.ipynb b/notebooks/en/fine_tuning_vlm_grpo_trl.ipynb
@@ -119,7 +119,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 2,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -170,7 +170,9 @@
         "\n",
         "The system prompt is extracted from DeepSeek R1. Refer to [this previous recipe](https://huggingface.co/learn/cookbook/fine_tuning_llm_grpo_trl) for more details.\n",
         "\n",
-        "We convert the dataset samples into conversation samples, including the system prompt and one image and problem description per sample, since this is how the GRPO trainer expects them."
+        "We convert the dataset samples into conversation samples, including the system prompt and one image and problem description per sample, since this is how the GRPO trainer expects them.\n",
+        "\n",
+        "We also set `padding_side=\"left\"` to ensure that generated completions during trainig are concatenated directly after the prompt, which is essential for GRPO to correctly compare token-level probabilities between preferred and rejected responses."
       ],
       "metadata": {
         "id": "6isapXWue91d"
@@ -217,7 +219,7 @@
         "id": "NeXuwO6KkYZi",
         "outputId": "9255ad62-3fa1-4578-85d7-15942ec3bd35"
       },
-      "execution_count": 4,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -249,7 +251,7 @@
         "id": "s48vCAy3e1x0",
         "outputId": "5fa28404-101d-4232-bbde-fdb51a60c0de"
       },
-      "execution_count": 5,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -292,7 +294,7 @@
           "base_uri": "https://localhost:8080/"
         }
       },
-      "execution_count": 6,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -331,7 +333,7 @@
         "id": "EaY8lUYSHyhA",
         "outputId": "e805bb0c-3c6d-4bf4-d1b3-a8ae7190bf2b"
       },
-      "execution_count": 7,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -422,7 +424,7 @@
           ]
         }
       },
-      "execution_count": 8,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "display_data",
@@ -453,7 +455,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 9,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -517,7 +519,7 @@
       "metadata": {
         "id": "BE7ZgN_sDPNg"
       },
-      "execution_count": 10,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -584,7 +586,7 @@
       "metadata": {
         "id": "P3VIGZL4FLxA"
       },
-      "execution_count": 11,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -602,7 +604,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 12,
+      "execution_count": null,
       "metadata": {
         "id": "SbqX1pQUKaSM"
       },
@@ -659,7 +661,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 14,
+      "execution_count": null,
       "metadata": {
         "id": "k_jk-U7ULYtA",
         "outputId": "c42b4d97-9692-48d3-f03a-b1eaf5687559",
@@ -710,7 +712,7 @@
           "height": 1000
         }
       },
-      "execution_count": 15,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "display_data",
@@ -894,7 +896,7 @@
         },
         "outputId": "bc87c9ad-fe94-438d-a9b2-085f6b46a11b"
       },
-      "execution_count": 16,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -940,7 +942,7 @@
       "metadata": {
         "id": "N7ZzgFrWFVgL"
       },
-      "execution_count": 17,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -1029,7 +1031,7 @@
         "id": "9qxxduOlEOv2",
         "outputId": "87878eb2-8ce4-4c8c-d115-11c3a6d1a0cb"
       },
-      "execution_count": 18,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "display_data",
@@ -1171,7 +1173,7 @@
       "metadata": {
         "id": "h8pjRAXBFheQ"
       },
-      "execution_count": 19,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -1196,7 +1198,7 @@
         "id": "4cfWEyRxF4T0",
         "outputId": "a2e937d5-ad6a-454d-caa1-596fd25d4ed0"
       },
-      "execution_count": 20,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -1245,7 +1247,7 @@
         "id": "MiKf8SNFGNWe",
         "outputId": "05a3c6f2-7cc0-46a6-a51b-a646615450a2"
       },
-      "execution_count": 21,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1284,7 +1286,7 @@
         "id": "svQYQTZ4KA-f",
         "outputId": "d2401a51-a70c-4add-e4a0-e4ac4dbd8723"
       },
-      "execution_count": 22,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",