Skip to content

Commit 0789a31

Browse files
committed
Explaining padding_side left
1 parent ea01824 commit 0789a31

File tree

1 file changed

+22
-20
lines changed

1 file changed

+22
-20
lines changed

notebooks/en/fine_tuning_vlm_grpo_trl.ipynb

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@
119119
},
120120
{
121121
"cell_type": "code",
122-
"execution_count": 2,
122+
"execution_count": null,
123123
"metadata": {
124124
"colab": {
125125
"base_uri": "https://localhost:8080/"
@@ -170,7 +170,9 @@
170170
"\n",
171171
"The system prompt is extracted from DeepSeek R1. Refer to [this previous recipe](https://huggingface.co/learn/cookbook/fine_tuning_llm_grpo_trl) for more details.\n",
172172
"\n",
173-
"We convert the dataset samples into conversation samples, including the system prompt and one image and problem description per sample, since this is how the GRPO trainer expects them."
173+
"We convert the dataset samples into conversation samples, including the system prompt and one image and problem description per sample, since this is how the GRPO trainer expects them.\n",
174+
"\n",
175+
"We also set `padding_side=\"left\"` to ensure that generated completions during trainig are concatenated directly after the prompt, which is essential for GRPO to correctly compare token-level probabilities between preferred and rejected responses."
174176
],
175177
"metadata": {
176178
"id": "6isapXWue91d"
@@ -217,7 +219,7 @@
217219
"id": "NeXuwO6KkYZi",
218220
"outputId": "9255ad62-3fa1-4578-85d7-15942ec3bd35"
219221
},
220-
"execution_count": 4,
222+
"execution_count": null,
221223
"outputs": [
222224
{
223225
"output_type": "stream",
@@ -249,7 +251,7 @@
249251
"id": "s48vCAy3e1x0",
250252
"outputId": "5fa28404-101d-4232-bbde-fdb51a60c0de"
251253
},
252-
"execution_count": 5,
254+
"execution_count": null,
253255
"outputs": [
254256
{
255257
"output_type": "stream",
@@ -292,7 +294,7 @@
292294
"base_uri": "https://localhost:8080/"
293295
}
294296
},
295-
"execution_count": 6,
297+
"execution_count": null,
296298
"outputs": [
297299
{
298300
"output_type": "execute_result",
@@ -331,7 +333,7 @@
331333
"id": "EaY8lUYSHyhA",
332334
"outputId": "e805bb0c-3c6d-4bf4-d1b3-a8ae7190bf2b"
333335
},
334-
"execution_count": 7,
336+
"execution_count": null,
335337
"outputs": [
336338
{
337339
"output_type": "stream",
@@ -422,7 +424,7 @@
422424
]
423425
}
424426
},
425-
"execution_count": 8,
427+
"execution_count": null,
426428
"outputs": [
427429
{
428430
"output_type": "display_data",
@@ -453,7 +455,7 @@
453455
},
454456
{
455457
"cell_type": "code",
456-
"execution_count": 9,
458+
"execution_count": null,
457459
"metadata": {
458460
"colab": {
459461
"base_uri": "https://localhost:8080/"
@@ -517,7 +519,7 @@
517519
"metadata": {
518520
"id": "BE7ZgN_sDPNg"
519521
},
520-
"execution_count": 10,
522+
"execution_count": null,
521523
"outputs": []
522524
},
523525
{
@@ -584,7 +586,7 @@
584586
"metadata": {
585587
"id": "P3VIGZL4FLxA"
586588
},
587-
"execution_count": 11,
589+
"execution_count": null,
588590
"outputs": []
589591
},
590592
{
@@ -602,7 +604,7 @@
602604
},
603605
{
604606
"cell_type": "code",
605-
"execution_count": 12,
607+
"execution_count": null,
606608
"metadata": {
607609
"id": "SbqX1pQUKaSM"
608610
},
@@ -659,7 +661,7 @@
659661
},
660662
{
661663
"cell_type": "code",
662-
"execution_count": 14,
664+
"execution_count": null,
663665
"metadata": {
664666
"id": "k_jk-U7ULYtA",
665667
"outputId": "c42b4d97-9692-48d3-f03a-b1eaf5687559",
@@ -710,7 +712,7 @@
710712
"height": 1000
711713
}
712714
},
713-
"execution_count": 15,
715+
"execution_count": null,
714716
"outputs": [
715717
{
716718
"output_type": "display_data",
@@ -894,7 +896,7 @@
894896
},
895897
"outputId": "bc87c9ad-fe94-438d-a9b2-085f6b46a11b"
896898
},
897-
"execution_count": 16,
899+
"execution_count": null,
898900
"outputs": [
899901
{
900902
"output_type": "stream",
@@ -940,7 +942,7 @@
940942
"metadata": {
941943
"id": "N7ZzgFrWFVgL"
942944
},
943-
"execution_count": 17,
945+
"execution_count": null,
944946
"outputs": []
945947
},
946948
{
@@ -1029,7 +1031,7 @@
10291031
"id": "9qxxduOlEOv2",
10301032
"outputId": "87878eb2-8ce4-4c8c-d115-11c3a6d1a0cb"
10311033
},
1032-
"execution_count": 18,
1034+
"execution_count": null,
10331035
"outputs": [
10341036
{
10351037
"output_type": "display_data",
@@ -1171,7 +1173,7 @@
11711173
"metadata": {
11721174
"id": "h8pjRAXBFheQ"
11731175
},
1174-
"execution_count": 19,
1176+
"execution_count": null,
11751177
"outputs": []
11761178
},
11771179
{
@@ -1196,7 +1198,7 @@
11961198
"id": "4cfWEyRxF4T0",
11971199
"outputId": "a2e937d5-ad6a-454d-caa1-596fd25d4ed0"
11981200
},
1199-
"execution_count": 20,
1201+
"execution_count": null,
12001202
"outputs": [
12011203
{
12021204
"output_type": "stream",
@@ -1245,7 +1247,7 @@
12451247
"id": "MiKf8SNFGNWe",
12461248
"outputId": "05a3c6f2-7cc0-46a6-a51b-a646615450a2"
12471249
},
1248-
"execution_count": 21,
1250+
"execution_count": null,
12491251
"outputs": [
12501252
{
12511253
"output_type": "execute_result",
@@ -1284,7 +1286,7 @@
12841286
"id": "svQYQTZ4KA-f",
12851287
"outputId": "d2401a51-a70c-4add-e4a0-e4ac4dbd8723"
12861288
},
1287-
"execution_count": 22,
1289+
"execution_count": null,
12881290
"outputs": [
12891291
{
12901292
"output_type": "stream",

0 commit comments

Comments
 (0)