|
119 | 119 | }, |
120 | 120 | { |
121 | 121 | "cell_type": "code", |
122 | | - "execution_count": 2, |
| 122 | + "execution_count": null, |
123 | 123 | "metadata": { |
124 | 124 | "colab": { |
125 | 125 | "base_uri": "https://localhost:8080/" |
|
170 | 170 | "\n", |
171 | 171 | "The system prompt is extracted from DeepSeek R1. Refer to [this previous recipe](https://huggingface.co/learn/cookbook/fine_tuning_llm_grpo_trl) for more details.\n", |
172 | 172 | "\n", |
173 | | - "We convert the dataset samples into conversation samples, including the system prompt and one image and problem description per sample, since this is how the GRPO trainer expects them." |
| 173 | + "We convert the dataset samples into conversation samples, including the system prompt and one image and problem description per sample, since this is how the GRPO trainer expects them.\n", |
| 174 | + "\n", |
| 175 | + "We also set `padding_side=\"left\"` to ensure that generated completions during trainig are concatenated directly after the prompt, which is essential for GRPO to correctly compare token-level probabilities between preferred and rejected responses." |
174 | 176 | ], |
175 | 177 | "metadata": { |
176 | 178 | "id": "6isapXWue91d" |
|
217 | 219 | "id": "NeXuwO6KkYZi", |
218 | 220 | "outputId": "9255ad62-3fa1-4578-85d7-15942ec3bd35" |
219 | 221 | }, |
220 | | - "execution_count": 4, |
| 222 | + "execution_count": null, |
221 | 223 | "outputs": [ |
222 | 224 | { |
223 | 225 | "output_type": "stream", |
|
249 | 251 | "id": "s48vCAy3e1x0", |
250 | 252 | "outputId": "5fa28404-101d-4232-bbde-fdb51a60c0de" |
251 | 253 | }, |
252 | | - "execution_count": 5, |
| 254 | + "execution_count": null, |
253 | 255 | "outputs": [ |
254 | 256 | { |
255 | 257 | "output_type": "stream", |
|
292 | 294 | "base_uri": "https://localhost:8080/" |
293 | 295 | } |
294 | 296 | }, |
295 | | - "execution_count": 6, |
| 297 | + "execution_count": null, |
296 | 298 | "outputs": [ |
297 | 299 | { |
298 | 300 | "output_type": "execute_result", |
|
331 | 333 | "id": "EaY8lUYSHyhA", |
332 | 334 | "outputId": "e805bb0c-3c6d-4bf4-d1b3-a8ae7190bf2b" |
333 | 335 | }, |
334 | | - "execution_count": 7, |
| 336 | + "execution_count": null, |
335 | 337 | "outputs": [ |
336 | 338 | { |
337 | 339 | "output_type": "stream", |
|
422 | 424 | ] |
423 | 425 | } |
424 | 426 | }, |
425 | | - "execution_count": 8, |
| 427 | + "execution_count": null, |
426 | 428 | "outputs": [ |
427 | 429 | { |
428 | 430 | "output_type": "display_data", |
|
453 | 455 | }, |
454 | 456 | { |
455 | 457 | "cell_type": "code", |
456 | | - "execution_count": 9, |
| 458 | + "execution_count": null, |
457 | 459 | "metadata": { |
458 | 460 | "colab": { |
459 | 461 | "base_uri": "https://localhost:8080/" |
|
517 | 519 | "metadata": { |
518 | 520 | "id": "BE7ZgN_sDPNg" |
519 | 521 | }, |
520 | | - "execution_count": 10, |
| 522 | + "execution_count": null, |
521 | 523 | "outputs": [] |
522 | 524 | }, |
523 | 525 | { |
|
584 | 586 | "metadata": { |
585 | 587 | "id": "P3VIGZL4FLxA" |
586 | 588 | }, |
587 | | - "execution_count": 11, |
| 589 | + "execution_count": null, |
588 | 590 | "outputs": [] |
589 | 591 | }, |
590 | 592 | { |
|
602 | 604 | }, |
603 | 605 | { |
604 | 606 | "cell_type": "code", |
605 | | - "execution_count": 12, |
| 607 | + "execution_count": null, |
606 | 608 | "metadata": { |
607 | 609 | "id": "SbqX1pQUKaSM" |
608 | 610 | }, |
|
659 | 661 | }, |
660 | 662 | { |
661 | 663 | "cell_type": "code", |
662 | | - "execution_count": 14, |
| 664 | + "execution_count": null, |
663 | 665 | "metadata": { |
664 | 666 | "id": "k_jk-U7ULYtA", |
665 | 667 | "outputId": "c42b4d97-9692-48d3-f03a-b1eaf5687559", |
|
710 | 712 | "height": 1000 |
711 | 713 | } |
712 | 714 | }, |
713 | | - "execution_count": 15, |
| 715 | + "execution_count": null, |
714 | 716 | "outputs": [ |
715 | 717 | { |
716 | 718 | "output_type": "display_data", |
|
894 | 896 | }, |
895 | 897 | "outputId": "bc87c9ad-fe94-438d-a9b2-085f6b46a11b" |
896 | 898 | }, |
897 | | - "execution_count": 16, |
| 899 | + "execution_count": null, |
898 | 900 | "outputs": [ |
899 | 901 | { |
900 | 902 | "output_type": "stream", |
|
940 | 942 | "metadata": { |
941 | 943 | "id": "N7ZzgFrWFVgL" |
942 | 944 | }, |
943 | | - "execution_count": 17, |
| 945 | + "execution_count": null, |
944 | 946 | "outputs": [] |
945 | 947 | }, |
946 | 948 | { |
|
1029 | 1031 | "id": "9qxxduOlEOv2", |
1030 | 1032 | "outputId": "87878eb2-8ce4-4c8c-d115-11c3a6d1a0cb" |
1031 | 1033 | }, |
1032 | | - "execution_count": 18, |
| 1034 | + "execution_count": null, |
1033 | 1035 | "outputs": [ |
1034 | 1036 | { |
1035 | 1037 | "output_type": "display_data", |
|
1171 | 1173 | "metadata": { |
1172 | 1174 | "id": "h8pjRAXBFheQ" |
1173 | 1175 | }, |
1174 | | - "execution_count": 19, |
| 1176 | + "execution_count": null, |
1175 | 1177 | "outputs": [] |
1176 | 1178 | }, |
1177 | 1179 | { |
|
1196 | 1198 | "id": "4cfWEyRxF4T0", |
1197 | 1199 | "outputId": "a2e937d5-ad6a-454d-caa1-596fd25d4ed0" |
1198 | 1200 | }, |
1199 | | - "execution_count": 20, |
| 1201 | + "execution_count": null, |
1200 | 1202 | "outputs": [ |
1201 | 1203 | { |
1202 | 1204 | "output_type": "stream", |
|
1245 | 1247 | "id": "MiKf8SNFGNWe", |
1246 | 1248 | "outputId": "05a3c6f2-7cc0-46a6-a51b-a646615450a2" |
1247 | 1249 | }, |
1248 | | - "execution_count": 21, |
| 1250 | + "execution_count": null, |
1249 | 1251 | "outputs": [ |
1250 | 1252 | { |
1251 | 1253 | "output_type": "execute_result", |
|
1284 | 1286 | "id": "svQYQTZ4KA-f", |
1285 | 1287 | "outputId": "d2401a51-a70c-4add-e4a0-e4ac4dbd8723" |
1286 | 1288 | }, |
1287 | | - "execution_count": 22, |
| 1289 | + "execution_count": null, |
1288 | 1290 | "outputs": [ |
1289 | 1291 | { |
1290 | 1292 | "output_type": "stream", |
|
0 commit comments