|
12 | 12 | }, |
13 | 13 | { |
14 | 14 | "cell_type": "code", |
15 | | - "execution_count": 5, |
| 15 | + "execution_count": 2, |
16 | 16 | "id": "a8613c94-93ef-4eeb-a916-06728100a352", |
17 | 17 | "metadata": {}, |
18 | 18 | "outputs": [], |
|
22 | 22 | }, |
23 | 23 | { |
24 | 24 | "cell_type": "code", |
25 | | - "execution_count": 6, |
| 25 | + "execution_count": 2, |
26 | 26 | "id": "9ddb39eb-e50b-4678-8fb0-d65ab8269e0a", |
27 | 27 | "metadata": {}, |
28 | 28 | "outputs": [ |
|
49 | 49 | "└──────────┘" |
50 | 50 | ] |
51 | 51 | }, |
52 | | - "execution_count": 6, |
| 52 | + "execution_count": 2, |
53 | 53 | "metadata": {}, |
54 | 54 | "output_type": "execute_result" |
55 | 55 | } |
|
62 | 62 | }, |
63 | 63 | { |
64 | 64 | "cell_type": "code", |
65 | | - "execution_count": 2, |
| 65 | + "execution_count": 3, |
66 | 66 | "id": "32217883-4097-4ddf-a5df-cdc838e1b5f8", |
67 | 67 | "metadata": {}, |
68 | 68 | "outputs": [ |
|
89 | 89 | "└────────────┴──────────┘" |
90 | 90 | ] |
91 | 91 | }, |
92 | | - "execution_count": 2, |
| 92 | + "execution_count": 3, |
93 | 93 | "metadata": {}, |
94 | 94 | "output_type": "execute_result" |
95 | 95 | } |
|
409 | 409 | "source": [ |
410 | 410 | "math_students = pl.read_parquet(\"maths.parquet\")\n", |
411 | 411 | "\n", |
412 | | - "(math_students.group_by(\"age\").agg(passes=pl.col(\"G3\")))" |
| 412 | + "(\n", |
| 413 | + " math_students.group_by(\"age\").agg(\n", |
| 414 | + " passes=pl.col(\"G3\"),\n", |
| 415 | + " )\n", |
| 416 | + ")" |
413 | 417 | ] |
414 | 418 | }, |
415 | 419 | { |
416 | 420 | "cell_type": "code", |
417 | | - "execution_count": 15, |
| 421 | + "execution_count": 6, |
418 | 422 | "id": "123401a9-20f4-40ca-b32a-a48850d1cd23", |
419 | 423 | "metadata": {}, |
420 | 424 | "outputs": [ |
|
428 | 432 | " white-space: pre-wrap;\n", |
429 | 433 | "}\n", |
430 | 434 | "</style>\n", |
431 | | - "<small>shape: (8, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>age</th><th>passes</th></tr><tr><td>i64</td><td>list[i64]</td></tr></thead><tbody><tr><td>22</td><td>[]</td></tr><tr><td>19</td><td>[13, 13, … 13]</td></tr><tr><td>15</td><td>[15, 13, … 15]</td></tr><tr><td>17</td><td>[13, 18, … 15]</td></tr><tr><td>21</td><td>[]</td></tr><tr><td>18</td><td>[14, 18, … 13]</td></tr><tr><td>20</td><td>[]</td></tr><tr><td>16</td><td>[15, 14, … 18]</td></tr></tbody></table></div>" |
| 435 | + "<small>shape: (8, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>age</th><th>passes</th></tr><tr><td>i64</td><td>list[i64]</td></tr></thead><tbody><tr><td>16</td><td>[15, 14, … 18]</td></tr><tr><td>19</td><td>[13, 13, … 13]</td></tr><tr><td>21</td><td>[]</td></tr><tr><td>18</td><td>[14, 18, … 13]</td></tr><tr><td>20</td><td>[]</td></tr><tr><td>22</td><td>[]</td></tr><tr><td>15</td><td>[15, 13, … 15]</td></tr><tr><td>17</td><td>[13, 18, … 15]</td></tr></tbody></table></div>" |
432 | 436 | ], |
433 | 437 | "text/plain": [ |
434 | 438 | "shape: (8, 2)\n", |
|
437 | 441 | "│ --- ┆ --- │\n", |
438 | 442 | "│ i64 ┆ list[i64] │\n", |
439 | 443 | "╞═════╪════════════════╡\n", |
440 | | - "│ 22 ┆ [] │\n", |
| 444 | + "│ 16 ┆ [15, 14, … 18] │\n", |
441 | 445 | "│ 19 ┆ [13, 13, … 13] │\n", |
442 | | - "│ 15 ┆ [15, 13, … 15] │\n", |
443 | | - "│ 17 ┆ [13, 18, … 15] │\n", |
444 | 446 | "│ 21 ┆ [] │\n", |
445 | 447 | "│ 18 ┆ [14, 18, … 13] │\n", |
446 | 448 | "│ 20 ┆ [] │\n", |
447 | | - "│ 16 ┆ [15, 14, … 18] │\n", |
| 449 | + "│ 22 ┆ [] │\n", |
| 450 | + "│ 15 ┆ [15, 13, … 15] │\n", |
| 451 | + "│ 17 ┆ [13, 18, … 15] │\n", |
448 | 452 | "└─────┴────────────────┘" |
449 | 453 | ] |
450 | 454 | }, |
451 | | - "execution_count": 15, |
| 455 | + "execution_count": 6, |
452 | 456 | "metadata": {}, |
453 | 457 | "output_type": "execute_result" |
454 | 458 | } |
|
459 | 463 | "(\n", |
460 | 464 | " math_students.group_by(\"age\").agg(\n", |
461 | 465 | " passes=pl.col(\"G3\").filter(\n", |
462 | | - " pl.col(\"absences\") > pl.col(\"absences\").mean(), pl.col(\"G3\") > 12\n", |
| 466 | + " pl.col(\"absences\") > pl.col(\"absences\").mean(), pl.col(\"G3\") >= 13\n", |
463 | 467 | " )\n", |
464 | 468 | " )\n", |
465 | 469 | ")" |
466 | 470 | ] |
467 | 471 | }, |
468 | 472 | { |
469 | 473 | "cell_type": "code", |
470 | | - "execution_count": 18, |
471 | | - "id": "6e9e82f9-cb64-47d1-ac31-bac558a7a711", |
| 474 | + "execution_count": 5, |
| 475 | + "id": "02146ea3-03f1-4150-ad50-e1e6f610db34", |
472 | 476 | "metadata": {}, |
473 | 477 | "outputs": [ |
474 | 478 | { |
|
481 | 485 | " white-space: pre-wrap;\n", |
482 | 486 | "}\n", |
483 | 487 | "</style>\n", |
484 | | - "<small>shape: (8, 4)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>age</th><th>passes</th><th>total_students</th><th>percentage</th></tr><tr><td>i64</td><td>u32</td><td>u32</td><td>f64</td></tr></thead><tbody><tr><td>15</td><td>15</td><td>82</td><td>18.292683</td></tr><tr><td>16</td><td>11</td><td>104</td><td>10.576923</td></tr><tr><td>17</td><td>8</td><td>98</td><td>8.163265</td></tr><tr><td>18</td><td>11</td><td>82</td><td>13.414634</td></tr><tr><td>19</td><td>4</td><td>24</td><td>16.666667</td></tr><tr><td>20</td><td>0</td><td>3</td><td>0.0</td></tr><tr><td>21</td><td>0</td><td>1</td><td>0.0</td></tr><tr><td>22</td><td>0</td><td>1</td><td>0.0</td></tr></tbody></table></div>" |
| 488 | + "<small>shape: (8, 4)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>age</th><th>passes</th><th>poor_attenders</th><th>percentage</th></tr><tr><td>i64</td><td>u32</td><td>u32</td><td>f64</td></tr></thead><tbody><tr><td>15</td><td>15</td><td>32</td><td>46.875</td></tr><tr><td>16</td><td>11</td><td>39</td><td>28.205128</td></tr><tr><td>17</td><td>8</td><td>29</td><td>27.586207</td></tr><tr><td>18</td><td>11</td><td>31</td><td>35.483871</td></tr><tr><td>19</td><td>4</td><td>10</td><td>40.0</td></tr><tr><td>20</td><td>0</td><td>1</td><td>0.0</td></tr><tr><td>21</td><td>0</td><td>0</td><td>0.0</td></tr><tr><td>22</td><td>0</td><td>0</td><td>0.0</td></tr></tbody></table></div>" |
485 | 489 | ], |
486 | 490 | "text/plain": [ |
487 | 491 | "shape: (8, 4)\n", |
488 | 492 | "┌─────┬────────┬────────────────┬────────────┐\n", |
489 | | - "│ age ┆ passes ┆ total_students ┆ percentage │\n", |
| 493 | + "│ age ┆ passes ┆ poor_attenders ┆ percentage │\n", |
490 | 494 | "│ --- ┆ --- ┆ --- ┆ --- │\n", |
491 | 495 | "│ i64 ┆ u32 ┆ u32 ┆ f64 │\n", |
492 | 496 | "╞═════╪════════╪════════════════╪════════════╡\n", |
493 | | - "│ 15 ┆ 15 ┆ 82 ┆ 18.292683 │\n", |
494 | | - "│ 16 ┆ 11 ┆ 104 ┆ 10.576923 │\n", |
495 | | - "│ 17 ┆ 8 ┆ 98 ┆ 8.163265 │\n", |
496 | | - "│ 18 ┆ 11 ┆ 82 ┆ 13.414634 │\n", |
497 | | - "│ 19 ┆ 4 ┆ 24 ┆ 16.666667 │\n", |
498 | | - "│ 20 ┆ 0 ┆ 3 ┆ 0.0 │\n", |
499 | | - "│ 21 ┆ 0 ┆ 1 ┆ 0.0 │\n", |
500 | | - "│ 22 ┆ 0 ┆ 1 ┆ 0.0 │\n", |
| 497 | + "│ 15 ┆ 15 ┆ 32 ┆ 46.875 │\n", |
| 498 | + "│ 16 ┆ 11 ┆ 39 ┆ 28.205128 │\n", |
| 499 | + "│ 17 ┆ 8 ┆ 29 ┆ 27.586207 │\n", |
| 500 | + "│ 18 ┆ 11 ┆ 31 ┆ 35.483871 │\n", |
| 501 | + "│ 19 ┆ 4 ┆ 10 ┆ 40.0 │\n", |
| 502 | + "│ 20 ┆ 0 ┆ 1 ┆ 0.0 │\n", |
| 503 | + "│ 21 ┆ 0 ┆ 0 ┆ 0.0 │\n", |
| 504 | + "│ 22 ┆ 0 ┆ 0 ┆ 0.0 │\n", |
501 | 505 | "└─────┴────────┴────────────────┴────────────┘" |
502 | 506 | ] |
503 | 507 | }, |
504 | | - "execution_count": 18, |
| 508 | + "execution_count": 5, |
505 | 509 | "metadata": {}, |
506 | 510 | "output_type": "execute_result" |
507 | 511 | } |
|
514 | 518 | " .agg(\n", |
515 | 519 | " passes=pl.col(\"G3\")\n", |
516 | 520 | " .filter(\n", |
517 | | - " pl.col(\"absences\") > pl.col(\"absences\").mean(), pl.col(\"G3\") > 12\n", |
| 521 | + " pl.col(\"absences\") > pl.col(\"absences\").mean(), pl.col(\"G3\") >= 13\n", |
518 | 522 | " )\n", |
519 | 523 | " .count(),\n", |
520 | | - " total_students=pl.col(\"student_id\").count(),\n", |
| 524 | + " poor_attenders=pl.col(\"G3\")\n", |
| 525 | + " .filter(pl.col(\"absences\") > pl.col(\"absences\").mean())\n", |
| 526 | + " .count(),\n", |
521 | 527 | " )\n", |
522 | 528 | " .select(\n", |
523 | | - " pl.col(\"age\"),\n", |
524 | | - " pl.col(\"passes\"),\n", |
525 | | - " pl.col(\"total_students\"),\n", |
526 | | - " percentage=pl.col(\"passes\") / pl.col(\"total_students\") * 100,\n", |
| 529 | + " pl.col(\"age\", \"passes\", \"poor_attenders\"),\n", |
| 530 | + " percentage=pl.col(\"passes\") / pl.col(\"poor_attenders\") * 100,\n", |
527 | 531 | " )\n", |
| 532 | + " .with_columns(pl.col(\"percentage\").replace(float(\"NaN\"), 0))\n", |
528 | 533 | " .sort(\"age\")\n", |
529 | 534 | ")" |
530 | 535 | ] |
|
766 | 771 | }, |
767 | 772 | { |
768 | 773 | "cell_type": "code", |
769 | | - "execution_count": 23, |
| 774 | + "execution_count": 6, |
770 | 775 | "id": "188f6fbd-6cb9-421c-9d86-7e4bea105130", |
771 | 776 | "metadata": {}, |
772 | 777 | "outputs": [ |
|
803 | 808 | "└─────────┴────────┴────────────┴────────┴──────────┴───────────────┘" |
804 | 809 | ] |
805 | 810 | }, |
806 | | - "execution_count": 23, |
| 811 | + "execution_count": 6, |
807 | 812 | "metadata": {}, |
808 | 813 | "output_type": "execute_result" |
809 | 814 | } |
|
813 | 818 | "\n", |
814 | 819 | "all_students.select(\n", |
815 | 820 | " pl.col(\"subject\", \"school\", \"student_id\", \"reason\", \"absences\"),\n", |
816 | | - " mean_absences=pl.col(\"absences\")\n", |
817 | | - " .mean()\n", |
818 | | - " .over(\"subject\", \"school\", \"reason\"),\n", |
| 821 | + " mean_absences=(\n", |
| 822 | + " pl.col(\"absences\").mean().over(\"subject\", \"school\", \"reason\")\n", |
| 823 | + " ),\n", |
819 | 824 | ")" |
820 | 825 | ] |
821 | 826 | }, |
822 | 827 | { |
823 | 828 | "cell_type": "code", |
824 | | - "execution_count": 24, |
| 829 | + "execution_count": 47, |
825 | 830 | "id": "4fedbbbd-2335-461b-a33c-95e0cb89a6a0", |
826 | 831 | "metadata": {}, |
827 | 832 | "outputs": [ |
|
858 | 863 | "└─────────┴────────┴────────────┴────────────┴──────────┴───────────────┘" |
859 | 864 | ] |
860 | 865 | }, |
861 | | - "execution_count": 24, |
| 866 | + "execution_count": 47, |
862 | 867 | "metadata": {}, |
863 | 868 | "output_type": "execute_result" |
864 | 869 | } |
|
868 | 873 | "\n", |
869 | 874 | "all_students.select(\n", |
870 | 875 | " pl.col(\"subject\", \"school\", \"student_id\", \"reason\", \"absences\"),\n", |
871 | | - " mean_absences=pl.col(\"absences\")\n", |
872 | | - " .mean()\n", |
873 | | - " .over(\"subject\", \"school\", \"reason\"),\n", |
| 876 | + " mean_absences=(\n", |
| 877 | + " pl.col(\"absences\").mean().over(\"subject\", \"school\", \"reason\")\n", |
| 878 | + " ),\n", |
874 | 879 | ").filter(pl.col(\"absences\") > pl.col(\"mean_absences\"))" |
875 | 880 | ] |
876 | 881 | }, |
|
934 | 939 | }, |
935 | 940 | { |
936 | 941 | "cell_type": "code", |
937 | | - "execution_count": 26, |
| 942 | + "execution_count": 14, |
938 | 943 | "id": "7005577f-ff6f-43eb-9f78-a582fe35311b", |
939 | 944 | "metadata": {}, |
940 | 945 | "outputs": [ |
|
962 | 967 | "└─────────┴─────────────┴─────────────┴─────────────┴─────────────┘" |
963 | 968 | ] |
964 | 969 | }, |
965 | | - "execution_count": 26, |
| 970 | + "execution_count": 14, |
966 | 971 | "metadata": {}, |
967 | 972 | "output_type": "execute_result" |
968 | 973 | } |
|
977 | 982 | " values=[\"absences\", \"failures\"],\n", |
978 | 983 | " aggregate_function=\"mean\",\n", |
979 | 984 | " ).select(\n", |
980 | | - " [\n", |
981 | | - " pl.col(\"subject\"),\n", |
982 | | - " pl.col(\"absences_GP\"),\n", |
983 | | - " pl.col(\"failures_GP\"),\n", |
984 | | - " pl.col(\"absences_MS\"),\n", |
985 | | - " pl.col(\"failures_MS\"),\n", |
986 | | - " ]\n", |
| 985 | + " pl.col(\n", |
| 986 | + " \"subject\",\n", |
| 987 | + " \"absences_GP\",\n", |
| 988 | + " \"failures_GP\",\n", |
| 989 | + " \"absences_MS\",\n", |
| 990 | + " \"failures_MS\",\n", |
| 991 | + " ),\n", |
987 | 992 | " )\n", |
988 | 993 | ")" |
989 | 994 | ] |
990 | 995 | }, |
991 | 996 | { |
992 | 997 | "cell_type": "code", |
993 | | - "execution_count": 27, |
| 998 | + "execution_count": 16, |
994 | 999 | "id": "b048a592-3a14-458e-aadb-e4b973119625", |
995 | 1000 | "metadata": {}, |
996 | 1001 | "outputs": [ |
|
1024 | 1029 | "└─────────┴────────────┴─────────────┴─────────────┴─────────────┴─────────────┘" |
1025 | 1030 | ] |
1026 | 1031 | }, |
1027 | | - "execution_count": 27, |
| 1032 | + "execution_count": 16, |
1028 | 1033 | "metadata": {}, |
1029 | 1034 | "output_type": "execute_result" |
1030 | 1035 | } |
|
1038 | 1043 | " values=[\"absences\", \"failures\"],\n", |
1039 | 1044 | " aggregate_function=\"mean\",\n", |
1040 | 1045 | " ).select(\n", |
1041 | | - " [\n", |
1042 | | - " pl.col(\"subject\"),\n", |
1043 | | - " pl.col(\"reason\"),\n", |
1044 | | - " pl.col(\"absences_GP\"),\n", |
1045 | | - " pl.col(\"failures_GP\"),\n", |
1046 | | - " pl.col(\"absences_MS\"),\n", |
1047 | | - " pl.col(\"failures_MS\"),\n", |
1048 | | - " ]\n", |
| 1046 | + " pl.col(\n", |
| 1047 | + " \"subject\",\n", |
| 1048 | + " \"reason\",\n", |
| 1049 | + " \"absences_GP\",\n", |
| 1050 | + " \"failures_GP\",\n", |
| 1051 | + " \"absences_MS\",\n", |
| 1052 | + " \"failures_MS\",\n", |
| 1053 | + " ),\n", |
1049 | 1054 | " )\n", |
1050 | 1055 | ")" |
1051 | 1056 | ] |
|
0 commit comments