Skip to content

Commit 36fc0bf

Browse files
committed
post TR2
1 parent c6b8e51 commit 36fc0bf

File tree

2 files changed

+85
-80
lines changed

2 files changed

+85
-80
lines changed

polars-groupby/solutions.ipynb

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
},
7575
{
7676
"cell_type": "code",
77-
"execution_count": 28,
77+
"execution_count": 18,
7878
"id": "c108a17a-d4d9-4183-b0eb-58c6abe70a7a",
7979
"metadata": {},
8080
"outputs": [
@@ -88,23 +88,23 @@
8888
" white-space: pre-wrap;\n",
8989
"}\n",
9090
"</style>\n",
91-
"<small>shape: (4, 4)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>subject</th><th>internet</th><th>passes</th><th>total</th></tr><tr><td>str</td><td>str</td><td>u32</td><td>u32</td></tr></thead><tbody><tr><td>&quot;M&quot;</td><td>&quot;yes&quot;</td><td>117</td><td>329</td></tr><tr><td>&quot;M&quot;</td><td>&quot;no&quot;</td><td>14</td><td>66</td></tr><tr><td>&quot;P&quot;</td><td>&quot;no&quot;</td><td>45</td><td>151</td></tr><tr><td>&quot;P&quot;</td><td>&quot;yes&quot;</td><td>231</td><td>498</td></tr></tbody></table></div>"
91+
"<small>shape: (4, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>subject</th><th>internet</th><th>total</th><th>passes</th><th>percentage</th></tr><tr><td>str</td><td>str</td><td>u32</td><td>u32</td><td>str</td></tr></thead><tbody><tr><td>&quot;M&quot;</td><td>&quot;yes&quot;</td><td>329</td><td>117</td><td>&quot;35.56%&quot;</td></tr><tr><td>&quot;M&quot;</td><td>&quot;no&quot;</td><td>66</td><td>14</td><td>&quot;21.21%&quot;</td></tr><tr><td>&quot;P&quot;</td><td>&quot;yes&quot;</td><td>498</td><td>231</td><td>&quot;46.39%&quot;</td></tr><tr><td>&quot;P&quot;</td><td>&quot;no&quot;</td><td>151</td><td>45</td><td>&quot;29.8%&quot;</td></tr></tbody></table></div>"
9292
],
9393
"text/plain": [
94-
"shape: (4, 4)\n",
95-
"┌─────────┬──────────┬────────┬───────┐\n",
96-
"│ subject ┆ internet ┆ passes ┆ total\n",
97-
"│ --- ┆ --- ┆ --- ┆ --- │\n",
98-
"│ str ┆ str ┆ u32 ┆ u32 │\n",
99-
"╞═════════╪══════════╪════════╪═══════╡\n",
100-
"│ M ┆ yes ┆ 117 ┆ 329\n",
101-
"│ M ┆ no ┆ 14 ┆ 66\n",
102-
"│ P ┆ no ┆ 45 ┆ 151\n",
103-
"│ P ┆ yes ┆ 231 ┆ 498\n",
104-
"└─────────┴──────────┴────────┴───────┘"
94+
"shape: (4, 5)\n",
95+
"┌─────────┬──────────┬───────┬────────┬────────────┐\n",
96+
"│ subject ┆ internet ┆ total ┆ passes ┆ percentage\n",
97+
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- \n",
98+
"│ str ┆ str ┆ u32 ┆ u32 ┆ str \n",
99+
"╞═════════╪══════════╪═══════╪════════╪════════════╡\n",
100+
"│ M ┆ yes ┆ 329 ┆ 117 ┆ 35.56% \n",
101+
"│ M ┆ no ┆ 66 ┆ 14 ┆ 21.21% \n",
102+
"│ P ┆ yes ┆ 498 ┆ 231 ┆ 46.39% \n",
103+
"│ P ┆ no ┆ 151 ┆ 45 ┆ 29.8% \n",
104+
"└─────────┴──────────┴───────┴────────┴────────────┘"
105105
]
106106
},
107-
"execution_count": 28,
107+
"execution_count": 18,
108108
"metadata": {},
109109
"output_type": "execute_result"
110110
}
@@ -121,10 +121,10 @@
121121
" passes=pl.col(\"G3\").filter(pl.col(\"G3\") > 12).count(),\n",
122122
" )\n",
123123
" .select(\n",
124-
" pl.col(\"subject\"),\n",
125-
" pl.col(\"internet\"),\n",
126-
" pl.col(\"passes\"),\n",
127-
" pl.col(\"total\"),\n",
124+
" pl.col(\"subject\", \"internet\", \"total\", \"passes\"),\n",
125+
" percentage=pl.format(\n",
126+
" \"{}%\", (pl.col(\"passes\") * 100 / pl.col(\"total\")).round(2)\n",
127+
" ),\n",
128128
" )\n",
129129
" .sort(\"subject\")\n",
130130
")"

polars-groupby/tutorial_code.ipynb

Lines changed: 67 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
},
1313
{
1414
"cell_type": "code",
15-
"execution_count": 5,
15+
"execution_count": 2,
1616
"id": "a8613c94-93ef-4eeb-a916-06728100a352",
1717
"metadata": {},
1818
"outputs": [],
@@ -22,7 +22,7 @@
2222
},
2323
{
2424
"cell_type": "code",
25-
"execution_count": 6,
25+
"execution_count": 2,
2626
"id": "9ddb39eb-e50b-4678-8fb0-d65ab8269e0a",
2727
"metadata": {},
2828
"outputs": [
@@ -49,7 +49,7 @@
4949
"└──────────┘"
5050
]
5151
},
52-
"execution_count": 6,
52+
"execution_count": 2,
5353
"metadata": {},
5454
"output_type": "execute_result"
5555
}
@@ -62,7 +62,7 @@
6262
},
6363
{
6464
"cell_type": "code",
65-
"execution_count": 2,
65+
"execution_count": 3,
6666
"id": "32217883-4097-4ddf-a5df-cdc838e1b5f8",
6767
"metadata": {},
6868
"outputs": [
@@ -89,7 +89,7 @@
8989
"└────────────┴──────────┘"
9090
]
9191
},
92-
"execution_count": 2,
92+
"execution_count": 3,
9393
"metadata": {},
9494
"output_type": "execute_result"
9595
}
@@ -409,12 +409,16 @@
409409
"source": [
410410
"math_students = pl.read_parquet(\"maths.parquet\")\n",
411411
"\n",
412-
"(math_students.group_by(\"age\").agg(passes=pl.col(\"G3\")))"
412+
"(\n",
413+
" math_students.group_by(\"age\").agg(\n",
414+
" passes=pl.col(\"G3\"),\n",
415+
" )\n",
416+
")"
413417
]
414418
},
415419
{
416420
"cell_type": "code",
417-
"execution_count": 15,
421+
"execution_count": 6,
418422
"id": "123401a9-20f4-40ca-b32a-a48850d1cd23",
419423
"metadata": {},
420424
"outputs": [
@@ -428,7 +432,7 @@
428432
" white-space: pre-wrap;\n",
429433
"}\n",
430434
"</style>\n",
431-
"<small>shape: (8, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>age</th><th>passes</th></tr><tr><td>i64</td><td>list[i64]</td></tr></thead><tbody><tr><td>22</td><td>[]</td></tr><tr><td>19</td><td>[13, 13, … 13]</td></tr><tr><td>15</td><td>[15, 13, … 15]</td></tr><tr><td>17</td><td>[13, 18, … 15]</td></tr><tr><td>21</td><td>[]</td></tr><tr><td>18</td><td>[14, 18, … 13]</td></tr><tr><td>20</td><td>[]</td></tr><tr><td>16</td><td>[15, 14, … 18]</td></tr></tbody></table></div>"
435+
"<small>shape: (8, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>age</th><th>passes</th></tr><tr><td>i64</td><td>list[i64]</td></tr></thead><tbody><tr><td>16</td><td>[15, 14, … 18]</td></tr><tr><td>19</td><td>[13, 13, … 13]</td></tr><tr><td>21</td><td>[]</td></tr><tr><td>18</td><td>[14, 18, … 13]</td></tr><tr><td>20</td><td>[]</td></tr><tr><td>22</td><td>[]</td></tr><tr><td>15</td><td>[15, 13, … 15]</td></tr><tr><td>17</td><td>[13, 18, … 15]</td></tr></tbody></table></div>"
432436
],
433437
"text/plain": [
434438
"shape: (8, 2)\n",
@@ -437,18 +441,18 @@
437441
"│ --- ┆ --- │\n",
438442
"│ i64 ┆ list[i64] │\n",
439443
"╞═════╪════════════════╡\n",
440-
"22 ┆ [] \n",
444+
"16 ┆ [15, 14, … 18]\n",
441445
"│ 19 ┆ [13, 13, … 13] │\n",
442-
"│ 15 ┆ [15, 13, … 15] │\n",
443-
"│ 17 ┆ [13, 18, … 15] │\n",
444446
"│ 21 ┆ [] │\n",
445447
"│ 18 ┆ [14, 18, … 13] │\n",
446448
"│ 20 ┆ [] │\n",
447-
"│ 16 ┆ [15, 14, … 18] │\n",
449+
"│ 22 ┆ [] │\n",
450+
"│ 15 ┆ [15, 13, … 15] │\n",
451+
"│ 17 ┆ [13, 18, … 15] │\n",
448452
"└─────┴────────────────┘"
449453
]
450454
},
451-
"execution_count": 15,
455+
"execution_count": 6,
452456
"metadata": {},
453457
"output_type": "execute_result"
454458
}
@@ -459,16 +463,16 @@
459463
"(\n",
460464
" math_students.group_by(\"age\").agg(\n",
461465
" passes=pl.col(\"G3\").filter(\n",
462-
" pl.col(\"absences\") > pl.col(\"absences\").mean(), pl.col(\"G3\") > 12\n",
466+
" pl.col(\"absences\") > pl.col(\"absences\").mean(), pl.col(\"G3\") >= 13\n",
463467
" )\n",
464468
" )\n",
465469
")"
466470
]
467471
},
468472
{
469473
"cell_type": "code",
470-
"execution_count": 18,
471-
"id": "6e9e82f9-cb64-47d1-ac31-bac558a7a711",
474+
"execution_count": 5,
475+
"id": "02146ea3-03f1-4150-ad50-e1e6f610db34",
472476
"metadata": {},
473477
"outputs": [
474478
{
@@ -481,27 +485,27 @@
481485
" white-space: pre-wrap;\n",
482486
"}\n",
483487
"</style>\n",
484-
"<small>shape: (8, 4)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>age</th><th>passes</th><th>total_students</th><th>percentage</th></tr><tr><td>i64</td><td>u32</td><td>u32</td><td>f64</td></tr></thead><tbody><tr><td>15</td><td>15</td><td>82</td><td>18.292683</td></tr><tr><td>16</td><td>11</td><td>104</td><td>10.576923</td></tr><tr><td>17</td><td>8</td><td>98</td><td>8.163265</td></tr><tr><td>18</td><td>11</td><td>82</td><td>13.414634</td></tr><tr><td>19</td><td>4</td><td>24</td><td>16.666667</td></tr><tr><td>20</td><td>0</td><td>3</td><td>0.0</td></tr><tr><td>21</td><td>0</td><td>1</td><td>0.0</td></tr><tr><td>22</td><td>0</td><td>1</td><td>0.0</td></tr></tbody></table></div>"
488+
"<small>shape: (8, 4)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>age</th><th>passes</th><th>poor_attenders</th><th>percentage</th></tr><tr><td>i64</td><td>u32</td><td>u32</td><td>f64</td></tr></thead><tbody><tr><td>15</td><td>15</td><td>32</td><td>46.875</td></tr><tr><td>16</td><td>11</td><td>39</td><td>28.205128</td></tr><tr><td>17</td><td>8</td><td>29</td><td>27.586207</td></tr><tr><td>18</td><td>11</td><td>31</td><td>35.483871</td></tr><tr><td>19</td><td>4</td><td>10</td><td>40.0</td></tr><tr><td>20</td><td>0</td><td>1</td><td>0.0</td></tr><tr><td>21</td><td>0</td><td>0</td><td>0.0</td></tr><tr><td>22</td><td>0</td><td>0</td><td>0.0</td></tr></tbody></table></div>"
485489
],
486490
"text/plain": [
487491
"shape: (8, 4)\n",
488492
"┌─────┬────────┬────────────────┬────────────┐\n",
489-
"│ age ┆ passes ┆ total_students ┆ percentage │\n",
493+
"│ age ┆ passes ┆ poor_attenders ┆ percentage │\n",
490494
"│ --- ┆ --- ┆ --- ┆ --- │\n",
491495
"│ i64 ┆ u32 ┆ u32 ┆ f64 │\n",
492496
"╞═════╪════════╪════════════════╪════════════╡\n",
493-
"│ 15 ┆ 15 ┆ 8218.292683\n",
494-
"│ 16 ┆ 11 ┆ 104 ┆ 10.576923\n",
495-
"│ 17 ┆ 8 ┆ 988.163265 \n",
496-
"│ 18 ┆ 11 ┆ 8213.414634\n",
497-
"│ 19 ┆ 4 ┆ 2416.666667\n",
498-
"│ 20 ┆ 0 ┆ 3 ┆ 0.0 │\n",
499-
"│ 21 ┆ 0 ┆ 1 ┆ 0.0 │\n",
500-
"│ 22 ┆ 0 ┆ 1 ┆ 0.0 │\n",
497+
"│ 15 ┆ 15 ┆ 3246.875 \n",
498+
"│ 16 ┆ 11 ┆ 39 ┆ 28.205128\n",
499+
"│ 17 ┆ 8 ┆ 2927.586207\n",
500+
"│ 18 ┆ 11 ┆ 3135.483871\n",
501+
"│ 19 ┆ 4 ┆ 1040.0 \n",
502+
"│ 20 ┆ 0 ┆ 1 ┆ 0.0 │\n",
503+
"│ 21 ┆ 0 ┆ 0 ┆ 0.0 │\n",
504+
"│ 22 ┆ 0 ┆ 0 ┆ 0.0 │\n",
501505
"└─────┴────────┴────────────────┴────────────┘"
502506
]
503507
},
504-
"execution_count": 18,
508+
"execution_count": 5,
505509
"metadata": {},
506510
"output_type": "execute_result"
507511
}
@@ -514,17 +518,18 @@
514518
" .agg(\n",
515519
" passes=pl.col(\"G3\")\n",
516520
" .filter(\n",
517-
" pl.col(\"absences\") > pl.col(\"absences\").mean(), pl.col(\"G3\") > 12\n",
521+
" pl.col(\"absences\") > pl.col(\"absences\").mean(), pl.col(\"G3\") >= 13\n",
518522
" )\n",
519523
" .count(),\n",
520-
" total_students=pl.col(\"student_id\").count(),\n",
524+
" poor_attenders=pl.col(\"G3\")\n",
525+
" .filter(pl.col(\"absences\") > pl.col(\"absences\").mean())\n",
526+
" .count(),\n",
521527
" )\n",
522528
" .select(\n",
523-
" pl.col(\"age\"),\n",
524-
" pl.col(\"passes\"),\n",
525-
" pl.col(\"total_students\"),\n",
526-
" percentage=pl.col(\"passes\") / pl.col(\"total_students\") * 100,\n",
529+
" pl.col(\"age\", \"passes\", \"poor_attenders\"),\n",
530+
" percentage=pl.col(\"passes\") / pl.col(\"poor_attenders\") * 100,\n",
527531
" )\n",
532+
" .with_columns(pl.col(\"percentage\").replace(float(\"NaN\"), 0))\n",
528533
" .sort(\"age\")\n",
529534
")"
530535
]
@@ -766,7 +771,7 @@
766771
},
767772
{
768773
"cell_type": "code",
769-
"execution_count": 23,
774+
"execution_count": 6,
770775
"id": "188f6fbd-6cb9-421c-9d86-7e4bea105130",
771776
"metadata": {},
772777
"outputs": [
@@ -803,7 +808,7 @@
803808
"└─────────┴────────┴────────────┴────────┴──────────┴───────────────┘"
804809
]
805810
},
806-
"execution_count": 23,
811+
"execution_count": 6,
807812
"metadata": {},
808813
"output_type": "execute_result"
809814
}
@@ -813,15 +818,15 @@
813818
"\n",
814819
"all_students.select(\n",
815820
" pl.col(\"subject\", \"school\", \"student_id\", \"reason\", \"absences\"),\n",
816-
" mean_absences=pl.col(\"absences\")\n",
817-
" .mean()\n",
818-
" .over(\"subject\", \"school\", \"reason\"),\n",
821+
" mean_absences=(\n",
822+
" pl.col(\"absences\").mean().over(\"subject\", \"school\", \"reason\")\n",
823+
" ),\n",
819824
")"
820825
]
821826
},
822827
{
823828
"cell_type": "code",
824-
"execution_count": 24,
829+
"execution_count": 47,
825830
"id": "4fedbbbd-2335-461b-a33c-95e0cb89a6a0",
826831
"metadata": {},
827832
"outputs": [
@@ -858,7 +863,7 @@
858863
"└─────────┴────────┴────────────┴────────────┴──────────┴───────────────┘"
859864
]
860865
},
861-
"execution_count": 24,
866+
"execution_count": 47,
862867
"metadata": {},
863868
"output_type": "execute_result"
864869
}
@@ -868,9 +873,9 @@
868873
"\n",
869874
"all_students.select(\n",
870875
" pl.col(\"subject\", \"school\", \"student_id\", \"reason\", \"absences\"),\n",
871-
" mean_absences=pl.col(\"absences\")\n",
872-
" .mean()\n",
873-
" .over(\"subject\", \"school\", \"reason\"),\n",
876+
" mean_absences=(\n",
877+
" pl.col(\"absences\").mean().over(\"subject\", \"school\", \"reason\")\n",
878+
" ),\n",
874879
").filter(pl.col(\"absences\") > pl.col(\"mean_absences\"))"
875880
]
876881
},
@@ -934,7 +939,7 @@
934939
},
935940
{
936941
"cell_type": "code",
937-
"execution_count": 26,
942+
"execution_count": 14,
938943
"id": "7005577f-ff6f-43eb-9f78-a582fe35311b",
939944
"metadata": {},
940945
"outputs": [
@@ -962,7 +967,7 @@
962967
"└─────────┴─────────────┴─────────────┴─────────────┴─────────────┘"
963968
]
964969
},
965-
"execution_count": 26,
970+
"execution_count": 14,
966971
"metadata": {},
967972
"output_type": "execute_result"
968973
}
@@ -977,20 +982,20 @@
977982
" values=[\"absences\", \"failures\"],\n",
978983
" aggregate_function=\"mean\",\n",
979984
" ).select(\n",
980-
" [\n",
981-
" pl.col(\"subject\"),\n",
982-
" pl.col(\"absences_GP\"),\n",
983-
" pl.col(\"failures_GP\"),\n",
984-
" pl.col(\"absences_MS\"),\n",
985-
" pl.col(\"failures_MS\"),\n",
986-
" ]\n",
985+
" pl.col(\n",
986+
" \"subject\",\n",
987+
" \"absences_GP\",\n",
988+
" \"failures_GP\",\n",
989+
" \"absences_MS\",\n",
990+
" \"failures_MS\",\n",
991+
" ),\n",
987992
" )\n",
988993
")"
989994
]
990995
},
991996
{
992997
"cell_type": "code",
993-
"execution_count": 27,
998+
"execution_count": 16,
994999
"id": "b048a592-3a14-458e-aadb-e4b973119625",
9951000
"metadata": {},
9961001
"outputs": [
@@ -1024,7 +1029,7 @@
10241029
"└─────────┴────────────┴─────────────┴─────────────┴─────────────┴─────────────┘"
10251030
]
10261031
},
1027-
"execution_count": 27,
1032+
"execution_count": 16,
10281033
"metadata": {},
10291034
"output_type": "execute_result"
10301035
}
@@ -1038,14 +1043,14 @@
10381043
" values=[\"absences\", \"failures\"],\n",
10391044
" aggregate_function=\"mean\",\n",
10401045
" ).select(\n",
1041-
" [\n",
1042-
" pl.col(\"subject\"),\n",
1043-
" pl.col(\"reason\"),\n",
1044-
" pl.col(\"absences_GP\"),\n",
1045-
" pl.col(\"failures_GP\"),\n",
1046-
" pl.col(\"absences_MS\"),\n",
1047-
" pl.col(\"failures_MS\"),\n",
1048-
" ]\n",
1046+
" pl.col(\n",
1047+
" \"subject\",\n",
1048+
" \"reason\",\n",
1049+
" \"absences_GP\",\n",
1050+
" \"failures_GP\",\n",
1051+
" \"absences_MS\",\n",
1052+
" \"failures_MS\",\n",
1053+
" ),\n",
10491054
" )\n",
10501055
")"
10511056
]

0 commit comments

Comments
 (0)