|
15 | 15 | "metadata": {}, |
16 | 16 | "outputs": [], |
17 | 17 | "source": [ |
18 | | - "!python -m pip install polars\n" |
| 18 | + "!python -m pip install polars" |
19 | 19 | ] |
20 | 20 | }, |
21 | 21 | { |
|
29 | 29 | "\n", |
30 | 30 | "tips = pl.scan_parquet(\"tips.parquet\")\n", |
31 | 31 | "\n", |
32 | | - "tips.collect()\n" |
| 32 | + "tips.collect()" |
33 | 33 | ] |
34 | 34 | }, |
35 | 35 | { |
|
39 | 39 | "metadata": {}, |
40 | 40 | "outputs": [], |
41 | 41 | "source": [ |
42 | | - "(\n", |
43 | | - " tips\n", |
44 | | - " .null_count()\n", |
45 | | - ").collect()\n" |
| 42 | + "(tips.null_count()).collect()" |
46 | 43 | ] |
47 | 44 | }, |
48 | 45 | { |
|
64 | 61 | "\n", |
65 | 62 | "tips = pl.scan_parquet(\"tips.parquet\")\n", |
66 | 63 | "\n", |
67 | | - "(\n", |
68 | | - " tips\n", |
69 | | - " .filter(\n", |
70 | | - " pl.col(\"total\").is_null() & pl.col(\"tip\").is_null()\n", |
71 | | - " )\n", |
72 | | - ").collect()\n" |
| 64 | + "(tips.filter(pl.col(\"total\").is_null() & pl.col(\"tip\").is_null())).collect()" |
73 | 65 | ] |
74 | 66 | }, |
75 | 67 | { |
|
80 | 72 | "outputs": [], |
81 | 73 | "source": [ |
82 | 74 | "(\n", |
83 | | - " tips\n", |
84 | | - " .drop_nulls(pl.col(\"total\"))\n", |
85 | | - " .filter(\n", |
| 75 | + " tips.drop_nulls(pl.col(\"total\")).filter(\n", |
86 | 76 | " pl.col(\"total\").is_null() & pl.col(\"tip\").is_null()\n", |
87 | 77 | " )\n", |
88 | | - ").collect()\n" |
| 78 | + ").collect()" |
89 | 79 | ] |
90 | 80 | }, |
91 | 81 | { |
|
99 | 89 | " tips.drop_nulls(pl.col(\"total\"))\n", |
100 | 90 | " .with_columns(pl.col(\"tip\").fill_null(0))\n", |
101 | 91 | " .filter(pl.col(\"tip\").is_null())\n", |
102 | | - ").collect()\n" |
| 92 | + ").collect()" |
103 | 93 | ] |
104 | 94 | }, |
105 | 95 | { |
|
121 | 111 | "\n", |
122 | 112 | "tips = pl.scan_parquet(\"tips.parquet\")\n", |
123 | 113 | "\n", |
124 | | - "(tips.filter(pl.col(\"time\").is_null())).collect()\n" |
| 114 | + "(tips.filter(pl.col(\"time\").is_null())).collect()" |
125 | 115 | ] |
126 | 116 | }, |
127 | 117 | { |
|
131 | 121 | "metadata": {}, |
132 | 122 | "outputs": [], |
133 | 123 | "source": [ |
134 | | - "(\n", |
135 | | - " tips\n", |
136 | | - " .filter(\n", |
137 | | - " pl.col(\"record_id\").is_in([2, 3, 4, 14, 15, 16])\n", |
138 | | - " )\n", |
139 | | - ").collect()\n" |
| 124 | + "(tips.filter(pl.col(\"record_id\").is_in([2, 3, 4, 14, 15, 16]))).collect()" |
140 | 125 | ] |
141 | 126 | }, |
142 | 127 | { |
|
147 | 132 | "outputs": [], |
148 | 133 | "source": [ |
149 | 134 | "(\n", |
150 | | - " tips\n", |
151 | | - " .drop_nulls(\"total\")\n", |
| 135 | + " tips.drop_nulls(\"total\")\n", |
152 | 136 | " .with_columns(pl.col(\"tip\").fill_null(0))\n", |
153 | 137 | " .with_columns(pl.col(\"time\").fill_null(strategy=\"forward\"))\n", |
154 | 138 | " .filter(pl.col(\"record_id\").is_in([3, 15]))\n", |
155 | | - ").collect()\n" |
| 139 | + ").collect()" |
156 | 140 | ] |
157 | 141 | }, |
158 | 142 | { |
|
174 | 158 | "\n", |
175 | 159 | "tips = pl.scan_parquet(\"tips.parquet\")\n", |
176 | 160 | "\n", |
177 | | - "(\n", |
178 | | - " tips\n", |
179 | | - " .filter(\n", |
180 | | - " pl.all_horizontal(pl.col(\"total\", \"tip\").is_null())\n", |
181 | | - " )\n", |
182 | | - ").collect()\n" |
| 161 | + "(tips.filter(pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))).collect()" |
183 | 162 | ] |
184 | 163 | }, |
185 | 164 | { |
|
191 | 170 | "source": [ |
192 | 171 | "tips = pl.scan_parquet(\"tips.parquet\")\n", |
193 | 172 | "\n", |
194 | | - "(\n", |
195 | | - " tips\n", |
196 | | - " .filter(\n", |
197 | | - " ~pl.all_horizontal(pl.col(\"total\", \"tip\").is_null())\n", |
198 | | - " )\n", |
199 | | - ").collect()\n" |
| 173 | + "(tips.filter(~pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))).collect()" |
200 | 174 | ] |
201 | 175 | }, |
202 | 176 | { |
|
211 | 185 | "tips = pl.scan_parquet(\"tips.parquet\")\n", |
212 | 186 | "\n", |
213 | 187 | "(\n", |
214 | | - " tips\n", |
215 | | - " .filter(\n", |
216 | | - " ~pl.all_horizontal(pl.col(\"total\", \"tip\").is_null())\n", |
217 | | - " )\n", |
| 188 | + " tips.filter(~pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))\n", |
218 | 189 | " .with_columns(pl.col(\"tip\").fill_null(0))\n", |
219 | 190 | " .with_columns(pl.col(\"time\").fill_null(strategy=\"forward\"))\n", |
220 | | - ").null_count().collect()\n" |
| 191 | + ").null_count().collect()" |
221 | 192 | ] |
222 | 193 | }, |
223 | 194 | { |
|
247 | 218 | " }\n", |
248 | 219 | ")\n", |
249 | 220 | "\n", |
250 | | - "scientists.collect()\n" |
| 221 | + "scientists.collect()" |
251 | 222 | ] |
252 | 223 | }, |
253 | 224 | { |
|
263 | 234 | " scientists.with_columns(cs.string().fill_null(\"Unknown\")).with_columns(\n", |
264 | 235 | " cs.integer().fill_null(0)\n", |
265 | 236 | " )\n", |
266 | | - ").collect()\n" |
| 237 | + ").collect()" |
267 | 238 | ] |
268 | 239 | }, |
269 | 240 | { |
|
285 | 256 | "\n", |
286 | 257 | "sales_trends = pl.scan_csv(\"sales_trends.csv\")\n", |
287 | 258 | "\n", |
288 | | - "sales_trends.collect()\n" |
| 259 | + "sales_trends.collect()" |
289 | 260 | ] |
290 | 261 | }, |
291 | 262 | { |
|
296 | 267 | "outputs": [], |
297 | 268 | "source": [ |
298 | 269 | "(\n", |
299 | | - " sales_trends\n", |
300 | | - " .with_columns(\n", |
| 270 | + " sales_trends.with_columns(\n", |
301 | 271 | " pl.col(\"next_year\").replace(\n", |
302 | 272 | " [float(\"inf\"), -float(\"inf\"), float(\"NaN\")], None\n", |
303 | 273 | " )\n", |
304 | 274 | " )\n", |
305 | | - ").collect()\n" |
| 275 | + ").collect()" |
306 | 276 | ] |
307 | 277 | }, |
308 | 278 | { |
|
313 | 283 | "outputs": [], |
314 | 284 | "source": [ |
315 | 285 | "(\n", |
316 | | - " sales_trends\n", |
317 | | - " .with_columns(\n", |
| 286 | + " sales_trends.with_columns(\n", |
318 | 287 | " pl.col(\"next_year\").replace(\n", |
319 | 288 | " [float(\"inf\"), -float(\"inf\"), float(\"NaN\")], None\n", |
320 | 289 | " )\n", |
321 | | - " )\n", |
322 | | - " .with_columns(\n", |
| 290 | + " ).with_columns(\n", |
323 | 291 | " pl.col(\"next_year\").fill_null(\n", |
324 | 292 | " pl.col(\"current_year\")\n", |
325 | 293 | " + (pl.col(\"current_year\") - pl.col(\"last_year\"))\n", |
326 | 294 | " )\n", |
327 | 295 | " )\n", |
328 | | - ").collect()\n" |
| 296 | + ").collect()" |
329 | 297 | ] |
330 | 298 | }, |
331 | 299 | { |
|
347 | 315 | "\n", |
348 | 316 | "episodes = pl.scan_parquet(\"ft_exercise.parquet\")\n", |
349 | 317 | "\n", |
350 | | - "episodes.null_count().collect()\n" |
| 318 | + "episodes.null_count().collect()" |
351 | 319 | ] |
352 | 320 | }, |
353 | 321 | { |
|
362 | 330 | "episodes = pl.scan_parquet(\"ft_exercise.parquet\")\n", |
363 | 331 | "\n", |
364 | 332 | "(\n", |
365 | | - " episodes\n", |
366 | | - " .with_columns(\n", |
| 333 | + " episodes.with_columns(\n", |
367 | 334 | " pl.when(pl.col(\"episode\") == 6)\n", |
368 | 335 | " .then(pl.col(\"series\").fill_null(strategy=\"forward\"))\n", |
369 | 336 | " .otherwise(pl.col(\"series\").fill_null(strategy=\"backward\"))\n", |
|
373 | 340 | " .then(pl.col(\"title\").fill_null(\"The Hotel Inspectors\"))\n", |
374 | 341 | " .otherwise(pl.col(\"title\").fill_null(\"Waldorf Salad\"))\n", |
375 | 342 | " )\n", |
376 | | - " .with_columns(\n", |
377 | | - " pl.col(\"original_date\").interpolate()\n", |
378 | | - " )\n", |
379 | | - ").null_count().collect()\n" |
| 343 | + " .with_columns(pl.col(\"original_date\").interpolate())\n", |
| 344 | + ").null_count().collect()" |
380 | 345 | ] |
381 | 346 | } |
382 | 347 | ], |
|
0 commit comments