Skip to content

Commit 28e9943

Browse files
authored
Merge pull request #53 from marimo-team/aka/polars-touchup
author: attribute peter-gy
2 parents 7b36ccd + 5e743f8 commit 28e9943

File tree

2 files changed

+36
-35
lines changed

2 files changed

+36
-35
lines changed

polars/09_strings.py

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
app = marimo.App(width="medium")
1515

1616

17-
@app.cell
17+
@app.cell(hide_code=True)
1818
def _(mo):
1919
mo.md(
2020
r"""
@@ -30,7 +30,7 @@ def _(mo):
3030
return
3131

3232

33-
@app.cell
33+
@app.cell(hide_code=True)
3434
def _(mo):
3535
mo.md(
3636
r"""
@@ -43,7 +43,7 @@ def _(mo):
4343
return
4444

4545

46-
@app.cell(hide_code=True)
46+
@app.cell
4747
def _(pl):
4848
pip_metadata_raw_df = pl.DataFrame(
4949
[
@@ -56,7 +56,7 @@ def _(pl):
5656
return (pip_metadata_raw_df,)
5757

5858

59-
@app.cell
59+
@app.cell(hide_code=True)
6060
def _(mo):
6161
mo.md(r"""We can use the [`json_decode`](https://docs.pola.rs/api/python/stable/reference/series/api/polars.Series.str.json_decode.html) expression to parse the raw JSON strings into Polars-native structs and we can use the [unnest](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.unnest.html) dataframe operation to have a dedicated column per parsed attribute.""")
6262
return
@@ -69,13 +69,13 @@ def _(pip_metadata_raw_df, pl):
6969
return (pip_metadata_df,)
7070

7171

72-
@app.cell
72+
@app.cell(hide_code=True)
7373
def _(mo):
7474
mo.md(r"""This is already a much friendlier representation of the data we started out with, but note that since the JSON entries had only string attributes, all values are strings, even the temporal `released_at` and numerical `size_mb` columns.""")
7575
return
7676

7777

78-
@app.cell
78+
@app.cell(hide_code=True)
7979
def _(mo):
8080
mo.md(r"""As we know that the `size_mb` column should have a decimal representation, we go ahead and use [`to_decimal`](https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.str.to_decimal.html#polars.Expr.str.to_decimal) to perform the conversion.""")
8181
return
@@ -91,7 +91,7 @@ def _(pip_metadata_df, pl):
9191
return
9292

9393

94-
@app.cell
94+
@app.cell(hide_code=True)
9595
def _(mo):
9696
mo.md(
9797
r"""
@@ -127,7 +127,7 @@ def _(pip_metadata_df, pl):
127127
return
128128

129129

130-
@app.cell
130+
@app.cell(hide_code=True)
131131
def _(mo):
132132
mo.md(r"""Alternatively, instead of using three different functions to perform the conversion to date, we can use a single one, [`strptime`](https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.str.strptime.html) which takes the desired temporal data type as its first parameter.""")
133133
return
@@ -145,7 +145,7 @@ def _(pip_metadata_df, pl):
145145
return
146146

147147

148-
@app.cell
148+
@app.cell(hide_code=True)
149149
def _(mo):
150150
mo.md(r"""And to wrap up this section on parsing and conversion, let's consider a final scenario. What if we don't want to parse the entire raw JSON string, because we only need a subset of its attributes? Well, in this case we can leverage the [`json_path_match`](https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.str.json_path_match.html) expression to extract only the desired attributes using standard [JSONPath](https://goessner.net/articles/JsonPath/) syntax.""")
151151
return
@@ -163,7 +163,7 @@ def _(pip_metadata_raw_df, pl):
163163
return
164164

165165

166-
@app.cell
166+
@app.cell(hide_code=True)
167167
def _(mo):
168168
mo.md(
169169
r"""
@@ -217,7 +217,7 @@ def list_expr_meta() -> list[dict]:
217217
return expressions_df, list_expr_meta, list_members
218218

219219

220-
@app.cell
220+
@app.cell(hide_code=True)
221221
def _(mo):
222222
mo.md(r"""As the following visualization shows, `str` is one of the richest Polars expression namespaces with multiple dozens of functions in it.""")
223223
return
@@ -232,7 +232,7 @@ def _(alt, expressions_df):
232232
return
233233

234234

235-
@app.cell
235+
@app.cell(hide_code=True)
236236
def _(mo):
237237
mo.md(
238238
r"""
@@ -260,7 +260,7 @@ def _(expressions_df, pl):
260260
return (docstring_length_df,)
261261

262262

263-
@app.cell
263+
@app.cell(hide_code=True)
264264
def _(mo):
265265
mo.md(r"""As the dataframe preview above and the scatterplot below show, the docstring length measured in bytes is almost always bigger than the length expressed in characters. This is due to the fact that the docstrings include characters which require more than a single byte to represent, such as "╞" for displaying dataframe header and body separators.""")
266266
return
@@ -276,7 +276,7 @@ def _(alt, docstring_length_df):
276276
return
277277

278278

279-
@app.cell
279+
@app.cell(hide_code=True)
280280
def _(mo):
281281
mo.md(
282282
r"""
@@ -298,7 +298,7 @@ def _(expressions_df, pl):
298298
return
299299

300300

301-
@app.cell
301+
@app.cell(hide_code=True)
302302
def _(mo):
303303
mo.md(
304304
r"""
@@ -338,7 +338,7 @@ def _(mo, padded_df, padding):
338338
return
339339

340340

341-
@app.cell
341+
@app.cell(hide_code=True)
342342
def _(mo):
343343
mo.md(
344344
r"""
@@ -362,7 +362,7 @@ def _(expressions_df, pl):
362362
return
363363

364364

365-
@app.cell
365+
@app.cell(hide_code=True)
366366
def _(mo):
367367
mo.md(
368368
r"""
@@ -388,7 +388,7 @@ def _(expressions_df, pl):
388388
return
389389

390390

391-
@app.cell
391+
@app.cell(hide_code=True)
392392
def _(mo):
393393
mo.md(
394394
r"""
@@ -412,7 +412,7 @@ def _(expressions_df, pl):
412412
return
413413

414414

415-
@app.cell
415+
@app.cell(hide_code=True)
416416
def _(mo):
417417
mo.md(
418418
r"""
@@ -434,7 +434,7 @@ def _(expressions_df, pl):
434434
return
435435

436436

437-
@app.cell
437+
@app.cell(hide_code=True)
438438
def _(mo):
439439
mo.md(
440440
r"""
@@ -460,7 +460,7 @@ def _(expressions_df, pl):
460460
return
461461

462462

463-
@app.cell
463+
@app.cell(hide_code=True)
464464
def _(mo):
465465
mo.md(r"""For scenarios where we want to combine multiple substrings to check for, we can use the [`contains`](https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.str.contains.html) expression to check for the presence of various patterns.""")
466466
return
@@ -476,7 +476,7 @@ def _(expressions_df, pl):
476476
return
477477

478478

479-
@app.cell
479+
@app.cell(hide_code=True)
480480
def _(mo):
481481
mo.md(
482482
r"""
@@ -506,7 +506,7 @@ def _(expressions_df, pl):
506506
return
507507

508508

509-
@app.cell
509+
@app.cell(hide_code=True)
510510
def _(mo):
511511
mo.md(r"""A related application example is to *find* the first index where a particular pattern is present, so that it can be used for downstream processing such as slicing. Below we use the [`find`](https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.str.find.html) expression to determine the index at which a code example starts in the docstring - identified by the Python shell substring `">>>"`.""")
512512
return
@@ -522,7 +522,7 @@ def _(expressions_df, pl):
522522
return
523523

524524

525-
@app.cell
525+
@app.cell(hide_code=True)
526526
def _(mo):
527527
mo.md(
528528
r"""
@@ -562,7 +562,7 @@ def _(mo, slice, sliced_df):
562562
return
563563

564564

565-
@app.cell
565+
@app.cell(hide_code=True)
566566
def _(mo):
567567
mo.md(
568568
r"""
@@ -589,7 +589,7 @@ def _(expressions_df, pl):
589589
return
590590

591591

592-
@app.cell
592+
@app.cell(hide_code=True)
593593
def _(mo):
594594
mo.md(r"""As a more practical example, we can use the `split` expression with some aggregation to count the number of times a particular word occurs in member names across all namespaces. This enables us to create a word cloud of the API members' constituents!""")
595595
return
@@ -643,7 +643,7 @@ def _(alt, expressions_df, pl, random, wordcloud_height, wordcloud_width):
643643
return wordcloud, wordcloud_df
644644

645645

646-
@app.cell
646+
@app.cell(hide_code=True)
647647
def _(mo):
648648
mo.md(
649649
r"""
@@ -677,7 +677,7 @@ def _(expressions_df, pl):
677677
return (descriptions_df,)
678678

679679

680-
@app.cell
680+
@app.cell(hide_code=True)
681681
def _(mo):
682682
mo.md(
683683
r"""
@@ -706,7 +706,7 @@ def _(descriptions_df, mo, pl):
706706
return
707707

708708

709-
@app.cell
709+
@app.cell(hide_code=True)
710710
def _(mo):
711711
mo.md(
712712
r"""
@@ -734,7 +734,7 @@ def _(expressions_df, pl):
734734
return (url_pattern,)
735735

736736

737-
@app.cell
737+
@app.cell(hide_code=True)
738738
def _(mo):
739739
mo.md(
740740
r"""
@@ -758,7 +758,7 @@ def _(expressions_df, pl):
758758
return
759759

760760

761-
@app.cell
761+
@app.cell(hide_code=True)
762762
def _(mo):
763763
mo.md(
764764
r"""
@@ -783,7 +783,7 @@ def _(expressions_df, pl):
783783
return
784784

785785

786-
@app.cell
786+
@app.cell(hide_code=True)
787787
def _(mo):
788788
mo.md(
789789
r"""
@@ -807,7 +807,7 @@ def _(expressions_df, pl):
807807
return
808808

809809

810-
@app.cell
810+
@app.cell(hide_code=True)
811811
def _(mo):
812812
mo.md(
813813
r"""
@@ -830,7 +830,7 @@ def _(expressions_df, pl):
830830
return (encoded_df,)
831831

832832

833-
@app.cell
833+
@app.cell(hide_code=True)
834834
def _(mo):
835835
mo.md(r"""And of course, you can convert back into a human-readable representation using the [`decode`](https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.str.decode.html) expression.""")
836836
return
@@ -845,7 +845,7 @@ def _(encoded_df, pl):
845845
return
846846

847847

848-
@app.cell
848+
@app.cell(hide_code=True)
849849
def _(mo):
850850
mo.md(
851851
r"""

polars/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ You can also open notebooks in our online playground by appending marimo.app/ to
2323
Thanks to all our notebook authors!
2424

2525
* [Koushik Khan](https://github.com/koushikkhan)
26+
* [Péter Gyarmati](https://github.com/peter-gy)

0 commit comments

Comments
 (0)