@@ -38,6 +38,50 @@ def _(mo):
3838def _ ():
3939 from datetime import datetime
4040
41+ import pandas as pd
42+ import polars as pl
43+
44+ data1 = {"store" : [1 , 1 , 2 ], "date_id" : [4 , 5 , 6 ]}
45+ data2 = {"store" : [1 , 2 ], "sales" : [7 , 8 ]}
46+
47+ pandas_df1 = pd .DataFrame (data1 )
48+ pandas_df2 = pd .DataFrame (data2 )
49+
50+ # The outputs are the same
51+ for _ in range (5 ):
52+ # Left join
53+ pandas_df = pd .merge (pandas_df1 , pandas_df2 , on = "store" , how = "left" )
54+
55+ # Cumulative sum of sales within each store
56+ pandas_df ["cumulative_sales" ] = pandas_df .groupby ("store" )["sales" ].cumsum ()
57+
58+ print (pandas_df )
59+ return data1 , data2 , datetime , pd , pl
60+
61+
62+ @app .cell
63+ def _ (data1 , data2 , pl ):
64+ polars_df1 = pl .DataFrame (data1 ).lazy ()
65+ polars_df2 = pl .DataFrame (data2 ).lazy ()
66+
67+ # The outputs are not the same
68+ for _ in range (5 ):
69+ print (
70+ polars_df1 .join (polars_df2 , on = "store" , how = "left" )
71+ .with_columns (cumulative_sales = pl .col ("sales" ).cum_sum ().over ("store" ))
72+ .collect (engine = "streaming" )
73+ )
74+ return
75+
76+
77+ @app .cell (hide_code = True )
78+ def _ (mo ):
79+ mo .md (r"""## Eager-only solution""" )
80+ return
81+
82+
83+ @app .cell
84+ def _ (datetime , pd ):
4185 data = {
4286 "sale_date" : [
4387 datetime (2025 , 5 , 22 ),
@@ -57,31 +101,10 @@ def _():
57101 ],
58102 "sales" : [1100 , None , 1450 , 501 , 500 , None ],
59103 }
60- return (data ,)
61104
62-
63- @app .cell
64- def _ (data , pd ):
65105 pdf = pd .DataFrame (data )
66- pdf ["sales" ] = pdf .groupby ("store" )["sales" ].ffill ()
67- pdf
68- return
69-
70-
71- @app .cell
72- def _ (data , pl ):
73- lazy_df = pl .DataFrame (data ).lazy ()
74- lazy_df .with_columns (
75- pl .col ("sales" ).fill_null (strategy = "forward" ).over ("store" )
76- ).collect ()
77- # ⚠️ This may not work as expected unless you specify order_by="sale_date"
78- return
79-
80-
81- @app .cell (hide_code = True )
82- def _ (mo ):
83- mo .md (r"""## Eager-only solution""" )
84- return
106+ print (pdf )
107+ return (data ,)
85108
86109
87110@app .cell
@@ -103,18 +126,19 @@ def agnostic_ffill_by_store(df_native: IntoFrameT) -> IntoFrameT:
103126
104127
105128@app .cell
106- def _ (agnostic_ffill_by_store , data ):
107- import pandas as pd
108- import polars as pl
109-
129+ def _ (agnostic_ffill_by_store , data , pd ):
110130 # pandas.DataFrame
111131 df_pandas = pd .DataFrame (data )
112132 agnostic_ffill_by_store (df_pandas )
133+ return (df_pandas ,)
134+
113135
136+ @app .cell
137+ def _ (agnostic_ffill_by_store , data , pl ):
114138 # polars.DataFrame
115139 df_polars = pl .DataFrame (data )
116140 agnostic_ffill_by_store (df_polars )
117- return df_pandas , df_polars , pd , pl
141+ return ( df_polars ,)
118142
119143
120144@app .cell
@@ -170,7 +194,12 @@ def _(agnostic_ffill_by_store_improved, df_polars):
170194@app .cell
171195def _ (agnostic_ffill_by_store_improved , df_pandas ):
172196 # Note that it still supports pandas
173- agnostic_ffill_by_store_improved (df_pandas )
197+ print (agnostic_ffill_by_store_improved (df_pandas ))
198+ return
199+
200+
201+ @app .cell
202+ def _ ():
174203 return
175204
176205
0 commit comments