Skip to content

Commit 38afbf9

Browse files
committed
prep for TR
1 parent cc1f668 commit 38afbf9

File tree

8 files changed

+62
-89
lines changed

8 files changed

+62
-89
lines changed

pandas-iterate-over-rows/README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1-
# Placeholder
1+
# How to Iterate Over a pandas DataFrame's Rows
22

3-
Info...
3+
In these supplementary materials you'll find the examples discussed in the tutorial, along with some bonus materials.
4+
5+
The main extra dependency for the bonus materials is [perfplot](https://github.com/nschloe/perfplot). To install all dependencies into your venv
6+
7+
```shell
8+
$ python -m pip install pandas httpx codetiming perfplot
9+
```
10+
11+
You'll also find some additional code samples along the way, showing alternatives to looping, such as by using list comprehensions, and even alternative ways of looping.

pandas-iterate-over-rows/time_cumulative_sum.py renamed to pandas-iterate-over-rows/cumulative_sum_codetiming.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,28 @@
22
import pandas as pd
33

44

5-
def pandas_cumsum(products):
6-
return products.assign(
7-
income=lambda df: df["sales"] * df["unit_price"],
8-
cumulative_income=lambda df: df["income"].cumsum(),
9-
).drop(columns="income")
10-
11-
125
def loop_cumsum(products):
136
cumulative_sum = []
14-
157
for row in products.itertuples():
168
if cumulative_sum:
179
cumulative_sum.append(
1810
cumulative_sum[-1] + (row.sales * row.unit_price)
1911
)
2012
else:
2113
cumulative_sum.append(row.sales * row.unit_price)
22-
2314
return products.assign(cumulative_income=cumulative_sum)
2415

2516

17+
def pandas_cumsum(products):
18+
return products.assign(
19+
income=lambda df: df["sales"] * df["unit_price"],
20+
cumulative_income=lambda df: df["income"].cumsum(),
21+
).drop(columns="income")
22+
23+
2624
for f in [pandas_cumsum, loop_cumsum]:
2725
products = pd.read_csv("resources/products.csv")
28-
products = pd.concat(products for _ in range(200))
29-
26+
products = pd.concat(products for _ in range(1000))
3027
with codetiming.Timer(
3128
name=f.__name__, text="{name:20}: {milliseconds:.2f} ms"
3229
):
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import pandas as pd
2+
import perfplot
3+
4+
5+
def loop_cumsum(products):
6+
cumulative_sum = []
7+
for row in products.itertuples():
8+
if cumulative_sum:
9+
cumulative_sum.append(
10+
cumulative_sum[-1] + (row.sales * row.unit_price)
11+
)
12+
else:
13+
cumulative_sum.append(row.sales * row.unit_price)
14+
return products.assign(cumulative_income=cumulative_sum)
15+
16+
17+
def pandas_cumsum(products):
18+
return products.assign(
19+
income=lambda df: df["sales"] * df["unit_price"],
20+
cumulative_income=lambda df: df["income"].cumsum(),
21+
).drop(columns="income")
22+
23+
24+
products = pd.read_csv("resources/products.csv")
25+
26+
plot = perfplot.bench(
27+
n_range=[i**2 for i in range(1, 1000, 100)],
28+
setup=lambda n: pd.concat([products for _ in range(n)]),
29+
kernels=[pandas_cumsum, loop_cumsum],
30+
labels=["pandas cumsum", "loop cumsum"],
31+
equality_check=None,
32+
)
33+
34+
plot.show()
35+
plot.show(logy=True)

pandas-iterate-over-rows/examples.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

pandas-iterate-over-rows/fix_place_of_pub.py renamed to pandas-iterate-over-rows/fix_place_of_pub_perfplot.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def get_books(n):
6767
return pd.concat([books for _ in range((n // len(books)) + 1)]).iloc[:n]
6868

6969

70-
perfplot.live(
70+
plot = perfplot.bench(
7171
setup=lambda n: get_books(n),
7272
kernels=[
7373
clean_pub_replace,
@@ -79,5 +79,7 @@ def get_books(n):
7979
labels=["replace", "itertuples", "iterrows", "apply", "list comp"],
8080
n_range=[i**2 for i in range(1, 40, 2)],
8181
equality_check=None,
82-
logy=True,
8382
)
83+
84+
plot.show()
85+
plot.show(logy=True)

pandas-iterate-over-rows/generate_data.py

Lines changed: 0 additions & 24 deletions
This file was deleted.

pandas-iterate-over-rows/take_sum_codetiming.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def python_sum(webs):
1919

2020
for f in [pandas_sum, loop_sum, python_sum]:
2121
webs = pd.read_csv("resources/popular_websites.csv", index_col=0)
22+
webs = pd.concat([webs for _ in range(1000)])
2223
with codetiming.Timer(
2324
name=f.__name__, text="{name:20}: {milliseconds:.2f} ms"
2425
):

pandas-iterate-over-rows/take_sum_perfplot.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,12 @@ def python_sum(webs):
1919

2020
webs = pd.read_csv("resources/popular_websites.csv", index_col=0)
2121

22-
perfplot.live(
22+
plot = perfplot.bench(
2323
n_range=[i**2 for i in range(1, 1000, 100)],
2424
setup=lambda n: pd.concat([webs for _ in range(n)]),
2525
kernels=[pandas_sum, loop_sum, python_sum],
2626
labels=["pandas sum", "loop sum", "python sum"],
27-
# equality_check=None,
28-
logy=True,
2927
)
28+
29+
plot.show()
30+
plot.show(logy=True)

0 commit comments

Comments
 (0)