Skip to content

Commit f259bd4

Browse files
committed
TR suggestions
1 parent 5440c88 commit f259bd4

File tree

6 files changed

+26
-22
lines changed

6 files changed

+26
-22
lines changed

pandas-iterate-over-rows/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# How to Iterate Over a pandas DataFrame's Rows
22

3-
In these supplementary materials you'll find the examples discussed in the tutorial, along with some bonus materials.
3+
In these supplementary materials you'll find the examples discussed in the tutorial [How to Iterate Over Rows in pandas, and Why You Shouldn't](https://realpython.com/pandas-iterate-over-rows/), along with some bonus materials.
44

5-
The main extra dependency for the bonus materials is [perfplot](https://github.com/nschloe/perfplot). To install all dependencies into your venv
5+
In addition to the third-party packages used in the tutorial, you should also install [perfplot](https://github.com/nschloe/perfplot) to play with the bonus examples. To install all dependencies into your virtual environment, you can run the following command:
66

7-
```shell
7+
```console
88
$ python -m pip install pandas httpx codetiming perfplot
99
```
1010

pandas-iterate-over-rows/cumulative_sum_perfplot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def get_products(n):
3535
kernels=[pandas_cumsum, loop_cumsum],
3636
labels=["pandas cumsum", "loop cumsum"],
3737
equality_check=None,
38-
title="Loop vs Pandas Cumulative Sum",
38+
title="Loop vs pandas Cumulative Sum",
3939
xlabel="Number of Rows",
4040
)
4141

pandas-iterate-over-rows/how_to_loop.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,14 @@ def check_connection(name, url):
2525
for website in websites.itertuples():
2626
check_connection(website.name, website.url)
2727

28-
# %%
28+
# %% You may use .iterrows() if you have dynamic columnnames
29+
name_column = "name"
30+
url_column = "url"
2931
for _, website in websites.iterrows():
30-
check_connection(website["name"], website["url"])
32+
check_connection(website[name_column], website[url_column])
3133

3234
# %% Use list comprehension to iterate through all rows
35+
# Note that this creates a list that is thrown away again
3336
[
3437
check_connection(website.name, website.url)
3538
for website in websites.itertuples()
@@ -43,5 +46,8 @@ def check_connection(name, url):
4346
for website in websites.T.to_dict().values():
4447
print(website)
4548

46-
# %%
47-
websites.aggregate(["sum"])
49+
# %% Use .agg() to aggregate over columns
50+
websites.agg(
51+
total_views=("total_views", "sum"),
52+
average_views=("total_views", "mean"),
53+
)

pandas-iterate-over-rows/products.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
else:
1414
cumulative_sum.append(income)
1515

16-
products.assign(cumulative_income=cumulative_sum)
16+
products = products.assign(cumulative_income=cumulative_sum)
1717

1818
# %% To get cumulative sum, instead of looping, you can create intermediate
1919
# columns and use .cumsum()

pandas-iterate-over-rows/take_sum.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,22 @@
22

33
import pandas as pd
44

5-
# %%
5+
# %% Read demo data
66

77
websites = pd.read_csv("resources/popular_websites.csv", index_col=0)
88

9-
# %% Best way: use the dedicated pandas method
9+
# %% Best way: use the dedicated pandas .sum() method
1010

1111
websites["total_views"].sum()
1212

13-
# %% List comprehension
13+
# %% List comprehension with .itertuples()
1414

1515
sum(website.total_views for website in websites.itertuples())
1616

17-
# %% itertuples()
17+
# %% Manual loop with .itertuples()
1818

1919
total = 0
2020
for website in websites.itertuples():
2121
total += website.total_views
2222

2323
total
24-
25-
# %%

pandas-iterate-over-rows/take_sum_perfplot.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,6 @@
22
import perfplot
33

44

5-
def pandas_sum(websites):
6-
return websites["total_views"].sum()
7-
8-
95
def loop_sum(websites):
106
total = 0
117
for row in websites.itertuples():
@@ -17,6 +13,10 @@ def python_sum(websites):
1713
return sum(row.total_views for row in websites.itertuples())
1814

1915

16+
def pandas_sum(websites):
17+
return websites["total_views"].sum()
18+
19+
2020
def get_websites(n):
2121
websites = pd.read_csv("resources/popular_websites.csv", index_col=0)
2222
if n < len(websites):
@@ -29,9 +29,9 @@ def get_websites(n):
2929
plot = perfplot.bench(
3030
n_range=[2**i for i in range(17)],
3131
setup=get_websites,
32-
kernels=[pandas_sum, loop_sum, python_sum],
33-
labels=["pandas sum", "loop sum", "python sum"],
34-
title="Python vs Pandas sum",
32+
kernels=[loop_sum, python_sum, pandas_sum],
33+
labels=["loop sum", "python sum", "pandas sum"],
34+
title="Python vs pandas sum",
3535
xlabel="Number of Rows",
3636
)
3737

0 commit comments

Comments
 (0)