Skip to content

Commit df05537

Browse files
committed
First after TR
1 parent defe608 commit df05537

File tree

4 files changed

+5722
-0
lines changed

4 files changed

+5722
-0
lines changed

pivot_tables/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
These materials supplement the RealPython tutorial [How to Create a Pivot Table With pandas](https://realpython.com/how-to-pandas-pivot-table/)
2+
3+
The files available are:
4+
5+
code_downloads.ipynb - Contains the code used in the tutorial.
6+
solutions.ipynb - Contains solutions to each of the exercises.
7+
sales_data.csv - Contains the source data used in the tutorial.

pivot_tables/code_downloads.ipynb

Lines changed: 334 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "58f1cebe-9313-47be-91a7-a292c721fa70",
6+
"metadata": {},
7+
"source": [
8+
"**Installing the libraries.**"
9+
]
10+
},
11+
{
12+
"cell_type": "code",
13+
"execution_count": null,
14+
"id": "043947ca-ebbd-4f3b-973b-02aa8064d407",
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"!pip install pandas pyarrow"
19+
]
20+
},
21+
{
22+
"cell_type": "markdown",
23+
"id": "809281a1-0122-4332-b39e-7603e11d1f62",
24+
"metadata": {},
25+
"source": [
26+
"**Reading the data.**"
27+
]
28+
},
29+
{
30+
"cell_type": "code",
31+
"execution_count": null,
32+
"id": "9d7828ae-7a2a-4beb-b92e-a47786e7eb2c",
33+
"metadata": {},
34+
"outputs": [],
35+
"source": [
36+
"import pandas as pd\n",
37+
"\n",
38+
"sales_data = pd.read_csv(\n",
39+
" \"sales_data.csv\",\n",
40+
" parse_dates=[\"order_date\"],\n",
41+
" dayfirst=True,\n",
42+
").convert_dtypes(dtype_backend=\"pyarrow\")\n",
43+
"\n",
44+
"sales_data.head(2)"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": null,
50+
"id": "239f06eb-db0b-45bb-9825-9192797bc55d",
51+
"metadata": {},
52+
"outputs": [],
53+
"source": [
54+
"sales_data.dtypes"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"id": "f2f57af3-d689-45d7-a017-3578895c5179",
61+
"metadata": {},
62+
"outputs": [],
63+
"source": [
64+
"sales_data.info()"
65+
]
66+
},
67+
{
68+
"cell_type": "markdown",
69+
"id": "6ef0b664-c295-41d2-89c2-33086caee992",
70+
"metadata": {},
71+
"source": [
72+
"**Creating your first pivot table.**"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"id": "df0d98f9-8c4e-4027-82d3-eca1b3e549f8",
79+
"metadata": {},
80+
"outputs": [],
81+
"source": [
82+
"pd.options.display.float_format = \"${:,.2f}\".format\n",
83+
"\n",
84+
"sales_data.pivot_table(\n",
85+
" values=\"sale_price\",\n",
86+
" index=\"sales_region\",\n",
87+
" columns=\"order_type\",\n",
88+
" aggfunc=\"sum\",\n",
89+
" margins=True,\n",
90+
" margins_name=\"Totals:\",\n",
91+
")"
92+
]
93+
},
94+
{
95+
"cell_type": "markdown",
96+
"id": "5e91d372-b055-4447-a8f8-fe07962867a6",
97+
"metadata": {},
98+
"source": [
99+
"**Including sub-sub columns within your pivot table**"
100+
]
101+
},
102+
{
103+
"cell_type": "code",
104+
"execution_count": null,
105+
"id": "b888e088-1f2c-465a-a2bc-b8832c137e62",
106+
"metadata": {},
107+
"outputs": [],
108+
"source": [
109+
"import pandas as pd\n",
110+
"\n",
111+
"pd.options.display.float_format = \"${:,.2f}\".format\n",
112+
"\n",
113+
"sales_data.pivot_table(\n",
114+
" values=\"sale_price\",\n",
115+
" index=\"customer_state\",\n",
116+
" columns=[\"customer_type\", \"order_type\"],\n",
117+
" aggfunc=\"mean\",\n",
118+
")"
119+
]
120+
},
121+
{
122+
"cell_type": "markdown",
123+
"id": "c19c30fc-3253-446a-b2e4-77dd4cc53c36",
124+
"metadata": {},
125+
"source": [
126+
"**Calculating multiple values in your pivot table.**"
127+
]
128+
},
129+
{
130+
"cell_type": "code",
131+
"execution_count": null,
132+
"id": "a59d0173-87a2-4960-9ece-461820392363",
133+
"metadata": {},
134+
"outputs": [],
135+
"source": [
136+
"import pandas as pd\n",
137+
"\n",
138+
"pd.options.display.float_format = \"${:,.2f}\".format\n",
139+
"\n",
140+
"sales_data.pivot_table(\n",
141+
" index=[\"sales_region\", \"product_category\"],\n",
142+
" values=[\"sale_price\", \"quantity\"],\n",
143+
" aggfunc=\"sum\",\n",
144+
" fill_value=0,\n",
145+
")"
146+
]
147+
},
148+
{
149+
"cell_type": "code",
150+
"execution_count": null,
151+
"id": "138491eb-7082-47dd-a48d-993ad3219214",
152+
"metadata": {},
153+
"outputs": [],
154+
"source": [
155+
"# This example ensures column order matches the order in the values parameter.\n",
156+
"\n",
157+
"import pandas as pd\n",
158+
"\n",
159+
"pd.options.display.float_format = \"${:,.2f}\".format\n",
160+
"\n",
161+
"sales_data.pivot_table(\n",
162+
" index=[\"sales_region\", \"product_category\"],\n",
163+
" values=[\"sale_price\", \"quantity\"],\n",
164+
" aggfunc=\"sum\",\n",
165+
" fill_value=0,\n",
166+
").loc[:, [\"sale_price\", \"quantity\"]]"
167+
]
168+
},
169+
{
170+
"cell_type": "markdown",
171+
"id": "c323235b-3479-469e-b6cb-ee08615897d5",
172+
"metadata": {},
173+
"source": [
174+
"**Performing more advanced aggregations.**"
175+
]
176+
},
177+
{
178+
"cell_type": "code",
179+
"execution_count": null,
180+
"id": "d2a2979b-694c-4880-aea7-9a1e8cf4573d",
181+
"metadata": {},
182+
"outputs": [],
183+
"source": [
184+
"pd.options.display.float_format = \"${:,.2f}\".format\n",
185+
"\n",
186+
"sales_data.pivot_table(\n",
187+
" values=[\"sale_price\"],\n",
188+
" index=\"product_category\",\n",
189+
" columns=\"customer_type\",\n",
190+
" aggfunc=[\"max\", \"min\"],\n",
191+
")"
192+
]
193+
},
194+
{
195+
"cell_type": "code",
196+
"execution_count": null,
197+
"id": "45b1a6d7-1b72-458f-a25d-a3c1c7109a46",
198+
"metadata": {},
199+
"outputs": [],
200+
"source": [
201+
"pd.options.display.float_format = \"${:,.2f}\".format\n",
202+
"\n",
203+
"sales_data.pivot_table(\n",
204+
" values=[\"sale_price\", \"quantity\"],\n",
205+
" index=[\"product_category\"],\n",
206+
" columns=\"customer_type\",\n",
207+
" aggfunc={\"sale_price\": \"mean\", \"quantity\": \"max\"},\n",
208+
")"
209+
]
210+
},
211+
{
212+
"cell_type": "code",
213+
"execution_count": null,
214+
"id": "93db010a-71c9-49d0-babb-ce99e0d8f801",
215+
"metadata": {},
216+
"outputs": [],
217+
"source": [
218+
"sales_data.pivot_table(\n",
219+
" values=\"employee_id\", index=\"sales_region\", aggfunc=\"count\"\n",
220+
")"
221+
]
222+
},
223+
{
224+
"cell_type": "code",
225+
"execution_count": null,
226+
"id": "57f47424-fddf-4ab7-bff4-b3f973cb8565",
227+
"metadata": {},
228+
"outputs": [],
229+
"source": [
230+
"def find_unique(s):\n",
231+
" return len(s.unique())\n",
232+
"\n",
233+
"\n",
234+
"sales_data.pivot_table(\n",
235+
" values=\"employee_id\", index=[\"sales_region\"], aggfunc=find_unique\n",
236+
")"
237+
]
238+
},
239+
{
240+
"cell_type": "markdown",
241+
"id": "a17481c1-eacb-4c6f-9b6d-889ade86f62f",
242+
"metadata": {},
243+
"source": [
244+
"**Using `.groupby()` and `crosstab()` for Aggregation**"
245+
]
246+
},
247+
{
248+
"cell_type": "code",
249+
"execution_count": null,
250+
"id": "2a483cf2-1bf0-48a6-b547-e537df15ceb9",
251+
"metadata": {},
252+
"outputs": [],
253+
"source": [
254+
"sales_data.pivot_table(\n",
255+
" values=\"sale_price\",\n",
256+
" index=\"product_category\",\n",
257+
" aggfunc=[\"min\", \"mean\", \"max\", \"std\"],\n",
258+
")"
259+
]
260+
},
261+
{
262+
"cell_type": "code",
263+
"execution_count": null,
264+
"id": "b0031b51-22ab-4c64-bfd2-81de360db501",
265+
"metadata": {},
266+
"outputs": [],
267+
"source": [
268+
"(\n",
269+
" sales_data.groupby(\"product_category\").agg(\n",
270+
" low_price=(\"sale_price\", \"min\"),\n",
271+
" average_price=(\"sale_price\", \"mean\"),\n",
272+
" high_price=(\"sale_price\", \"max\"),\n",
273+
" standard_deviation=(\"sale_price\", \"std\"),\n",
274+
" )\n",
275+
")"
276+
]
277+
},
278+
{
279+
"cell_type": "code",
280+
"execution_count": null,
281+
"id": "ef30559b-8be1-4eee-b644-5cc66d12f112",
282+
"metadata": {},
283+
"outputs": [],
284+
"source": [
285+
"pd.crosstab(\n",
286+
" index=sales_data.job_title,\n",
287+
" columns=sales_data.sales_region,\n",
288+
" margins=True,\n",
289+
" margins_name=\"Totals:\",\n",
290+
")"
291+
]
292+
},
293+
{
294+
"cell_type": "code",
295+
"execution_count": null,
296+
"id": "6f47c7ac-e889-412e-98c9-8742c7b9d4e1",
297+
"metadata": {},
298+
"outputs": [],
299+
"source": [
300+
"(\n",
301+
" pd.crosstab(\n",
302+
" index=sales_data.job_title,\n",
303+
" columns=sales_data.sales_region,\n",
304+
" margins=True,\n",
305+
" margins_name=\"Totals:\",\n",
306+
" normalize=True,\n",
307+
" )\n",
308+
" * 100\n",
309+
").map(\"{:.2f}%\".format)"
310+
]
311+
}
312+
],
313+
"metadata": {
314+
"kernelspec": {
315+
"display_name": "Python 3 (ipykernel)",
316+
"language": "python",
317+
"name": "python3"
318+
},
319+
"language_info": {
320+
"codemirror_mode": {
321+
"name": "ipython",
322+
"version": 3
323+
},
324+
"file_extension": ".py",
325+
"mimetype": "text/x-python",
326+
"name": "python",
327+
"nbconvert_exporter": "python",
328+
"pygments_lexer": "ipython3",
329+
"version": "3.12.0"
330+
}
331+
},
332+
"nbformat": 4,
333+
"nbformat_minor": 5
334+
}

0 commit comments

Comments
 (0)