Skip to content

Commit 4124962

Browse files
authored
Merge pull request #109 from realpython/pandas-gradebook
Add materials for Pandas Gradebook tutorial
2 parents f82dff3 + 1cd7f5d commit 4124962

20 files changed

+2201
-0
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""Calculate student grades by combining data from many sources.
2+
3+
Using Pandas, this script combines data from the:
4+
5+
* Roster
6+
* Homework & Exam grades
7+
* Quiz grades
8+
9+
to calculate final grades for a class.
10+
"""
11+
from pathlib import Path
12+
import pandas as pd
13+
14+
HERE = Path(__file__).parent
15+
DATA_FOLDER = HERE / "data"
16+
17+
# ----------------------
18+
# 01 - LOADING THE DATA
19+
# ----------------------
20+
21+
roster = pd.read_csv(
22+
DATA_FOLDER / "roster.csv",
23+
converters={"NetID": str.lower, "Email Address": str.lower},
24+
usecols=["Section", "Email Address", "NetID"],
25+
index_col="NetID",
26+
)
27+
28+
hw_exam_grades = pd.read_csv(
29+
DATA_FOLDER / "hw_exam_grades.csv",
30+
converters={"SID": str.lower, "Email Address": str.lower},
31+
usecols=lambda x: "Submission" not in x,
32+
index_col="SID",
33+
)
34+
35+
quiz_grades = pd.DataFrame()
36+
for file_path in DATA_FOLDER.glob("quiz_*_grades.csv"):
37+
quiz_name = " ".join(file_path.stem.title().split("_")[:2])
38+
quiz = pd.read_csv(
39+
file_path,
40+
converters={"Email": str.lower},
41+
index_col=["Email"],
42+
usecols=["Email", "Grade"],
43+
).rename(columns={"Grade": quiz_name})
44+
quiz_grades = pd.concat([quiz_grades, quiz], axis=1)
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
"""Calculate student grades by combining data from many sources.
2+
3+
Using Pandas, this script combines data from the:
4+
5+
* Roster
6+
* Homework & Exam grades
7+
* Quiz grades
8+
9+
to calculate final grades for a class.
10+
"""
11+
from pathlib import Path
12+
import pandas as pd
13+
14+
HERE = Path(__file__).parent
15+
DATA_FOLDER = HERE / "data"
16+
17+
# ----------------------
18+
# 01 - LOADING THE DATA
19+
# ----------------------
20+
21+
roster = pd.read_csv(
22+
DATA_FOLDER / "roster.csv",
23+
converters={"NetID": str.lower, "Email Address": str.lower},
24+
usecols=["Section", "Email Address", "NetID"],
25+
index_col="NetID",
26+
)
27+
28+
hw_exam_grades = pd.read_csv(
29+
DATA_FOLDER / "hw_exam_grades.csv",
30+
converters={"SID": str.lower, "Email Address": str.lower},
31+
usecols=lambda x: "Submission" not in x,
32+
index_col="SID",
33+
)
34+
35+
quiz_grades = pd.DataFrame()
36+
for file_path in DATA_FOLDER.glob("quiz_*_grades.csv"):
37+
quiz_name = " ".join(file_path.stem.title().split("_")[:2])
38+
quiz = pd.read_csv(
39+
file_path,
40+
converters={"Email": str.lower},
41+
index_col=["Email"],
42+
usecols=["Email", "Grade"],
43+
).rename(columns={"Grade": quiz_name})
44+
quiz_grades = pd.concat([quiz_grades, quiz], axis=1)
45+
46+
# ------------------------
47+
# 02 - MERGING DATAFRAMES
48+
# ------------------------
49+
50+
final_data = pd.merge(
51+
roster, hw_exam_grades, left_index=True, right_index=True,
52+
)
53+
final_data = pd.merge(
54+
final_data, quiz_grades, left_on="Email Address", right_index=True
55+
)
56+
final_data = final_data.fillna(0)
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
"""Calculate student grades by combining data from many sources.
2+
3+
Using Pandas, this script combines data from the:
4+
5+
* Roster
6+
* Homework & Exam grades
7+
* Quiz grades
8+
9+
to calculate final grades for a class.
10+
"""
11+
from pathlib import Path
12+
import pandas as pd
13+
import numpy as np
14+
15+
HERE = Path(__file__).parent
16+
DATA_FOLDER = HERE / "data"
17+
18+
# ----------------------
19+
# 01 - LOADING THE DATA
20+
# ----------------------
21+
22+
roster = pd.read_csv(
23+
DATA_FOLDER / "roster.csv",
24+
converters={"NetID": str.lower, "Email Address": str.lower},
25+
usecols=["Section", "Email Address", "NetID"],
26+
index_col="NetID",
27+
)
28+
29+
hw_exam_grades = pd.read_csv(
30+
DATA_FOLDER / "hw_exam_grades.csv",
31+
converters={"SID": str.lower, "Email Address": str.lower},
32+
usecols=lambda x: "Submission" not in x,
33+
index_col="SID",
34+
)
35+
36+
quiz_grades = pd.DataFrame()
37+
for file_path in DATA_FOLDER.glob("quiz_*_grades.csv"):
38+
quiz_name = " ".join(file_path.stem.title().split("_")[:2])
39+
quiz = pd.read_csv(
40+
file_path,
41+
converters={"Email": str.lower},
42+
index_col=["Email"],
43+
usecols=["Email", "Grade"],
44+
).rename(columns={"Grade": quiz_name})
45+
quiz_grades = pd.concat([quiz_grades, quiz], axis=1)
46+
47+
# ------------------------
48+
# 02 - MERGING DATAFRAMES
49+
# ------------------------
50+
51+
final_data = pd.merge(
52+
roster, hw_exam_grades, left_index=True, right_index=True,
53+
)
54+
final_data = pd.merge(
55+
final_data, quiz_grades, left_on="Email Address", right_index=True
56+
)
57+
final_data = final_data.fillna(0)
58+
59+
# ------------------------
60+
# 03 - CALCULATING GRADES
61+
# ------------------------
62+
63+
n_exams = 3
64+
for n in range(1, n_exams + 1):
65+
final_data[f"Exam {n} Score"] = (
66+
final_data[f"Exam {n}"] / final_data[f"Exam {n} - Max Points"]
67+
)
68+
69+
homework_scores = final_data.filter(regex=r"^Homework \d\d?$", axis=1)
70+
homework_max_points = final_data.filter(regex=r"^Homework \d\d? -", axis=1)
71+
72+
sum_of_hw_scores = homework_scores.sum(axis=1)
73+
sum_of_hw_max = homework_max_points.sum(axis=1)
74+
final_data["Total Homework"] = sum_of_hw_scores / sum_of_hw_max
75+
76+
hw_max_renamed = homework_max_points.set_axis(homework_scores.columns, axis=1)
77+
average_hw_scores = (homework_scores / hw_max_renamed).sum(axis=1)
78+
final_data["Average Homework"] = average_hw_scores / homework_scores.shape[1]
79+
80+
final_data["Homework Score"] = final_data[
81+
["Total Homework", "Average Homework"]
82+
].max(axis=1)
83+
84+
quiz_scores = final_data.filter(regex=r"^Quiz \d$", axis=1)
85+
quiz_max_points = pd.Series(
86+
{"Quiz 1": 11, "Quiz 2": 15, "Quiz 3": 17, "Quiz 4": 14, "Quiz 5": 12}
87+
)
88+
89+
sum_of_quiz_scores = quiz_scores.sum(axis=1)
90+
sum_of_quiz_max = quiz_max_points.sum()
91+
final_data["Total Quizzes"] = sum_of_hw_scores / sum_of_hw_max
92+
93+
average_quiz_scores = (quiz_scores / quiz_max_points).sum(axis=1)
94+
final_data["Average Quizzes"] = average_quiz_scores / quiz_scores.shape[1]
95+
96+
final_data["Quiz Score"] = final_data[
97+
["Total Quizzes", "Average Quizzes"]
98+
].max(axis=1)
99+
100+
weightings = pd.Series(
101+
{
102+
"Exam 1 Score": 0.05,
103+
"Exam 2 Score": 0.1,
104+
"Exam 3 Score": 0.15,
105+
"Quiz Score": 0.30,
106+
"Homework Score": 0.4,
107+
}
108+
)
109+
110+
final_data["Final Score"] = (final_data[weightings.index] * weightings).sum(
111+
axis=1
112+
)
113+
final_data["Ceiling Score"] = np.ceil(final_data["Final Score"] * 100)
114+
115+
grades = {
116+
90: "A",
117+
80: "B",
118+
70: "C",
119+
60: "D",
120+
0: "F",
121+
}
122+
123+
124+
def grade_mapping(value):
125+
"""Map numerical grade to letter grade."""
126+
for key, letter in grades.items():
127+
if value >= key:
128+
return letter
129+
130+
131+
letter_grades = final_data["Ceiling Score"].map(grade_mapping)
132+
final_data["Final Grade"] = pd.Categorical(
133+
letter_grades, categories=grades.values(), ordered=True
134+
)
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
"""Calculate student grades by combining data from many sources.
2+
3+
Using Pandas, this script combines data from the:
4+
5+
* Roster
6+
* Homework & Exam grades
7+
* Quiz grades
8+
9+
to calculate final grades for a class.
10+
"""
11+
from pathlib import Path
12+
import pandas as pd
13+
import numpy as np
14+
15+
HERE = Path(__file__).parent
16+
DATA_FOLDER = HERE / "data"
17+
18+
# ----------------------
19+
# 01 - LOADING THE DATA
20+
# ----------------------
21+
22+
roster = pd.read_csv(
23+
DATA_FOLDER / "roster.csv",
24+
converters={"NetID": str.lower, "Email Address": str.lower},
25+
usecols=["Section", "Email Address", "NetID"],
26+
index_col="NetID",
27+
)
28+
29+
hw_exam_grades = pd.read_csv(
30+
DATA_FOLDER / "hw_exam_grades.csv",
31+
converters={"SID": str.lower, "Email Address": str.lower},
32+
usecols=lambda x: "Submission" not in x,
33+
index_col="SID",
34+
)
35+
36+
quiz_grades = pd.DataFrame()
37+
for file_path in DATA_FOLDER.glob("quiz_*_grades.csv"):
38+
quiz_name = " ".join(file_path.stem.title().split("_")[:2])
39+
quiz = pd.read_csv(
40+
file_path,
41+
converters={"Email": str.lower},
42+
index_col=["Email"],
43+
usecols=["Email", "Grade"],
44+
).rename(columns={"Grade": quiz_name})
45+
quiz_grades = pd.concat([quiz_grades, quiz], axis=1)
46+
47+
# ------------------------
48+
# 02 - MERGING DATAFRAMES
49+
# ------------------------
50+
51+
final_data = pd.merge(
52+
roster, hw_exam_grades, left_index=True, right_index=True,
53+
)
54+
final_data = pd.merge(
55+
final_data, quiz_grades, left_on="Email Address", right_index=True
56+
)
57+
final_data = final_data.fillna(0)
58+
59+
# ------------------------
60+
# 03 - CALCULATING GRADES
61+
# ------------------------
62+
63+
n_exams = 3
64+
for n in range(1, n_exams + 1):
65+
final_data[f"Exam {n} Score"] = (
66+
final_data[f"Exam {n}"] / final_data[f"Exam {n} - Max Points"]
67+
)
68+
69+
homework_scores = final_data.filter(regex=r"^Homework \d\d?$", axis=1)
70+
homework_max_points = final_data.filter(regex=r"^Homework \d\d? -", axis=1)
71+
72+
sum_of_hw_scores = homework_scores.sum(axis=1)
73+
sum_of_hw_max = homework_max_points.sum(axis=1)
74+
final_data["Total Homework"] = sum_of_hw_scores / sum_of_hw_max
75+
76+
hw_max_renamed = homework_max_points.set_axis(homework_scores.columns, axis=1)
77+
average_hw_scores = (homework_scores / hw_max_renamed).sum(axis=1)
78+
final_data["Average Homework"] = average_hw_scores / homework_scores.shape[1]
79+
80+
final_data["Homework Score"] = final_data[
81+
["Total Homework", "Average Homework"]
82+
].max(axis=1)
83+
84+
quiz_scores = final_data.filter(regex=r"^Quiz \d$", axis=1)
85+
quiz_max_points = pd.Series(
86+
{"Quiz 1": 11, "Quiz 2": 15, "Quiz 3": 17, "Quiz 4": 14, "Quiz 5": 12}
87+
)
88+
89+
sum_of_quiz_scores = quiz_scores.sum(axis=1)
90+
sum_of_quiz_max = quiz_max_points.sum()
91+
final_data["Total Quizzes"] = sum_of_hw_scores / sum_of_hw_max
92+
93+
average_quiz_scores = (quiz_scores / quiz_max_points).sum(axis=1)
94+
final_data["Average Quizzes"] = average_quiz_scores / quiz_scores.shape[1]
95+
96+
final_data["Quiz Score"] = final_data[
97+
["Total Quizzes", "Average Quizzes"]
98+
].max(axis=1)
99+
100+
weightings = pd.Series(
101+
{
102+
"Exam 1 Score": 0.05,
103+
"Exam 2 Score": 0.1,
104+
"Exam 3 Score": 0.15,
105+
"Quiz Score": 0.30,
106+
"Homework Score": 0.4,
107+
}
108+
)
109+
110+
final_data["Final Score"] = (final_data[weightings.index] * weightings).sum(
111+
axis=1
112+
)
113+
final_data["Ceiling Score"] = np.ceil(final_data["Final Score"] * 100)
114+
115+
grades = {
116+
90: "A",
117+
80: "B",
118+
70: "C",
119+
60: "D",
120+
0: "F",
121+
}
122+
123+
124+
def grade_mapping(value):
125+
"""Map numerical grade to letter grade."""
126+
for key, letter in grades.items():
127+
if value >= key:
128+
return letter
129+
130+
131+
letter_grades = final_data["Ceiling Score"].map(grade_mapping)
132+
final_data["Final Grade"] = pd.Categorical(
133+
letter_grades, categories=grades.values(), ordered=True
134+
)
135+
136+
# -----------------------
137+
# 04 - GROUPING THE DATA
138+
# -----------------------
139+
140+
for section, table in final_data.groupby("Section"):
141+
section_file = DATA_FOLDER / f"Section {section} Grades.csv"
142+
num_students = table.shape[0]
143+
print(
144+
f"In Section {section} there are {num_students} students saved to "
145+
f"file {section_file}."
146+
)
147+
table.sort_values(by=["Last Name", "First Name"]).to_csv(section_file)

0 commit comments

Comments
 (0)