|
1 | 1 | import pandas as pd |
2 | 2 |
|
| 3 | +NUM_CLASSES = 7 |
| 4 | +TOP_SCORE = NUM_CLASSES |
| 5 | +FREEBIES = 1 |
| 6 | + |
3 | 7 | file_path = ( |
4 | | - "/content/attendance_reports_attendance-264e4d14-1765-4396-b311-4d927b59566d.csv" |
| 8 | + "~/Downloads/attendance_reports_attendance-264e4d14-1765-4396-b311-4d927b59566d.csv" |
5 | 9 | ) |
6 | | -df = pd.read_csv(file_path) |
7 | | - |
8 | | -df.columns = df.columns.str.strip() |
9 | | -df = df.rename( |
10 | | - columns={ |
11 | | - "Course ID": "Section Name", |
12 | | - "Student ID": "Student Name", |
13 | | - "Class Date": "Date", |
14 | | - "Attendance": "Status", |
15 | | - } |
| 10 | +entries = pd.read_csv( |
| 11 | + file_path, |
| 12 | + index_col=False, |
| 13 | + usecols=[ |
| 14 | + "Section Name", |
| 15 | + "Student Name", |
| 16 | + "Student ID", |
| 17 | + "Class Date", |
| 18 | + "Attendance", |
| 19 | + ], |
| 20 | + parse_dates=["Class Date"], |
16 | 21 | ) |
17 | 22 |
|
18 | | -df = df[["Section Name", "Student Name", "Date", "Status"]] |
19 | | -df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x) |
20 | | -df = df.reset_index(drop=True) |
21 | | -df["Date"] = pd.to_datetime(df["Date"], format="%m/%d/%y", errors="coerce") |
22 | | -df = df.loc[:, ~df.columns.duplicated()] |
23 | | - |
24 | | -total_classes = df["Date"].nunique() |
25 | | - |
26 | | - |
27 | | -attendance_counts = ( |
28 | | - df.groupby(["Student Name", "Date"])["Status"] |
29 | | - .apply(lambda x: (x == "present").sum()) |
30 | | - .reset_index() |
| 23 | +# pull the section number out |
| 24 | +entries["Section"] = ( |
| 25 | + entries["Section Name"].str.extract(r"INAFU6504_(\d{3})_").astype(int) |
31 | 26 | ) |
32 | 27 |
|
33 | | -total_attended = attendance_counts.groupby("Student Name")["Status"].sum().reset_index() |
34 | | -total_attended.columns = ["Student Name", "Total Attended"] |
| 28 | +# TODO deal with students who switch sections |
35 | 29 |
|
36 | | -students_all_present = total_attended[ |
37 | | - total_attended["Total Attended"] == total_classes |
38 | | -]["Student Name"].tolist() |
39 | | -students_missed_one = total_attended[ |
40 | | - total_attended["Total Attended"] == total_classes - 1 |
41 | | -]["Student Name"].tolist() |
| 30 | +recording_counts = entries.groupby(["Student ID", "Student Name"]).size() |
| 31 | +print("Students missing entries:\n") |
| 32 | +print(recording_counts[recording_counts < NUM_CLASSES]) |
42 | 33 |
|
43 | | -filtered_students = pd.DataFrame( |
44 | | - { |
45 | | - "Student Name": students_all_present + students_missed_one, |
46 | | - "Attendance Status": ["All Present"] * len(students_all_present) |
47 | | - + ["Missed One"] * len(students_missed_one), |
48 | | - } |
49 | | -) |
| 34 | +total_classes = entries["Class Date"].nunique() |
| 35 | +assert total_classes == NUM_CLASSES |
50 | 36 |
|
51 | | -# students who missed more than one class |
52 | | -students_missed_more_than_one = total_attended[ |
53 | | - total_attended["Total Attended"] < total_classes - 1 |
54 | | -]["Student Name"].tolist() |
| 37 | +attended = entries[entries["Attendance"] == "present"] |
| 38 | +attendance_counts = attended.groupby(["Student ID", "Student Name"]).size() |
| 39 | +# print("\n-------------------\nAttendance counts:\n") |
| 40 | +# print(attendance_counts) |
55 | 41 |
|
56 | | -missed_more_than_one_df = pd.DataFrame( |
57 | | - { |
58 | | - "Student Name": students_missed_more_than_one, |
59 | | - "Attendance Status": ["Missed More Than One"] |
60 | | - * len(students_missed_more_than_one), |
61 | | - } |
62 | | -) |
| 42 | +# factor in the freebies |
| 43 | +scores = attendance_counts + FREEBIES |
| 44 | +scores[scores > TOP_SCORE] = TOP_SCORE |
| 45 | +# print(scores) |
63 | 46 |
|
64 | | -pd.set_option("display.max_rows", None) |
65 | | -pd.set_option("display.max_columns", None) |
66 | | -pd.set_option("display.expand_frame_repr", False) |
| 47 | +# TODO write to CSV |
| 48 | +# https://community.canvaslms.com/t5/Instructor-Guide/How-do-I-import-grades-in-the-Gradebook/ta-p/807 |
67 | 49 |
|
68 | | -print("Students who attended all classes or missed only one:") |
69 | | -print(filtered_students) |
70 | | -print("\nStudents who missed more than one class:") |
71 | | -print(missed_more_than_one_df) |
| 50 | +lowered_scores = scores[scores < TOP_SCORE] |
| 51 | +print( |
| 52 | + f"\n-------------------\nScores for students who missed more than {FREEBIES} class(es):\n" |
| 53 | +) |
| 54 | +print(lowered_scores.sort_values()) |
0 commit comments