Skip to content

Commit 0d8cdf9

Browse files
authored
Add unit tests for api-tools/parser/parser.go (#62)
* Update sectionParser.go remove print * Update sectionParser.go unused import * parser tests are basically done. adding go docs is a work in progress * finished docs, t.parrallel spam * update godoc * re-added TestGetCourse * added TestGetInfoRows * added panic on missing fields
1 parent 61ae968 commit 0d8cdf9

36 files changed

+2316
-522
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ deploy_log.sh
4242
.vscode/
4343
.firebase/
4444
/api-tools
45+
/qodana.yaml
4546

4647
# output data and logs
4748
data/

parser/courseParser.go

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,29 @@ import (
1313
)
1414

1515
var (
16-
coursePrefixRexp *regexp.Regexp = utils.Regexpf(`^%s`, utils.R_SUBJ_COURSE_CAP)
17-
contactRegexp *regexp.Regexp = regexp.MustCompile(`\(([0-9]+)-([0-9]+)\)\s+([SUFY]+)`)
16+
// coursePrefixRegexp matches the course prefix and number (e.g., "CS 1337").
17+
coursePrefixRegexp = utils.Regexpf(`^%s`, utils.R_SUBJ_COURSE_CAP)
18+
19+
// contactRegexp matches the contact hours and offering frequency from the course description
20+
// (e.g. "(12-34) SUS")
21+
contactRegexp = regexp.MustCompile(`\(([0-9]+)-([0-9]+)\)\s+([SUFY]+)`)
1822
)
1923

20-
func parseCourse(courseNum string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course {
24+
// parseCourse returns a pointer to the course specified by the
25+
// provided information. If the associated course is not found in
26+
// Courses, it will run getCourse and add the result to Courses.
27+
func parseCourse(internalCourseNumber string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course {
2128
// Courses are internally keyed by their internal course number and the catalog year they're part of
2229
catalogYear := getCatalogYear(session)
23-
courseKey := courseNum + catalogYear
30+
courseKey := internalCourseNumber + catalogYear
2431

2532
// Don't recreate the course if it already exists
2633
course, courseExists := Courses[courseKey]
2734
if courseExists {
2835
return course
2936
}
3037

31-
course = getCourse(courseNum, session, rowInfo, classInfo)
38+
course = getCourse(internalCourseNumber, session, rowInfo, classInfo)
3239

3340
// Get closure for parsing course requisites (god help me)
3441
enrollmentReqs, hasEnrollmentReqs := rowInfo["Enrollment Reqs:"]
@@ -39,8 +46,10 @@ func parseCourse(courseNum string, session schema.AcademicSession, rowInfo map[s
3946
return course
4047
}
4148

42-
// no global state is changed
43-
func getCourse(courseNum string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course {
49+
// getCourse extracts course details from the provided information and creates a schema.Course object.
50+
// This function does not modify any global state.
51+
// Returns a pointer to the newly created schema.Course object.
52+
func getCourse(internalCourseNumber string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course {
4453
CoursePrefix, CourseNumber := getPrefixAndNumber(classInfo)
4554

4655
course := schema.Course{
@@ -54,7 +63,7 @@ func getCourse(courseNum string, session schema.AcademicSession, rowInfo map[str
5463
Class_level: classInfo["Class Level:"],
5564
Activity_type: classInfo["Activity Type:"],
5665
Grading: classInfo["Grading:"],
57-
Internal_course_number: courseNum,
66+
Internal_course_number: internalCourseNumber,
5867
Catalog_year: getCatalogYear(session),
5968
}
6069

@@ -70,6 +79,10 @@ func getCourse(courseNum string, session schema.AcademicSession, rowInfo map[str
7079
return &course
7180
}
7281

82+
// getCatalogYear determines the catalog year from the academic session information.
83+
// It assumes the session name starts with a 2-digit year and a semester character ('F', 'S', 'U').
84+
// Fall (S) and Summer U sessions are associated with the previous calendar year.
85+
// (e.g, 20F = 20, 20S = 19)
7386
func getCatalogYear(session schema.AcademicSession) string {
7487
sessionYear, err := strconv.Atoi(session.Name[0:2])
7588
if err != nil {
@@ -79,22 +92,24 @@ func getCatalogYear(session schema.AcademicSession) string {
7992
switch sessionSemester {
8093
case 'F':
8194
return strconv.Itoa(sessionYear)
82-
case 'S':
83-
return strconv.Itoa(sessionYear - 1)
84-
case 'U':
95+
case 'S', 'U':
8596
return strconv.Itoa(sessionYear - 1)
8697
default:
8798
panic(fmt.Errorf("encountered invalid session semester '%c!'", sessionSemester))
8899
}
89100
}
90101

102+
// getPrefixAndNumber returns the 2nd and 3rd matched values from a coursePrefixRegexp on
103+
// `ClassInfo["Class Section:"]`. It expects ClassInfo to contain "Class Section:" key.
104+
// If there are no matches, empty strings are returned.
91105
func getPrefixAndNumber(classInfo map[string]string) (string, string) {
92106
if sectionId, ok := classInfo["Class Section:"]; ok {
93107
// Get subject prefix and course number by doing a regexp match on the section id
94-
matches := coursePrefixRexp.FindStringSubmatch(sectionId)
108+
matches := coursePrefixRegexp.FindStringSubmatch(sectionId)
95109
if len(matches) == 3 {
96110
return matches[1], matches[2]
97111
}
112+
panic("failed to course prefix and number")
98113
}
99-
return "", ""
114+
panic("could not find 'Class Section:' in ClassInfo")
100115
}

parser/courseParser_test.go

Lines changed: 83 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
package parser
22

33
import (
4+
"github.com/google/go-cmp/cmp"
5+
"github.com/google/go-cmp/cmp/cmpopts"
46
"testing"
57

68
"github.com/UTDNebula/nebula-api/api/schema"
7-
"github.com/google/go-cmp/cmp"
8-
"github.com/google/go-cmp/cmp/cmpopts"
99
)
1010

1111
func TestGetCourse(t *testing.T) {
12-
loadTestData(t)
12+
t.Parallel()
1313

14-
for name, testCase := range testDataCache {
14+
for name, testCase := range testData {
1515
t.Run(name, func(t *testing.T) {
1616
_, courseNum := getInternalClassAndCourseNum(testCase.ClassInfo)
1717
output := *getCourse(courseNum, testCase.Section.Academic_session, testCase.RowInfo, testCase.ClassInfo)
1818
expected := testCase.Course
1919

20-
diff := cmp.Diff(expected, output, cmpopts.IgnoreFields(schema.Course{}, "Id"))
20+
diff := cmp.Diff(expected, output, cmpopts.IgnoreFields(schema.Course{}, "Id", "Sections", "Enrollment_reqs", "Prerequisites"))
2121

2222
if diff != "" {
2323
t.Errorf("Failed (-expected +got)\n %s", diff)
@@ -28,97 +28,131 @@ func TestGetCourse(t *testing.T) {
2828
}
2929

3030
func TestGetCatalogYear(t *testing.T) {
31+
t.Parallel()
32+
3133
testCases := map[string]struct {
3234
Session schema.AcademicSession
3335
Expected string
36+
Panic bool
3437
}{
3538
"Case_001": {
36-
Session: schema.AcademicSession{
37-
Name: "25S",
38-
},
39+
Session: schema.AcademicSession{Name: "25S"},
3940
Expected: "24",
40-
}, "Case_002": {
41-
Session: schema.AcademicSession{
42-
Name: "25F",
43-
},
41+
},
42+
"Case_002": {
43+
Session: schema.AcademicSession{Name: "25F"},
4444
Expected: "25",
45-
}, "Case_003": {
46-
Session: schema.AcademicSession{
47-
Name: "22U",
48-
},
45+
},
46+
"Case_003": {
47+
Session: schema.AcademicSession{Name: "22U"},
4948
Expected: "21",
50-
}, "Case_004": {
51-
Session: schema.AcademicSession{
52-
Name: "20S",
53-
},
49+
},
50+
"Case_004": {
51+
Session: schema.AcademicSession{Name: "20S"},
5452
Expected: "19",
5553
},
54+
"Case_005": {
55+
Session: schema.AcademicSession{Name: "Garbage"},
56+
Panic: true,
57+
},
58+
"Case_006": {
59+
Session: schema.AcademicSession{Name: "20P"},
60+
Panic: true,
61+
},
5662
}
5763

58-
for name, tc := range testCases {
64+
for name, testCase := range testCases {
5965
t.Run(name, func(t *testing.T) {
60-
output := getCatalogYear(tc.Session)
66+
t.Parallel()
67+
68+
defer func() {
69+
// Test fails if we panic when we didn't want to or didn't when we did
70+
if rec := recover(); rec != nil {
71+
if !testCase.Panic {
72+
t.Errorf("unexpected panic for session %q: %v", testCase.Session.Name, rec)
73+
}
74+
} else {
75+
if testCase.Panic {
76+
t.Errorf("expected panic for session %q but got none", testCase.Session.Name)
77+
}
78+
}
79+
}()
6180

62-
if output != tc.Expected {
63-
t.Errorf("expected %s got %s", tc.Expected, output)
81+
// only call if we *expect* it to succeed
82+
output := getCatalogYear(testCase.Session)
83+
if !testCase.Panic && output != testCase.Expected {
84+
t.Errorf("expected %q, got %q", testCase.Expected, output)
6485
}
6586
})
66-
6787
}
6888
}
6989

7090
func TestGetPrefixAndCourseNum(t *testing.T) {
91+
t.Parallel()
92+
7193
testCases := map[string]struct {
72-
classInfo map[string]string
73-
prefix string
74-
number string
94+
ClassInfo map[string]string
95+
Prefix string
96+
Number string
97+
Panic bool
7598
}{
7699
"Case_001": {
77-
classInfo: map[string]string{
100+
ClassInfo: map[string]string{
78101
"Class Section:": "ACCT2301.001.25S",
79102
},
80-
prefix: "ACCT",
81-
number: "2301",
103+
Prefix: "ACCT",
104+
Number: "2301",
82105
},
83106
"Case_002": {
84-
classInfo: map[string]string{
107+
ClassInfo: map[string]string{
85108
"Class Section:": "ENTP3301.002.24S",
86109
},
87-
prefix: "ENTP",
88-
number: "3301",
110+
Prefix: "ENTP",
111+
Number: "3301",
89112
},
90113
"Case_003": {
91-
classInfo: map[string]string{
114+
ClassInfo: map[string]string{
92115
"Class Section:": "Garbage In, Garbage out",
93116
},
94-
prefix: "",
95-
number: "",
117+
Panic: true,
96118
},
97119
"Case_004": {
98-
classInfo: map[string]string{
120+
ClassInfo: map[string]string{
99121
"Class Section:": "ENTP33S",
100122
},
101-
prefix: "",
102-
number: "",
123+
Panic: true,
103124
},
104125
"Case_005": {
105-
classInfo: map[string]string{
126+
ClassInfo: map[string]string{
106127
"Class Section:": "",
107128
},
108-
prefix: "",
109-
number: "",
129+
Panic: true,
110130
},
111131
}
112132

113133
for name, testCase := range testCases {
114134
t.Run(name, func(t *testing.T) {
115-
prefix, number := getPrefixAndNumber(testCase.classInfo)
135+
defer func() {
136+
if r := recover(); r != nil {
137+
if !testCase.Panic {
138+
t.Errorf("unexpected panic for input %q: %v", name, r)
139+
}
140+
} else {
141+
if testCase.Panic {
142+
t.Errorf("expected panic for input %q but none occurred", name)
143+
}
144+
}
145+
}()
116146

117-
if prefix != testCase.prefix {
118-
t.Errorf("expected %s got %s", testCase.prefix, prefix)
119-
}
120-
if number != testCase.number {
121-
t.Errorf("expected %s got %s", testCase.number, number)
147+
prefix, number := getPrefixAndNumber(testCase.ClassInfo)
148+
149+
if !testCase.Panic {
150+
if prefix != testCase.Prefix {
151+
t.Errorf("expected %q got %q", testCase.Prefix, prefix)
152+
}
153+
if number != testCase.Number {
154+
t.Errorf("expected %q got %q", testCase.Number, number)
155+
}
122156
}
123157
})
124158
}

parser/parser.go

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,33 @@ import (
1313
"github.com/UTDNebula/nebula-api/api/schema"
1414
)
1515

16-
// Main dictionaries for mapping unique keys to the actual data
17-
var Sections = make(map[primitive.ObjectID]*schema.Section)
18-
var Courses = make(map[string]*schema.Course)
19-
var Professors = make(map[string]*schema.Professor)
16+
var (
17+
// Sections dictionary for mapping UUIDs to a *schema.Section
18+
Sections = make(map[primitive.ObjectID]*schema.Section)
2019

21-
// Auxilliary dictionaries for mapping the generated ObjectIDs to the keys used in the above maps, used for validation purposes
22-
var CourseIDMap = make(map[primitive.ObjectID]string)
23-
var ProfessorIDMap = make(map[primitive.ObjectID]string)
20+
// Courses dictionary for keys (Internal_course_number + Catalog_year) to a *schema.Course
21+
Courses = make(map[string]*schema.Course)
2422

25-
// Requisite parser closures associated with courses
26-
var ReqParsers = make(map[primitive.ObjectID]func())
23+
// Professors dictionary for keys (First_name + Last_name) to a *schema.Professor
24+
Professors = make(map[string]*schema.Professor)
2725

28-
// Grade mappings for section grade distributions, mapping is MAP[SEMESTER] -> MAP[SUBJECT + NUMBER + SECTION] -> GRADE DISTRIBUTION
29-
var GradeMap map[string]map[string][]int
26+
//CourseIDMap auxiliary dictionary for mapping UUIDs to a *schema.Course
27+
CourseIDMap = make(map[primitive.ObjectID]string)
3028

31-
// Time location for dates (uses America/Chicago tz database zone for CDT which accounts for daylight saving)
32-
var timeLocation, timeError = time.LoadLocation("America/Chicago")
29+
//ProfessorIDMap auxiliary dictionary for mapping UUIDs to a *schema.Professor
30+
ProfessorIDMap = make(map[primitive.ObjectID]string)
3331

34-
// Externally exposed parse function
32+
// ReqParsers dictionary mapping course UUIDs to the func() that parsers its Reqs
33+
ReqParsers = make(map[primitive.ObjectID]func())
34+
35+
// GradeMap mappings for section grade distributions, mapping is MAP[SEMESTER] -> MAP[SUBJECT + NUMBER + SECTION] -> GRADE DISTRIBUTION
36+
GradeMap map[string]map[string][]int
37+
38+
// timeLocation Time location for dates (uses America/Chicago tz database zone for CDT which accounts for daylight saving)
39+
timeLocation, timeError = time.LoadLocation("America/Chicago")
40+
)
41+
42+
// Parse Externally exposed parse function
3543
func Parse(inDir string, outDir string, csvPath string, skipValidation bool) {
3644

3745
// Panic if timeLocation didn't load properly
@@ -91,7 +99,9 @@ func Parse(inDir string, outDir string, csvPath string, skipValidation bool) {
9199
utils.WriteJSON(fmt.Sprintf("%s/professors.json", outDir), utils.GetMapValues(Professors))
92100
}
93101

94-
// Internal parse function
102+
// parse is an internal helper function that parses a single HTML file.
103+
// It opens the file, creates a goquery document, and calls parseSection to
104+
// extract section data.
95105
func parse(path string) {
96106

97107
utils.VPrintf("Parsing %s...", path)
@@ -109,14 +119,8 @@ func parse(path string) {
109119
panic(err)
110120
}
111121

112-
// Dictionary to hold the row data, keyed by row header
113-
rowInfo := getRowInfo(doc)
114-
// Dictionary to hold the class info, keyed by data label
115-
classInfo := getClassInfo(doc)
116-
117-
// Get the class and course num by splitting classInfo value
122+
parseSection(getRowInfo(doc), getClassInfo(doc))
118123

119-
parseSection(rowInfo, classInfo)
120124
utils.VPrint("Parsed!")
121125
}
122126

0 commit comments

Comments
 (0)