Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ deploy_log.sh
.vscode/
.firebase/
/api-tools
/qodana.yaml

# output data and logs
data/
Expand Down
41 changes: 28 additions & 13 deletions parser/courseParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,29 @@ import (
)

var (
coursePrefixRexp *regexp.Regexp = utils.Regexpf(`^%s`, utils.R_SUBJ_COURSE_CAP)
contactRegexp *regexp.Regexp = regexp.MustCompile(`\(([0-9]+)-([0-9]+)\)\s+([SUFY]+)`)
// coursePrefixRegexp matches the course prefix and number (e.g., "CS 1337").
coursePrefixRegexp = utils.Regexpf(`^%s`, utils.R_SUBJ_COURSE_CAP)

// contactRegexp matches the contact hours and offering frequency from the course description
// (e.g. "(12-34) SUS")
contactRegexp = regexp.MustCompile(`\(([0-9]+)-([0-9]+)\)\s+([SUFY]+)`)
)

func parseCourse(courseNum string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course {
// parseCourse returns a pointer to the course specified by the
// provided information. If the associated course is not found in
// Courses, it will run getCourse and add the result to Courses.
func parseCourse(internalCourseNumber string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course {
// Courses are internally keyed by their internal course number and the catalog year they're part of
catalogYear := getCatalogYear(session)
courseKey := courseNum + catalogYear
courseKey := internalCourseNumber + catalogYear

// Don't recreate the course if it already exists
course, courseExists := Courses[courseKey]
if courseExists {
return course
}

course = getCourse(courseNum, session, rowInfo, classInfo)
course = getCourse(internalCourseNumber, session, rowInfo, classInfo)

// Get closure for parsing course requisites (god help me)
enrollmentReqs, hasEnrollmentReqs := rowInfo["Enrollment Reqs:"]
Expand All @@ -39,8 +46,10 @@ func parseCourse(courseNum string, session schema.AcademicSession, rowInfo map[s
return course
}

// no global state is changed
func getCourse(courseNum string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course {
// getCourse extracts course details from the provided information and creates a schema.Course object.
// This function does not modify any global state.
// Returns a pointer to the newly created schema.Course object.
func getCourse(internalCourseNumber string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course {
CoursePrefix, CourseNumber := getPrefixAndNumber(classInfo)

course := schema.Course{
Expand All @@ -54,7 +63,7 @@ func getCourse(courseNum string, session schema.AcademicSession, rowInfo map[str
Class_level: classInfo["Class Level:"],
Activity_type: classInfo["Activity Type:"],
Grading: classInfo["Grading:"],
Internal_course_number: courseNum,
Internal_course_number: internalCourseNumber,
Catalog_year: getCatalogYear(session),
}

Expand All @@ -70,6 +79,10 @@ func getCourse(courseNum string, session schema.AcademicSession, rowInfo map[str
return &course
}

// getCatalogYear determines the catalog year from the academic session information.
// It assumes the session name starts with a 2-digit year and a semester character ('F', 'S', 'U').
// Fall (S) and Summer U sessions are associated with the previous calendar year.
// (e.g, 20F = 20, 20S = 19)
func getCatalogYear(session schema.AcademicSession) string {
sessionYear, err := strconv.Atoi(session.Name[0:2])
if err != nil {
Expand All @@ -79,22 +92,24 @@ func getCatalogYear(session schema.AcademicSession) string {
switch sessionSemester {
case 'F':
return strconv.Itoa(sessionYear)
case 'S':
return strconv.Itoa(sessionYear - 1)
case 'U':
case 'S', 'U':
return strconv.Itoa(sessionYear - 1)
default:
panic(fmt.Errorf("encountered invalid session semester '%c!'", sessionSemester))
}
}

// getPrefixAndNumber returns the 2nd and 3rd matched values from a coursePrefixRegexp on
// `ClassInfo["Class Section:"]`. It expects ClassInfo to contain "Class Section:" key.
// If there are no matches, empty strings are returned.
func getPrefixAndNumber(classInfo map[string]string) (string, string) {
if sectionId, ok := classInfo["Class Section:"]; ok {
// Get subject prefix and course number by doing a regexp match on the section id
matches := coursePrefixRexp.FindStringSubmatch(sectionId)
matches := coursePrefixRegexp.FindStringSubmatch(sectionId)
if len(matches) == 3 {
return matches[1], matches[2]
}
panic("failed to course prefix and number")
}
return "", ""
panic("could not find 'Class Section:' in ClassInfo")
}
132 changes: 83 additions & 49 deletions parser/courseParser_test.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
package parser

import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"testing"

"github.com/UTDNebula/nebula-api/api/schema"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
)

func TestGetCourse(t *testing.T) {
loadTestData(t)
t.Parallel()

for name, testCase := range testDataCache {
for name, testCase := range testData {
t.Run(name, func(t *testing.T) {
_, courseNum := getInternalClassAndCourseNum(testCase.ClassInfo)
output := *getCourse(courseNum, testCase.Section.Academic_session, testCase.RowInfo, testCase.ClassInfo)
expected := testCase.Course

diff := cmp.Diff(expected, output, cmpopts.IgnoreFields(schema.Course{}, "Id"))
diff := cmp.Diff(expected, output, cmpopts.IgnoreFields(schema.Course{}, "Id", "Sections", "Enrollment_reqs", "Prerequisites"))

if diff != "" {
t.Errorf("Failed (-expected +got)\n %s", diff)
Expand All @@ -28,97 +28,131 @@ func TestGetCourse(t *testing.T) {
}

func TestGetCatalogYear(t *testing.T) {
t.Parallel()

testCases := map[string]struct {
Session schema.AcademicSession
Expected string
Panic bool
}{
"Case_001": {
Session: schema.AcademicSession{
Name: "25S",
},
Session: schema.AcademicSession{Name: "25S"},
Expected: "24",
}, "Case_002": {
Session: schema.AcademicSession{
Name: "25F",
},
},
"Case_002": {
Session: schema.AcademicSession{Name: "25F"},
Expected: "25",
}, "Case_003": {
Session: schema.AcademicSession{
Name: "22U",
},
},
"Case_003": {
Session: schema.AcademicSession{Name: "22U"},
Expected: "21",
}, "Case_004": {
Session: schema.AcademicSession{
Name: "20S",
},
},
"Case_004": {
Session: schema.AcademicSession{Name: "20S"},
Expected: "19",
},
"Case_005": {
Session: schema.AcademicSession{Name: "Garbage"},
Panic: true,
},
"Case_006": {
Session: schema.AcademicSession{Name: "20P"},
Panic: true,
},
}

for name, tc := range testCases {
for name, testCase := range testCases {
t.Run(name, func(t *testing.T) {
output := getCatalogYear(tc.Session)
t.Parallel()

defer func() {
// Test fails if we panic when we didn't want to or didn't when we did
if rec := recover(); rec != nil {
if !testCase.Panic {
t.Errorf("unexpected panic for session %q: %v", testCase.Session.Name, rec)
}
} else {
if testCase.Panic {
t.Errorf("expected panic for session %q but got none", testCase.Session.Name)
}
}
}()

if output != tc.Expected {
t.Errorf("expected %s got %s", tc.Expected, output)
// only call if we *expect* it to succeed
output := getCatalogYear(testCase.Session)
if !testCase.Panic && output != testCase.Expected {
t.Errorf("expected %q, got %q", testCase.Expected, output)
}
})

}
}

func TestGetPrefixAndCourseNum(t *testing.T) {
t.Parallel()

testCases := map[string]struct {
classInfo map[string]string
prefix string
number string
ClassInfo map[string]string
Prefix string
Number string
Panic bool
}{
"Case_001": {
classInfo: map[string]string{
ClassInfo: map[string]string{
"Class Section:": "ACCT2301.001.25S",
},
prefix: "ACCT",
number: "2301",
Prefix: "ACCT",
Number: "2301",
},
"Case_002": {
classInfo: map[string]string{
ClassInfo: map[string]string{
"Class Section:": "ENTP3301.002.24S",
},
prefix: "ENTP",
number: "3301",
Prefix: "ENTP",
Number: "3301",
},
"Case_003": {
classInfo: map[string]string{
ClassInfo: map[string]string{
"Class Section:": "Garbage In, Garbage out",
},
prefix: "",
number: "",
Panic: true,
},
"Case_004": {
classInfo: map[string]string{
ClassInfo: map[string]string{
"Class Section:": "ENTP33S",
},
prefix: "",
number: "",
Panic: true,
},
"Case_005": {
classInfo: map[string]string{
ClassInfo: map[string]string{
"Class Section:": "",
},
prefix: "",
number: "",
Panic: true,
},
}

for name, testCase := range testCases {
t.Run(name, func(t *testing.T) {
prefix, number := getPrefixAndNumber(testCase.classInfo)
defer func() {
if r := recover(); r != nil {
if !testCase.Panic {
t.Errorf("unexpected panic for input %q: %v", name, r)
}
} else {
if testCase.Panic {
t.Errorf("expected panic for input %q but none occurred", name)
}
}
}()

if prefix != testCase.prefix {
t.Errorf("expected %s got %s", testCase.prefix, prefix)
}
if number != testCase.number {
t.Errorf("expected %s got %s", testCase.number, number)
prefix, number := getPrefixAndNumber(testCase.ClassInfo)

if !testCase.Panic {
if prefix != testCase.Prefix {
t.Errorf("expected %q got %q", testCase.Prefix, prefix)
}
if number != testCase.Number {
t.Errorf("expected %q got %q", testCase.Number, number)
}
}
})
}
Expand Down
48 changes: 26 additions & 22 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,33 @@ import (
"github.com/UTDNebula/nebula-api/api/schema"
)

// Main dictionaries for mapping unique keys to the actual data
var Sections = make(map[primitive.ObjectID]*schema.Section)
var Courses = make(map[string]*schema.Course)
var Professors = make(map[string]*schema.Professor)
var (
// Sections dictionary for mapping UUIDs to a *schema.Section
Sections = make(map[primitive.ObjectID]*schema.Section)

// Auxilliary dictionaries for mapping the generated ObjectIDs to the keys used in the above maps, used for validation purposes
var CourseIDMap = make(map[primitive.ObjectID]string)
var ProfessorIDMap = make(map[primitive.ObjectID]string)
// Courses dictionary for keys (Internal_course_number + Catalog_year) to a *schema.Course
Courses = make(map[string]*schema.Course)

// Requisite parser closures associated with courses
var ReqParsers = make(map[primitive.ObjectID]func())
// Professors dictionary for keys (First_name + Last_name) to a *schema.Professor
Professors = make(map[string]*schema.Professor)

// Grade mappings for section grade distributions, mapping is MAP[SEMESTER] -> MAP[SUBJECT + NUMBER + SECTION] -> GRADE DISTRIBUTION
var GradeMap map[string]map[string][]int
//CourseIDMap auxiliary dictionary for mapping UUIDs to a *schema.Course
CourseIDMap = make(map[primitive.ObjectID]string)

// Time location for dates (uses America/Chicago tz database zone for CDT which accounts for daylight saving)
var timeLocation, timeError = time.LoadLocation("America/Chicago")
//ProfessorIDMap auxiliary dictionary for mapping UUIDs to a *schema.Professor
ProfessorIDMap = make(map[primitive.ObjectID]string)

// Externally exposed parse function
// ReqParsers dictionary mapping course UUIDs to the func() that parsers its Reqs
ReqParsers = make(map[primitive.ObjectID]func())

// GradeMap mappings for section grade distributions, mapping is MAP[SEMESTER] -> MAP[SUBJECT + NUMBER + SECTION] -> GRADE DISTRIBUTION
GradeMap map[string]map[string][]int

// timeLocation Time location for dates (uses America/Chicago tz database zone for CDT which accounts for daylight saving)
timeLocation, timeError = time.LoadLocation("America/Chicago")
)

// Parse Externally exposed parse function
func Parse(inDir string, outDir string, csvPath string, skipValidation bool) {

// Panic if timeLocation didn't load properly
Expand Down Expand Up @@ -91,7 +99,9 @@ func Parse(inDir string, outDir string, csvPath string, skipValidation bool) {
utils.WriteJSON(fmt.Sprintf("%s/professors.json", outDir), utils.GetMapValues(Professors))
}

// Internal parse function
// parse is an internal helper function that parses a single HTML file.
// It opens the file, creates a goquery document, and calls parseSection to
// extract section data.
func parse(path string) {

utils.VPrintf("Parsing %s...", path)
Expand All @@ -109,14 +119,8 @@ func parse(path string) {
panic(err)
}

// Dictionary to hold the row data, keyed by row header
rowInfo := getRowInfo(doc)
// Dictionary to hold the class info, keyed by data label
classInfo := getClassInfo(doc)

// Get the class and course num by splitting classInfo value
parseSection(getRowInfo(doc), getClassInfo(doc))

parseSection(rowInfo, classInfo)
utils.VPrint("Parsed!")
}

Expand Down
Loading