diff --git a/.env.template b/.env.template index 377825b..fd04e52 100644 --- a/.env.template +++ b/.env.template @@ -7,4 +7,4 @@ LOGIN_ASTRA_PASSWORD= MAZEVO_API_KEY= #Uploader -MONGODB_URI= \ No newline at end of file +MONGODB_URI= diff --git a/go.mod b/go.mod index 1071abf..d5271f1 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/UTDNebula/nebula-api/api v0.0.0-20250222211052-e8c23b26713c github.com/chromedp/cdproto v0.0.0-20250120090109-d38428e4d9c8 github.com/chromedp/chromedp v0.12.1 + github.com/google/go-cmp v0.7.0 github.com/joho/godotenv v1.5.1 github.com/valyala/fastjson v1.6.4 go.mongodb.org/mongo-driver v1.17.2 diff --git a/go.sum b/go.sum index 5ae99cc..ff16643 100644 --- a/go.sum +++ b/go.sum @@ -48,6 +48,8 @@ github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= diff --git a/parser/courseParser.go b/parser/courseParser.go index bc7ea33..48c7edc 100644 --- a/parser/courseParser.go +++ b/parser/courseParser.go @@ -12,26 +12,10 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" ) -var coursePrefixRexp *regexp.Regexp = utils.Regexpf(`^%s`, utils.R_SUBJ_COURSE_CAP) -var contactRegexp *regexp.Regexp = regexp.MustCompile(`\(([0-9]+)-([0-9]+)\)\s+([SUFY]+)`) - -func getCatalogYear(session schema.AcademicSession) string { - sessionYear, err := strconv.Atoi(session.Name[0:2]) - if err != nil { - panic(err) - } - sessionSemester := session.Name[2] - switch sessionSemester { - case 'F': - return strconv.Itoa(sessionYear) - case 'S': - return strconv.Itoa(sessionYear - 1) - case 'U': - return strconv.Itoa(sessionYear - 1) - default: - panic(fmt.Errorf("encountered invalid session semester '%c!'", sessionSemester)) - } -} +var ( + coursePrefixRexp *regexp.Regexp = utils.Regexpf(`^%s`, utils.R_SUBJ_COURSE_CAP) + contactRegexp *regexp.Regexp = regexp.MustCompile(`\(([0-9]+)-([0-9]+)\)\s+([SUFY]+)`) +) func parseCourse(courseNum string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course { // Courses are internally keyed by their internal course number and the catalog year they're part of @@ -44,28 +28,36 @@ func parseCourse(courseNum string, session schema.AcademicSession, rowInfo map[s return course } - // Get subject prefix and course number by doing a regexp match on the section id - sectionId := classInfo["Class Section:"] - idMatches := coursePrefixRexp.FindStringSubmatch(sectionId) - - course = &schema.Course{} - - course.Id = primitive.NewObjectID() - course.Course_number = idMatches[2] - course.Subject_prefix = idMatches[1] - course.Title = utils.TrimWhitespace(rowInfo["Course Title:"].Text()) - course.Description = utils.TrimWhitespace(rowInfo["Description:"].Text()) - course.School = utils.TrimWhitespace(rowInfo["College:"].Text()) - course.Credit_hours = classInfo["Semester Credit Hours:"] - course.Class_level = classInfo["Class Level:"] - course.Activity_type = classInfo["Activity Type:"] - course.Grading = classInfo["Grading:"] - course.Internal_course_number = courseNum + course = getCourse(courseNum, session, rowInfo, classInfo) // Get closure for parsing course requisites (god help me) enrollmentReqs, hasEnrollmentReqs := rowInfo["Enrollment Reqs:"] ReqParsers[course.Id] = getReqParser(course, hasEnrollmentReqs, enrollmentReqs) + Courses[courseKey] = course + CourseIDMap[course.Id] = courseKey + return course +} + +// no global state is changed +func getCourse(courseNum string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course { + CoursePrefix, CourseNumber := getPrefixAndNumber(classInfo) + + course := schema.Course{ + Id: primitive.NewObjectID(), + Course_number: CourseNumber, + Subject_prefix: CoursePrefix, + Title: utils.TrimWhitespace(rowInfo["Course Title:"].Text()), + Description: utils.TrimWhitespace(rowInfo["Description:"].Text()), + School: utils.TrimWhitespace(rowInfo["College:"].Text()), + Credit_hours: classInfo["Semester Credit Hours:"], + Class_level: classInfo["Class Level:"], + Activity_type: classInfo["Activity Type:"], + Grading: classInfo["Grading:"], + Internal_course_number: courseNum, + Catalog_year: getCatalogYear(session), + } + // Try to get lecture/lab contact hours and offering frequency from course description contactMatches := contactRegexp.FindStringSubmatch(course.Description) // Length of contactMatches should be 4 upon successful match @@ -75,10 +67,34 @@ func parseCourse(courseNum string, session schema.AcademicSession, rowInfo map[s course.Offering_frequency = contactMatches[3] } - // Set the catalog year - course.Catalog_year = catalogYear + return &course +} - Courses[courseKey] = course - CourseIDMap[course.Id] = courseKey - return course +func getCatalogYear(session schema.AcademicSession) string { + sessionYear, err := strconv.Atoi(session.Name[0:2]) + if err != nil { + panic(err) + } + sessionSemester := session.Name[2] + switch sessionSemester { + case 'F': + return strconv.Itoa(sessionYear) + case 'S': + return strconv.Itoa(sessionYear - 1) + case 'U': + return strconv.Itoa(sessionYear - 1) + default: + panic(fmt.Errorf("encountered invalid session semester '%c!'", sessionSemester)) + } +} + +func getPrefixAndNumber(classInfo map[string]string) (string, string) { + if sectionId, ok := classInfo["Class Section:"]; ok { + // Get subject prefix and course number by doing a regexp match on the section id + matches := coursePrefixRexp.FindStringSubmatch(sectionId) + if len(matches) == 3 { + return matches[1], matches[2] + } + } + return "", "" } diff --git a/parser/courseParser_test.go b/parser/courseParser_test.go new file mode 100644 index 0000000..d573b69 --- /dev/null +++ b/parser/courseParser_test.go @@ -0,0 +1,125 @@ +package parser + +import ( + "testing" + + "github.com/UTDNebula/nebula-api/api/schema" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" +) + +func TestGetCourse(t *testing.T) { + loadTestData(t) + + for name, testCase := range testDataCache { + t.Run(name, func(t *testing.T) { + _, courseNum := getInternalClassAndCourseNum(testCase.ClassInfo) + output := *getCourse(courseNum, testCase.Section.Academic_session, testCase.RowInfo, testCase.ClassInfo) + expected := testCase.Course + + diff := cmp.Diff(expected, output, cmpopts.IgnoreFields(schema.Course{}, "Id")) + + if diff != "" { + t.Errorf("Failed (-expected +got)\n %s", diff) + } + + }) + } +} + +func TestGetCatalogYear(t *testing.T) { + testCases := map[string]struct { + Session schema.AcademicSession + Expected string + }{ + "Case_001": { + Session: schema.AcademicSession{ + Name: "25S", + }, + Expected: "24", + }, "Case_002": { + Session: schema.AcademicSession{ + Name: "25F", + }, + Expected: "25", + }, "Case_003": { + Session: schema.AcademicSession{ + Name: "22U", + }, + Expected: "21", + }, "Case_004": { + Session: schema.AcademicSession{ + Name: "20S", + }, + Expected: "19", + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + output := getCatalogYear(tc.Session) + + if output != tc.Expected { + t.Errorf("expected %s got %s", tc.Expected, output) + } + }) + + } +} + +func TestGetPrefixAndCourseNum(t *testing.T) { + testCases := map[string]struct { + classInfo map[string]string + prefix string + number string + }{ + "Case_001": { + classInfo: map[string]string{ + "Class Section:": "ACCT2301.001.25S", + }, + prefix: "ACCT", + number: "2301", + }, + "Case_002": { + classInfo: map[string]string{ + "Class Section:": "ENTP3301.002.24S", + }, + prefix: "ENTP", + number: "3301", + }, + "Case_003": { + classInfo: map[string]string{ + "Class Section:": "Garbage In, Garbage out", + }, + prefix: "", + number: "", + }, + "Case_004": { + classInfo: map[string]string{ + "Class Section:": "ENTP33S", + }, + prefix: "", + number: "", + }, + "Case_005": { + classInfo: map[string]string{ + "Class Section:": "", + }, + prefix: "", + number: "", + }, + } + + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + prefix, number := getPrefixAndNumber(testCase.classInfo) + + if prefix != testCase.prefix { + t.Errorf("expected %s got %s", testCase.prefix, prefix) + } + if number != testCase.number { + t.Errorf("expected %s got %s", testCase.number, number) + } + }) + } +} diff --git a/parser/parser.go b/parser/parser.go index 421f5eb..b1dafb5 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -4,7 +4,6 @@ import ( "fmt" "log" "os" - "strings" "time" "github.com/UTDNebula/api-tools/utils" @@ -110,35 +109,33 @@ func parse(path string) { panic(err) } - // Get the rows of the info table - infoTable := doc.FindMatcher(goquery.Single("table.courseinfo__overviewtable > tbody")) - infoRows := infoTable.ChildrenFiltered("tr") + // Dictionary to hold the row data, keyed by row header + rowInfo := getRowInfo(doc) + // Dictionary to hold the class info, keyed by data label + classInfo := getClassInfo(doc) - var syllabusURI string + // Get the class and course num by splitting classInfo value - // Dictionary to hold the row data, keyed by row header + parseSection(rowInfo, classInfo) + utils.VPrint("Parsed!") +} + +func getRowInfo(doc *goquery.Document) map[string]*goquery.Selection { + infoRows := doc.FindMatcher(goquery.Single("table.courseinfo__overviewtable > tbody")).ChildrenFiltered("tr") rowInfo := make(map[string]*goquery.Selection, len(infoRows.Nodes)) - // Populate rowInfo infoRows.Each(func(_ int, row *goquery.Selection) { rowHeader := utils.TrimWhitespace(row.FindMatcher(goquery.Single("th")).Text()) rowInfo[rowHeader] = row.FindMatcher(goquery.Single("td")) }) + return rowInfo +} - // Get syllabusURI from syllabus row link - if syllabus, ok := rowInfo["syllabus"]; ok { - syllabusURI, _ = syllabus.FindMatcher(goquery.Single("a")).Attr("href") - } - - // Get the rows of the class info subtable - infoSubTable := infoTable.FindMatcher(goquery.Single("table.courseinfo__classsubtable > tbody")) - infoRows = infoSubTable.ChildrenFiltered("tr") - - // Dictionary to hold the class info, keyed by data label - classInfo := make(map[string]string) +func getClassInfo(doc *goquery.Document) map[string]string { + infoRows := doc.FindMatcher(goquery.Single("table.courseinfo__classsubtable > tbody")).ChildrenFiltered("tr") + classInfo := make(map[string]string, len(infoRows.Nodes)) - // Populate classInfo infoRows.Each(func(_ int, row *goquery.Selection) { rowHeaders := row.Find("td.courseinfo__classsubtable__th") rowHeaders.Each(func(_ int, header *goquery.Selection) { @@ -147,17 +144,5 @@ func parse(path string) { classInfo[headerText] = dataText }) }) - - // Get the class and course num by splitting classInfo value - classAndCourseNum := strings.Split(classInfo["Class/Course Number:"], " / ") - classNum := classAndCourseNum[0] - courseNum := utils.TrimWhitespace(classAndCourseNum[1]) - - // Figure out the academic session associated with this specific course/Section - session := getAcademicSession(rowInfo) - - // Try to create the course and section based on collected info - courseRef := parseCourse(courseNum, session, rowInfo, classInfo) - parseSection(courseRef, classNum, syllabusURI, session, rowInfo, classInfo) - utils.VPrint("Parsed!") + return classInfo } diff --git a/parser/sectionParser.go b/parser/sectionParser.go index b056164..1d5dd04 100644 --- a/parser/sectionParser.go +++ b/parser/sectionParser.go @@ -1,6 +1,7 @@ package parser import ( + "encoding/json" "regexp" "strings" "time" @@ -12,73 +13,65 @@ import ( "golang.org/x/net/html/atom" ) -var sectionPrefixRegexp *regexp.Regexp = utils.Regexpf(`^(?i)%s\.(%s)`, utils.R_SUBJ_COURSE, utils.R_SECTION_CODE) -var coreRegexp *regexp.Regexp = regexp.MustCompile(`[0-9]{3}`) -var personRegexp *regexp.Regexp = regexp.MustCompile(`(.+)・(.+)・(.+)`) - -func parseSection(courseRef *schema.Course, classNum string, syllabusURI string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) { - // Get subject prefix and course number by doing a regexp match on the section id - sectionId := classInfo["Class Section:"] - idMatches := sectionPrefixRegexp.FindStringSubmatch(sectionId) - - section := &schema.Section{} - - section.Id = primitive.NewObjectID() - section.Section_number = idMatches[1] - section.Course_reference = courseRef.Id - - //TODO: section requisites? - - // Set academic session - section.Academic_session = session - // Add professors - section.Professors = parseProfessors(section.Id, rowInfo, classInfo) - - // Get all TA/RA info - assistantText := utils.TrimWhitespace(rowInfo["TA/RA(s):"].Text()) - assistantMatches := personRegexp.FindAllStringSubmatch(assistantText, -1) - section.Teaching_assistants = make([]schema.Assistant, 0, len(assistantMatches)) - for _, match := range assistantMatches { - assistant := schema.Assistant{} - nameStr := utils.TrimWhitespace(match[1]) - names := strings.Split(nameStr, " ") - assistant.First_name = strings.Join(names[:len(names)-1], " ") - assistant.Last_name = names[len(names)-1] - assistant.Role = utils.TrimWhitespace(match[2]) - assistant.Email = utils.TrimWhitespace(match[3]) - section.Teaching_assistants = append(section.Teaching_assistants, assistant) - } - - section.Internal_class_number = classNum - section.Instruction_mode = classInfo["Instruction Mode:"] - section.Meetings = getMeetings(rowInfo) +const timeLayout = "January 2, 2006" - // Parse core flags (may or may not exist) +var ( + sectionPrefixRegexp *regexp.Regexp = utils.Regexpf(`^(?i)%s\.(%s)`, utils.R_SUBJ_COURSE, utils.R_SECTION_CODE) + coreRegexp *regexp.Regexp = regexp.MustCompile(`[0-9]{3}`) + personRegexp *regexp.Regexp = regexp.MustCompile(`(.+)・(.+)・(.+)`) - if coreText, hasCore := rowInfo["Core:"]; hasCore { - section.Core_flags = coreRegexp.FindAllString(utils.TrimWhitespace(coreText.Text()), -1) - } - - section.Syllabus_uri = syllabusURI + meetingDatesRegexp = regexp.MustCompile(utils.R_DATE_MDY) + meetingDaysRegexp = regexp.MustCompile(utils.R_WEEKDAY) + meetingTimesRegexp = regexp.MustCompile(utils.R_TIME_AM_PM) +) - if semesterGrades, ok := GradeMap[session.Name]; ok { - // We have to trim leading zeroes from the section number in order to match properly, since the grade data does not use leading zeroes - trimmedSectionNumber := strings.TrimLeft(section.Section_number, "0") - // Key into grademap should be uppercased like the grade data - gradeKey := strings.ToUpper(courseRef.Subject_prefix + courseRef.Course_number + trimmedSectionNumber) - sectionGrades, exists := semesterGrades[gradeKey] - if exists { - section.Grade_distribution = sectionGrades - } +// TODO: section requisites? +func parseSection(rowInfo map[string]*goquery.Selection, classInfo map[string]string) { + classNum, courseNum := getInternalClassAndCourseNum(classInfo) + session := getAcademicSession(rowInfo) + courseRef := parseCourse(courseNum, session, rowInfo, classInfo) + + sectionNumber := getSectionNumber(classInfo) + + id := primitive.NewObjectID() + + section := schema.Section{ + Id: id, + Section_number: sectionNumber, + Course_reference: courseRef.Id, + Academic_session: session, + Professors: parseProfessors(id, rowInfo, classInfo), + Teaching_assistants: getTeachingAssistants(rowInfo), + Internal_class_number: classNum, + Instruction_mode: getInstructionMode(classInfo), + Meetings: getMeetings(rowInfo), + Core_flags: getCoreFlags(rowInfo), + Syllabus_uri: getSyllabusUri(rowInfo), + Grade_distribution: getGradeDistribution(session, sectionNumber, courseRef), } + a, _ := json.Marshal(section) + println(string(a)) + a, _ = json.Marshal(*courseRef) + println(string(a)) // Add new section to section map - Sections[section.Id] = section + Sections[section.Id] = §ion // Append new section to course's section listing courseRef.Sections = append(courseRef.Sections, section.Id) } +// todo add logging for failing to get feilds? probably only max verbosity +func getInternalClassAndCourseNum(classInfo map[string]string) (string, string) { + if numbers, ok := classInfo["Class/Course Number:"]; ok { + classAndCourseNum := strings.Split(numbers, " / ") + if len(classAndCourseNum) == 2 { + return classAndCourseNum[0], classAndCourseNum[1] + } + } + return "", "" +} + func getAcademicSession(rowInfo map[string]*goquery.Selection) schema.AcademicSession { session := schema.AcademicSession{} @@ -102,9 +95,40 @@ func getAcademicSession(rowInfo map[string]*goquery.Selection) schema.AcademicSe return session } -var meetingDatesRegexp = regexp.MustCompile(utils.R_DATE_MDY) -var meetingDaysRegexp = regexp.MustCompile(utils.R_WEEKDAY) -var meetingTimesRegexp = regexp.MustCompile(utils.R_TIME_AM_PM) +func getSectionNumber(classInfo map[string]string) string { + if syllabus, ok := classInfo["Class Section:"]; ok { + matches := sectionPrefixRegexp.FindStringSubmatch(syllabus) + if len(matches) == 2 { + return matches[1] + } + } + return "" +} + +func getTeachingAssistants(rowInfo map[string]*goquery.Selection) []schema.Assistant { + assistantMatches := personRegexp.FindAllStringSubmatch(utils.TrimWhitespace(rowInfo["TA/RA(s):"].Text()), -1) + assistants := make([]schema.Assistant, 0, len(assistantMatches)) + + for _, match := range assistantMatches { + names := strings.Split(utils.TrimWhitespace(match[1]), " ") + + assistant := schema.Assistant{ + First_name: strings.Join(names[:len(names)-1], " "), + Last_name: names[len(names)-1], + Role: utils.TrimWhitespace(match[2]), + Email: utils.TrimWhitespace(match[3]), + } + assistants = append(assistants, assistant) + } + return assistants +} + +func getInstructionMode(classInfo map[string]string) string { + if mode, ok := classInfo["Instruction Mode:"]; ok { + return mode + } + return "" +} func getMeetings(rowInfo map[string]*goquery.Selection) []schema.Meeting { meetingItems := rowInfo["Schedule:"].Find("div.courseinfo__meeting-item--multiple") @@ -160,7 +184,40 @@ func getMeetings(rowInfo map[string]*goquery.Selection) []schema.Meeting { return meetings } -const timeLayout = "January 2, 2006" +func getCoreFlags(rowInfo map[string]*goquery.Selection) []string { + if core, ok := rowInfo["Core:"]; ok { + flags := coreRegexp.FindAllString(utils.TrimWhitespace(core.Text()), -1) + + if flags != nil { + return flags + } + } + return []string{} +} + +func getSyllabusUri(rowInfo map[string]*goquery.Selection) string { + if syllabus, ok := rowInfo["Syllabus:"]; ok { + link := syllabus.FindMatcher(goquery.Single("a")) + if link.Length() == 1 { + return link.AttrOr("href", "") + } + } + return "" +} + +func getGradeDistribution(session schema.AcademicSession, sectionNumber string, courseRef *schema.Course) []int { + if semesterGrades, ok := GradeMap[session.Name]; ok { + // We have to trim leading zeroes from the section number in order to match properly, since the grade data does not use leading zeroes + trimmedSectionNumber := strings.TrimLeft(sectionNumber, "0") + // Key into grademap should be uppercased like the grade data + gradeKey := strings.ToUpper(courseRef.Subject_prefix + courseRef.Course_number + trimmedSectionNumber) + sectionGrades, exists := semesterGrades[gradeKey] + if exists { + return sectionGrades + } + } + return []int{} +} func parseTimeOrPanic(value string) time.Time { date, err := time.ParseInLocation(timeLayout, value, timeLocation) diff --git a/parser/sectionParser_test.go b/parser/sectionParser_test.go new file mode 100644 index 0000000..d6d7d25 --- /dev/null +++ b/parser/sectionParser_test.go @@ -0,0 +1,144 @@ +package parser + +import ( + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestGetInternalClassAndCourseNum(t *testing.T) { + loadTestData(t) + + for name, testCase := range testDataCache { + t.Run(name, func(t *testing.T) { + classNum, courseNum := getInternalClassAndCourseNum(testCase.ClassInfo) + expectedClassNum := testCase.Section.Internal_class_number + expectedCourseNumber := testCase.Course.Internal_course_number + + if classNum != expectedClassNum { + t.Errorf("Class Number: expected %s got %s", expectedClassNum, classNum) + } + + if courseNum != expectedCourseNumber { + t.Errorf("Class Number: expected %s got %s", expectedCourseNumber, courseNum) + } + + }) + } +} + +func TestGetAcademicSession(t *testing.T) { + loadTestData(t) + + for name, testCase := range testDataCache { + t.Run(name, func(t *testing.T) { + output := getAcademicSession(testCase.RowInfo) + expected := testCase.Section.Academic_session + + diff := cmp.Diff(expected, output) + + if diff != "" { + t.Errorf("Failed (-expected +got)\n %s", diff) + } + }) + } +} + +func TestGetSectionNumber(t *testing.T) { + loadTestData(t) + + for name, testCase := range testDataCache { + t.Run(name, func(t *testing.T) { + output := getSectionNumber(testCase.ClassInfo) + expected := testCase.Section.Section_number + + if output != expected { + t.Errorf("expected %s got %s", expected, output) + } + }) + + } +} + +func TestGetTeachingAssistants(t *testing.T) { + loadTestData(t) + + for name, testCase := range testDataCache { + t.Run(name, func(t *testing.T) { + output := getTeachingAssistants(testCase.RowInfo) + expected := testCase.Section.Teaching_assistants + + diff := cmp.Diff(expected, output) + + if diff != "" { + t.Errorf("Failed (-expected +got)\n %s", diff) + } + }) + } +} + +func TestGetInstructionMode(t *testing.T) { + loadTestData(t) + + for name, testCase := range testDataCache { + t.Run(name, func(t *testing.T) { + output := getInstructionMode(testCase.ClassInfo) + expected := testCase.Section.Instruction_mode + + if output != expected { + t.Errorf("expected %s got %s", expected, output) + } + }) + + } +} + +func TestGetMeetings(t *testing.T) { + loadTestData(t) + + for name, testCase := range testDataCache { + t.Run(name, func(t *testing.T) { + output := getMeetings(testCase.RowInfo) + expected := testCase.Section.Meetings + + diff := cmp.Diff(expected, output) + + if diff != "" { + t.Errorf("Failed (-expected +got)\n %s", diff) + } + }) + } +} + +func TestGetCoreFlags(t *testing.T) { + loadTestData(t) + + for name, testCase := range testDataCache { + t.Run(name, func(t *testing.T) { + output := getCoreFlags(testCase.RowInfo) + expected := testCase.Section.Core_flags + + diff := cmp.Diff(expected, output) + + if diff != "" { + t.Errorf("Failed (-expected +got)\n %s", diff) + } + }) + } +} + +func TestGetSyllabusUri(t *testing.T) { + loadTestData(t) + + for name, testCase := range testDataCache { + t.Run(name, func(t *testing.T) { + output := getSyllabusUri(testCase.RowInfo) + expected := testCase.Section.Syllabus_uri + + if output != expected { + t.Errorf("expected %s got %s", expected, output) + } + }) + + } +} diff --git a/parser/test_helper_test.go b/parser/test_helper_test.go new file mode 100644 index 0000000..6fb14e3 --- /dev/null +++ b/parser/test_helper_test.go @@ -0,0 +1,83 @@ +package parser + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "testing" + + "github.com/PuerkitoBio/goquery" + "github.com/UTDNebula/nebula-api/api/schema" +) + +type TestData struct { + RowInfo map[string]*goquery.Selection + ClassInfo map[string]string + Section schema.Section + Course schema.Course +} + +var testDataCache map[string]TestData + +func loadTestData(t *testing.T) { + t.Helper() + if testDataCache != nil { + return + } + + testDataCache = make(map[string]TestData) + dir, err := os.ReadDir("testdata") + if err != nil { + t.Fatalf("Failed to load testdata: %v", err) + } + + for _, file := range dir { + if !file.IsDir() { + continue + } + testCase, err := loadTest(file.Name()) + if err != nil { + t.Fatalf("Failed to load %s: %v", file.Name(), err) + } + testDataCache[file.Name()] = testCase + } +} + +func loadTest(dir string) (TestData, error) { + + htmlBytes, err := os.ReadFile(fmt.Sprintf("testdata/%s/input.html", dir)) + if err != nil { + return TestData{}, err + } + + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(htmlBytes)) + if err != nil { + return TestData{}, err + } + + result := TestData{ + RowInfo: getRowInfo(doc), + ClassInfo: getClassInfo(doc), + } + + jsonBytes, err := os.ReadFile(fmt.Sprintf("testdata/%s/section.json", dir)) + if err != nil { + return result, err + } + err = json.Unmarshal(jsonBytes, &result.Section) + if err != nil { + return TestData{}, err + } + + jsonBytes, err = os.ReadFile(fmt.Sprintf("testdata/%s/course.json", dir)) + if err != nil { + return result, err + } + err = json.Unmarshal(jsonBytes, &result.Course) + if err != nil { + return TestData{}, err + } + + return result, nil +} diff --git a/parser/testdata/case_001/course.json b/parser/testdata/case_001/course.json new file mode 100644 index 0000000..65a0dc3 --- /dev/null +++ b/parser/testdata/case_001/course.json @@ -0,0 +1,23 @@ +{ + "_id": "67bd14d7b35a4cd7c0446f3c", + "subject_prefix": "ACCT", + "course_number": "2301", + "title": "Introductory Financial Accounting", + "description": "ACCT 2301 - Introductory Financial Accounting (3 semester credit hours) An introduction to financial reporting designed to create an awareness of the accounting concepts and principles for preparing the three basic financial statements: the income statement, balance sheet, and statement of cash flows. A minimum grade of C is required to take upper-division ACCT courses. (3-0) S", + "enrollment_reqs": "", + "school": "Naveen Jindal School of Management", + "credit_hours": "3", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "grading": "Graded - Undergraduate", + "internal_course_number": "000061", + "prerequisites": null, + "corequisites": null, + "co_or_pre_requisites": null, + "sections": null, + "lecture_contact_hours": "3", + "laboratory_contact_hours": "0", + "offering_frequency": "S", + "catalog_year": "24", + "attributes": null +} diff --git a/parser/testdata/case_001/input.html b/parser/testdata/case_001/input.html new file mode 100644 index 0000000..29a0d89 --- /dev/null +++ b/parser/testdata/case_001/input.html @@ -0,0 +1,221 @@ +