Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ LOGIN_ASTRA_PASSWORD=
MAZEVO_API_KEY=

#Uploader
MONGODB_URI=
MONGODB_URI=
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/UTDNebula/nebula-api/api v0.0.0-20250222211052-e8c23b26713c
github.com/chromedp/cdproto v0.0.0-20250120090109-d38428e4d9c8
github.com/chromedp/chromedp v0.12.1
github.com/google/go-cmp v0.7.0
github.com/joho/godotenv v1.5.1
github.com/valyala/fastjson v1.6.4
go.mongodb.org/mongo-driver v1.17.2
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
Expand Down
100 changes: 58 additions & 42 deletions parser/courseParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,10 @@ import (
"go.mongodb.org/mongo-driver/bson/primitive"
)

var coursePrefixRexp *regexp.Regexp = utils.Regexpf(`^%s`, utils.R_SUBJ_COURSE_CAP)
var contactRegexp *regexp.Regexp = regexp.MustCompile(`\(([0-9]+)-([0-9]+)\)\s+([SUFY]+)`)

func getCatalogYear(session schema.AcademicSession) string {
sessionYear, err := strconv.Atoi(session.Name[0:2])
if err != nil {
panic(err)
}
sessionSemester := session.Name[2]
switch sessionSemester {
case 'F':
return strconv.Itoa(sessionYear)
case 'S':
return strconv.Itoa(sessionYear - 1)
case 'U':
return strconv.Itoa(sessionYear - 1)
default:
panic(fmt.Errorf("encountered invalid session semester '%c!'", sessionSemester))
}
}
var (
coursePrefixRexp *regexp.Regexp = utils.Regexpf(`^%s`, utils.R_SUBJ_COURSE_CAP)
contactRegexp *regexp.Regexp = regexp.MustCompile(`\(([0-9]+)-([0-9]+)\)\s+([SUFY]+)`)
)

func parseCourse(courseNum string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course {
// Courses are internally keyed by their internal course number and the catalog year they're part of
Expand All @@ -44,28 +28,36 @@ func parseCourse(courseNum string, session schema.AcademicSession, rowInfo map[s
return course
}

// Get subject prefix and course number by doing a regexp match on the section id
sectionId := classInfo["Class Section:"]
idMatches := coursePrefixRexp.FindStringSubmatch(sectionId)

course = &schema.Course{}

course.Id = primitive.NewObjectID()
course.Course_number = idMatches[2]
course.Subject_prefix = idMatches[1]
course.Title = utils.TrimWhitespace(rowInfo["Course Title:"].Text())
course.Description = utils.TrimWhitespace(rowInfo["Description:"].Text())
course.School = utils.TrimWhitespace(rowInfo["College:"].Text())
course.Credit_hours = classInfo["Semester Credit Hours:"]
course.Class_level = classInfo["Class Level:"]
course.Activity_type = classInfo["Activity Type:"]
course.Grading = classInfo["Grading:"]
course.Internal_course_number = courseNum
course = getCourse(courseNum, session, rowInfo, classInfo)

// Get closure for parsing course requisites (god help me)
enrollmentReqs, hasEnrollmentReqs := rowInfo["Enrollment Reqs:"]
ReqParsers[course.Id] = getReqParser(course, hasEnrollmentReqs, enrollmentReqs)

Courses[courseKey] = course
CourseIDMap[course.Id] = courseKey
return course
}

// no global state is changed
func getCourse(courseNum string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course {
CoursePrefix, CourseNumber := getPrefixAndNumber(classInfo)

course := schema.Course{
Id: primitive.NewObjectID(),
Course_number: CourseNumber,
Subject_prefix: CoursePrefix,
Title: utils.TrimWhitespace(rowInfo["Course Title:"].Text()),
Description: utils.TrimWhitespace(rowInfo["Description:"].Text()),
School: utils.TrimWhitespace(rowInfo["College:"].Text()),
Credit_hours: classInfo["Semester Credit Hours:"],
Class_level: classInfo["Class Level:"],
Activity_type: classInfo["Activity Type:"],
Grading: classInfo["Grading:"],
Internal_course_number: courseNum,
Catalog_year: getCatalogYear(session),
}

// Try to get lecture/lab contact hours and offering frequency from course description
contactMatches := contactRegexp.FindStringSubmatch(course.Description)
// Length of contactMatches should be 4 upon successful match
Expand All @@ -75,10 +67,34 @@ func parseCourse(courseNum string, session schema.AcademicSession, rowInfo map[s
course.Offering_frequency = contactMatches[3]
}

// Set the catalog year
course.Catalog_year = catalogYear
return &course
}

Courses[courseKey] = course
CourseIDMap[course.Id] = courseKey
return course
func getCatalogYear(session schema.AcademicSession) string {
sessionYear, err := strconv.Atoi(session.Name[0:2])
if err != nil {
panic(err)
}
sessionSemester := session.Name[2]
switch sessionSemester {
case 'F':
return strconv.Itoa(sessionYear)
case 'S':
return strconv.Itoa(sessionYear - 1)
case 'U':
return strconv.Itoa(sessionYear - 1)
default:
panic(fmt.Errorf("encountered invalid session semester '%c!'", sessionSemester))
}
}

func getPrefixAndNumber(classInfo map[string]string) (string, string) {
if sectionId, ok := classInfo["Class Section:"]; ok {
// Get subject prefix and course number by doing a regexp match on the section id
matches := coursePrefixRexp.FindStringSubmatch(sectionId)
if len(matches) == 3 {
return matches[1], matches[2]
}
}
return "", ""
}
125 changes: 125 additions & 0 deletions parser/courseParser_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package parser

import (
"testing"

"github.com/UTDNebula/nebula-api/api/schema"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
)

func TestGetCourse(t *testing.T) {
loadTestData(t)

for name, testCase := range testDataCache {
t.Run(name, func(t *testing.T) {
_, courseNum := getInternalClassAndCourseNum(testCase.ClassInfo)
output := *getCourse(courseNum, testCase.Section.Academic_session, testCase.RowInfo, testCase.ClassInfo)
expected := testCase.Course

diff := cmp.Diff(expected, output, cmpopts.IgnoreFields(schema.Course{}, "Id"))

if diff != "" {
t.Errorf("Failed (-expected +got)\n %s", diff)
}

})
}
}

func TestGetCatalogYear(t *testing.T) {
testCases := map[string]struct {
Session schema.AcademicSession
Expected string
}{
"Case_001": {
Session: schema.AcademicSession{
Name: "25S",
},
Expected: "24",
}, "Case_002": {
Session: schema.AcademicSession{
Name: "25F",
},
Expected: "25",
}, "Case_003": {
Session: schema.AcademicSession{
Name: "22U",
},
Expected: "21",
}, "Case_004": {
Session: schema.AcademicSession{
Name: "20S",
},
Expected: "19",
},
}

for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
output := getCatalogYear(tc.Session)

if output != tc.Expected {
t.Errorf("expected %s got %s", tc.Expected, output)
}
})

}
}

func TestGetPrefixAndCourseNum(t *testing.T) {
testCases := map[string]struct {
classInfo map[string]string
prefix string
number string
}{
"Case_001": {
classInfo: map[string]string{
"Class Section:": "ACCT2301.001.25S",
},
prefix: "ACCT",
number: "2301",
},
"Case_002": {
classInfo: map[string]string{
"Class Section:": "ENTP3301.002.24S",
},
prefix: "ENTP",
number: "3301",
},
"Case_003": {
classInfo: map[string]string{
"Class Section:": "Garbage In, Garbage out",
},
prefix: "",
number: "",
},
"Case_004": {
classInfo: map[string]string{
"Class Section:": "ENTP33S",
},
prefix: "",
number: "",
},
"Case_005": {
classInfo: map[string]string{
"Class Section:": "",
},
prefix: "",
number: "",
},
}

for name, testCase := range testCases {
t.Run(name, func(t *testing.T) {
prefix, number := getPrefixAndNumber(testCase.classInfo)

if prefix != testCase.prefix {
t.Errorf("expected %s got %s", testCase.prefix, prefix)
}
if number != testCase.number {
t.Errorf("expected %s got %s", testCase.number, number)
}
})
}
}
49 changes: 17 additions & 32 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"fmt"
"log"
"os"
"strings"
"time"

"github.com/UTDNebula/api-tools/utils"
Expand Down Expand Up @@ -110,35 +109,33 @@ func parse(path string) {
panic(err)
}

// Get the rows of the info table
infoTable := doc.FindMatcher(goquery.Single("table.courseinfo__overviewtable > tbody"))
infoRows := infoTable.ChildrenFiltered("tr")
// Dictionary to hold the row data, keyed by row header
rowInfo := getRowInfo(doc)
// Dictionary to hold the class info, keyed by data label
classInfo := getClassInfo(doc)

var syllabusURI string
// Get the class and course num by splitting classInfo value

// Dictionary to hold the row data, keyed by row header
parseSection(rowInfo, classInfo)
utils.VPrint("Parsed!")
}

func getRowInfo(doc *goquery.Document) map[string]*goquery.Selection {
infoRows := doc.FindMatcher(goquery.Single("table.courseinfo__overviewtable > tbody")).ChildrenFiltered("tr")
rowInfo := make(map[string]*goquery.Selection, len(infoRows.Nodes))

// Populate rowInfo
infoRows.Each(func(_ int, row *goquery.Selection) {
rowHeader := utils.TrimWhitespace(row.FindMatcher(goquery.Single("th")).Text())
rowInfo[rowHeader] = row.FindMatcher(goquery.Single("td"))

})
return rowInfo
}

// Get syllabusURI from syllabus row link
if syllabus, ok := rowInfo["syllabus"]; ok {
syllabusURI, _ = syllabus.FindMatcher(goquery.Single("a")).Attr("href")
}

// Get the rows of the class info subtable
infoSubTable := infoTable.FindMatcher(goquery.Single("table.courseinfo__classsubtable > tbody"))
infoRows = infoSubTable.ChildrenFiltered("tr")

// Dictionary to hold the class info, keyed by data label
classInfo := make(map[string]string)
func getClassInfo(doc *goquery.Document) map[string]string {
infoRows := doc.FindMatcher(goquery.Single("table.courseinfo__classsubtable > tbody")).ChildrenFiltered("tr")
classInfo := make(map[string]string, len(infoRows.Nodes))

// Populate classInfo
infoRows.Each(func(_ int, row *goquery.Selection) {
rowHeaders := row.Find("td.courseinfo__classsubtable__th")
rowHeaders.Each(func(_ int, header *goquery.Selection) {
Expand All @@ -147,17 +144,5 @@ func parse(path string) {
classInfo[headerText] = dataText
})
})

// Get the class and course num by splitting classInfo value
classAndCourseNum := strings.Split(classInfo["Class/Course Number:"], " / ")
classNum := classAndCourseNum[0]
courseNum := utils.TrimWhitespace(classAndCourseNum[1])

// Figure out the academic session associated with this specific course/Section
session := getAcademicSession(rowInfo)

// Try to create the course and section based on collected info
courseRef := parseCourse(courseNum, session, rowInfo, classInfo)
parseSection(courseRef, classNum, syllabusURI, session, rowInfo, classInfo)
utils.VPrint("Parsed!")
return classInfo
}
Loading
Loading