11package parser
22
33import (
4- "fmt"
54 "regexp"
65 "strings"
76 "time"
87
8+ "github.com/PuerkitoBio/goquery"
99 "github.com/UTDNebula/api-tools/utils"
1010 "github.com/UTDNebula/nebula-api/api/schema"
1111 "go.mongodb.org/mongo-driver/bson/primitive"
12+ "golang.org/x/net/html/atom"
1213)
1314
1415var sectionPrefixRegexp * regexp.Regexp = utils .Regexpf (`^(?i)%s\.(%s)` , utils .R_SUBJ_COURSE , utils .R_SECTION_CODE )
1516var coreRegexp * regexp.Regexp = regexp .MustCompile (`[0-9]{3}` )
1617var personRegexp * regexp.Regexp = regexp .MustCompile (`(.+)・(.+)・(.+)` )
1718
18- func parseSection (courseRef * schema.Course , classNum string , syllabusURI string , session schema.AcademicSession , rowInfo map [string ]string , classInfo map [string ]string ) {
19+ func parseSection (courseRef * schema.Course , classNum string , syllabusURI string , session schema.AcademicSession , rowInfo map [string ]* goquery. Selection , classInfo map [string ]string ) {
1920 // Get subject prefix and course number by doing a regexp match on the section id
2021 sectionId := classInfo ["Class Section:" ]
2122 idMatches := sectionPrefixRegexp .FindStringSubmatch (sectionId )
@@ -34,7 +35,7 @@ func parseSection(courseRef *schema.Course, classNum string, syllabusURI string,
3435 section .Professors = parseProfessors (section .Id , rowInfo , classInfo )
3536
3637 // Get all TA/RA info
37- assistantText := rowInfo ["TA/RA(s):" ]
38+ assistantText := utils . TrimWhitespace ( rowInfo ["TA/RA(s):" ]. Text ())
3839 assistantMatches := personRegexp .FindAllStringSubmatch (assistantText , - 1 )
3940 section .Teaching_assistants = make ([]schema.Assistant , 0 , len (assistantMatches ))
4041 for _ , match := range assistantMatches {
@@ -50,18 +51,17 @@ func parseSection(courseRef *schema.Course, classNum string, syllabusURI string,
5051
5152 section .Internal_class_number = classNum
5253 section .Instruction_mode = classInfo ["Instruction Mode:" ]
53- section .Meetings = getMeetings (rowInfo , classInfo )
54+ section .Meetings = getMeetings (rowInfo )
5455
5556 // Parse core flags (may or may not exist)
56- coreText , hasCore := rowInfo [ "Core:" ]
57- if hasCore {
58- section .Core_flags = coreRegexp .FindAllString (coreText , - 1 )
57+
58+ if coreText , hasCore := rowInfo [ "Core:" ]; hasCore {
59+ section .Core_flags = coreRegexp .FindAllString (utils . TrimWhitespace ( coreText . Text ()) , - 1 )
5960 }
6061
6162 section .Syllabus_uri = syllabusURI
6263
63- semesterGrades , exists := GradeMap [session .Name ]
64- if exists {
64+ if semesterGrades , ok := GradeMap [session .Name ]; ok {
6565 // We have to trim leading zeroes from the section number in order to match properly, since the grade data does not use leading zeroes
6666 trimmedSectionNumber := strings .TrimLeft (section .Section_number , "0" )
6767 // Key into grademap should be uppercased like the grade data
@@ -79,76 +79,93 @@ func parseSection(courseRef *schema.Course, classNum string, syllabusURI string,
7979 courseRef .Sections = append (courseRef .Sections , section .Id )
8080}
8181
82- var termRegexp * regexp.Regexp = utils .Regexpf (`(?i)Term: (%s)` , utils .R_TERM_CODE )
83- var datesRegexp * regexp.Regexp = utils .Regexpf (`(?:Start|End)s: (%s)` , utils .R_DATE_MDY )
84-
85- func getAcademicSession (rowInfo map [string ]string ) schema.AcademicSession {
82+ func getAcademicSession (rowInfo map [string ]* goquery.Selection ) schema.AcademicSession {
8683 session := schema.AcademicSession {}
87- scheduleText := rowInfo ["Schedule:" ]
8884
89- session . Name = termRegexp . FindStringSubmatch ( scheduleText )[ 1 ]
90- dateMatches := datesRegexp . FindAllStringSubmatch ( scheduleText , - 1 )
91-
92- datesFound := len ( dateMatches )
93- switch {
94- case datesFound == 1 :
95- startDate , err := time . ParseInLocation ( "January 2, 2006" , dateMatches [ 0 ][ 1 ], timeLocation )
96- if err != nil {
97- panic ( err )
98- }
99- session . Start_date = startDate
100- case datesFound == 2 :
101- startDate , err := time . ParseInLocation ( "January 2, 2006" , dateMatches [ 0 ][ 1 ], timeLocation )
102- if err != nil {
103- panic ( err )
85+ infoNodes := rowInfo [ "Schedule:" ]. FindMatcher ( goquery . Single ( "p.courseinfo__sectionterm" )). Contents (). Nodes
86+ for _ , node := range infoNodes {
87+ if node . DataAtom == atom . B {
88+ //since the key is not a TextElement, the Text is stored in it's first child, a TextElement
89+ key := utils . TrimWhitespace ( node . FirstChild . Data )
90+ value := utils . TrimWhitespace ( node . NextSibling . Data )
91+
92+ switch key {
93+ case "Term:" :
94+ session . Name = value
95+ case "Starts:" :
96+ session . Start_date = parseTimeOrPanic ( value )
97+ case "Ends:" :
98+ session . End_date = parseTimeOrPanic ( value )
99+ }
104100 }
105- endDate , err := time .ParseInLocation ("January 2, 2006" , dateMatches [1 ][1 ], timeLocation )
106- if err != nil {
107- panic (err )
108- }
109- session .Start_date = startDate
110- session .End_date = endDate
111101 }
112102 return session
113103}
114104
115- var meetingsRegexp * regexp.Regexp = utils .Regexpf (`(%s)-(%s)\W+((?:%s(?:, )?)+)\W+(%s)-(%s)(?:\W+(?:(\S+)\s+(\S+)))` , utils .R_DATE_MDY , utils .R_DATE_MDY , utils .R_WEEKDAY , utils .R_TIME_AM_PM , utils .R_TIME_AM_PM )
105+ var meetingDatesRegexp = regexp .MustCompile (utils .R_DATE_MDY )
106+ var meetingDaysRegexp = regexp .MustCompile (utils .R_WEEKDAY )
107+ var meetingTimesRegexp = regexp .MustCompile (utils .R_TIME_AM_PM )
116108
117- func getMeetings (rowInfo map [string ]string , classInfo map [string ]string ) []schema.Meeting {
118- scheduleText := rowInfo ["Schedule:" ]
119- meetingMatches := meetingsRegexp .FindAllStringSubmatch (scheduleText , - 1 )
120- var meetings []schema.Meeting = make ([]schema.Meeting , 0 , len (meetingMatches ))
121- for _ , match := range meetingMatches {
122- meeting := schema.Meeting {}
109+ func getMeetings (rowInfo map [string ]* goquery.Selection ) []schema.Meeting {
110+ meetingItems := rowInfo ["Schedule:" ].Find ("div.courseinfo__meeting-item--multiple" )
111+ var meetings []schema.Meeting = make ([]schema.Meeting , 0 , meetingItems .Length ())
123112
124- startDate , err := time .ParseInLocation ("January 2, 2006" , match [1 ], timeLocation )
125- if err != nil {
126- panic (err )
113+ meetingItems .Each (func (i int , s * goquery.Selection ) {
114+ meeting := schema.Meeting {}
115+ meetingInfo := s .FindMatcher (goquery .Single ("p.courseinfo__meeting-time" ))
116+
117+ dates := meetingDatesRegexp .FindAllString (meetingInfo .Text (), - 1 )
118+ if len (dates ) == 2 {
119+ meeting .Start_date = parseTimeOrPanic (dates [0 ])
120+ meeting .End_date = parseTimeOrPanic (dates [1 ])
121+ } else if len (dates ) == 1 {
122+ meeting .Start_date = parseTimeOrPanic (dates [0 ])
123+ meeting .End_date = meeting .Start_date
127124 }
128- meeting .Start_date = startDate
129125
130- endDate , err := time .ParseInLocation ("January 2, 2006" , match [2 ], timeLocation )
131- if err != nil {
132- panic (err )
126+ days := meetingDaysRegexp .FindAllString (meetingInfo .Text (), - 1 )
127+ if days != nil {
128+ meeting .Meeting_days = days
129+ } else {
130+ meeting .Meeting_days = []string {} //avoid null in the json
133131 }
134- meeting .End_date = endDate
135-
136- meeting .Meeting_days = strings .Split (match [3 ], ", " )
137132
138- // Don't parse time into time object, adds unnecessary extra data
139- meeting .Start_time = match [4 ]
140- meeting .End_time = match [5 ]
141-
142- // Only add location data if it's available
143- if len (match ) > 6 {
144- location := schema.Location {}
145- location .Building = match [6 ]
146- location .Room = match [7 ]
147- location .Map_uri = fmt .Sprintf ("https://locator.utdallas.edu/%s_%s" , location .Building , location .Room )
148- meeting .Location = location
133+ times := meetingTimesRegexp .FindAllString (meetingInfo .Text (), - 1 )
134+ if len (times ) == 2 {
135+ meeting .Start_time = times [0 ]
136+ meeting .End_time = times [1 ]
137+ } else if len (times ) == 1 {
138+ meeting .Start_time = times [0 ]
139+ meeting .End_time = meeting .Start_time
149140 }
150141
142+ if locationInfo := meetingInfo .FindMatcher (goquery .Single ("a" )); locationInfo != nil {
143+ mapUri := locationInfo .AttrOr ("href" , "" )
144+
145+ //only add locations for meetings that have actual data, all meetings have a link some are not visible or empty
146+ if mapUri != "" && mapUri != "https://locator.utdallas.edu/" && mapUri != "https://locator.utdallas.edu/ONLINE" {
147+ splitText := strings .Split (utils .TrimWhitespace (locationInfo .Text ()), " " )
148+
149+ if len (splitText ) == 2 {
150+ meeting .Location = schema.Location {
151+ Building : splitText [0 ],
152+ Room : splitText [1 ],
153+ Map_uri : mapUri ,
154+ }
155+ }
156+ }
157+ }
151158 meetings = append (meetings , meeting )
152- }
159+ })
153160 return meetings
154161}
162+
163+ const timeLayout = "January 2, 2006"
164+
165+ func parseTimeOrPanic (value string ) time.Time {
166+ date , err := time .ParseInLocation (timeLayout , value , timeLocation )
167+ if err != nil {
168+ panic (err )
169+ }
170+ return date
171+ }
0 commit comments