Skip to content

Commit 8aafbbe

Browse files
authored
Merge branch 'develop' into issue-40
2 parents c4908c1 + 68f62cd commit 8aafbbe

File tree

7 files changed

+237
-165
lines changed

7 files changed

+237
-165
lines changed

parser/cometCalendarParser.go

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
/*
2+
This file contains the code for the comet calendar events parser.
3+
*/
4+
15
package parser
26

37
import (
@@ -8,14 +12,16 @@ import (
812
"regexp"
913
"slices"
1014
"strings"
15+
"time"
1116

17+
"github.com/UTDNebula/api-tools/scrapers"
1218
"github.com/UTDNebula/api-tools/utils"
1319
"github.com/UTDNebula/nebula-api/api/schema"
1420
)
1521

1622
// Some events have only the building name, not the abbreviation
1723
// Maps building names to their abbreviations
18-
var buildingAbbreviations = map[string]string{
24+
var DefaultBuildings = map[string]string{
1925
"Activity Center": "AB",
2026
"Activity Center Bookstore": "ACB",
2127
"Administration": "AD",
@@ -74,7 +80,7 @@ var buildingAbbreviations = map[string]string{
7480
}
7581

7682
// Valid building abreviations for checking
77-
var validAbbreviations []string = []string{
83+
var DefaultValid []string = []string{
7884
"AB",
7985
"ACB",
8086
"AD",
@@ -146,6 +152,11 @@ func ParseCometCalendar(inDir string, outDir string) {
146152
}
147153

148154
multiBuildingMap := make(map[string]map[string]map[string][]schema.Event)
155+
// Some events have only the building name, not the abbreviation
156+
buildingAbbreviations, validAbbreviations, err := getLocationAbbreviations(inDir)
157+
if err != nil {
158+
panic(err)
159+
}
149160

150161
for _, event := range allEvents {
151162

@@ -181,7 +192,7 @@ func ParseCometCalendar(inDir string, outDir string) {
181192

182193
// If location doesn't have room number, check to see if location included a room
183194
if room == "" && isValidBuilding {
184-
locationParts := strings.SplitN(*location, ",", 2)
195+
locationParts := strings.SplitN(*location, ", ", 2)
185196
if len(locationParts) == 2 {
186197
room = locationParts[1]
187198
}
@@ -239,3 +250,52 @@ func ParseCometCalendar(inDir string, outDir string) {
239250

240251
utils.WriteJSON(fmt.Sprintf("%s/cometCalendar.json", outDir), result)
241252
}
253+
254+
// getAbbreviations dynamically retrieves the all of the locations abbreviations
255+
func getLocationAbbreviations(inDir string) (map[string]string, []string, error) {
256+
// Get the locations from the map scraper
257+
var mapFile []byte
258+
259+
mapFile, err := os.ReadFile(inDir + "/mapLocations.json")
260+
if err != nil {
261+
if os.IsNotExist(err) {
262+
// Scrape the data if the it doesn't exist yet and then get the map file
263+
scrapers.ScrapeMapLocations(inDir)
264+
time.Sleep(2 * time.Second)
265+
ParseMapLocations(inDir, inDir)
266+
time.Sleep(2 * time.Second)
267+
268+
// If fail to get the locations again, it's not because location is unscraped
269+
mapFile, err = os.ReadFile(inDir + "/mapLocations.json")
270+
if err != nil {
271+
return nil, nil, err
272+
}
273+
} else {
274+
return nil, nil, err
275+
}
276+
}
277+
278+
var locations []schema.MapBuilding
279+
if err = json.Unmarshal(mapFile, &locations); err != nil {
280+
return nil, nil, err
281+
}
282+
283+
// Process the abbreviations
284+
buildingsAbbreviations := make(map[string]string, 0) // Maps building names to their abbreviations
285+
validAbbreviations := make([]string, 0) // Valid building abreviations for checking
286+
287+
for _, location := range locations {
288+
// Trim the following acronym in the name
289+
trimmedName := strings.Split(*location.Name, " (")[0]
290+
// Fallback on the locations that have no acronyms
291+
abbreviation := ""
292+
if location.Acronym != nil {
293+
abbreviation = *location.Acronym
294+
}
295+
296+
buildingsAbbreviations[trimmedName] = abbreviation
297+
validAbbreviations = append(validAbbreviations, abbreviation)
298+
}
299+
300+
return buildingsAbbreviations, validAbbreviations, nil
301+
}

parser/gradeLoader.go

Lines changed: 35 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010
"strings"
1111
)
1212

13+
var grades = []string{"A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", "F", "W", "P", "CR", "NC", "I"}
14+
1315
func loadGrades(csvDir string) map[string]map[string][]int {
1416

1517
// MAP[SEMESTER] -> MAP[SUBJECT + NUMBER + SECTION] -> GRADE DISTRIBUTION
@@ -73,68 +75,48 @@ func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int {
7375
if err != nil {
7476
log.Panicf("Error parsing %s: %s", csvFile.Name(), err.Error())
7577
}
76-
// look for the subject column and w column
77-
subjectCol := -1
78-
catalogNumberCol := -1
79-
sectionCol := -1
80-
wCol := -1
81-
aPlusCol := -1
82-
83-
headerRow := records[0]
84-
85-
for j := 0; j < len(headerRow); j++ {
86-
switch {
87-
case headerRow[j] == "Subject":
88-
subjectCol = j
89-
case headerRow[j] == "Catalog Number" || headerRow[j] == "Catalog Nbr":
90-
catalogNumberCol = j
91-
case headerRow[j] == "Section":
92-
sectionCol = j
93-
case headerRow[j] == "W" || headerRow[j] == "Total W" || headerRow[j] == "W Total":
94-
wCol = j
95-
case headerRow[j] == "A+":
96-
aPlusCol = j
97-
}
98-
if wCol == -1 || subjectCol == -1 || catalogNumberCol == -1 || sectionCol == -1 || aPlusCol == -1 {
99-
continue
100-
} else {
101-
break
78+
79+
indexMap := make(map[string]int)
80+
81+
for j, col := range records[0] {
82+
switch col {
83+
case "Catalog Number", "Catalog Nbr":
84+
indexMap["Catalog Number"] = j
85+
case "W", "Total W", "W Total":
86+
indexMap["W"] = j
87+
default:
88+
indexMap[col] = j
10289
}
10390
}
10491

105-
if wCol == -1 {
106-
logFile.WriteString("could not find W column")
107-
//log.Panicf("could not find W column")
108-
}
109-
if sectionCol == -1 {
110-
logFile.WriteString("could not find Section column")
111-
log.Panicf("could not find Section column")
112-
}
113-
if subjectCol == -1 {
114-
logFile.WriteString("could not find Subject column")
115-
log.Panicf("could not find Subject column")
116-
}
117-
if catalogNumberCol == -1 {
118-
logFile.WriteString("could not find catalog # column")
119-
log.Panicf("could not find catalog # column")
92+
// required columns
93+
for _, name := range []string{"Section", "Subject", "Catalog Number", "A+"} {
94+
if _, ok := indexMap[name]; !ok {
95+
fmt.Fprintf(logFile, "could not find %s column", name)
96+
log.Panicf("could not find %s column", name)
97+
}
12098
}
121-
if aPlusCol == -1 {
122-
logFile.WriteString("could not find A+ column")
123-
log.Panicf("could not find A+ column")
99+
100+
// optional columns
101+
for _, name := range []string{"W", "P", "CR", "NC", "I"} {
102+
if _, ok := indexMap[name]; !ok {
103+
logFile.WriteString(fmt.Sprintf("could not find %s column\n", name))
104+
}
124105
}
125106

107+
sectionCol := indexMap["Section"]
108+
subjectCol := indexMap["Subject"]
109+
catalogNumberCol := indexMap["Catalog Number"]
110+
126111
distroMap := make(map[string][]int)
127112

128-
for _, record := range records {
113+
for _, record := range records[1:] {
129114
// convert grade distribution from string to int
130-
intSlice := [14]int{}
131-
132-
for j := 0; j < 13; j++ {
133-
intSlice[j], _ = strconv.Atoi(record[aPlusCol+j])
134-
}
135-
// add w number to the grade_distribution slice
136-
if wCol != -1 {
137-
intSlice[13], _ = strconv.Atoi(record[wCol])
115+
intSlice := make([]int, len(grades))
116+
for i, col := range grades {
117+
if pos, ok := indexMap[col]; ok {
118+
intSlice[i], _ = strconv.Atoi(record[pos])
119+
}
138120
}
139121

140122
// add new grade distribution to map, keyed by SUBJECT + NUMBER + SECTION

parser/professorParser.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
"go.mongodb.org/mongo-driver/bson/primitive"
1111
)
1212

13-
func parseProfessors(sectionId primitive.ObjectID, rowInfo map[string]*goquery.Selection, classInfo map[string]string) []primitive.ObjectID {
13+
func parseProfessors(sectionId primitive.ObjectID, rowInfo map[string]*goquery.Selection) []primitive.ObjectID {
1414
professorText := utils.TrimWhitespace(rowInfo["Instructor(s):"].Text())
1515
professorMatches := personRegexp.FindAllStringSubmatch(professorText, -1)
1616
var profRefs []primitive.ObjectID = make([]primitive.ObjectID, 0, len(professorMatches))

parser/sectionParser.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func parseSection(rowInfo map[string]*goquery.Selection, classInfo map[string]st
5252
Section_number: sectionNumber,
5353
Course_reference: courseRef.Id,
5454
Academic_session: session,
55-
Professors: parseProfessors(id, rowInfo, classInfo),
55+
Professors: parseProfessors(id, rowInfo),
5656
Teaching_assistants: getTeachingAssistants(rowInfo),
5757
Internal_class_number: classNum,
5858
Instruction_mode: getInstructionMode(classInfo),

parser/validator_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,8 @@ func testCourseReferenceFail(fail string, courseIx int, sectionIx int, t *testin
381381
var sectionID, originalID primitive.ObjectID // used to store IDs of modified sections
382382

383383
// Build the failed section map based on fail type
384-
if fail == "missing" {
384+
switch fail {
385+
case "missing":
385386
// Misses a section
386387
for i, section := range testSections {
387388
if sectionIx != i {
@@ -390,7 +391,7 @@ func testCourseReferenceFail(fail string, courseIx int, sectionIx int, t *testin
390391
sectionID = section.Id // Nonexistent ID referenced by course
391392
}
392393
}
393-
} else if fail == "modified" {
394+
case "modified":
394395
// One section doesn't reference to correct courses
395396
for i, section := range testSections {
396397
sectionMap[section.Id] = section

scrapers/adacemicCalendars.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func ScrapeAcademicCalendars(outDir string) {
4747

4848
// Extract data from links
4949
// Current
50-
academicCalendars := []AcademicCalendar{AcademicCalendar{"", "", "current"}}
50+
academicCalendars := []AcademicCalendar{{"", "", "current"}}
5151
chromedp.Run(chromedpCtx, chromedp.TextContent("h2.wp-block-heading", &academicCalendars[0].Title, chromedp.ByQuery))
5252
var currentNode []*cdp.Node
5353
chromedp.Run(chromedpCtx, chromedp.Nodes("a.wp-block-button__link", &currentNode, chromedp.ByQuery))

0 commit comments

Comments
 (0)