Skip to content

Commit 321f9f3

Browse files
authored
Merge pull request #97 from NewdlDewdl/develop
Resolved conflicts after adding Calendar Parser (Issue #78)
2 parents 407f8b1 + 242d414 commit 321f9f3

File tree

6 files changed

+266
-5
lines changed

6 files changed

+266
-5
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ Run the tool by changing directory using `cd` to the `api-tools` directory and r
7575
| Command | Description |
7676
|---------|-------------|
7777
| `./api-tools -parse -astra` | Parses Astra data. |
78+
| `./api-tools -parse -calendar` | Parses calendar data. |
7879
| `./api-tools -parse -csv [directory]` | Outputs grade data CSVs (default: `./grade-data`). |
7980
| `./api-tools -parse -map` | Parses UTD Map data. |
8081
| `./api-tools -parse -mazevo` | Parses Mazevo data. |

go.mod

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ require (
1111
github.com/joho/godotenv v1.5.1
1212
github.com/valyala/fastjson v1.6.4
1313
go.mongodb.org/mongo-driver v1.17.3
14-
golang.org/x/net v0.43.0
14+
golang.org/x/net v0.36.0
15+
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
1516
)
1617

1718
require (
@@ -57,6 +58,9 @@ require (
5758
github.com/josharian/intern v1.0.0 // indirect
5859
github.com/json-iterator/go v1.1.12 // indirect
5960
github.com/klauspost/compress v1.17.8 // indirect
61+
github.com/klauspost/cpuid/v2 v2.2.9 // indirect
62+
github.com/kr/pretty v0.3.1 // indirect
63+
github.com/kr/text v0.2.0 // indirect
6064
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
6165
github.com/leodido/go-urn v1.4.0 // indirect
6266
github.com/mailru/easyjson v0.9.0 // indirect
@@ -66,6 +70,7 @@ require (
6670
github.com/montanaflynn/stats v0.7.1 // indirect
6771
github.com/pelletier/go-toml/v2 v2.2.4 // indirect
6872
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
73+
github.com/rogpeppe/go-internal v1.13.1 // indirect
6974
github.com/stretchr/testify v1.11.1 // indirect
7075
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
7176
github.com/ugorji/go/codec v1.3.0 // indirect

go.sum

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI
5252
github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
5353
github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 h1:Om6kYQYDUk5wWbT0t0q6pvyM49i9XZAv9dDrkDA7gjk=
5454
github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
55+
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
5556
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
5657
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
5758
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -117,10 +118,17 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr
117118
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
118119
github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
119120
github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
121+
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
122+
github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY=
123+
github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8=
124+
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
125+
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
120126
github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
121127
github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
122128
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
123129
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
130+
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
131+
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
124132
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
125133
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
126134
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
@@ -140,12 +148,16 @@ github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8
140148
github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
141149
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw=
142150
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
151+
github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
152+
github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
153+
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
143154
github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
144155
github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
145156
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
146157
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
147158
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
148159
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
160+
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
149161
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
150162
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
151163
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=

main.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ func main() {
3838
scrapeProfiles := flag.Bool("profiles", false, "Alongside -scrape, signifies that professor profiles should be scraped.")
3939
// Flag for soc scraping
4040
scrapeOrganizations := flag.Bool("organizations", false, "Alongside -scrape, signifies that SOC organizations should be scraped.")
41-
// Flag for calendar scraping
42-
scrapeCalendar := flag.Bool("calendar", false, "Alongside -scrape, signifies that calendar should be scraped.")
41+
// Flag for calendar scraping and parsing
42+
calendar := flag.Bool("calendar", false, "Alongside -scrape or -parse, signifies that calendar should be scraped.")
4343
// Flag for astra scraping and parsing
4444
astra := flag.Bool("astra", false, "Alongside -scrape or -parse, signifies that Astra should be scraped/parsed.")
4545
// Flag for mazevo scraping and parsing
@@ -106,7 +106,7 @@ func main() {
106106
scrapers.ScrapeCoursebook(*term, *startPrefix, *outDir, *resume)
107107
case *scrapeOrganizations:
108108
scrapers.ScrapeOrganizations(*outDir)
109-
case *scrapeCalendar:
109+
case *calendar:
110110
scrapers.ScrapeCalendar(*outDir)
111111
case *astra:
112112
scrapers.ScrapeAstra(*outDir)
@@ -119,6 +119,8 @@ func main() {
119119
}
120120
case *parse:
121121
switch {
122+
case *calendar:
123+
parser.ParseCalendar(*inDir, *outDir)
122124
case *astra:
123125
parser.ParseAstra(*inDir, *outDir)
124126
case *mazevo:

parser/calendarParser.go

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
package parser
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"log"
7+
"os"
8+
"regexp"
9+
"slices"
10+
"strings"
11+
12+
"github.com/UTDNebula/api-tools/utils"
13+
"github.com/UTDNebula/nebula-api/api/schema"
14+
)
15+
16+
// Some events have only the building name, not the abbreviation
17+
// Maps building names to their abbreviations
18+
var buildingAbbreviations = map[string]string{
19+
"Activity Center": "AB",
20+
"Activity Center Bookstore": "ACB",
21+
"Administration": "AD",
22+
"Edith and Peter O’Donnell Jr. Athenaeum": "APC",
23+
"Edith O'Donnell Arts and Technology Building": "ATC",
24+
"Lloyd V. Berkner Hall": "BE",
25+
"Bioengineering and Sciences Building": "BSB",
26+
"Classroom Building": "CB",
27+
"Callier Center Richardson": "CR",
28+
"Callier Center Addition": "CRA",
29+
"Davidson-Gundy Alumni Center": "DGA",
30+
"Dining Hall West": "DHW",
31+
"Engineering and Computer Science North": "ECSN",
32+
"Engineering and Computer Science South": "ECSS",
33+
"Engineering and Computer Science West": "ECSW",
34+
"Energy Plant": "EP",
35+
"Founders Annex": "FA",
36+
"Facilities Management": "FM",
37+
"Founders North": "FN",
38+
"Founders Building": "FO",
39+
"Cecil H. Green Hall": "GR",
40+
"Karl Hoblitzelle Hall": "HH",
41+
"Erik Jonsson Academic Center": "JO",
42+
"Naveen Jindal School of Management": "JSOM",
43+
"Eugene McDermott Library": "MC",
44+
"Modular Lab 1": "ML1",
45+
"Modular Lab 2": "ML2",
46+
"North Office Building": "NB",
47+
"North Lab": "NL",
48+
"Police": "PD",
49+
"Physics Annex": "PHA",
50+
"Physics Building": "PHY",
51+
"Natural Science and Engineering Research Lab": "RL",
52+
"Research and Operations Center": "ROC",
53+
"Research and Operations Center West": "ROW",
54+
"Service Building": "SB",
55+
"Sciences Building": "SCI",
56+
"Safety and Grounds": "SG",
57+
"Student Learning Center": "SLC",
58+
"Student Services Building Addition": "SSA",
59+
"Student Services Building": "SSB",
60+
"Student Union": "SU",
61+
"Student Union Food Court": "SUFC",
62+
"Synergy Park North": "SPN",
63+
"Synergy Park North 2": "SP2",
64+
"University Theatre": "TH",
65+
"Visitor Center": "VC",
66+
"Waterview Science and Technology Center": "WSTC",
67+
"Andromeda Hall & University Housing Office": "RHA",
68+
"Capella Hall": "RHC",
69+
"Helix Hall": "RHH",
70+
"Sirius Hall": "RHS",
71+
"Vega Hall": "RHV",
72+
"Recreation Center West": "RCW",
73+
"SP/N Gallery": "SP2",
74+
}
75+
76+
// Valid building abreviations for checking
77+
var validAbbreviations []string = []string{
78+
"AB",
79+
"ACB",
80+
"AD",
81+
"APC",
82+
"ATC",
83+
"BE",
84+
"BSB",
85+
"CB",
86+
"CR",
87+
"CRA",
88+
"DGA",
89+
"DHW",
90+
"ECSN",
91+
"ECSS",
92+
"ECSW",
93+
"EP",
94+
"FA",
95+
"FM",
96+
"FN",
97+
"FO",
98+
"GR",
99+
"HH",
100+
"JO",
101+
"JSOM",
102+
"MC",
103+
"ML1",
104+
"ML2",
105+
"NB",
106+
"NL",
107+
"PD",
108+
"PHA",
109+
"PHY",
110+
"RL",
111+
"ROC",
112+
"ROW",
113+
"SB",
114+
"SCI",
115+
"SG",
116+
"SLC",
117+
"SSA",
118+
"SSB",
119+
"SU",
120+
"SUFC",
121+
"SPN",
122+
"SP2",
123+
"TH",
124+
"VC",
125+
"WSTC",
126+
"RHA",
127+
"RHC",
128+
"RHH",
129+
"RHS",
130+
"RHV",
131+
"RCW",
132+
}
133+
134+
func ParseCalendar(inDir string, outDir string) {
135+
136+
calendarFile, err := os.ReadFile(inDir + "/eventScraped.json")
137+
if err != nil {
138+
panic(err)
139+
}
140+
141+
var allEvents []schema.Event
142+
143+
err = json.Unmarshal(calendarFile, &allEvents)
144+
if err != nil {
145+
panic(err)
146+
}
147+
148+
multiBuildingMap := make(map[string]map[string]map[string][]schema.Event)
149+
150+
for _, event := range(allEvents) {
151+
152+
// Get date
153+
dateTime := event.StartTime
154+
dateTimeString := dateTime.String()
155+
date := dateTimeString[:10]
156+
157+
// Get building and room
158+
location := utils.ConvertFromInterface[string](event.Location)
159+
160+
// Regexp to match building abbreviations and room numbers
161+
buildingRegexp := regexp.MustCompile(`[A-Z]{2,4}`)
162+
roomRegexp := regexp.MustCompile(`([0-9]{1,2}\.[0-9]{3})([A-Z])?`)
163+
164+
building := buildingRegexp.FindString(*location)
165+
room := roomRegexp.FindString(*location)
166+
167+
// buildingRegexp might capture something that isn't a valid building abbreviation (e.g., UTD)
168+
isValidBuilding := slices.Contains(validAbbreviations, building)
169+
170+
// If location doesn't have building abbreviation or buildingRegexp captured an invalid abbreviation,
171+
// check for the full building name
172+
lowercaseLocation := strings.ToLower(*location)
173+
if building == "" || !isValidBuilding {
174+
for key := range buildingAbbreviations {
175+
if strings.Contains(lowercaseLocation, strings.ToLower(key)) {
176+
building = buildingAbbreviations[key]
177+
isValidBuilding = true
178+
}
179+
}
180+
}
181+
182+
// If location doesn't have room number, check to see if location included a room
183+
if room == "" && isValidBuilding {
184+
locationParts := strings.SplitN(*location, ",", 2)
185+
if len(locationParts) == 2 {
186+
room = locationParts[1]
187+
}
188+
}
189+
190+
// If building is still empty string, then location was initally an empty string
191+
// or location was a place off campus
192+
if building == "" {
193+
building = "Other"
194+
}
195+
196+
// If room is still empty string, then location was initally an empty string, or
197+
// location did not include a room, or location was a place off campus
198+
if room == "" {
199+
room = "Other"
200+
}
201+
202+
if _, exists := multiBuildingMap[date]; !exists {
203+
multiBuildingMap[date] = make(map[string]map[string][]schema.Event)
204+
}
205+
206+
if _, exists := multiBuildingMap[date][building]; !exists {
207+
multiBuildingMap[date][building] = make(map[string][]schema.Event)
208+
}
209+
210+
multiBuildingMap[date][building][room] = append(multiBuildingMap[date][building][room], event)
211+
}
212+
213+
var result []schema.MultiBuildingEvents[schema.Event]
214+
215+
for date, buildings := range multiBuildingMap {
216+
var singleBuildings []schema.SingleBuildingEvents[schema.Event]
217+
for building, rooms := range buildings {
218+
var roomEvents []schema.RoomEvents[schema.Event]
219+
for room, events := range rooms {
220+
roomEvents = append(roomEvents, schema.RoomEvents[schema.Event]{
221+
Room: room,
222+
Events: events,
223+
})
224+
}
225+
226+
singleBuildings = append(singleBuildings, schema.SingleBuildingEvents[schema.Event]{
227+
Building: building,
228+
Rooms: roomEvents,
229+
})
230+
}
231+
232+
result = append(result, schema.MultiBuildingEvents[schema.Event]{
233+
Date: date,
234+
Buildings: singleBuildings,
235+
})
236+
}
237+
238+
log.Print("Parsed Calendar!")
239+
240+
utils.WriteJSON(fmt.Sprintf("%s/events.json", outDir), result)
241+
}

scrapers/calendar.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ func ScrapeCalendar(outDir string) {
134134
log.Printf("Parsed the events of page %d successfully!\n\n", page+1)
135135
}
136136

137-
if err := utils.WriteJSON(fmt.Sprintf("%s/events.json", outDir), events); err != nil {
137+
if err := utils.WriteJSON(fmt.Sprintf("%s/eventScraped.json", outDir), events); err != nil {
138138
panic(err)
139139
}
140140
log.Printf("Finished parsing %d events successfully!\n\n", len(events))

0 commit comments

Comments
 (0)