Skip to content

Commit 9a0c058

Browse files
committed
Dynamically retrieve valid abbrs
1 parent dcff73e commit 9a0c058

File tree

2 files changed

+88
-36
lines changed

2 files changed

+88
-36
lines changed

parser/cometCalendarParser.go

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
/*
2+
This file contains the code for the comet calendar events parser.
3+
*/
4+
15
package parser
26

37
import (
@@ -17,7 +21,7 @@ import (
1721

1822
// Some events have only the building name, not the abbreviation
1923
// Maps building names to their abbreviations
20-
var buildingAbbreviations = map[string]string{
24+
var DefaultBuildings = map[string]string{
2125
"Activity Center": "AB",
2226
"Activity Center Bookstore": "ACB",
2327
"Administration": "AD",
@@ -76,7 +80,7 @@ var buildingAbbreviations = map[string]string{
7680
}
7781

7882
// Valid building abreviations for checking
79-
var validAbbreviations []string = []string{
83+
var DefaultValid []string = []string{
8084
"AB",
8185
"ACB",
8286
"AD",
@@ -148,6 +152,7 @@ func ParseCometCalendar(inDir string, outDir string) {
148152
}
149153

150154
multiBuildingMap := make(map[string]map[string]map[string][]schema.Event)
155+
buildingAbbreviations, validAbbreviations := getAbbreviations(inDir)
151156

152157
for _, event := range allEvents {
153158

@@ -241,3 +246,34 @@ func ParseCometCalendar(inDir string, outDir string) {
241246

242247
utils.WriteJSON(fmt.Sprintf("%s/cometCalendar.json", outDir), result)
243248
}
249+
250+
// getAbbreviations dynamically retrieves the all of the locations abbreviations
251+
func getAbbreviations(inDir string) (map[string]string, []string) {
252+
// Get the locations from the map scraper
253+
mapFile, err := os.ReadFile(inDir + "/mapLocations.json")
254+
if err != nil {
255+
// Fall back if we haven't scraped the locations yet
256+
return DefaultBuildings, DefaultValid
257+
}
258+
var locations []map[string]any
259+
if err = json.Unmarshal(mapFile, &locations); err != nil {
260+
panic(err)
261+
}
262+
263+
// Process the abbreviations
264+
buildingsAbbrs := make(map[string]string, 0)
265+
validAbbrs := make([]string, 0)
266+
267+
for _, location := range locations {
268+
name := *utils.ConvertFromInterface[string](location["name"])
269+
acronym := *utils.ConvertFromInterface[string](location["acronym"])
270+
271+
// Trim the tailing acronym in the name
272+
trimmedName := strings.Split(name, " (")[0]
273+
buildingsAbbrs[trimmedName] = acronym
274+
275+
validAbbrs = append(validAbbrs, acronym)
276+
}
277+
278+
return buildingsAbbrs, validAbbrs
279+
}

scrapers/cometCalendar.go

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
This file contains the code for the events scraper.
2+
This file contains the code for the comet calendar events scraper.
33
*/
44

55
package scrapers
@@ -19,6 +19,8 @@ import (
1919
"go.mongodb.org/mongo-driver/bson/primitive"
2020
)
2121

22+
const BASE_CAL_URL string = "https://calendar.utdallas.edu/api/2/events"
23+
2224
// RawEvent mirrors the nested event payload returned by the calendar API.
2325
type RawEvent struct {
2426
Event map[string]any `json:"event"`
@@ -31,7 +33,7 @@ type APICalendarResponse struct {
3133
Date map[string]string `json:"date"`
3234
}
3335

34-
// ScrapeCometCalendar retrieves calendar events through the API and writes normalized JSON output.
36+
// ScrapeCometCalendar retrieves calendar events through the API
3537
func ScrapeCometCalendar(outDir string) {
3638
err := os.MkdirAll(outDir, 0777)
3739
if err != nil {
@@ -42,7 +44,7 @@ func ScrapeCometCalendar(outDir string) {
4244

4345
// Get the total number of pages
4446
log.Printf("Getting the number of pages...")
45-
if err := scrapeAndUnmarshal(&client, 0, &calendarData); err != nil {
47+
if err := callAPIAndUnmarshal(&client, 0, &calendarData); err != nil {
4648
panic(err)
4749
}
4850
numPages := calendarData.Page["total"]
@@ -51,20 +53,20 @@ func ScrapeCometCalendar(outDir string) {
5153
var calendarEvents []schema.Event
5254
for page := range numPages {
5355
log.Printf("Scraping events of page %d...", page+1)
54-
if err := scrapeAndUnmarshal(&client, page+1, &calendarData); err != nil {
56+
if err := callAPIAndUnmarshal(&client, page+1, &calendarData); err != nil {
5557
panic(err)
5658
}
57-
5859
for _, rawEvent := range calendarData.Events {
59-
startTime, endTime := parseStartAndEndTime(rawEvent)
60-
eventTypes, targetAudiences, eventTopics := parseFilters(rawEvent)
61-
departments, tags := parseDepartmentsAndTags(rawEvent)
62-
contactInfo := parseContactInfo(rawEvent)
60+
// Parse all necessary info
61+
startTime, endTime := getTime(rawEvent)
62+
eventTypes, targetAudiences, eventTopics := getFilters(rawEvent)
63+
departments, tags := getDepartmentsAndTags(rawEvent)
64+
contactInfo := getContactInfo(rawEvent)
6365

6466
calendarEvents = append(calendarEvents, schema.Event{
6567
Id: primitive.NewObjectID(),
6668
Summary: convert[string](rawEvent.Event["title"]),
67-
Location: parseEventLocation(rawEvent),
69+
Location: getEventLocation(rawEvent),
6870
StartTime: startTime,
6971
EndTime: endTime,
7072
Description: convert[string](rawEvent.Event["description_text"]),
@@ -90,44 +92,51 @@ func ScrapeCometCalendar(outDir string) {
9092
}
9193

9294
// scrapeAndUnmarshal fetches a calendar page and decodes it into data.
93-
func scrapeAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error {
95+
func callAPIAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error {
9496
// Call API to get the byte data
95-
calendarUrl := fmt.Sprintf("https://calendar.utdallas.edu/api/2/events?days=365&pp=100&page=%d", page)
97+
calendarUrl := fmt.Sprintf("%s?days=365&pp=100&page=%d", BASE_CAL_URL, page)
9698
request, err := http.NewRequest("GET", calendarUrl, nil)
9799
if err != nil {
98100
return err
99101
}
102+
request.Header = http.Header{
103+
"Content-type": {"application/json"},
104+
"Accept": {"application/json"},
105+
}
106+
100107
response, err := client.Do(request)
101108
if err != nil {
102109
return err
103110
}
104111
if response != nil && response.StatusCode != 200 {
105112
return fmt.Errorf("ERROR: Non-200 status is returned, %s", response.Status)
106113
}
114+
defer response.Body.Close()
107115

108116
// Unmarshal bytes to the response data
109117
buffer := bytes.Buffer{}
110118
if _, err = buffer.ReadFrom(response.Body); err != nil {
111119
return err
112120
}
113-
response.Body.Close()
114121
if err = json.Unmarshal(buffer.Bytes(), &data); err != nil {
115122
return err
116123
}
124+
117125
return nil
118126
}
119127

120-
// parseStartAndEndTime parses the start and end time of the event
121-
func parseStartAndEndTime(event RawEvent) (time.Time, time.Time) {
128+
// getTime parses the start and end time of the event
129+
func getTime(event RawEvent) (time.Time, time.Time) {
122130
instance := convert[map[string]any](
123-
convert[map[string]any](convert[[]any](event.Event["event_instances"])[0])["event_instance"],
124-
)
131+
convert[map[string]any](
132+
convert[[]any](event.Event["event_instances"])[0])["event_instance"])
125133

126134
// Converts RFC3339 timestamp string to time.Time
127135
startTime, err := time.Parse(time.RFC3339, convert[string](instance["start"]))
128136
if err != nil {
129137
panic(err)
130138
}
139+
131140
var endTime time.Time
132141
if convert[string](instance["end"]) != "" {
133142
endTime, err = time.Parse(time.RFC3339, convert[string](instance["end"]))
@@ -137,27 +146,30 @@ func parseStartAndEndTime(event RawEvent) (time.Time, time.Time) {
137146
} else {
138147
endTime = startTime
139148
}
149+
140150
return startTime, endTime
141151
}
142152

143-
func parseEventLocation(event RawEvent) string {
153+
// getEventLocation parses the location of the event
154+
func getEventLocation(event RawEvent) string {
144155
building := convert[string](event.Event["location_name"])
145-
room_num := convert[string](event.Event["room_number"])
146-
location := strings.Trim(fmt.Sprintf("%s, %s", building, room_num), " ,")
156+
room := convert[string](event.Event["room_number"])
157+
location := strings.Trim(fmt.Sprintf("%s, %s", building, room), " ,")
158+
147159
return location
148160
}
149161

150-
// Parse the event types, event topic, and event target audience
151-
func parseFilters(event RawEvent) ([]string, []string, []string) {
152-
eventTypes := []string{}
162+
// getFilters parses the types, topics, and target audiences
163+
func getFilters(event RawEvent) ([]string, []string, []string) {
164+
types := []string{}
153165
targetAudiences := []string{}
154-
eventTopics := []string{}
166+
topics := []string{}
155167

156168
filters := convert[map[string]any](event.Event["filters"])
157169

158170
rawTypes := convert[[]any](filters["event_types"])
159171
for _, rawType := range rawTypes {
160-
eventTypes = append(eventTypes, convert[string](convert[map[string]any](rawType)["name"]))
172+
types = append(types, convert[string](convert[map[string]any](rawType)["name"]))
161173
}
162174

163175
rawAudiences := convert[[]any](filters["event_target_audience"])
@@ -167,13 +179,14 @@ func parseFilters(event RawEvent) ([]string, []string, []string) {
167179

168180
rawTopics := convert[[]any](filters["event_topic"])
169181
for _, topic := range rawTopics {
170-
eventTopics = append(eventTopics, convert[string](convert[map[string]any](topic)["name"]))
182+
topics = append(topics, convert[string](convert[map[string]any](topic)["name"]))
171183
}
172-
return eventTypes, targetAudiences, eventTopics
184+
185+
return types, targetAudiences, topics
173186
}
174187

175-
// Parse the event departments, and event tags
176-
func parseDepartmentsAndTags(event RawEvent) ([]string, []string) {
188+
// getDepartmentsAndTags parses the departments, and tags
189+
func getDepartmentsAndTags(event RawEvent) ([]string, []string) {
177190
departments := []string{}
178191
tags := []string{}
179192

@@ -186,28 +199,31 @@ func parseDepartmentsAndTags(event RawEvent) ([]string, []string) {
186199
for _, deparment := range rawDeparments {
187200
departments = append(departments, convert[string](convert[map[string]any](deparment)["name"]))
188201
}
202+
189203
return departments, tags
190204
}
191205

192-
// Parse the contact info.
193-
func parseContactInfo(event RawEvent) [3]string {
206+
// getContactInfo parses the contact info.
207+
func getContactInfo(event RawEvent) [3]string {
194208
// Note that some events won't have contact phone number
195-
rawContactInfo := convert[map[string]any](event.Event["custom_fields"])
196209
contactInfo := [3]string{}
210+
211+
rawContactInfo := convert[map[string]any](event.Event["custom_fields"])
197212
for i, infoField := range []string{
198213
"contact_information_name",
199214
"contact_information_email",
200215
"contact_information_phone",
201216
} {
202217
contactInfo[i] = convert[string](rawContactInfo[infoField])
203218
}
219+
204220
return contactInfo
205221
}
206222

207223
// convert() attempts to convert data into types for this scraper
208224
func convert[T []any | map[string]any | string](data any) T {
209-
if newTypeData, ok := data.(T); ok {
210-
return newTypeData
225+
if newTypedData, ok := data.(T); ok {
226+
return newTypedData
211227
}
212228
var zeroValue T
213229
return zeroValue

0 commit comments

Comments
 (0)