-
Notifications
You must be signed in to change notification settings - Fork 46
Expand file tree
/
Copy pathcometCalendar.go
More file actions
204 lines (178 loc) · 6.03 KB
/
cometCalendar.go
File metadata and controls
204 lines (178 loc) · 6.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
/*
This file contains the code for the events scraper.
*/
package scrapers
import (
"bytes"
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"strings"
"time"
"github.com/UTDNebula/api-tools/utils"
"github.com/UTDNebula/nebula-api/api/schema"
"go.mongodb.org/mongo-driver/bson/primitive"
)
// RawEvent mirrors the nested event payload returned by the calendar API.
type RawEvent struct {
Event map[string]interface{} `json:"event"`
}
// APICalendarResponse models the calendar API pagination envelope.
type APICalendarResponse struct {
Events []RawEvent `json:"events"`
Page map[string]int `json:"page"`
Date map[string]string `json:"date"`
}
// ScrapeCometCalendar retrieves calendar events through the API and writes normalized JSON output.
func ScrapeCometCalendar(outDir string) {
err := os.MkdirAll(outDir, 0777)
if err != nil {
panic(err)
}
cli := http.Client{Timeout: 15 * time.Second}
var calendarData APICalendarResponse
// Get the total number of pages
log.Printf("Getting the number of pages...")
if err := scrapeAndUnmarshal(&cli, 0, &calendarData); err != nil {
panic(err)
}
numPages := calendarData.Page["total"]
log.Printf("The number of pages is %d!\n\n", numPages)
var events []schema.Event
for page := range numPages {
log.Printf("Scraping events of page %d...", page+1)
if err := scrapeAndUnmarshal(&cli, page+1, &calendarData); err != nil {
panic(err)
}
log.Printf("Scraped events of page %d successfully!\n", page+1)
log.Printf("Parsing the events of page %d...", page+1)
for _, rawEvent := range calendarData.Events {
// Parse the time
eventInstance := toMap(toMap(toSlice(rawEvent.Event["event_instances"])[0])["event_instance"])
startTime := parseTime(toString(eventInstance["start"]))
endTime := startTime
if toString(eventInstance["end"]) != "" {
endTime = parseTime(toString(eventInstance["end"]))
}
// Parse location
location := strings.Trim(fmt.Sprintf("%s, %s", toString(rawEvent.Event["location_name"]), toString(rawEvent.Event["room_number"])), " ,")
// Parse the event types, event topic, and event target audience
filters := toMap(rawEvent.Event["filters"])
eventTypes := []string{}
eventTopics := []string{}
targetAudiences := []string{}
rawTypes := toSlice(filters["event_types"])
for _, rawType := range rawTypes {
eventTypes = append(eventTypes, toString(toMap(rawType)["name"]))
}
rawAudiences := toSlice(filters["event_target_audience"])
for _, audience := range rawAudiences {
targetAudiences = append(targetAudiences, toString(toMap(audience)["name"]))
}
rawTopics := toSlice(filters["event_topic"])
for _, topic := range rawTopics {
eventTopics = append(eventTopics, toString(toMap(topic)["name"]))
}
// Parse the event departments, and tags
departments := []string{}
tags := []string{}
rawTags := toSlice(rawEvent.Event["tags"])
for _, tag := range rawTags {
tags = append(tags, tag.(string))
}
rawDeparments := toSlice(rawEvent.Event["departments"])
for _, deparment := range rawDeparments {
departments = append(departments, toMap(deparment)["name"].(string))
}
// Parse the contact info, =ote that some events won't have contact phone number
rawContactInfo := toMap(rawEvent.Event["custom_fields"])
contactInfo := [3]string{}
for i, infoField := range []string{
"contact_information_name", "contact_information_email", "contact_information_phone",
} {
contactInfo[i] = toString(rawContactInfo[infoField])
}
events = append(events, schema.Event{
Id: primitive.NewObjectID(),
Summary: toString(rawEvent.Event["title"]),
Location: location,
StartTime: startTime,
EndTime: endTime,
Description: toString(rawEvent.Event["description_text"]),
EventType: eventTypes,
TargetAudience: targetAudiences,
Topic: eventTopics,
EventTags: tags,
EventWebsite: toString(rawEvent.Event["url"]),
Department: departments,
ContactName: contactInfo[0],
ContactEmail: contactInfo[1],
ContactPhoneNumber: contactInfo[2],
})
}
log.Printf("Parsed the events of page %d successfully!\n\n", page+1)
}
if err := utils.WriteJSON(fmt.Sprintf("%s/cometCalendarScraped.json", outDir), events); err != nil {
panic(err)
}
log.Printf("Finished parsing %d events successfully!\n\n", len(events))
}
// scrapeAndUnmarshal fetches a calendar page and decodes it into data.
func scrapeAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error {
// Call API to get the byte data
calendarUrl := fmt.Sprintf("https://calendar.utdallas.edu/api/2/events?days=365&pp=100&page=%d", page)
req, err := http.NewRequest("GET", calendarUrl, nil)
if err != nil {
return err
}
res, err := client.Do(req)
if err != nil {
return err
}
if res != nil && res.StatusCode != 200 {
return fmt.Errorf("ERROR: Non-200 status is returned, %s", res.Status)
}
// Unmarshal bytes to the response data
buffer := bytes.Buffer{}
if _, err = buffer.ReadFrom(res.Body); err != nil {
return err
}
res.Body.Close()
if err = json.Unmarshal(buffer.Bytes(), &data); err != nil {
return err
}
return nil
}
// toSlice attempts to convert data into a slice of interface{}.
func toSlice(data interface{}) []interface{} {
if array, ok := data.([]interface{}); ok {
return array
}
return nil
}
// toMap attempts to convert data into a map keyed by string.
func toMap(data interface{}) map[string]interface{} {
if dataMap, ok := data.(map[string]interface{}); ok {
return dataMap
}
return nil
}
// toString returns the string form of data or empty string when nil.
func toString(data interface{}) string {
if data != nil {
if dataString, ok := data.(string); ok {
return dataString
}
}
return ""
}
// parseTime converts an RFC3339 timestamp string to a time.Time.
func parseTime(stringTime string) time.Time {
parsedTime, err := time.Parse(time.RFC3339, stringTime)
if err != nil {
panic(err)
}
return parsedTime
}