Skip to content

Commit 3dac5c9

Browse files
committed
minor clean up
1 parent 9adb01a commit 3dac5c9

File tree

1 file changed

+22
-25
lines changed

1 file changed

+22
-25
lines changed

scrapers/coursebook.go

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,13 @@ var (
2929
termRegex = regexp.MustCompile("[0-9]{1,2}[sfu]")
3030
)
3131

32-
const reqThrottle = 400 * time.Millisecond
33-
const prefixThrottle = 5 * time.Second
32+
const (
33+
reqThrottle = 400 * time.Millisecond
34+
prefixThrottle = 5 * time.Second
35+
httpTimeout = 10 * time.Second
36+
)
3437

3538
// ScrapeCoursebook scrapes utd coursebook for the provided term (semester)
36-
// if resume flag is true then
3739
func ScrapeCoursebook(term string, startPrefix string, outDir string, resume bool) {
3840
if startPrefix != "" && !prefixRegex.MatchString(startPrefix) {
3941
log.Fatalf("Invalid starting prefix %s, must match format cp_{abcde}", startPrefix)
@@ -43,7 +45,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string, resume boo
4345
}
4446

4547
scraper := newCoursebookScraper(term, outDir)
46-
defer scraper.cancel()
48+
defer scraper.chromedpCancel()
4749

4850
if resume && startPrefix == "" {
4951
// providing a starting prefix overrides the resume flag
@@ -125,8 +127,7 @@ type coursebookScraper struct {
125127
func newCoursebookScraper(term string, outDir string) *coursebookScraper {
126128
ctx, cancel := utils.InitChromeDp()
127129
httpClient := &http.Client{
128-
// longer than 10 seconds is probably a rate limit
129-
Timeout: 10 * time.Second,
130+
Timeout: httpTimeout,
130131
}
131132

132133
//prefixes in alphabetical order for skip prefix flag
@@ -268,8 +269,8 @@ func (s *coursebookScraper) getSectionIdsForPrefix(prefix string) ([]string, err
268269
return nil, fmt.Errorf("failed to fetch sections: %s", err)
269270
}
270271
sectionRegexp := utils.Regexpf(`View details for section (%s%s\.\w+\.%s)`, prefix[3:], utils.R_COURSE_CODE, utils.R_TERM_CODE)
271-
smatches := sectionRegexp.FindAllStringSubmatch(content, -1)
272-
for _, match := range smatches {
272+
matches := sectionRegexp.FindAllStringSubmatch(content, -1)
273+
for _, match := range matches {
273274
sections = append(sections, match[1])
274275
}
275276
}
@@ -292,21 +293,27 @@ func (s *coursebookScraper) req(queryStr string, retries int, reqName string) (s
292293
res, err = s.httpClient.Do(req)
293294
dur := time.Since(start)
294295

295-
if res != nil && res.StatusCode != 200 {
296-
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
296+
if res != nil {
297+
if res.StatusCode != 200 {
298+
return errors.New("non-200 response status code")
299+
}
300+
utils.VPrintf("[Request Success] Request for [%s] took %v", reqName, dur)
301+
} else if err != nil {
302+
var netErr net.Error
303+
if errors.As(err, &netErr) && netErr.Timeout() {
297304
utils.VPrintf("[Timeout] Request for [%s] timed out", reqName)
298-
return netErr // Return the error to trigger a retry
305+
} else {
306+
utils.VPrintf("[Request Error] Request for %s failed: %v", reqName, err)
299307
}
300-
301-
return errors.New("non-200 response status code")
302308
}
303309

304-
utils.VPrintf("[Success] Request for [%s] took %v", reqName, dur)
305310
return err
306311
}, retries, func(numRetries int) {
307-
utils.VPrintf("[Retry] Attempt %d of %d for request [%s]", numRetries, retries, reqName)
312+
utils.VPrintf("[Request Retry] Attempt %d of %d for request %s", numRetries, retries, reqName)
308313
s.coursebookHeaders = utils.RefreshToken(s.chromedpCtx)
309314
s.reqRetries++
315+
316+
//back off exponentially
310317
time.Sleep(time.Duration(math.Pow(2, float64(numRetries))) * time.Second)
311318
})
312319
if err != nil {
@@ -320,16 +327,6 @@ func (s *coursebookScraper) req(queryStr string, retries int, reqName string) (s
320327
return string(content), nil
321328
}
322329

323-
// refreshToken token using login info
324-
func (s *coursebookScraper) refreshToken() {
325-
s.coursebookHeaders = utils.RefreshToken(s.chromedpCtx)
326-
}
327-
328-
// cancel cancels chromedp context
329-
func (s *coursebookScraper) cancel() {
330-
s.chromedpCancel()
331-
}
332-
333330
// validate returns true if each prefix contains all required ids
334331
// if it does not it will re-scrape all missing sections
335332
func (s *coursebookScraper) validate() error {

0 commit comments

Comments
 (0)