@@ -29,11 +29,13 @@ var (
2929 termRegex = regexp .MustCompile ("[0-9]{1,2}[sfu]" )
3030)
3131
32- const reqThrottle = 400 * time .Millisecond
33- const prefixThrottle = 5 * time .Second
32+ const (
33+ reqThrottle = 400 * time .Millisecond
34+ prefixThrottle = 5 * time .Second
35+ httpTimeout = 10 * time .Second
36+ )
3437
3538// ScrapeCoursebook scrapes utd coursebook for the provided term (semester)
36- // if resume flag is true then
3739func ScrapeCoursebook (term string , startPrefix string , outDir string , resume bool ) {
3840 if startPrefix != "" && ! prefixRegex .MatchString (startPrefix ) {
3941 log .Fatalf ("Invalid starting prefix %s, must match format cp_{abcde}" , startPrefix )
@@ -43,7 +45,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string, resume boo
4345 }
4446
4547 scraper := newCoursebookScraper (term , outDir )
46- defer scraper .cancel ()
48+ defer scraper .chromedpCancel ()
4749
4850 if resume && startPrefix == "" {
4951 // providing a starting prefix overrides the resume flag
@@ -125,8 +127,7 @@ type coursebookScraper struct {
125127func newCoursebookScraper (term string , outDir string ) * coursebookScraper {
126128 ctx , cancel := utils .InitChromeDp ()
127129 httpClient := & http.Client {
128- // longer than 10 seconds is probably a rate limit
129- Timeout : 10 * time .Second ,
130+ Timeout : httpTimeout ,
130131 }
131132
132133 //prefixes in alphabetical order for skip prefix flag
@@ -268,8 +269,8 @@ func (s *coursebookScraper) getSectionIdsForPrefix(prefix string) ([]string, err
268269 return nil , fmt .Errorf ("failed to fetch sections: %s" , err )
269270 }
270271 sectionRegexp := utils .Regexpf (`View details for section (%s%s\.\w+\.%s)` , prefix [3 :], utils .R_COURSE_CODE , utils .R_TERM_CODE )
271- smatches := sectionRegexp .FindAllStringSubmatch (content , - 1 )
272- for _ , match := range smatches {
272+ matches := sectionRegexp .FindAllStringSubmatch (content , - 1 )
273+ for _ , match := range matches {
273274 sections = append (sections , match [1 ])
274275 }
275276 }
@@ -292,21 +293,27 @@ func (s *coursebookScraper) req(queryStr string, retries int, reqName string) (s
292293 res , err = s .httpClient .Do (req )
293294 dur := time .Since (start )
294295
295- if res != nil && res .StatusCode != 200 {
296- if netErr , ok := err .(net.Error ); ok && netErr .Timeout () {
296+ if res != nil {
297+ if res .StatusCode != 200 {
298+ return errors .New ("non-200 response status code" )
299+ }
300+ utils .VPrintf ("[Request Success] Request for [%s] took %v" , reqName , dur )
301+ } else if err != nil {
302+ var netErr net.Error
303+ if errors .As (err , & netErr ) && netErr .Timeout () {
297304 utils .VPrintf ("[Timeout] Request for [%s] timed out" , reqName )
298- return netErr // Return the error to trigger a retry
305+ } else {
306+ utils .VPrintf ("[Request Error] Request for %s failed: %v" , reqName , err )
299307 }
300-
301- return errors .New ("non-200 response status code" )
302308 }
303309
304- utils .VPrintf ("[Success] Request for [%s] took %v" , reqName , dur )
305310 return err
306311 }, retries , func (numRetries int ) {
307- utils .VPrintf ("[Retry] Attempt %d of %d for request [%s] " , numRetries , retries , reqName )
312+ utils .VPrintf ("[Request Retry] Attempt %d of %d for request %s " , numRetries , retries , reqName )
308313 s .coursebookHeaders = utils .RefreshToken (s .chromedpCtx )
309314 s .reqRetries ++
315+
316+ //back off exponentially
310317 time .Sleep (time .Duration (math .Pow (2 , float64 (numRetries ))) * time .Second )
311318 })
312319 if err != nil {
@@ -320,16 +327,6 @@ func (s *coursebookScraper) req(queryStr string, retries int, reqName string) (s
320327 return string (content ), nil
321328}
322329
323- // refreshToken token using login info
324- func (s * coursebookScraper ) refreshToken () {
325- s .coursebookHeaders = utils .RefreshToken (s .chromedpCtx )
326- }
327-
328- // cancel cancels chromedp context
329- func (s * coursebookScraper ) cancel () {
330- s .chromedpCancel ()
331- }
332-
333330// validate returns true if each prefix contains all required ids
334331// if it does not it will re-scrape all missing sections
335332func (s * coursebookScraper ) validate () error {
0 commit comments