@@ -6,6 +6,7 @@ package scrapers
66
77import (
88 "bytes"
9+ "errors"
910 "fmt"
1011 "log"
1112 "net/http"
@@ -84,20 +85,15 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
8485 }
8586 req .Header = coursebookHeaders
8687 res , err = cli .Do (req )
88+ if res .StatusCode != 200 {
89+ return errors .New ("Non-200 response status code" )
90+ }
8791 return err
8892 }, 10 , func (numRetries int ) {
89- log .Printf ("ERROR: Section find for course prefix %s failed! Response code was: %s" , coursePrefix , res .Status )
90- // Wait longer if 3 retries fail; we've probably been IP ratelimited...
91- if numRetries >= 3 {
92- log .Printf ("WARNING: More than 3 retries have failed. Waiting for 5 minutes before attempting further retries." )
93- time .Sleep (5 * time .Minute )
94- } else {
95- log .Printf ("Getting new token and retrying in 3 seconds..." )
96- time .Sleep (3 * time .Second )
97- }
93+ log .Printf ("WARN: Section find for course prefix %s failed! Response code was: %s" , coursePrefix , res .Status )
9894 coursebookHeaders = utils .RefreshToken (chromedpCtx )
99- // Give coursebook some time to recognize the new token
100- time .Sleep (500 * time .Millisecond )
95+ // Wait proportionally long to how many times we've retried; generally works pretty well
96+ time .Sleep (500 * time .Millisecond * time . Duration ( numRetries ) )
10197 })
10298
10399 if err != nil {
@@ -128,7 +124,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
128124
129125 // Get section info
130126 // Worth noting that the "req" and "div" params in the request below don't actually seem to matter... consider them filler to make sure the request goes through
131- queryStr := fmt .Sprintf ("id=%s&req=0bd73666091d3d1da057c5eeb6ef20a7df3CTp0iTMYFuu9paDeUptMzLYUiW4BIk9i8LIFcBahX2E2b18WWXkUUJ1Y7Xq6j3WZAKPbREfGX7lZY96lI7btfpVS95YAprdJHX9dc5wM= &action=section&div=r-62childcontent " , id )
127+ queryStr := fmt .Sprintf ("id=%s&req=b30da8ab21637dbef35fd7682f48e1c1W0ypMhaj%%2FdsnYn3Wa03BrxSNgCeyvLfvucSTobcSXRf38SWaUaNfMjJQn%%2BdcabF%%2F7ZuG%%2BdKqHAqmrxEKyg8AdB0FqVGcz4rkff3%%2B3SIUIt8%%3D &action=info " , id )
132128
133129 // Try HTTP request, retrying if necessary
134130 var res * http.Response
@@ -139,20 +135,15 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
139135 }
140136 req .Header = coursebookHeaders
141137 res , err = cli .Do (req )
138+ if res .StatusCode != 200 {
139+ return errors .New ("Non-200 response status code" )
140+ }
142141 return err
143142 }, 10 , func (numRetries int ) {
144- log .Printf ("ERROR: Section id lookup for id %s failed! Response code was: %s" , id , res .Status )
145- // Wait longer if 3 retries fail; we've probably been IP ratelimited...
146- if numRetries >= 3 {
147- log .Printf ("WARNING: More than 3 retries have failed. Waiting for 5 minutes before attempting further retries." )
148- time .Sleep (5 * time .Minute )
149- } else {
150- log .Printf ("Getting new token and retrying in 3 seconds..." )
151- time .Sleep (3 * time .Second )
152- }
143+ log .Printf ("WARN: Section id lookup for id %s failed! Response code was: %s" , id , res .Status )
153144 coursebookHeaders = utils .RefreshToken (chromedpCtx )
154- // Give coursebook some time to recognize the new token
155- time .Sleep (500 * time .Millisecond )
145+ // Wait proportionally long to how many times we've retried; generally works pretty well
146+ time .Sleep (500 * time .Millisecond * time . Duration ( numRetries ) )
156147 })
157148
158149 if err != nil {
@@ -181,6 +172,10 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
181172 sectionsInCoursePrefix ++
182173 }
183174 log .Printf ("\n Finished scraping course prefix %s. Got %d sections." , coursePrefix , sectionsInCoursePrefix )
175+ // Panic if we got fewer sections than we should've
176+ if sectionsInCoursePrefix != len (sectionIDs ) {
177+ log .Panicf ("Section count mismatch! Coursebook has %d sections for course prefix %s but we only got %d" , sectionsInCoursePrefix , coursePrefix , sectionsInCoursePrefix )
178+ }
184179 totalSections += sectionsInCoursePrefix
185180 }
186181 log .Printf ("\n Done scraping term! Scraped a total of %d sections." , totalSections )
0 commit comments