Skip to content

Commit 26ae762

Browse files
committed
Replace RetryHTTP util with more generic Retry util; improve RefreshToken
1 parent f298b1c commit 26ae762

File tree

2 files changed

+64
-52
lines changed

2 files changed

+64
-52
lines changed

scrapers/coursebook.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,16 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
7676
queryStr := fmt.Sprintf("action=search&s%%5B%%5D=term_%s&s%%5B%%5D=%s&s%%5B%%5D=%s", term, coursePrefix, clevel)
7777

7878
// Try HTTP request, retrying if necessary
79-
res, err := utils.RetryHTTP(func() *http.Request {
79+
var res *http.Response
80+
err := utils.Retry(func() error {
8081
req, err := http.NewRequest("POST", "https://coursebook.utdallas.edu/clips/clip-cb11-hat.zog", strings.NewReader(queryStr))
8182
if err != nil {
8283
panic(err)
8384
}
8485
req.Header = coursebookHeaders
85-
return req
86-
}, cli, func(res *http.Response, numRetries int) {
86+
res, err = cli.Do(req)
87+
return err
88+
}, 10, func(numRetries int) {
8789
log.Printf("ERROR: Section find for course prefix %s failed! Response code was: %s", coursePrefix, res.Status)
8890
// Wait longer if 3 retries fail; we've probably been IP ratelimited...
8991
if numRetries >= 3 {
@@ -97,6 +99,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
9799
// Give coursebook some time to recognize the new token
98100
time.Sleep(500 * time.Millisecond)
99101
})
102+
100103
if err != nil {
101104
panic(err)
102105
}
@@ -128,14 +131,16 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
128131
queryStr := fmt.Sprintf("id=%s&req=0bd73666091d3d1da057c5eeb6ef20a7df3CTp0iTMYFuu9paDeUptMzLYUiW4BIk9i8LIFcBahX2E2b18WWXkUUJ1Y7Xq6j3WZAKPbREfGX7lZY96lI7btfpVS95YAprdJHX9dc5wM=&action=section&div=r-62childcontent", id)
129132

130133
// Try HTTP request, retrying if necessary
131-
res, err := utils.RetryHTTP(func() *http.Request {
134+
var res *http.Response
135+
err := utils.Retry(func() error {
132136
req, err := http.NewRequest("POST", "https://coursebook.utdallas.edu/clips/clip-cb11-hat.zog", strings.NewReader(queryStr))
133137
if err != nil {
134138
panic(err)
135139
}
136140
req.Header = coursebookHeaders
137-
return req
138-
}, cli, func(res *http.Response, numRetries int) {
141+
res, err = cli.Do(req)
142+
return err
143+
}, 10, func(numRetries int) {
139144
log.Printf("ERROR: Section id lookup for id %s failed! Response code was: %s", id, res.Status)
140145
// Wait longer if 3 retries fail; we've probably been IP ratelimited...
141146
if numRetries >= 3 {
@@ -149,6 +154,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
149154
// Give coursebook some time to recognize the new token
150155
time.Sleep(500 * time.Millisecond)
151156
})
157+
152158
if err != nil {
153159
panic(err)
154160
}

utils/methods.go

Lines changed: 52 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"fmt"
1212
"io/fs"
1313
"log"
14-
"net/http"
1514
"os"
1615
"path/filepath"
1716
"regexp"
@@ -59,46 +58,59 @@ func RefreshToken(chromedpCtx context.Context) map[string][]string {
5958
panic(err)
6059
}
6160

61+
delayedRetryCallback := func(numRetries int) {
62+
time.Sleep(250 * time.Millisecond * time.Duration(numRetries))
63+
}
64+
6265
VPrintf("Getting new token...")
63-
_, err = chromedp.RunResponse(chromedpCtx,
64-
chromedp.ActionFunc(func(ctx context.Context) error {
65-
err := network.ClearBrowserCookies().Do(ctx)
66-
return err
67-
}),
68-
chromedp.Navigate(`https://wat.utdallas.edu/login`),
69-
chromedp.WaitVisible(`form#login-form`),
70-
chromedp.SendKeys(`input#netid`, netID),
71-
chromedp.SendKeys(`input#password`, password),
72-
chromedp.WaitVisible(`button#login-button`),
73-
chromedp.Click(`button#login-button`),
74-
chromedp.WaitVisible(`body`),
75-
)
66+
err = Retry(func() error {
67+
_, err = chromedp.RunResponse(chromedpCtx,
68+
chromedp.ActionFunc(func(ctx context.Context) error {
69+
err := network.ClearBrowserCookies().Do(ctx)
70+
return err
71+
}),
72+
chromedp.Navigate(`https://wat.utdallas.edu/login`),
73+
chromedp.WaitVisible(`form#login-form`),
74+
chromedp.SendKeys(`input#netid`, netID),
75+
chromedp.SendKeys(`input#password`, password),
76+
chromedp.WaitVisible(`button#login-button`),
77+
chromedp.Click(`button#login-button`),
78+
chromedp.WaitVisible(`body`),
79+
)
80+
return err
81+
}, 3, delayedRetryCallback)
82+
7683
if err != nil {
7784
panic(err)
7885
}
7986

8087
time.Sleep(250 * time.Millisecond)
8188

8289
var cookieStrs []string
83-
_, err = chromedp.RunResponse(chromedpCtx,
84-
chromedp.Navigate(`https://coursebook.utdallas.edu/`),
85-
chromedp.ActionFunc(func(ctx context.Context) error {
86-
cookies, err := network.GetCookies().Do(ctx)
87-
cookieStrs = make([]string, len(cookies))
88-
gotToken := false
89-
for i, cookie := range cookies {
90-
cookieStrs[i] = fmt.Sprintf("%s=%s", cookie.Name, cookie.Value)
91-
if cookie.Name == "PTGSESSID" {
92-
VPrintf("Got new token: PTGSESSID = %s", cookie.Value)
93-
gotToken = true
90+
91+
err = Retry(func() error {
92+
_, err = chromedp.RunResponse(chromedpCtx,
93+
chromedp.Navigate(`https://coursebook.utdallas.edu/`),
94+
chromedp.ActionFunc(func(ctx context.Context) error {
95+
cookies, err := network.GetCookies().Do(ctx)
96+
cookieStrs = make([]string, len(cookies))
97+
gotToken := false
98+
for i, cookie := range cookies {
99+
cookieStrs[i] = fmt.Sprintf("%s=%s", cookie.Name, cookie.Value)
100+
if cookie.Name == "PTGSESSID" {
101+
VPrintf("Got new token: PTGSESSID = %s", cookie.Value)
102+
gotToken = true
103+
}
94104
}
95-
}
96-
if !gotToken {
97-
return errors.New("failed to get a new token")
98-
}
99-
return err
100-
}),
101-
)
105+
if !gotToken {
106+
return errors.New("failed to get a new token")
107+
}
108+
return err
109+
}),
110+
)
111+
return err
112+
}, 3, delayedRetryCallback)
113+
102114
if err != nil {
103115
panic(err)
104116
}
@@ -249,22 +261,16 @@ func Regexpf(format string, vars ...interface{}) *regexp.Regexp {
249261
return regexp.MustCompile(fmt.Sprintf(format, vars...))
250262
}
251263

252-
// Attempts to run the given HTTP request with the given HTTP client, wrapping the request with a retry callback
253-
func RetryHTTP(requestCreator func() *http.Request, client *http.Client, retryCallback func(res *http.Response, numRetries int)) (res *http.Response, err error) {
254-
// Retry loop for requests
255-
numRetries := 0
256-
for {
257-
// Perform HTTP request, retrying if we get a non-200 response code
258-
res, err = client.Do(requestCreator())
259-
// Retry handling
260-
if res.StatusCode != 200 {
261-
retryCallback(res, numRetries)
262-
numRetries++
263-
continue
264+
// Attempts to retry running the given error-returning function up to a maximum number of retries, at which point the last error is returned. A callback is called between each retry.
265+
func Retry(action func() error, maxRetries int, retryCallback func(numRetries int)) error {
266+
for retries := 1; ; retries++ {
267+
// Perform the action
268+
err := action()
269+
if err == nil || retries > maxRetries {
270+
return err
264271
}
265-
break
272+
retryCallback(retries)
266273
}
267-
return res, err
268274
}
269275

270276
func GetCoursePrefixes(chromedpCtx context.Context) []string {

0 commit comments

Comments
 (0)