Skip to content

Commit 098ed8e

Browse files
committed
Minor coursebook scraper improvements
1 parent ac1396f commit 098ed8e

File tree

3 files changed

+13
-7
lines changed

3 files changed

+13
-7
lines changed

parser/courseParser_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
package parser
22

33
import (
4+
"testing"
5+
46
"github.com/google/go-cmp/cmp"
57
"github.com/google/go-cmp/cmp/cmpopts"
6-
"testing"
78

89
"github.com/UTDNebula/nebula-api/api/schema"
910
)

scrapers/coursebook.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
3838
}
3939

4040
// Init http client
41-
tr := &http.Transport{
42-
MaxIdleConns: 10,
43-
IdleConnTimeout: 30 * time.Second,
44-
DisableCompression: true,
45-
}
46-
cli := &http.Client{Transport: tr}
41+
cli := &http.Client{}
4742

4843
// Make the output directory for this term
4944
termDir := fmt.Sprintf("%s/%s", outDir, term)
@@ -119,6 +114,11 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
119114
}
120115
log.Printf("Found %d sections for course prefix %s", len(sectionIDs), coursePrefix)
121116

117+
// Get a new token before starting the section lookup
118+
coursebookHeaders = utils.RefreshToken(chromedpCtx)
119+
// Give coursebook some time to recognize the new token
120+
time.Sleep(500 * time.Millisecond)
121+
122122
// Get HTML data for all section IDs
123123
sectionsInCoursePrefix := 0
124124
for sectionIndex, id := range sectionIDs {

utils/methods.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"os"
1616
"path/filepath"
1717
"regexp"
18+
"time"
1819

1920
"strings"
2021

@@ -76,6 +77,8 @@ func RefreshToken(chromedpCtx context.Context) map[string][]string {
7677
panic(err)
7778
}
7879

80+
time.Sleep(250 * time.Millisecond)
81+
7982
var cookieStrs []string
8083
_, err = chromedp.RunResponse(chromedpCtx,
8184
chromedp.Navigate(`https://coursebook.utdallas.edu/`),
@@ -142,6 +145,8 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string {
142145
panic(err)
143146
}
144147

148+
time.Sleep(250 * time.Millisecond)
149+
145150
// Save all cookies to string
146151
cookieStr := ""
147152
_, err = chromedp.RunResponse(chromedpCtx,

0 commit comments

Comments
 (0)