Skip to content

Commit 946dfad

Browse files
committed
Add a future integration layer to speed up the download process and a progressr layer to announce progress.
1 parent 2a4a0ac commit 946dfad

16 files changed

+243
-74
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
.Rproj.user
22
.Rhistory
33
docs
4+
cache

DESCRIPTION

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,16 @@ Roxygen: list(markdown = TRUE)
1414
RoxygenNote: 7.3.2
1515
Imports:
1616
httr2,
17-
jsonlite,
1817
xml2,
1918
dplyr,
20-
fs,
21-
glue,
19+
jsonlite,
2220
purrr,
23-
readr,
24-
tibble
21+
glue,
22+
fs,
23+
tibble,
24+
future.apply,
25+
progressr,
26+
readr
2527
Collate:
2628
'cache.R'
2729
'constants.R'

NAMESPACE

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,3 @@ export(fetch_department_courses)
55
export(fetch_departments)
66
export(parse_courses)
77
export(read_cache)
8-
export(write_cache)

R/cache.R

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,71 @@
1-
#' Write course data to cache
1+
#' Initialize cache directories
22
#'
3-
#' @param data Course data frame
4-
#' @param cache_dir Directory to cache results
3+
#' @param cache_dir Base cache directory
4+
#' @return List of cache paths
5+
#' @keywords internal
6+
init_cache_dirs <- function(cache_dir) {
7+
if (is.null(cache_dir)) {
8+
return(NULL)
9+
}
10+
11+
paths <- list(
12+
base = cache_dir,
13+
json = file.path(cache_dir, "json"),
14+
xml = file.path(cache_dir, "xml")
15+
)
16+
17+
fs::dir_create(paths$json)
18+
fs::dir_create(paths$xml)
19+
20+
paths
21+
}
22+
23+
#' Write XML data to cache
24+
#'
25+
#' @param content XML content
26+
#' @param cache_dir Base cache directory
527
#' @param dept Department code
6-
#' @export
7-
write_cache <- function(data, cache_dir, dept) {
8-
fs::dir_create(cache_dir)
28+
#' @keywords internal
29+
write_xml_cache <- function(content, cache_dir, dept) {
30+
if (is.null(cache_dir)) return(NULL)
31+
32+
paths <- init_cache_dirs(cache_dir)
33+
xml_path <- fs::path(paths$xml, paste0(dept, ".xml"))
34+
readr::write_file(content, xml_path)
35+
}
36+
37+
#' Write JSON data to cache
38+
#'
39+
#' @param data Data frame or list to cache
40+
#' @param cache_dir Base cache directory
41+
#' @param filename Filename without extension
42+
#' @keywords internal
43+
write_json_cache <- function(data, cache_dir, filename) {
44+
if (is.null(cache_dir)) return(NULL)
45+
46+
paths <- init_cache_dirs(cache_dir)
947
jsonlite::write_json(
1048
data,
11-
fs::path(cache_dir, paste0(dept, ".json")),
49+
fs::path(paths$json, paste0(filename, ".json")),
1250
pretty = TRUE
1351
)
1452
}
1553

1654
#' Read course data from cache
1755
#'
18-
#' @param cache_dir Directory containing cached files
56+
#' @param cache_dir Base cache directory
1957
#' @param dept Department code (optional)
2058
#' @return Data frame of course data
2159
#' @export
2260
read_cache <- function(cache_dir, dept = NULL) {
61+
paths <- init_cache_dirs(cache_dir)
62+
2363
if (is.null(dept)) {
24-
files <- fs::dir_ls(cache_dir, glob = "*.json")
25-
purrr::map_dfr(files, jsonlite::read_json, simplifyVector = TRUE)
64+
files <- fs::dir_ls(paths$json, glob = "*.json")
65+
results <- lapply(files, jsonlite::read_json, simplifyVector = TRUE)
66+
dplyr::bind_rows(results)
2667
} else {
27-
file <- fs::path(cache_dir, paste0(dept, ".json"))
68+
file <- fs::path(paths$json, paste0(dept, ".json"))
2869
if (fs::file_exists(file)) {
2970
jsonlite::read_json(file, simplifyVector = TRUE)
3071
} else {

R/fetch.R

Lines changed: 72 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#' Fetch department list from Stanford ExploreCourses
22
#'
3-
#' @param cache_dir Directory to cache results
3+
#' @param cache_dir Base cache directory
44
#' @return A data frame containing department information
55
#' @export
66
#' @include constants.R
@@ -28,24 +28,23 @@ fetch_departments <- function(cache_dir = NULL) {
2828
})
2929

3030
if (!is.null(cache_dir)) {
31-
fs::dir_create(cache_dir)
32-
jsonlite::write_json(
33-
departments,
34-
fs::path(cache_dir, "departments.json"),
35-
pretty = TRUE
36-
)
31+
write_json_cache(departments, cache_dir, "departments")
3732
}
3833

3934
departments
4035
}
4136

42-
#' Fetch courses for a specific department
37+
38+
#' Fetch courses for a specific department with progress reporting
4339
#'
4440
#' @param name Department code
45-
#' @param cache_dir Directory to cache results
46-
#' @return XML content of courses
41+
#' @param cache_dir Base cache directory
42+
#' @param p Progress handler
43+
#' @return Data frame of parsed course information
4744
#' @export
48-
fetch_department_courses <- function(name, cache_dir = NULL) {
45+
fetch_department_courses <- function(name, cache_dir = NULL, p = NULL) {
46+
if (!is.null(p)) p(message = sprintf("Fetching %s", name))
47+
4948
url <- glue::glue(COURSE_ENDPOINT, name = name)
5049

5150
req <- httr2::request(url) |>
@@ -54,19 +53,50 @@ fetch_department_courses <- function(name, cache_dir = NULL) {
5453
content <- httr2::resp_body_string(req)
5554

5655
if (!is.null(cache_dir)) {
57-
fs::dir_create(cache_dir)
58-
59-
xml_path <- fs::path(cache_dir, paste0(name, ".xml"))
60-
readr::write_file(content, xml_path)
56+
write_xml_cache(content, cache_dir, name)
6157
}
6258

63-
59+
# Parse XML directly to data frame
6460
courses <- parse_courses(content)
6561
courses$department <- name
6662

6763
courses
6864
}
6965

66+
#' Process a single department
67+
#'
68+
#' @param name Department code
69+
#' @param cache_dir Base cache directory
70+
#' @param p Progress handler
71+
#' @return Data frame of parsed course information
72+
#' @keywords internal
73+
process_department <- function(name, cache_dir = NULL, p = NULL) {
74+
if (!is.null(p)) p(message = sprintf("Fetching %s", name))
75+
76+
url <- glue::glue(COURSE_ENDPOINT, name = name)
77+
78+
# Try to fetch and parse the data
79+
tryCatch({
80+
req <- httr2::request(url) |>
81+
httr2::req_perform()
82+
83+
content <- httr2::resp_body_string(req)
84+
85+
if (!is.null(cache_dir)) {
86+
write_xml_cache(content, cache_dir, name)
87+
}
88+
89+
# Parse XML directly to data frame
90+
courses <- parse_courses(content)
91+
courses$department <- name
92+
93+
courses
94+
}, error = function(e) {
95+
warning(sprintf("Error processing department %s: %s", name, e$message))
96+
NULL
97+
})
98+
}
99+
70100
#' Parse course XML into a data frame
71101
#'
72102
#' @param xml_content XML content from fetch_department_courses
@@ -142,20 +172,38 @@ parse_courses <- function(xml_content) {
142172
course_data
143173
}
144174

145-
#' Fetch and process courses for multiple departments
175+
#' Fetch courses for a specific department with progress reporting
176+
#'
177+
#' @param name Department code
178+
#' @param cache_dir Base cache directory
179+
#' @param p Progress handler
180+
#' @return Data frame of parsed course information
181+
#' @export
182+
fetch_department_courses <- function(name, cache_dir = NULL, p = NULL) {
183+
process_department(name, cache_dir, p)
184+
}
185+
186+
#' Fetch and process courses for multiple departments in parallel
146187
#'
147188
#' @param departments Character vector of department codes
148-
#' @param cache_dir Directory to cache results
149-
#' @return A list of data frames containing course information
189+
#' @param cache_dir Base cache directory
190+
#' @return A data frame containing course information
150191
#' @export
151192
fetch_all_courses <- function(departments = NULL, cache_dir = NULL) {
152193
if (is.null(departments)) {
153194
departments <- fetch_departments(cache_dir)$name
154195
}
155196

156-
purrr::map_dfr(departments, function(dept) {
157-
message("Fetching department: ", dept)
158-
courses <- fetch_department_courses(dept, cache_dir)
159-
courses
160-
})
197+
p <- progressr::progressor(steps = length(departments))
198+
199+
results <- future.apply::future_lapply(
200+
departments,
201+
fetch_department_courses,
202+
cache_dir = cache_dir,
203+
p = p,
204+
future.seed = TRUE
205+
)
206+
207+
# Combine results
208+
dplyr::bind_rows(results)
161209
}

README.Rmd

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ remotes::remotes("coatless-rpkg/explorecourses")
4040

4141
First, load the package:
4242

43-
```r
43+
```{r}
44+
#| eval: false
4445
library(explorecourses)
4546
```
4647

@@ -53,7 +54,8 @@ The package contains three main functions:
5354
By default, we'll retrieve all courses across all departments for the current
5455
academic year using:
5556

56-
```r
57+
```{r}
58+
#| eval: false
5759
all_courses <- fetch_all_courses()
5860
```
5961

@@ -62,16 +64,21 @@ This information is stored in the `schedule_ay24_25` data frame.
6264
For just a single department, we can use it's code to retrieve a list of all
6365
classes:
6466

65-
```r
67+
```{r}
68+
#| eval: false
6669
department_courses <- fetch_department_courses("STATS")
6770
```
6871

6972
To determine possible department shortcodes, we can use:
7073

71-
```r
74+
```{r}
75+
#| eval: false
7276
departments <- fetch_departments()
7377
```
7478

79+
This will return a data frame with the department short name, long name, and school
80+
the department is associated with.
81+
7582
## License
7683

7784
AGPL (>= 3)

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ To determine possible department shortcodes, we can use:
6565
departments <- fetch_departments()
6666
```
6767

68+
This will return a data frame with the department short name, long name,
69+
and school the department is associated with.
70+
6871
## License
6972

7073
AGPL (\>= 3)

man/fetch_all_courses.Rd

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/fetch_department_courses.Rd

Lines changed: 13 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/fetch_departments.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)