11# ' Fetch department list from Stanford ExploreCourses
22# '
3- # ' @param cache_dir Directory to cache results
3+ # ' @param cache_dir Base cache directory
44# ' @return A data frame containing department information
55# ' @export
66# ' @include constants.R
@@ -28,24 +28,23 @@ fetch_departments <- function(cache_dir = NULL) {
2828 })
2929
3030 if (! is.null(cache_dir )) {
31- fs :: dir_create(cache_dir )
32- jsonlite :: write_json(
33- departments ,
34- fs :: path(cache_dir , " departments.json" ),
35- pretty = TRUE
36- )
31+ write_json_cache(departments , cache_dir , " departments" )
3732 }
3833
3934 departments
4035}
4136
42- # ' Fetch courses for a specific department
37+
38+ # ' Fetch courses for a specific department with progress reporting
4339# '
4440# ' @param name Department code
45- # ' @param cache_dir Directory to cache results
46- # ' @return XML content of courses
41+ # ' @param cache_dir Base cache directory
42+ # ' @param p Progress handler
43+ # ' @return Data frame of parsed course information
4744# ' @export
48- fetch_department_courses <- function (name , cache_dir = NULL ) {
45+ fetch_department_courses <- function (name , cache_dir = NULL , p = NULL ) {
46+ if (! is.null(p )) p(message = sprintf(" Fetching %s" , name ))
47+
4948 url <- glue :: glue(COURSE_ENDPOINT , name = name )
5049
5150 req <- httr2 :: request(url ) | >
@@ -54,19 +53,50 @@ fetch_department_courses <- function(name, cache_dir = NULL) {
5453 content <- httr2 :: resp_body_string(req )
5554
5655 if (! is.null(cache_dir )) {
57- fs :: dir_create(cache_dir )
58-
59- xml_path <- fs :: path(cache_dir , paste0(name , " .xml" ))
60- readr :: write_file(content , xml_path )
56+ write_xml_cache(content , cache_dir , name )
6157 }
6258
63-
59+ # Parse XML directly to data frame
6460 courses <- parse_courses(content )
6561 courses $ department <- name
6662
6763 courses
6864}
6965
66+ # ' Process a single department
67+ # '
68+ # ' @param name Department code
69+ # ' @param cache_dir Base cache directory
70+ # ' @param p Progress handler
71+ # ' @return Data frame of parsed course information
72+ # ' @keywords internal
73+ process_department <- function (name , cache_dir = NULL , p = NULL ) {
74+ if (! is.null(p )) p(message = sprintf(" Fetching %s" , name ))
75+
76+ url <- glue :: glue(COURSE_ENDPOINT , name = name )
77+
78+ # Try to fetch and parse the data
79+ tryCatch({
80+ req <- httr2 :: request(url ) | >
81+ httr2 :: req_perform()
82+
83+ content <- httr2 :: resp_body_string(req )
84+
85+ if (! is.null(cache_dir )) {
86+ write_xml_cache(content , cache_dir , name )
87+ }
88+
89+ # Parse XML directly to data frame
90+ courses <- parse_courses(content )
91+ courses $ department <- name
92+
93+ courses
94+ }, error = function (e ) {
95+ warning(sprintf(" Error processing department %s: %s" , name , e $ message ))
96+ NULL
97+ })
98+ }
99+
70100# ' Parse course XML into a data frame
71101# '
72102# ' @param xml_content XML content from fetch_department_courses
@@ -142,20 +172,38 @@ parse_courses <- function(xml_content) {
142172 course_data
143173}
144174
145- # ' Fetch and process courses for multiple departments
175+ # ' Fetch courses for a specific department with progress reporting
176+ # '
177+ # ' @param name Department code
178+ # ' @param cache_dir Base cache directory
179+ # ' @param p Progress handler
180+ # ' @return Data frame of parsed course information
181+ # ' @export
182+ fetch_department_courses <- function (name , cache_dir = NULL , p = NULL ) {
183+ process_department(name , cache_dir , p )
184+ }
185+
186+ # ' Fetch and process courses for multiple departments in parallel
146187# '
147188# ' @param departments Character vector of department codes
148- # ' @param cache_dir Directory to cache results
149- # ' @return A list of data frames containing course information
189+ # ' @param cache_dir Base cache directory
190+ # ' @return A data frame containing course information
150191# ' @export
151192fetch_all_courses <- function (departments = NULL , cache_dir = NULL ) {
152193 if (is.null(departments )) {
153194 departments <- fetch_departments(cache_dir )$ name
154195 }
155196
156- purrr :: map_dfr(departments , function (dept ) {
157- message(" Fetching department: " , dept )
158- courses <- fetch_department_courses(dept , cache_dir )
159- courses
160- })
197+ p <- progressr :: progressor(steps = length(departments ))
198+
199+ results <- future.apply :: future_lapply(
200+ departments ,
201+ fetch_department_courses ,
202+ cache_dir = cache_dir ,
203+ p = p ,
204+ future.seed = TRUE
205+ )
206+
207+ # Combine results
208+ dplyr :: bind_rows(results )
161209}
0 commit comments