|
20 | 20 | #' \item{file_extension}{\code{character}. The file extension of the data file (e.g., 'tar', 'gz').} |
21 | 21 | #' \item{data_ym}{\code{Date}. The year and month of the data coverage, if available.} |
22 | 22 | #' \item{data_ymd}{\code{Date}. The specific date of the data coverage, if available.} |
| 23 | +#' \item{study}{\code{factor}. Study category derived from the URL (e.g., 'basic', 'complete', 'routes').} |
| 24 | +#' \item{type}{\code{factor}. Data type category derived from the URL (e.g., 'number_of_trips', 'origin-destination', 'overnight_stays', 'data_quality', 'metadata').} |
| 25 | +#' \item{period}{\code{factor}. Temporal granularity category derived from the URL (e.g., 'day', 'month').} |
| 26 | +#' \item{zones}{\code{factor}. Geographic zone classification derived from the URL (e.g., 'districts', 'municipalities', 'large_urban_areas').} |
23 | 27 | #' \item{local_path}{\code{character}. The local file path where the data is (or going to be) stored.} |
24 | 28 | #' \item{downloaded}{\code{logical}. Indicator of whether the data file has been downloaded locally. This is only available if `check_local_files` is `TRUE`.} |
25 | 29 | #' } |
@@ -252,32 +256,52 @@ spod_available_data_v1 <- function( |
252 | 256 |
|
253 | 257 | files_table <- files_table |> |
254 | 258 | dplyr::mutate( |
255 | | - study = dplyr::case_when( |
256 | | - grepl("maestra", .data$target_url) ~ "basic", |
257 | | - TRUE ~ "" |
| 259 | + study = factor( |
| 260 | + dplyr::case_when( |
| 261 | + grepl("maestra", .data$target_url) ~ "basic", |
| 262 | + TRUE ~ NA_character_ |
| 263 | + ), |
| 264 | + levels = c("basic") |
258 | 265 | ), |
259 | 266 |
|
260 | | - type = dplyr::case_when( |
261 | | - grepl("maestra2", .data$target_url) ~ "number_of_trips", |
262 | | - grepl("maestra1", .data$target_url) ~ "origin-destination", |
263 | | - grepl("RSS\\.xml", .data$target_url) ~ "metadata", |
264 | | - grepl("zonificacion", .data$target_url) ~ "zones", |
265 | | - grepl("relacion", .data$target_url) ~ "relations", |
266 | | - grepl("index\\.html", .data$target_url) ~ "index", |
267 | | - grepl(".\\pdf", .data$target_url) ~ "documentation", |
268 | | - TRUE ~ "" |
| 267 | + type = factor( |
| 268 | + dplyr::case_when( |
| 269 | + grepl("maestra2", .data$target_url) ~ "number_of_trips", |
| 270 | + grepl("maestra1", .data$target_url) ~ "origin-destination", |
| 271 | + grepl("RSS\\.xml", .data$target_url) ~ "metadata", |
| 272 | + grepl("zonificacion", .data$target_url) ~ "zones", |
| 273 | + grepl("relacion", .data$target_url) ~ "relations", |
| 274 | + grepl("index\\.html", .data$target_url) ~ "index", |
| 275 | + grepl("\\.pdf", .data$target_url) ~ "documentation", |
| 276 | + TRUE ~ NA_character_ |
| 277 | + ), |
| 278 | + levels = c( |
| 279 | + "number_of_trips", |
| 280 | + "origin-destination", |
| 281 | + "metadata", |
| 282 | + "zones", |
| 283 | + "relations", |
| 284 | + "index", |
| 285 | + "documentation" |
| 286 | + ) |
269 | 287 | ), |
270 | 288 |
|
271 | | - period = dplyr::case_when( |
272 | | - grepl("ficheros-diarios", .data$target_url) ~ "day", |
273 | | - grepl("meses-completos|mensual", .data$target_url) ~ "month", |
274 | | - TRUE ~ "" |
| 289 | + period = factor( |
| 290 | + dplyr::case_when( |
| 291 | + grepl("ficheros-diarios", .data$target_url) ~ "day", |
| 292 | + grepl("meses-completos|mensual", .data$target_url) ~ "month", |
| 293 | + TRUE ~ NA_character_ |
| 294 | + ), |
| 295 | + levels = c("day", "month") |
275 | 296 | ), |
276 | 297 |
|
277 | | - zones = dplyr::case_when( |
278 | | - grepl("distrito", .data$target_url) ~ "district", |
279 | | - grepl("municipio", .data$target_url) ~ "municipality", |
280 | | - TRUE ~ "" |
| 298 | + zones = factor( |
| 299 | + dplyr::case_when( |
| 300 | + grepl("distrito", .data$target_url) ~ "districts", |
| 301 | + grepl("municipio", .data$target_url) ~ "municipalities", |
| 302 | + TRUE ~ NA_character_ |
| 303 | + ), |
| 304 | + levels = c("districts", "municipalities") |
281 | 305 | ) |
282 | 306 | ) |
283 | 307 |
|
@@ -557,33 +581,51 @@ spod_available_data_v2 <- function( |
557 | 581 |
|
558 | 582 | files_table <- files_table |> |
559 | 583 | dplyr::mutate( |
560 | | - study = dplyr::case_when( |
561 | | - grepl("estudios_basicos", .data$target_url) ~ "basic", |
562 | | - grepl("estudios_completos", .data$target_url) ~ "complete", |
563 | | - grepl("rutas", .data$target_url) ~ "routes", |
564 | | - TRUE ~ "" |
| 584 | + study = factor( |
| 585 | + dplyr::case_when( |
| 586 | + grepl("estudios_basicos", .data$target_url) ~ "basic", |
| 587 | + grepl("estudios_completos", .data$target_url) ~ "complete", |
| 588 | + grepl("rutas", .data$target_url) ~ "routes", |
| 589 | + TRUE ~ NA_character_ |
| 590 | + ), |
| 591 | + levels = c("basic", "complete", "routes") |
565 | 592 | ), |
566 | 593 |
|
567 | | - type = dplyr::case_when( |
568 | | - grepl("personas", .data$target_url) ~ "number_of_trips", |
569 | | - grepl("viajes", .data$target_url) ~ "origin-destination", |
570 | | - grepl("pernoctaciones", .data$target_url) ~ "overnight_stays", |
571 | | - grepl("calidad", .data$target_url) ~ "data_quality", |
572 | | - grepl("RSS\\.xml", .data$target_url) ~ "metadata", |
573 | | - TRUE ~ "" |
| 594 | + type = factor( |
| 595 | + dplyr::case_when( |
| 596 | + grepl("personas", .data$target_url) ~ "number_of_trips", |
| 597 | + grepl("viajes", .data$target_url) ~ "origin-destination", |
| 598 | + grepl("pernoctaciones", .data$target_url) ~ "overnight_stays", |
| 599 | + grepl("calidad", .data$target_url) ~ "data_quality", |
| 600 | + grepl("RSS\\.xml", .data$target_url) ~ "metadata", |
| 601 | + TRUE ~ NA_character_ |
| 602 | + ), |
| 603 | + levels = c( |
| 604 | + "origin-destination", |
| 605 | + "number_of_trips", |
| 606 | + "overnight_stays", |
| 607 | + "data_quality", |
| 608 | + "metadata" |
| 609 | + ) |
574 | 610 | ), |
575 | 611 |
|
576 | | - period = dplyr::case_when( |
577 | | - grepl("ficheros-diarios", .data$target_url) ~ "day", |
578 | | - grepl("meses-completos|mensual", .data$target_url) ~ "month", |
579 | | - TRUE ~ "" |
| 612 | + period = factor( |
| 613 | + dplyr::case_when( |
| 614 | + grepl("ficheros-diarios", .data$target_url) ~ "day", |
| 615 | + grepl("meses-completos|mensual", .data$target_url) ~ "month", |
| 616 | + TRUE ~ NA_character_ |
| 617 | + ), |
| 618 | + levels = c("day", "month") |
580 | 619 | ), |
581 | 620 |
|
582 | | - zones = dplyr::case_when( |
583 | | - grepl("distritos", .data$target_url) ~ "district", |
584 | | - grepl("municipios", .data$target_url) ~ "municipality", |
585 | | - grepl("GAU", .data$target_url) ~ "gau", |
586 | | - TRUE ~ "" |
| 621 | + zones = factor( |
| 622 | + dplyr::case_when( |
| 623 | + grepl("distritos", .data$target_url) ~ "districts", |
| 624 | + grepl("municipios", .data$target_url) ~ "municipalities", |
| 625 | + grepl("GAU", .data$target_url) ~ "large_urban_areas", |
| 626 | + TRUE ~ NA_character_ |
| 627 | + ), |
| 628 | + levels = c("districts", "municipalities", "large_urban_areas") |
587 | 629 | ) |
588 | 630 | ) |
589 | 631 |
|
@@ -728,12 +770,16 @@ read_data_links_xml <- function( |
728 | 770 | Sys.Date() |
729 | 771 |
|
730 | 772 | if (needs_update) { |
731 | | - if (!quiet) message("Fetching latest data links xml") |
| 773 | + if (!quiet) { |
| 774 | + message("Fetching latest data links xml") |
| 775 | + } |
732 | 776 | latest_data_links_xml_path <- latest_file_function( |
733 | 777 | data_dir = data_dir |
734 | 778 | ) |
735 | 779 | } else { |
736 | | - if (!quiet) message("Using existing data links xml: ", latest_file) |
| 780 | + if (!quiet) { |
| 781 | + message("Using existing data links xml: ", latest_file) |
| 782 | + } |
737 | 783 | latest_data_links_xml_path <- latest_file |
738 | 784 | } |
739 | 785 |
|
|
0 commit comments