|
| 1 | +## |------------------------------------------------------------------------| # |
| 2 | +# BioReg_Process ---- |
| 3 | +## |------------------------------------------------------------------------| # |
| 4 | + |
| 5 | +#' Process biogeographical regions |
| 6 | +#' |
| 7 | +#' Downloads and processes the Biogeographical Regions dataset (Europe 2016, |
| 8 | +#' version 1) from the [European Environment |
| 9 | +#' Agency](https://www.eea.europa.eu/en/datahub/datahubitem-view/11db8d14-f167-4cd5-9205-95638dfd9618). |
| 10 | +#' The dataset is processed to extract region names per reference grid cell in |
| 11 | +#' EPSG:3035 projection. Outputs include raster (.tif), spatial vector (.shp), |
| 12 | +#' and RData files. The outputs of this function are used to count presence grid |
| 13 | +#' cells for each species per biogeographical region. |
| 14 | +#' @return Invisible `NULL`. Processed data is saved to disk as raster, vector, |
| 15 | +#' and RData files. |
| 16 | +#' @details |
| 17 | +#' - Temporal coverage: 2011-2015 |
| 18 | +#' - Spatial coverage: 28°E-81°E, 31.27°W-62°E |
| 19 | +#' - CRS: EPSG:3035; read as: ETRS_1989_LAEA_L52_M10 |
| 20 | +#' - file format: shapefile (compressed in zip file) |
| 21 | +#' - Requires external tools: `curl` (for downloading) and `unzip` |
| 22 | +#' (for extraction) |
| 23 | +#' @param EnvFile Character. Path to the environment file containing paths to |
| 24 | +#' data sources. Defaults to `.env`. |
| 25 | +#' @param FromHPC Logical. Whether the processing is being done on an |
| 26 | +#' High-Performance Computing (HPC) environment, to adjust file paths |
| 27 | +#' accordingly. Default: `TRUE`. |
| 28 | +#' @export |
| 29 | +#' @author Ahmed El-Gabbas |
| 30 | +#' @name BioReg_Process |
| 31 | + |
| 32 | +BioReg_Process <- function(FromHPC = TRUE, EnvFile = ".env") { |
| 33 | + |
| 34 | + # # ..................................................................... ### |
| 35 | + |
| 36 | + .StartTime <- lubridate::now(tzone = "CET") |
| 37 | + |
| 38 | + # Avoid "no visible binding for global variable" message |
| 39 | + # https://www.r-bloggers.com/2019/08/no-visible-binding-for-global-variable/ |
| 40 | + Path_BioReg <- Path_Raw <- Path_Interim <- BioReg_URL <- Path_Grid <- |
| 41 | + Rast <- Path_Grid_Ref <- geometry <- ID <- NULL |
| 42 | + |
| 43 | + # # ..................................................................... ### |
| 44 | + |
| 45 | + # Check input arguments ------ |
| 46 | + IASDT.R::CatTime("Check input arguments") |
| 47 | + |
| 48 | + AllArgs <- ls(envir = environment()) |
| 49 | + AllArgs <- purrr::map( |
| 50 | + AllArgs, |
| 51 | + function(x) get(x, envir = parent.env(env = environment()))) %>% |
| 52 | + stats::setNames(AllArgs) |
| 53 | + IASDT.R::CheckArgs(AllArgs = AllArgs, Type = "character", Args = "EnvFile") |
| 54 | + IASDT.R::CheckArgs(AllArgs = AllArgs, Type = "logical", Args = "FromHPC") |
| 55 | + rm(AllArgs, envir = environment()) |
| 56 | + |
| 57 | + if (isFALSE(IASDT.R::CheckCommands("curl"))) { |
| 58 | + stop( |
| 59 | + "`curl` is required for downloading data but was not found.", |
| 60 | + call. = FALSE) |
| 61 | + } |
| 62 | + if (isFALSE(IASDT.R::CheckCommands("unzip"))) { |
| 63 | + stop( |
| 64 | + "`unzip` is required for extracting data but was not found.", |
| 65 | + call. = FALSE) |
| 66 | + } |
| 67 | + |
| 68 | + # # ..................................................................... ### |
| 69 | + |
| 70 | + # Environment variables |
| 71 | + if (FromHPC) { |
| 72 | + EnvVars2Read <- tibble::tribble( |
| 73 | + ~VarName, ~Value, ~CheckDir, ~CheckFile, |
| 74 | + "Path_Grid", "DP_R_Grid", TRUE, FALSE, |
| 75 | + "Path_Grid_Ref", "DP_R_Grid_Ref", TRUE, FALSE, |
| 76 | + "Path_Raw", "DP_R_BioReg_Raw", FALSE, FALSE, |
| 77 | + "Path_Interim", "DP_R_BioReg_Interim", FALSE, FALSE, |
| 78 | + "Path_BioReg", "DP_R_BioReg", FALSE, FALSE, |
| 79 | + "BioReg_URL", "DP_R_BioReg_URL", FALSE, FALSE) |
| 80 | + } else { |
| 81 | + EnvVars2Read <- tibble::tribble( |
| 82 | + ~VarName, ~Value, ~CheckDir, ~CheckFile, |
| 83 | + "Path_Grid", "DP_R_Grid_Local", TRUE, FALSE, |
| 84 | + "Path_Grid_Ref", "DP_R_Grid_Ref_Local", TRUE, FALSE, |
| 85 | + "Path_Raw", "DP_R_BioReg_Raw_Local", FALSE, FALSE, |
| 86 | + "Path_Interim", "DP_R_BioReg_Interim_Local", FALSE, FALSE, |
| 87 | + "Path_BioReg", "DP_R_BioReg_Local", FALSE, FALSE, |
| 88 | + "BioReg_URL", "DP_R_BioReg_URL", FALSE, FALSE) |
| 89 | + } |
| 90 | + |
| 91 | + # Assign environment variables and check file and paths |
| 92 | + IASDT.R::AssignEnvVars(EnvFile = EnvFile, EnvVarDT = EnvVars2Read) |
| 93 | + |
| 94 | + # # ..................................................................... ### |
| 95 | + |
| 96 | + # Ensure necessary directories exist |
| 97 | + fs::dir_create(c(Path_BioReg, Path_Raw, Path_Interim)) |
| 98 | + |
| 99 | + # # ..................................................................... ### |
| 100 | + |
| 101 | + # Download biogeographical regions dataset ------ |
| 102 | + |
| 103 | + IASDT.R::CatTime("Downloading biogeographical regions dataset") |
| 104 | + |
| 105 | + # name of downloaded zip file |
| 106 | + ZipFileName <- "Biogeog_regions_original.zip" |
| 107 | + |
| 108 | + # Extract download link |
| 109 | + IASDT.R::CatTime("Extract download link", Level = 1) |
| 110 | + BioReg_URL2 <- tryCatch({ |
| 111 | + BioReg_URL %>% |
| 112 | + # extract download link |
| 113 | + httr::GET(config = httr::timeout(100)) %>% |
| 114 | + rvest::read_html() %>% |
| 115 | + rvest::html_elements(css = "#334349 .list:nth-child(2) .content") %>% |
| 116 | + rvest::html_nodes("a") %>% |
| 117 | + rvest::html_attr("href") %>% |
| 118 | + # extract direct download link |
| 119 | + httr::GET(config = httr::timeout(100)) %>% |
| 120 | + rvest::read_html() %>% |
| 121 | + rvest::html_elements(css = "#header-primary-action .button") %>% |
| 122 | + rvest::html_attr("href") |
| 123 | + }, error = function(e) { |
| 124 | + stop("Failed to extract download URL: ", e$message, call. = FALSE) |
| 125 | + }) |
| 126 | + |
| 127 | + if (length(BioReg_URL2) != 1) { |
| 128 | + stop( |
| 129 | + "Download link extraction failed. Found: ", length(BioReg_URL2), |
| 130 | + call. = FALSE) |
| 131 | + } |
| 132 | + IASDT.R::CatTime(BioReg_URL2, Level = 2, Time = FALSE) |
| 133 | + |
| 134 | + # # |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ## |
| 135 | + |
| 136 | + # Downloading using `curl` |
| 137 | + IASDT.R::CatTime("Download using `curl`", Level = 1) |
| 138 | + Zip_file <- IASDT.R::Path(Path_Raw, ZipFileName) |
| 139 | + DownCommand <- stringr::str_glue( |
| 140 | + 'curl -J --create-dirs --output-dir {Path_Raw} -o\\ |
| 141 | + "{ZipFileName}" -L {BioReg_URL2} --silent --max-time 300') |
| 142 | + |
| 143 | + attempt <- 1 |
| 144 | + repeat { |
| 145 | + IASDT.R::CatTime(paste0("Attempt ", attempt), Level = 2, Time = FALSE) |
| 146 | + |
| 147 | + invisible(IASDT.R::System(DownCommand)) |
| 148 | + |
| 149 | + if (IASDT.R::CheckZip(Zip_file)) { |
| 150 | + break |
| 151 | + } |
| 152 | + |
| 153 | + if (attempt >= 5) { |
| 154 | + stop( |
| 155 | + "Error: Maximum download attempts reached. Zip file check failed.", |
| 156 | + call. = FALSE) |
| 157 | + } |
| 158 | + attempt <- attempt + 1 |
| 159 | + } |
| 160 | + |
| 161 | + # # |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ## |
| 162 | + |
| 163 | + # unzip data to interim directory |
| 164 | + IASDT.R::CatTime("unzip data to interim directory", Level = 1) |
| 165 | + stringr::str_glue("unzip -o -qq -j {Zip_file} -d {Path_Interim}") %>% |
| 166 | + IASDT.R::System() %>% |
| 167 | + invisible() |
| 168 | + |
| 169 | + # # ..................................................................... ### |
| 170 | + |
| 171 | + # Processing biogeographical regions data ------ |
| 172 | + IASDT.R::CatTime("Processing biogeographical regions data") |
| 173 | + |
| 174 | + # Reading data from original shapefile |
| 175 | + IASDT.R::CatTime("Read data from original shapefile", Level = 1) |
| 176 | + BioReg_DT <- fs::dir_ls(path = Path_Interim, type = "file", glob = "*.shp$") |
| 177 | + if (length(BioReg_DT) != 1) { |
| 178 | + stop("Expected one .shp file, found: ", length(BioReg_DT), call. = FALSE) |
| 179 | + } |
| 180 | + BioReg_DT <- sf::st_read(BioReg_DT, quiet = TRUE) %>% |
| 181 | + # project to EPSG:3035 |
| 182 | + sf::st_transform(3035) |
| 183 | + |
| 184 | + # # |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ## |
| 185 | + |
| 186 | + # Extract metadata |
| 187 | + IASDT.R::CatTime("Extract metadata", Level = 1) |
| 188 | + BioReg_Metadata <- sf::st_drop_geometry(BioReg_DT) %>% |
| 189 | + tibble::as_tibble() %>% |
| 190 | + dplyr::rename(ID = "PK_UID") %>% |
| 191 | + list() |
| 192 | + |
| 193 | + # # |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ## |
| 194 | + |
| 195 | + # Rasterize/masking |
| 196 | + IASDT.R::CatTime("Rasterize/masking", Level = 1) |
| 197 | + |
| 198 | + GridR <- IASDT.R::Path(Path_Grid, "Grid_10_Land_Crop.RData") |
| 199 | + if (!file.exists(GridR)) { |
| 200 | + stop( |
| 201 | + paste0("Path for the Europe boundaries does not exist: ", GridR), |
| 202 | + call. = FALSE) |
| 203 | + } |
| 204 | + |
| 205 | + BioReg_R <- BioReg_DT %>% |
| 206 | + dplyr::mutate( |
| 207 | + Rast = purrr::map( |
| 208 | + .x = geometry, |
| 209 | + .f = ~{ |
| 210 | + .x %>% |
| 211 | + sf::st_geometry() %>% |
| 212 | + sf::st_as_sf() %>% |
| 213 | + terra::rasterize( |
| 214 | + y = terra::unwrap(IASDT.R::LoadAs(GridR)), cover = TRUE) %>% |
| 215 | + terra::classify(cbind(NA, 0)) |
| 216 | + })) %>% |
| 217 | + dplyr::pull(Rast) %>% |
| 218 | + terra::rast() %>% |
| 219 | + terra::which.max() %>% |
| 220 | + stats::setNames("ID") %>% |
| 221 | + terra::mask(terra::unwrap(IASDT.R::LoadAs(GridR))) |
| 222 | + |
| 223 | + rm(BioReg_DT, GridR, envir = environment()) |
| 224 | + |
| 225 | + # # |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ## |
| 226 | + |
| 227 | + # Remove unused levels and adjust ID column |
| 228 | + IASDT.R::CatTime("Remove unused levels", Level = 1) |
| 229 | + levels(BioReg_R) <- BioReg_Metadata |
| 230 | + BioReg_R <- terra::droplevels(BioReg_R) |
| 231 | + |
| 232 | + MapLevels <- terra::levels(BioReg_R)[[1]] |
| 233 | + MapLevelsNew <- dplyr::mutate(MapLevels, ID = seq_len(dplyr::n())) |
| 234 | + MapLevelsM <- MapLevels %>% |
| 235 | + dplyr::left_join(MapLevelsNew, by = "short_name") %>% |
| 236 | + dplyr::select("short_name", tidyselect::everything()) |
| 237 | + BioReg_R <- terra::classify(BioReg_R, MapLevelsM[, -1]) |
| 238 | + levels(BioReg_R) <- list(MapLevelsNew) |
| 239 | + terra::crs(BioReg_R) <- "epsg:3035" |
| 240 | + |
| 241 | + IASDT.R::CatTime("Convert to sf object", Level = 1) |
| 242 | + Grid_sf <- IASDT.R::Path(Path_Grid_Ref, "Grid_10_sf.RData") %>% |
| 243 | + IASDT.R::LoadAs() %>% |
| 244 | + magrittr::extract2("Grid_10_sf_s") |
| 245 | + BioReg_sf <- terra::as.polygons( |
| 246 | + x = BioReg_R, aggregate = FALSE, na.rm = TRUE) %>% |
| 247 | + sf::st_as_sf() %>% |
| 248 | + tibble::tibble() %>% |
| 249 | + sf::st_as_sf() %>% |
| 250 | + dplyr::left_join(BioReg_Metadata[[1]], by = "short_name") %>% |
| 251 | + sf::st_join(Grid_sf) %>% |
| 252 | + dplyr::relocate(geometry, .after = tidyselect::everything()) |
| 253 | + |
| 254 | + # # ..................................................................... ### |
| 255 | + |
| 256 | + IASDT.R::CatTime("Saving processed biogeographical regions data") |
| 257 | + |
| 258 | + IASDT.R::CatTime("Save as tiff", Level = 1) |
| 259 | + terra::writeRaster( |
| 260 | + x = BioReg_R, overwrite = TRUE, |
| 261 | + filename = file.path(Path_BioReg, "BioReg_R.tif")) |
| 262 | + # Write attributes to file |
| 263 | + terra::levels(BioReg_R)[[1]] %>% |
| 264 | + dplyr::rename(VALUE = ID) %>% |
| 265 | + foreign::write.dbf( |
| 266 | + file = file.path(Path_BioReg, "BioReg_R.tif.vat.dbf"), |
| 267 | + factor2char = TRUE, max_nchar = 254) |
| 268 | + |
| 269 | + IASDT.R::CatTime("Save as RData", Level = 1) |
| 270 | + IASDT.R::SaveAs( |
| 271 | + InObj = terra::wrap(BioReg_R), OutObj = "BioReg_R", |
| 272 | + OutPath = file.path(Path_BioReg, "BioReg_R.RData")) |
| 273 | + |
| 274 | + IASDT.R::CatTime("Save sf object", Level = 1) |
| 275 | + save(BioReg_sf, file = file.path(Path_BioReg, "BioReg_sf.RData")) |
| 276 | + |
| 277 | + # # ..................................................................... ### |
| 278 | + |
| 279 | + IASDT.R::CatDiff( |
| 280 | + InitTime = .StartTime, |
| 281 | + Prefix = "Processing biogeographical regions data took ") |
| 282 | + |
| 283 | + return(invisible(NULL)) |
| 284 | +} |
0 commit comments