Skip to content

Commit d574b1d

Browse files
committed
Add BioReg_Process and small fixes
1 parent ff78234 commit d574b1d

15 files changed

+402
-73
lines changed

DESCRIPTION

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ Package: IASDT.R
33
Title: Modelling the distribution of invasive alien plant species in
44
Europe
55
Version: 0.1.02
6-
Date: 2025-02-24
6+
Date: 2025-02-25
77
Authors@R:
88
person("Ahmed", "El-Gabbas", , "ahmed.el-gabbas@ufz.de", role = c("aut", "cre"),
99
comment = c(ORCID = "0000-0003-2225-088X"))
@@ -67,24 +67,26 @@ Suggests:
6767
grid (>= 4.4.1),
6868
gridExtra (>= 2.3),
6969
gtools (>= 3.9.5),
70+
httr(>= 1.4.7),
7071
janitor (>= 2.2.0),
7172
jsonify (>= 1.2.2),
7273
jsonlite (>= 1.8.8),
7374
kableExtra (>= 1.4.0),
74-
knitr,
75+
knitr (>= 1.49),
7576
lme4 (>= 1.1.35.5),
7677
lobstr (>= 1.1.2),
7778
Matrix (>= 1.7.0),
7879
methods,
7980
ncdf4 (>= 1.22),
8081
pak (>= 0.7.2),
8182
paletteer (>= 1.6.0),
83+
parallelly (>= 1.42),
8284
patchwork (>= 1.2.0),
8385
pbapply (>= 1.7.2),
8486
png (>= 0.1.8),
8587
pROC (>= 1.18.5),
8688
qs2 (>= 0.1.1),
87-
quarto,
89+
quarto (>= 1.4.4),
8890
R.utils (>= 2.12.3),
8991
ragg (>= 1.3.3),
9092
raster (>= 3.6.26),

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ export(AddMissingCols)
77
export(AllObjSizes)
88
export(AssignEnvVars)
99
export(AssignIfNotExist)
10+
export(BioReg_Process)
1011
export(CHELSA_Prepare)
1112
export(CHELSA_Process)
1213
export(CHELSA_Project)

R/DWF_Bioreg.R

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
## |------------------------------------------------------------------------| #
2+
# BioReg_Process ----
3+
## |------------------------------------------------------------------------| #
4+
5+
#' Process biogeographical regions
6+
#'
7+
#' Downloads and processes the Biogeographical Regions dataset (Europe 2016,
8+
#' version 1) from the [European Environment
9+
#' Agency](https://www.eea.europa.eu/en/datahub/datahubitem-view/11db8d14-f167-4cd5-9205-95638dfd9618).
10+
#' The dataset is processed to extract region names per reference grid cell in
11+
#' EPSG:3035 projection. Outputs include raster (.tif), spatial vector (.shp),
12+
#' and RData files. The outputs of this function are used to count presence grid
13+
#' cells for each species per biogeographical region.
14+
#' @return Invisible `NULL`. Processed data is saved to disk as raster, vector,
15+
#' and RData files.
16+
#' @details
17+
#' - Temporal coverage: 2011-2015
18+
#' - Spatial coverage: 28°E-81°E, 31.27°W-62°E
19+
#' - CRS: EPSG:3035; read as: ETRS_1989_LAEA_L52_M10
20+
#' - file format: shapefile (compressed in zip file)
21+
#' - Requires external tools: `curl` (for downloading) and `unzip`
22+
#' (for extraction)
23+
#' @param EnvFile Character. Path to the environment file containing paths to
24+
#' data sources. Defaults to `.env`.
25+
#' @param FromHPC Logical. Whether the processing is being done on an
26+
#' High-Performance Computing (HPC) environment, to adjust file paths
27+
#' accordingly. Default: `TRUE`.
28+
#' @export
29+
#' @author Ahmed El-Gabbas
30+
#' @name BioReg_Process
31+
32+
BioReg_Process <- function(FromHPC = TRUE, EnvFile = ".env") {
33+
34+
# # ..................................................................... ###
35+
36+
.StartTime <- lubridate::now(tzone = "CET")
37+
38+
# Avoid "no visible binding for global variable" message
39+
# https://www.r-bloggers.com/2019/08/no-visible-binding-for-global-variable/
40+
Path_BioReg <- Path_Raw <- Path_Interim <- BioReg_URL <- Path_Grid <-
41+
Rast <- Path_Grid_Ref <- geometry <- ID <- NULL
42+
43+
# # ..................................................................... ###
44+
45+
# Check input arguments ------
46+
IASDT.R::CatTime("Check input arguments")
47+
48+
AllArgs <- ls(envir = environment())
49+
AllArgs <- purrr::map(
50+
AllArgs,
51+
function(x) get(x, envir = parent.env(env = environment()))) %>%
52+
stats::setNames(AllArgs)
53+
IASDT.R::CheckArgs(AllArgs = AllArgs, Type = "character", Args = "EnvFile")
54+
IASDT.R::CheckArgs(AllArgs = AllArgs, Type = "logical", Args = "FromHPC")
55+
rm(AllArgs, envir = environment())
56+
57+
if (isFALSE(IASDT.R::CheckCommands("curl"))) {
58+
stop(
59+
"`curl` is required for downloading data but was not found.",
60+
call. = FALSE)
61+
}
62+
if (isFALSE(IASDT.R::CheckCommands("unzip"))) {
63+
stop(
64+
"`unzip` is required for extracting data but was not found.",
65+
call. = FALSE)
66+
}
67+
68+
# # ..................................................................... ###
69+
70+
# Environment variables
71+
if (FromHPC) {
72+
EnvVars2Read <- tibble::tribble(
73+
~VarName, ~Value, ~CheckDir, ~CheckFile,
74+
"Path_Grid", "DP_R_Grid", TRUE, FALSE,
75+
"Path_Grid_Ref", "DP_R_Grid_Ref", TRUE, FALSE,
76+
"Path_Raw", "DP_R_BioReg_Raw", FALSE, FALSE,
77+
"Path_Interim", "DP_R_BioReg_Interim", FALSE, FALSE,
78+
"Path_BioReg", "DP_R_BioReg", FALSE, FALSE,
79+
"BioReg_URL", "DP_R_BioReg_URL", FALSE, FALSE)
80+
} else {
81+
EnvVars2Read <- tibble::tribble(
82+
~VarName, ~Value, ~CheckDir, ~CheckFile,
83+
"Path_Grid", "DP_R_Grid_Local", TRUE, FALSE,
84+
"Path_Grid_Ref", "DP_R_Grid_Ref_Local", TRUE, FALSE,
85+
"Path_Raw", "DP_R_BioReg_Raw_Local", FALSE, FALSE,
86+
"Path_Interim", "DP_R_BioReg_Interim_Local", FALSE, FALSE,
87+
"Path_BioReg", "DP_R_BioReg_Local", FALSE, FALSE,
88+
"BioReg_URL", "DP_R_BioReg_URL", FALSE, FALSE)
89+
}
90+
91+
# Assign environment variables and check file and paths
92+
IASDT.R::AssignEnvVars(EnvFile = EnvFile, EnvVarDT = EnvVars2Read)
93+
94+
# # ..................................................................... ###
95+
96+
# Ensure necessary directories exist
97+
fs::dir_create(c(Path_BioReg, Path_Raw, Path_Interim))
98+
99+
# # ..................................................................... ###
100+
101+
# Download biogeographical regions dataset ------
102+
103+
IASDT.R::CatTime("Downloading biogeographical regions dataset")
104+
105+
# name of downloaded zip file
106+
ZipFileName <- "Biogeog_regions_original.zip"
107+
108+
# Extract download link
109+
IASDT.R::CatTime("Extract download link", Level = 1)
110+
BioReg_URL2 <- tryCatch({
111+
BioReg_URL %>%
112+
# extract download link
113+
httr::GET(config = httr::timeout(100)) %>%
114+
rvest::read_html() %>%
115+
rvest::html_elements(css = "#334349 .list:nth-child(2) .content") %>%
116+
rvest::html_nodes("a") %>%
117+
rvest::html_attr("href") %>%
118+
# extract direct download link
119+
httr::GET(config = httr::timeout(100)) %>%
120+
rvest::read_html() %>%
121+
rvest::html_elements(css = "#header-primary-action .button") %>%
122+
rvest::html_attr("href")
123+
}, error = function(e) {
124+
stop("Failed to extract download URL: ", e$message, call. = FALSE)
125+
})
126+
127+
if (length(BioReg_URL2) != 1) {
128+
stop(
129+
"Download link extraction failed. Found: ", length(BioReg_URL2),
130+
call. = FALSE)
131+
}
132+
IASDT.R::CatTime(BioReg_URL2, Level = 2, Time = FALSE)
133+
134+
# # |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ##
135+
136+
# Downloading using `curl`
137+
IASDT.R::CatTime("Download using `curl`", Level = 1)
138+
Zip_file <- IASDT.R::Path(Path_Raw, ZipFileName)
139+
DownCommand <- stringr::str_glue(
140+
'curl -J --create-dirs --output-dir {Path_Raw} -o\\
141+
"{ZipFileName}" -L {BioReg_URL2} --silent --max-time 300')
142+
143+
attempt <- 1
144+
repeat {
145+
IASDT.R::CatTime(paste0("Attempt ", attempt), Level = 2, Time = FALSE)
146+
147+
invisible(IASDT.R::System(DownCommand))
148+
149+
if (IASDT.R::CheckZip(Zip_file)) {
150+
break
151+
}
152+
153+
if (attempt >= 5) {
154+
stop(
155+
"Error: Maximum download attempts reached. Zip file check failed.",
156+
call. = FALSE)
157+
}
158+
attempt <- attempt + 1
159+
}
160+
161+
# # |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ##
162+
163+
# unzip data to interim directory
164+
IASDT.R::CatTime("unzip data to interim directory", Level = 1)
165+
stringr::str_glue("unzip -o -qq -j {Zip_file} -d {Path_Interim}") %>%
166+
IASDT.R::System() %>%
167+
invisible()
168+
169+
# # ..................................................................... ###
170+
171+
# Processing biogeographical regions data ------
172+
IASDT.R::CatTime("Processing biogeographical regions data")
173+
174+
# Reading data from original shapefile
175+
IASDT.R::CatTime("Read data from original shapefile", Level = 1)
176+
BioReg_DT <- fs::dir_ls(path = Path_Interim, type = "file", glob = "*.shp$")
177+
if (length(BioReg_DT) != 1) {
178+
stop("Expected one .shp file, found: ", length(BioReg_DT), call. = FALSE)
179+
}
180+
BioReg_DT <- sf::st_read(BioReg_DT, quiet = TRUE) %>%
181+
# project to EPSG:3035
182+
sf::st_transform(3035)
183+
184+
# # |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ##
185+
186+
# Extract metadata
187+
IASDT.R::CatTime("Extract metadata", Level = 1)
188+
BioReg_Metadata <- sf::st_drop_geometry(BioReg_DT) %>%
189+
tibble::as_tibble() %>%
190+
dplyr::rename(ID = "PK_UID") %>%
191+
list()
192+
193+
# # |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ##
194+
195+
# Rasterize/masking
196+
IASDT.R::CatTime("Rasterize/masking", Level = 1)
197+
198+
GridR <- IASDT.R::Path(Path_Grid, "Grid_10_Land_Crop.RData")
199+
if (!file.exists(GridR)) {
200+
stop(
201+
paste0("Path for the Europe boundaries does not exist: ", GridR),
202+
call. = FALSE)
203+
}
204+
205+
BioReg_R <- BioReg_DT %>%
206+
dplyr::mutate(
207+
Rast = purrr::map(
208+
.x = geometry,
209+
.f = ~{
210+
.x %>%
211+
sf::st_geometry() %>%
212+
sf::st_as_sf() %>%
213+
terra::rasterize(
214+
y = terra::unwrap(IASDT.R::LoadAs(GridR)), cover = TRUE) %>%
215+
terra::classify(cbind(NA, 0))
216+
})) %>%
217+
dplyr::pull(Rast) %>%
218+
terra::rast() %>%
219+
terra::which.max() %>%
220+
stats::setNames("ID") %>%
221+
terra::mask(terra::unwrap(IASDT.R::LoadAs(GridR)))
222+
223+
rm(BioReg_DT, GridR, envir = environment())
224+
225+
# # |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ##
226+
227+
# Remove unused levels and adjust ID column
228+
IASDT.R::CatTime("Remove unused levels", Level = 1)
229+
levels(BioReg_R) <- BioReg_Metadata
230+
BioReg_R <- terra::droplevels(BioReg_R)
231+
232+
MapLevels <- terra::levels(BioReg_R)[[1]]
233+
MapLevelsNew <- dplyr::mutate(MapLevels, ID = seq_len(dplyr::n()))
234+
MapLevelsM <- MapLevels %>%
235+
dplyr::left_join(MapLevelsNew, by = "short_name") %>%
236+
dplyr::select("short_name", tidyselect::everything())
237+
BioReg_R <- terra::classify(BioReg_R, MapLevelsM[, -1])
238+
levels(BioReg_R) <- list(MapLevelsNew)
239+
terra::crs(BioReg_R) <- "epsg:3035"
240+
241+
IASDT.R::CatTime("Convert to sf object", Level = 1)
242+
Grid_sf <- IASDT.R::Path(Path_Grid_Ref, "Grid_10_sf.RData") %>%
243+
IASDT.R::LoadAs() %>%
244+
magrittr::extract2("Grid_10_sf_s")
245+
BioReg_sf <- terra::as.polygons(
246+
x = BioReg_R, aggregate = FALSE, na.rm = TRUE) %>%
247+
sf::st_as_sf() %>%
248+
tibble::tibble() %>%
249+
sf::st_as_sf() %>%
250+
dplyr::left_join(BioReg_Metadata[[1]], by = "short_name") %>%
251+
sf::st_join(Grid_sf) %>%
252+
dplyr::relocate(geometry, .after = tidyselect::everything())
253+
254+
# # ..................................................................... ###
255+
256+
IASDT.R::CatTime("Saving processed biogeographical regions data")
257+
258+
IASDT.R::CatTime("Save as tiff", Level = 1)
259+
terra::writeRaster(
260+
x = BioReg_R, overwrite = TRUE,
261+
filename = file.path(Path_BioReg, "BioReg_R.tif"))
262+
# Write attributes to file
263+
terra::levels(BioReg_R)[[1]] %>%
264+
dplyr::rename(VALUE = ID) %>%
265+
foreign::write.dbf(
266+
file = file.path(Path_BioReg, "BioReg_R.tif.vat.dbf"),
267+
factor2char = TRUE, max_nchar = 254)
268+
269+
IASDT.R::CatTime("Save as RData", Level = 1)
270+
IASDT.R::SaveAs(
271+
InObj = terra::wrap(BioReg_R), OutObj = "BioReg_R",
272+
OutPath = file.path(Path_BioReg, "BioReg_R.RData"))
273+
274+
IASDT.R::CatTime("Save sf object", Level = 1)
275+
save(BioReg_sf, file = file.path(Path_BioReg, "BioReg_sf.RData"))
276+
277+
# # ..................................................................... ###
278+
279+
IASDT.R::CatDiff(
280+
InitTime = .StartTime,
281+
Prefix = "Processing biogeographical regions data took ")
282+
283+
return(invisible(NULL))
284+
}

R/DWF_IAS_Distribution.R

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@
1010
#' biogeographical region, and outputs results as tif, RData, and tibble
1111
#' formats.
1212
#' @param Species Character. Name of the species to analyze.
13-
#' @param FromHPC Logical. Whether the processing is being done on an
14-
#' High-Performance Computing (HPC) environment, to adjust file paths
13+
#' @param FromHPC Logical. Whether the processing is being done on an
14+
#' High-Performance Computing (HPC) environment, to adjust file paths
1515
#' accordingly. Default: `TRUE`.
16-
#' @param EnvFile Character. Path to the environment file containing paths to
16+
#' @param EnvFile Character. Path to the environment file containing paths to
1717
#' data sources. Defaults to `.env`.
18-
#' @param Verbose Logical. Whether to print progress messages.
18+
#' @param Verbose Logical. Whether to print progress messages.
1919
#' Default is `FALSE`.
2020
#' @param Overwrite Logical. If `TRUE`, the function will overwrite existing
2121
#' files (default: `FALSE`).
@@ -92,7 +92,7 @@ IAS_Distribution <- function(
9292
"Path_TaxaCNT", "DP_R_Taxa_Country", FALSE, TRUE,
9393
"Path_TaxaInfo_RData", "DP_R_TaxaInfo_RData", FALSE, TRUE,
9494
"Path_TaxaInfo", "DP_R_TaxaInfo", FALSE, TRUE,
95-
"Path_BioReg", "DP_R_BioReg", FALSE, TRUE,
95+
"Path_BioReg", "DP_R_BioReg", TRUE, FALSE,
9696
"EU_Bound", "DP_R_EUBound_sf", FALSE, TRUE)
9797
} else {
9898
EnvVars2Read <- tibble::tribble(
@@ -106,7 +106,7 @@ IAS_Distribution <- function(
106106
"Path_TaxaCNT", "DP_R_Taxa_Country_Local", FALSE, TRUE,
107107
"Path_TaxaInfo_RData", "DP_R_TaxaInfo_RData_Local", FALSE, TRUE,
108108
"Path_TaxaInfo", "DP_R_TaxaInfo_Local", FALSE, TRUE,
109-
"Path_BioReg", "DP_R_BioReg_Local", FALSE, TRUE,
109+
"Path_BioReg", "DP_R_BioReg_Local", TRUE, FALSE,
110110
"EU_Bound", "DP_R_EUBound_sf_Local", FALSE, TRUE)
111111
}
112112

@@ -497,7 +497,15 @@ IAS_Distribution <- function(
497497
# number of biogeographical regions per species and minimum / maximum / mean
498498
# number of grid cells per biogeographical regions
499499

500-
BioReg_R <- terra::unwrap(IASDT.R::LoadAs(Path_BioReg))
500+
BioReg_R <- IASDT.R::Path(Path_BioReg, "BioReg_R.RData")
501+
if (isFALSE(IASDT.R::CheckRData(BioReg_R))) {
502+
stop(
503+
paste0(
504+
"Required file for biogeographical regions does not exist: ", BioReg_R),
505+
call. = FALSE)
506+
}
507+
508+
BioReg_R <- terra::unwrap(IASDT.R::LoadAs(BioReg_R))
501509

502510
# name of Biogeographical regions
503511
BioReg_Names <- c(

0 commit comments

Comments
 (0)