diff --git a/.Rbuildignore b/.Rbuildignore index 145160b8..2a4f7414 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -25,3 +25,5 @@ run_ramsar_metrics.R test-indicator_formulas.R ^doc$ ^Meta$ +^.gemini$ +^conductor$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 14159b77..9a1735c6 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -2,7 +2,7 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [main, master, outsource_uncertainty] pull_request: branches: [main, master] @@ -39,10 +39,25 @@ jobs: http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true + # Clear cache to ensure fresh package install + - name: Clear R package cache + run: | + Rscript -e 'try(remove.packages("b3gbi"), silent=TRUE)' || true + rm -rf ~/.cache/R/renv/library/*/b3gbi || true + shell: bash + - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::rcmdcheck + extra-packages: any::rcmdcheck, b-cubed-eu/dubicube needs: check + cache-version: 4 # Bust the cache + + # Force install local package to ensure latest code is used + - name: Install local b3gbi + run: | + R CMD build . --no-manual + R CMD INSTALL b3gbi_*.tar.gz + shell: bash - uses: r-lib/actions/check-r-package@v2 with: diff --git a/.gitignore b/.gitignore index 18ed7de9..98a12586 100644 --- a/.gitignore +++ b/.gitignore @@ -71,3 +71,5 @@ app.R .RDataTmp /doc/ /Meta/ +.gemini/ +conductor/ diff --git a/CITATION.cff b/CITATION.cff index 659b5d6d..b457f49c 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -8,7 +8,7 @@ message: 'To cite package "b3gbi" in publications use:' type: software license: MIT title: 'b3gbi: General Biodiversity Indicators for Biodiversity Data Cubes' -version: 0.8.13 +version: 0.9.0 abstract: Calculate general biodiversity indicators from GBIF data cubes. Includes many common indicators such as species richness and evenness, which can be calculated over time (trends) or space (maps). @@ -41,7 +41,7 @@ references: version: '>= 3.5.0' - type: software title: boot - abstract: 'boot: Bootstrap Functions' + abstract: 'boot: Bootstrap Functions (Originally by Angelo Canty for S)' notes: Imports repository: https://CRAN.R-project.org/package=boot authors: @@ -78,6 +78,24 @@ references: orcid: https://orcid.org/0000-0003-4777-038X year: '2026' doi: 10.32614/CRAN.package.dplyr +- type: software + title: dubicube + abstract: 'dubicube: Calculation and Interpretation of Data Cube Indicator Uncertainty' + notes: Imports + url: https://b-cubed-eu.github.io/dubicube/ + repository: https://b-cubed-eu.r-universe.dev + authors: + - family-names: Langeraert + given-names: Ward + email: ward.langeraert@inbo.be + orcid: https://orcid.org/0000-0002-5900-8109 + affiliation: Research Institute for Nature and Forest (INBO) + - family-names: Van Daele + given-names: Toon + email: toon.vandaele@inbo.be + orcid: https://orcid.org/0000-0002-1362-853X + affiliation: Research Institute for Nature and Forest (INBO) + year: '2026' - type: software title: ggplot2 abstract: 'ggplot2: Create Elegant Data Visualisations Using the Grammar of Graphics' @@ -122,12 +140,11 @@ references: title: iNEXT abstract: 'iNEXT: Interpolation and Extrapolation for Species Diversity' notes: Imports - url: https://sites.google.com/view/chao-lab-website/software/inext + url: http://chao.stat.nthu.edu.tw/wordpress/software_download/ repository: https://CRAN.R-project.org/package=iNEXT authors: - family-names: Hsieh given-names: T. C. - email: euler96@gmail.com - family-names: Ma given-names: K. H. - family-names: Chao @@ -208,7 +225,7 @@ references: authors: - family-names: Wickham given-names: Hadley - email: hadley@posit.co + email: hadley@rstudio.com orcid: https://orcid.org/0000-0003-4757-117X - family-names: Henry given-names: Lionel @@ -362,6 +379,21 @@ references: orcid: https://orcid.org/0000-0001-6403-5550 year: '2026' doi: 10.32614/CRAN.package.units +- type: software + title: bold + abstract: 'bold: Interface to Bold Systems API' + notes: Suggests + url: https://docs.ropensci.org/bold/ + authors: + - family-names: Dubois + given-names: Salix + email: salixdubois+bold@gmail.com + - family-names: Chamberlain + given-names: Scott + email: myrmecocystus@gmail.com + orcid: https://orcid.org/0000-0003-1444-9135 + year: '2026' + version: '>= 1.3.0' - type: software title: knitr abstract: 'knitr: A General-Purpose Package for Dynamic Report Generation in R' @@ -485,7 +517,7 @@ references: title: rnaturalearthdata abstract: 'rnaturalearthdata: World Vector Map Data from Natural Earth Used in ''rnaturalearth''' notes: Suggests - url: https://docs.ropensci.org/rnaturalearthdata/ + url: https://github.com/ropenscilabs/rnaturalearthdata repository: https://CRAN.R-project.org/package=rnaturalearthdata authors: - family-names: South @@ -530,6 +562,7 @@ references: - family-names: Chamberlain given-names: Scott email: myrmecocystus@gmail.com + orcid: https://orcid.org/0000-0003-1444-9135 - family-names: Szoecs given-names: Eduard - family-names: Foster diff --git a/DESCRIPTION b/DESCRIPTION index dfc1862c..f1f9b5bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: b3gbi Title: General Biodiversity Indicators for Biodiversity Data Cubes -Version: 0.8.14 +Version: 0.9.0 Authors@R: c( person("Shawn", "Dove", email = "Shawn.Dove@allzool.bio.uni-giessen.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9465-5638")), @@ -20,6 +20,7 @@ Depends: Imports: boot, dplyr, + dubicube, ggplot2, iNEXT, labeling, @@ -57,11 +58,14 @@ RoxygenNote: 7.3.3 VignetteBuilder: knitr, rmarkdown Additional_repositories: https://b-cubed-eu.r-universe.dev, https://ropensci.r-universe.dev +Config/Needs/check: Ironholds/WikidataR, ropensci/wikitaxa Remotes: - github::hrbrmstr/mgrs, - github::ropensci/bold, - github::ropensci/taxize, - github::ropensci/rnaturalearthhires + hrbrmstr/mgrs, + ropensci/bold, + ropensci/wikitaxa, + ropensci/taxize, + ropensci/rnaturalearthhires, + b-cubed-eu/dubicube BugReports: https://github.com/b-cubed-eu/b3gbi/issues Funding: This project receives funding from the European Union’s Horizon Europe Research and Innovation Programme (ID No 101059592). diff --git a/NAMESPACE b/NAMESPACE index 64cce03a..0a6627ac 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -67,6 +67,7 @@ S3method(print,sim_cube) export("%>%") export(ab_rarity_map) export(ab_rarity_ts) +export(add_ci) export(area_rarity_map) export(area_rarity_ts) export(calc_ci) @@ -99,6 +100,7 @@ export(plot_mv) export(plot_species_map) export(plot_species_ts) export(plot_ts) +export(prepare_indicator_bootstrap) export(process_cube) export(spec_occ_map) export(spec_occ_ts) diff --git a/NEWS.md b/NEWS.md index 7fa0490a..e7e689b5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,20 @@ +# b3gbi 0.9.0 - Major update: + +* Confidence intervals are no longer calculated in the core indicator workflow, except for Hill diversity (Hill diversity is handled by the iNEXT package, which calculates confidence intervals internally). They are now calculated using a separated function, add_ci(), which can be applied to any indicator_ts object for which reasonable confidence intervals can be calculated. This gives the user more freedom. After adding CIs, you can recalculate with different parameters by setting 'replace = TRUE' when you call add_ci(). +* **Indicator-specific bootstrap defaults**: add_ci() now applies different default behaviors based on indicator type to ensure statistically appropriate confidence intervals: + - Species-level indicators (`spec_occ`, `spec_range`, `total_occ`) use group-specific bootstrapping (resampling within each species/year combination) + - Aggregate indicators (evenness, rarity, density) use whole-cube bootstrapping + - Evenness indicators (`pielou_evenness`, `williams_evenness`) automatically apply logit transformation to keep confidence intervals within valid [0,1] bounds + - Total occurrences (`total_occ`) has bias correction disabled by default +* Bootstrapping for confidence intervals is now done across the entire cube by default. This uses the dubicube package, which is now added as a dependency. The option to calculate at indicator level is still available by setting 'level = "indicator"' when calling add_ci(). Indicator level bootstrapping is a faster but less robust method. +* The default number of bootstrap replicates when calculating confidence intervals is now 1000. This improves robustness at the sake of speed. This can still be changed using the 'num_bootstrap' parameter when calling add_ci(). +* Added `boot_args` and `ci_args` parameters to `add_ci()` to allow fine-tuning of the underlying `dubicube` calls. These can be used to override the indicator-specific defaults if needed. +* Implemented `group_specific` vs `whole_cube` resampling logic in `add_ci()` to handle different indicator calculation requirements. Species-level indicators are automatically detected and handled appropriately. +* Added new internal helper function `prepare_indicator_bootstrap()` to centralize bootstrap parameter configuration. This function implements a "rule book" pattern that defines appropriate defaults for each indicator type. +* Included detailed bootstrap summary statistics (`est_boot`, `se_boot`, `bias_boot`) in the output of `add_ci()`. +* Added a new conceptual vignette: "Uncertainty in Biodiversity Indicators". +* Added comprehensive unit and integration tests for the decoupled uncertainty workflow. + # b3gbi 0.8.14 - Minor update: * Fixed a bug in `pielou_evenness_map` where use of a `cell_size` larger than the native cube resolution caused a crash due to non-unique row identifiers during pivoting (#102). diff --git a/R/add_ci.R b/R/add_ci.R new file mode 100644 index 00000000..d337f4b2 --- /dev/null +++ b/R/add_ci.R @@ -0,0 +1,260 @@ +#' Add Confidence Intervals to an Indicator Object +#' +#' @description +#' This function calculates bootstrap confidence intervals for an existing +#' `indicator_ts` object. It supports both cube-level bootstrapping (resampling +#' occurrence records) and indicator-level bootstrapping (resampling +#' calculated values), allowing for advanced transformations during the +#' CI calculation process. +#' +#' @param indicator An object of class `indicator_ts` to which confidence +#' intervals should be added. +#' @param num_bootstrap (Optional) Number of bootstrap replicates to perform. +#' (Default: 1000) +#' @param bootstrap_level (Optional) Level at which to perform bootstrapping: +#' * `cube` (default): Bootstrapping is done by resampling the +#' occurrence records in the cube. This is mathematically more robust as it +#' captures the underlying sampling uncertainty. +#' * `indicator`: Bootstrapping is done by resampling indicator +#' values. This is faster for large cubes but less robust. +#' @param ci_type (Optional) Type of bootstrap confidence intervals to +#' calculate. (Default: `"norm"`). Supported options are: +#' * `perc`: Percentile intervals. +#' * `bca`: Bias-corrected and accelerated intervals. +#' * `norm`: Normal approximation intervals. +#' * `basic`: Basic bootstrap intervals. +#' * `none`: No confidence intervals calculated. +#' @param trans (Optional) A function for transforming the indicator values +#' before calculating confidence intervals (e.g., `log`). +#' (Default: identity function) +#' @param inv_trans (Optional) The inverse of the transformation function +#' `trans` (e.g., `exp`). Used to back-transform the intervals +#' to the original scale. (Default: identity function) +#' @param confidence_level (Optional) The confidence level for the calculated +#' intervals (e.g., 0.95 for 95% CIs). (Default: 0.95) +#' @param overwrite (Optional) Logical. If the indicator already contains +#' confidence intervals (`ll` and `ul` columns), should they +#' be replaced? (Default: TRUE) +#' @param seed (Optional) Integer. Random seed for bootstrapping. (Default: 123) +#' @param boot_args (Optional) Named list of additional arguments passed to +#' `dubicube::bootstrap_cube()`. (Default: `list()`) +#' @param ci_args (Optional) Named list of additional arguments passed to +#' `dubicube::calculate_bootstrap_ci()`. (Default: `list()`) +#' @param ... (Optional) Additional arguments passed to calc_ci(). +#' +#' @details +#' The function acts as a bridge to the \pkg{dubicube} package to calculate +#' bootstrap confidence intervals. +#' +#' ## Indicator-specific defaults +#' +#' Depending on the indicator, default settings are internally applied when +#' calculating bootstrap confidence intervals. These defaults control whether +#' bootstrapping is performed per group, which transformation is used, and +#' whether bias correction is disabled. +#' +#' The following defaults are used unless explicitly overridden via +#' `trans`, `inv_trans`, `boot_args`, or `ci_args`: +#' +#' - **`total_occ`** +#' - Group-specific bootstrapping: **yes** +#' - Transformation: **none (identity)** +#' - Bias correction: **disabled** (`no_bias = TRUE`) +#' +#' - **`spec_occ`, `spec_range`** +#' - Group-specific bootstrapping: **yes** +#' - Transformation: **none (identity)** +#' - Bias correction: enabled +#' +#' - **`pielou_evenness`, `williams_evenness`** +#' - Group-specific bootstrapping: **no** +#' - Transformation: **logit** +#' - Inverse transformation: **inverse logit** +#' - Bias correction: enabled +#' +#' - **`occ_density`, `ab_rarity`, `area_rarity`, `newness`** +#' - Group-specific bootstrapping: **no** +#' - Transformation: **none (identity)** +#' - Bias correction: enabled +#' +#' Group-specific bootstrapping means that resampling is performed within each +#' group (e.g., species or year), which is required for indicators that are +#' inherently group-based. This in contrast to whole-cube bootstrapping +#' where resampling is performed across the whole dataset; applicable for +#' indicators that combine information across groups +#' +#' Transformations are applied prior to confidence interval calculation and +#' inverted afterwards to return intervals on the original scale. +#' +#' ## Indicators outside scope of this function +#' +#' For certain indicators, confidence intervals cannot be +#' added post-hoc as they are calculated internally via the `iNext` package, or +#' if they are not relevant. In such cases, a warning is issued and the +#' original object is returned. The following indicators cannot have confidence +#' intervals added via `add_ci()`: +#' * `hill0`, `hill1`, `hill2` (calculated internally) +#' * `obs_richness` +#' * `cum_richness` +#' * `occ_turnover` +#' * `tax_distinct` +#' +#' @return An updated object of class `indicator_ts` containing the +#' original data with the following additional columns: +#' * `ll`: Lower limit of the confidence interval. +#' * `ul`: Upper limit of the confidence interval. +#' * `est_boot`: The bootstrap estimate of the indicator value. +#' * `se_boot`: The bootstrap standard error. +#' * `bias_boot`: The bootstrap estimate of bias. +#' * `int_type`: The type of interval calculated (e.g., 'perc'). +#' * `conf`: The confidence level used. +#' +#' @seealso [dubicube::bootstrap_cube()], [dubicube::calculate_bootstrap_ci()] +#' +#' @export +add_ci <- function(indicator, + num_bootstrap = 1000, + bootstrap_level = c("cube", + "indicator"), + ci_type = c("perc", + "bca", + "norm", + "basic", + "none"), + trans = function(t) t, + inv_trans = function(t) t, + confidence_level = 0.95, + overwrite = TRUE, + boot_args = list(), + ci_args = list(), + seed = 123, + ...) { + + # Check for correct object class + if (!inherits(indicator, "indicator_ts")) { + stop("indicator must be an indicator_ts object.") + } + + ll <- ul <- year <- est_original <- NULL + + # List of indicators for which bootstrapped confidence intervals should not + # be calculated + noci_list <- c("obs_richness", + "cum_richness", + "occ_turnover", + "tax_distinct", + "hill0", + "hill1", + "hill2") + + # Match ci_type argument + ci_type <- match.arg(ci_type) + bootstrap_level <- match.arg(bootstrap_level) + + # If indicator is in noci_list, return indicator without calculating CIs + if (indicator$div_type %in% noci_list) { + if (indicator$div_type %in% c("hill0", "hill1", "hill2")) { + warning( + paste0( + "Confidence intervals cannot calculated for ", + indicator$div_type, + " as they are handled by the iNext package when calculating + your indicator. Returning indicator without adding CIs." + ) + ) + } else + warning( + paste0( + "Cannot calculate sensible confidence intervals for ", + indicator$div_type, ". Returning indicator without CIs." + ) + ) + return(indicator) + } + + # Extract data from indicator object + x <- indicator$data + raw_data <- indicator$raw_data + + if (any(c("ll", "ul") %in% names(x)) & !overwrite) { + warning( + paste0( + "Indicator already contains confidence intervals. Returning indicator + without adding CIs. Use 'replace = TRUE' argument to recalculate CIs." + ) + ) + return(indicator) + } + + # Remove existing confidence intervals if overwrite = TRUE + if (overwrite & + all(c("ll", "ul") %in% names(x))) { + x <- x %>% + dplyr::select(-ll, -ul) + } + + # Calculate confidence intervals + if (bootstrap_level == "indicator") { + + # Send data to calc_ci for indicator level bootstrapping + indicator$data <- calc_ci(raw_data, + indicator = x, + num_bootstrap = num_bootstrap, + ci_type = ci_type, + ...) + return(indicator) + + } else if (bootstrap_level == "cube") { + # Get dubicube function parameters + params_total <- prepare_indicator_bootstrap( + indicator = indicator, + num_bootstrap = num_bootstrap, + ci_type = ci_type, + trans = trans, + inv_trans = inv_trans, + confidence_level = confidence_level, + boot_args = boot_args, + ci_args = ci_args, + seed = seed + ) + + # Bootstrap cube data + bootstrap_results <- do.call(dubicube::bootstrap_cube, + params_total$bootstrap_params) + + # Calculate confidence intervals from bootstrap results + params_total$ci_params$bootstrap_results <- bootstrap_results + params_total$ci_params$bootstrap_samples_df <- bootstrap_results + group_cols <- params_total$ci_params$grouping_var + ci_df <- do.call( + dubicube::calculate_bootstrap_ci, + params_total$ci_params + ) %>% + dplyr::select(-est_original) + + # Join confidence intervals to indicator object + if (nrow(ci_df) > 0) { + # This handles cases where dubicube returns year as character + if ("year" %in% names(ci_df) && is.numeric(x$year)) { + ci_df$year <- as.numeric(ci_df$year) + } + # Convert negative values to zero as rarity cannot be less than zero + ci_df$ll <- ifelse(ci_df$ll > 0, ci_df$ll, 0) + # Join confidence intervals to indicator values by year + x <- x %>% + dplyr::full_join(ci_df, + by = group_cols) + indicator$data <- x + return(indicator) + } else { + warning( + paste0( + "Unable to calculate confidence intervals. There may be ", + "insufficient data." + ) + ) + } + } else { + stop("Invalid bootstrap_level. Choose 'cube' or 'indicator'.") + } +} diff --git a/R/calc_ci_methods.R b/R/calc_ci_methods.R index 0545bf03..48b2417c 100644 --- a/R/calc_ci_methods.R +++ b/R/calc_ci_methods.R @@ -44,7 +44,7 @@ calc_ci.default <- function(x, calc_ci.total_occ <- function(x, indicator, num_bootstrap = 1000, - ci_type = ci_type, + ci_type = "norm", ...) { stopifnot_error("Wrong data class. This is an internal function and is not @@ -85,7 +85,7 @@ calc_ci.total_occ <- function(x, calc_ci.occ_density <- function(x, indicator, num_bootstrap = 1000, - ci_type = ci_type, + ci_type = "norm", ...) { stopifnot_error("Wrong data class. This is an internal function and is not @@ -143,7 +143,7 @@ calc_ci.occ_density <- function(x, calc_ci.newness <- function(x, indicator, num_bootstrap = 1000, - ci_type = ci_type, + ci_type = "norm", ...) { stopifnot_error("Wrong data class. This is an internal function and is not @@ -229,7 +229,7 @@ calc_ci.pielou_evenness <- function(x, calc_ci.ab_rarity <- function(x, indicator, num_bootstrap = 1000, - ci_type = ci_type, + ci_type = "norm", ...) { stopifnot_error("Wrong data class. This is an internal function and is not @@ -270,7 +270,7 @@ calc_ci.ab_rarity <- function(x, calc_ci.area_rarity <- function(x, indicator, num_bootstrap = 1000, - ci_type = ci_type, + ci_type = "norm", ...) { stopifnot_error("Wrong data class. This is an internal function and is not @@ -316,7 +316,7 @@ calc_ci.area_rarity <- function(x, calc_ci.spec_occ <- function(x, indicator, num_bootstrap = 1000, - ci_type = ci_type, + ci_type = "norm", ...) { stopifnot_error("Wrong data class. This is an internal function and is not @@ -391,7 +391,7 @@ calc_ci.spec_occ <- function(x, calc_ci.spec_range <- function(x, indicator, num_bootstrap = 1000, - ci_type = ci_type, + ci_type = "norm", ...) { stopifnot_error("Wrong data class. This is an internal function and is not diff --git a/R/calc_map_evenness_core.R b/R/calc_map_evenness_core.R index 8a132af2..70c02546 100644 --- a/R/calc_map_evenness_core.R +++ b/R/calc_map_evenness_core.R @@ -31,7 +31,7 @@ calc_map_evenness_core <- function(x, # We must explicitly drop cellCode before pivoting so the data is clean # for the calculation matrix, and rely only on the separate cell_map later. - dplyr::select(-cellCode) %>% # <--- CRITICAL CHANGE 2 + dplyr::select(-all_of(cellCode)) %>% # <--- CRITICAL CHANGE 2 tidyr::pivot_wider(names_from = cellid, values_from = num_occ) %>% diff --git a/R/compute_indicator_workflow.R b/R/compute_indicator_workflow.R index beb521e6..23f1ebf4 100644 --- a/R/compute_indicator_workflow.R +++ b/R/compute_indicator_workflow.R @@ -26,9 +26,6 @@ #' rarefaction). #' @param dim_type (Optional) Dimension to calculate indicator over time: 'ts', #' or space: 'map'. (Default: 'map') -#' @param ci_type (Optional) Type of bootstrap confidence intervals to -#' calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -#' CIs. #' @param cell_size (Optional) Length of grid cell sides, in km or degrees. #' If set to "grid" (default), this will use the existing grid size of your #' cube. If set to "auto", this will be automatically determined according to @@ -64,8 +61,6 @@ #' @param make_valid (Optional) Calls st_make_valid() from the sf package #' after creating the grid. Increases processing time but may help if you are #' getting polygon errors. (Default is FALSE). -#' @param num_bootstrap (Optional) Set the number of bootstraps to calculate for -#' generating confidence intervals. (Default: 100) #' @param shapefile_path (optional) Path of an external shapefile to merge into #' the workflow. For example, if you want to calculate your indicator #' particular features such as protected areas or wetlands. @@ -116,11 +111,6 @@ compute_indicator_workflow <- function(data, type, dim_type = c("map", "ts"), - ci_type = c("norm", - "basic", - "perc", - "bca", - "none"), cell_size = "grid", level = c("cube", "continent", @@ -141,7 +131,6 @@ compute_indicator_workflow <- function(data, last_year = NULL, spherical_geometry = TRUE, make_valid = FALSE, - num_bootstrap = 100, shapefile_path = NULL, shapefile_crs = NULL, invert = FALSE, @@ -178,19 +167,8 @@ compute_indicator_workflow <- function(data, "hill1", "hill2") - # List of indicators for which bootstrapped confidence intervals should not - # be calculated - noci_list <- c("obs_richness", - "cum_richness", - "occ_turnover", - "tax_distinct", - "hill0", - "hill1", - "hill2") - type <- match.arg(type, names(available_indicators)) dim_type <- match.arg(dim_type) - ci_type <- match.arg(ci_type) ne_type <- match.arg(ne_type) ne_scale <- match.arg(ne_scale) level <- match.arg(level) @@ -226,18 +204,6 @@ compute_indicator_workflow <- function(data, } } - if (type %in% c("hill0", "hill1", "hill2") && - ci_type %in% c("norm", "basic", "bca")) { - message( - paste0( - "Note: Hill diversity measures are calculated by the iNEXT package, ", - "therefore bootstrap confidence intervals will be calculated using ", - "the standard iNEXT method, similar to the 'percentile' method of ", - "the 'boot' package." - ) - ) - } - # Ensure user has entered reasonable first and last years, then filter the # data accordingly. If user-chosen first and/or last years are outside the # range of the data, the actual first and last years of the data will be used. @@ -644,6 +610,9 @@ compute_indicator_workflow <- function(data, } + # Save raw data before adding classes (for bootstrapping later) + raw_data_for_bootstrap <- data_final_nogeom + # Assign classes to send data to correct calculator function subtype <- paste0(type, "_", dim_type) class(data_final_nogeom) <- append(type, class(data_final_nogeom)) @@ -678,25 +647,6 @@ compute_indicator_workflow <- function(data, # Calculate indicator indicator <- calc_ts(data_final_nogeom, ...) - # Calculate confidence intervals - if (ci_type != "none") { - if (!type %in% noci_list) { - indicator <- calc_ci(data_final_nogeom, - indicator = indicator, - num_bootstrap = num_bootstrap, - ci_type = ci_type, - ...) - } else { - if (!type %in% c("hill0", "hill1", "hill2")) { - warning( - paste0( - "Bootstrapped confidence intervals cannot be calculated for the ", - "chosen indicator." - ) - ) - } - } - } } if (spherical_geometry == FALSE) { @@ -741,7 +691,8 @@ compute_indicator_workflow <- function(data, num_species = num_species, num_years = num_years, species_names = species_names, - coord_range = map_lims) + coord_range = map_lims, + raw_cube_occurrences = raw_data_for_bootstrap) } return(diversity_obj) diff --git a/R/get_bootstrap_ci.R b/R/get_bootstrap_ci.R index de2192d7..e61d8fca 100644 --- a/R/get_bootstrap_ci.R +++ b/R/get_bootstrap_ci.R @@ -10,11 +10,12 @@ #' @param ... Additional argument to be passed to the `boot::boot.ci()` #' function. #' -#' @returns The returned value is a dataframe containing the time point, +#' @return The returned value is a dataframe containing the time point, #' the type of interval (`int_type`), the lower limit of the confidence -#' interval (`ll`), the upper limit of the confidence interval (`ul`), and the -#' confidence level of the intervals (`conf_level`). - +#' interval (`ll`), the upper limit of the confidence interval (`ul`), the +#' bootstrap estimate (`est_boot`), the bootstrap standard error (`se_boot`), +#' the bootstrap bias (`bias_boot`), and the confidence level of the +#' intervals (`conf_level`). get_bootstrap_ci <- function(bootstrap_list, temporal_list_name = "year", ...) { @@ -51,10 +52,24 @@ get_bootstrap_ci <- function(bootstrap_list, vec[length(vec)] }) + # Extract bootstrap summaries from original bootstrap list + est_boot <- sapply(names(conf_ints), function(name) { + mean(bootstrap_list[[name]]$t, na.rm = TRUE) + }) + se_boot <- sapply(names(conf_ints), function(name) { + sd(bootstrap_list[[name]]$t, na.rm = TRUE) + }) + bias_boot <- est_boot - sapply(names(conf_ints), function(name) { + bootstrap_list[[name]]$t0 + }) + out_list[[i]] <- data.frame(time_point = as.numeric(names(conf_ints)), int_type = type, ll = ll, - ul = ul) + ul = ul, + est_boot = est_boot, + se_boot = se_boot, + bias_boot = bias_boot) } # Create combined dataframe diff --git a/R/indicator_wrappers.R b/R/indicator_wrappers.R index 6f582800..510a0c7c 100644 --- a/R/indicator_wrappers.R +++ b/R/indicator_wrappers.R @@ -41,7 +41,7 @@ #' #' @inheritDotParams compute_indicator_workflow -type -dim_type -data #' -#' @seealso compute_indicator_workflow +#' @seealso [compute_indicator_workflow], [add_ci] #' #' @return An S3 object with the classes 'indicator_map' or 'indicator_ts' and #' 'obs_richness' containing the calculated indicator values and metadata. @@ -100,7 +100,7 @@ obs_richness_ts <- function(data, ...) { #' #' @inheritDotParams compute_indicator_workflow -type -dim_type -data #' -#' @seealso compute_indicator_workflow +#' @seealso [compute_indicator_workflow], [add_ci] #' #' @return An S3 object with the classes 'indicator_map' or 'indicator_ts' and #' 'total_occ' containing the calculated indicator values and metadata. @@ -215,7 +215,7 @@ total_occ_ts <- function(data, ...) { #' #' @inheritDotParams compute_indicator_workflow -type -dim_type -data #' -#' @seealso compute_indicator_workflow +#' @seealso [compute_indicator_workflow], [add_ci] #' #' @return An S3 object with the classes 'indicator_map' or 'indicator_ts' and #' 'pielou_evenness' or 'williams_evenness' containing the calculated indicator @@ -351,7 +351,7 @@ williams_evenness_ts <- function(data, ...) { #' #' @inheritDotParams compute_indicator_workflow -type -dim_type -data #' -#' @seealso compute_indicator_workflow +#' @seealso [compute_indicator_workflow], [add_ci] #' #' @return An S3 object with the classes 'indicator_map' or 'indicator_ts' and #' 'area_rarity' or 'ab_rarity' containing the calculated indicator values and @@ -579,7 +579,7 @@ hill_diversity_details <- paste0( #' #' @inheritDotParams compute_indicator_workflow -type -dim_type -data #' -#' @seealso compute_indicator_workflow +#' @seealso [compute_indicator_workflow], [add_ci] #' #' @return An S3 object with the classes 'indicator_map' or 'indicator_ts' and #' 'hill0' or 'hill1' or 'hill2' containing the calculated indicator values and @@ -635,7 +635,7 @@ hill0_ts <- function(data, # Check if num_bootstrap was provided, otherwise set a default num_bootstrap <- dot_params$num_bootstrap - if (is.null(num_bootstrap)) num_bootstrap <- 100 + if (is.null(num_bootstrap)) num_bootstrap <- 1000 # Check if ci_type is provided and set to "none". If so, set num_bootstrap = 0 ci_type <- dot_params$ci_type @@ -703,7 +703,7 @@ hill1_ts <- function(data, # Check if num_bootstrap was provided, otherwise set a default num_bootstrap <- dot_params$num_bootstrap - if (is.null(num_bootstrap)) num_bootstrap <- 100 + if (is.null(num_bootstrap)) num_bootstrap <- 1000 # Check if ci_type is provided and set to "none". If so, set num_bootstrap = 0 ci_type <- dot_params$ci_type @@ -770,7 +770,7 @@ hill2_ts <- function(data, # Check if num_bootstrap was provided, otherwise set a default num_bootstrap <- dot_params$num_bootstrap - if (is.null(num_bootstrap)) num_bootstrap <- 100 + if (is.null(num_bootstrap)) num_bootstrap <- 1000 # Check if ci_type is provided and set to "none". If so, set num_bootstrap = 0 ci_type <- dot_params$ci_type @@ -823,7 +823,7 @@ hill2_ts <- function(data, #' #' @inheritDotParams compute_indicator_workflow -type -dim_type -data #' -#' @seealso compute_indicator_workflow +#' @seealso [compute_indicator_workflow], [add_ci] #' #' @return An S3 object with the classes 'indicator_ts' and 'cum_richness' #' containing the calculated indicator values and metadata. @@ -863,7 +863,7 @@ cum_richness_ts <- function(data, ...) { #' #' @inheritDotParams compute_indicator_workflow -type -dim_type -data #' -#' @seealso compute_indicator_workflow +#' @seealso [compute_indicator_workflow], [add_ci] #' #' @return An S3 object with the classes 'indicator_map' or 'indicator_ts' and #' 'newness' containing the calculated indicator values and metadata. @@ -918,7 +918,7 @@ newness_ts <- function(data, ...) { #' #' @inheritDotParams compute_indicator_workflow -type -dim_type -data #' -#' @seealso compute_indicator_workflow +#' @seealso [compute_indicator_workflow], [add_ci] #' #' @return An S3 object with the classes 'indicator_map' or 'indicator_ts' and #' 'occ_density' containing the calculated indicator values and metadata. @@ -978,7 +978,7 @@ occ_density_ts <- function(data, ...) { #' #' @inheritDotParams compute_indicator_workflow -type -dim_type -data #' -#' @seealso compute_indicator_workflow +#' @seealso [compute_indicator_workflow], [add_ci] #' #' @return An S3 object with the classes 'indicator_map' or 'indicator_ts' and #' 'spec_occ' containing the calculated indicator values and metadata. @@ -1027,7 +1027,7 @@ spec_occ_ts <- function(data, ...) { #' #' @inheritDotParams compute_indicator_workflow -type -dim_type -data #' -#' @seealso compute_indicator_workflow +#' @seealso [compute_indicator_workflow], [add_ci] #' #' @return An S3 object with the classes 'indicator_map' or 'indicator_ts' and #' 'spec_range' containing the calculated indicator values and metadata. @@ -1110,7 +1110,7 @@ spec_range_ts <- function(data, ...) { #' #' @inheritDotParams compute_indicator_workflow -type -dim_type -data #' -#' @seealso compute_indicator_workflow +#' @seealso [compute_indicator_workflow], [add_ci] #' #' @return An S3 object with the classes 'indicator_map' or 'indicator_ts' and #' 'tax_distinct' containing the calculated indicator values and metadata. @@ -1202,7 +1202,7 @@ tax_distinct_ts <- function(data, rows = 1, ...) { #' #' @inheritDotParams compute_indicator_workflow -type -dim_type -data #' -#' @seealso compute_indicator_workflow +#' @seealso [compute_indicator_workflow], [add_ci] #' #' @return An S3 object with the classes 'indicator_ts' and 'occ_turnover' #' containing the calculated indicator values and metadata. diff --git a/R/new_indicator_ts.R b/R/new_indicator_ts.R index c0b4d6a7..36d95251 100644 --- a/R/new_indicator_ts.R +++ b/R/new_indicator_ts.R @@ -42,7 +42,8 @@ new_indicator_ts <- function(x, num_species, num_years, species_names, - coord_range) { + coord_range, + raw_cube_occurrences) { # check that x is a tibble and all necessary columns are present stopifnot(tibble::is_tibble(x), all(c("year", @@ -63,7 +64,8 @@ new_indicator_ts <- function(x, num_families = num_families, coord_range = coord_range, species_names = species_names, - data = x + data = x, + raw_data = raw_cube_occurrences ), class = c("indicator_ts", div_type), indicator_id = id, diff --git a/R/prepare_indicator_bootstrap.R b/R/prepare_indicator_bootstrap.R new file mode 100644 index 00000000..70a35b07 --- /dev/null +++ b/R/prepare_indicator_bootstrap.R @@ -0,0 +1,257 @@ +#' Prepare bootstrap and confidence interval parameters for an indicator +#' +#' This function prepares the argument lists for [dubicube::bootstrap_cube()] +#' and [dubicube::calculate_bootstrap_ci()] based on the indicator definition. +#' Behaviour (grouping, bootstrap method, transformations, bias correction) +#' is fully controlled by a rule book keyed on `indicator$div_type`. +#' +#' No computation is performed; the function only returns parameter lists. +#' +#' @param indicator A list describing the indicator. Must contain at least +#' \code{div_type} and \code{raw_data}. +#' @param num_bootstrap Integer. Number of bootstrap samples. +#' @param ci_type Character. Type of confidence interval. +#' @param seed Optional integer. Random seed for bootstrapping. (Default: 123) +#' @param trans Optional transformation function applied to the statistic. +#' Will be overridden if specified by the indicator rule book. +#' @param inv_trans Optional inverse transformation function. +#' Will be overridden if specified by the indicator rule book. +#' @param confidence_level Numeric between 0 and 1. Confidence level. +#' @param boot_args Named list of additional arguments passed to +#' \code{bootstrap_cube()}, overriding defaults. +#' @param ci_args Named list of additional arguments passed to +#' \code{calculate_bootstrap_ci()}, overriding defaults. +#' +#' @return A named list with two elements: +#' \describe{ +#' \item{bootstrap_params}{List of parameters for \code{bootstrap_cube()}} +#' \item{ci_params}{List of parameters for \code{calculate_bootstrap_ci()}} +#' } +#' +#' @importFrom utils modifyList +#' +#' @examples +#' \dontrun{ +#' params <- prepare_indicator_bootstrap( +#' indicator = indicator, +#' num_bootstrap = 1000, +#' ci_type = "bca" +#' ) +#' } +#' +#' @export +prepare_indicator_bootstrap <- function( + indicator, + num_bootstrap, + ci_type, + seed = 123, + trans = function(t) t, + inv_trans = function(t) t, + confidence_level = 0.95, + boot_args = list(), + ci_args = list()) { + + # First, adjust logit to avoid failure when evenness is zero. + logit = function(p) { + p <- pmax(pmin(p, 1 - 1e-6), 1e-6) # Clamp values between ~0 and ~1 + log(p / (1 - p)) + } + + ## ------------------------------------------------------------------ + ## Indicator rule book + ## ------------------------------------------------------------------ + ## Each entry controls: + ## - whether group-specific bootstrapping is used + ## - which transformation (if any) is applied + ## - whether bias correction is disabled (no_bias) + + indicator_rules <- list( + total_occ = list( + group_specific = TRUE, + trans = trans, + inv_trans = inv_trans, + no_bias = TRUE + ), + pielou_evenness = list( + group_specific = FALSE, + trans = logit, + inv_trans = inv_logit, + no_bias = FALSE + ), + williams_evenness = list( + group_specific = FALSE, + trans = logit, + inv_trans = inv_logit, + no_bias = FALSE + ), + occ_density = list( + group_specific = FALSE, + trans = trans, + inv_trans = inv_trans, + no_bias = FALSE + ), + ab_rarity = list( + group_specific = FALSE, + trans = trans, + inv_trans = inv_trans, + no_bias = FALSE + ), + area_rarity = list( + group_specific = FALSE, + trans = trans, + inv_trans = inv_trans, + no_bias = FALSE + ), + newness = list( + group_specific = FALSE, + trans = trans, + inv_trans = inv_trans, + no_bias = FALSE + ), + spec_occ = list( + group_specific = TRUE, + trans = trans, + inv_trans = inv_trans, + no_bias = FALSE + ), + spec_range = list( + group_specific = TRUE, + trans = trans, + inv_trans = inv_trans, + no_bias = FALSE + ) + ) + + rule <- indicator_rules[[indicator$div_type]] + if (is.null(rule)) { + stop("Unknown indicator$div_type: ", indicator$div_type) + } + ## Allow user-supplied arguments to override defaults + rule <- utils::modifyList(rule, boot_args) + rule <- utils::modifyList(rule, ci_args) + + ## ------------------------------------------------------------------ + ## Determine grouping variables + ## ------------------------------------------------------------------ + ## Species-level indicators are grouped by year and taxon; + ## all others only by year. + group_cols <- if (indicator$div_type %in% c("spec_occ", "spec_range")) { + c("year", "taxonKey") + } else { + "year" + } + + ## ------------------------------------------------------------------ + ## Handle multiple grouping variables for dubicube compatibility + ## ------------------------------------------------------------------ + if (length(group_cols) > 1) { + # Check if all grouping columns exist in the data + missing_cols <- setdiff(group_cols, names(indicator$raw_data)) + if (length(missing_cols) > 0) { + stop("Missing required grouping columns: ", paste(missing_cols, collapse = ", ")) + } + + # Use tidyr::unite for a safe, fast composite key + # 'remove = FALSE' ensures we keep the original year/taxonKey columns + # for the internal calc_ts call + indicator$raw_data <- indicator$raw_data %>% + tidyr::unite("group_key", dplyr::all_of(group_cols), + sep = "_", remove = FALSE) + + group_cols <- "group_key" + } + + ## ------------------------------------------------------------------ + ## Determine bootstrap method + ## ------------------------------------------------------------------ + boot_method <- if (rule$group_specific) { + "group_specific" + } else { + "whole_cube" + } + + ## Whole-cube bootstrapping requires the "boot_" prefix + if (length(group_cols) == 1) { + boot_method <- paste0("boot_", boot_method) + } + + ## ------------------------------------------------------------------ + ## Prepare bootstrap_cube parameters + ## ------------------------------------------------------------------ + + # Apply indicator class to data for S3 method dispatch in calc_ts + data_for_bootstrap <- indicator$raw_data + + # Debug: Check what we're working with + if (getOption("b3gbi.debug", FALSE)) { + message("DEBUG: indicator$div_type = ", indicator$div_type) + message("DEBUG: class(data_for_bootstrap) before = ", paste(class(data_for_bootstrap), collapse = ", ")) + } + + class(data_for_bootstrap) <- c(indicator$div_type, class(data_for_bootstrap)) + + # Ensure the indicator class is first for proper S3 dispatch + current_classes <- class(data_for_bootstrap) + if (current_classes[1] != indicator$div_type) { + # Move indicator class to front + class(data_for_bootstrap) <- c( + indicator$div_type, + setdiff(current_classes, indicator$div_type) + ) + } + + if (getOption("b3gbi.debug", FALSE)) { + message("DEBUG: class(data_for_bootstrap) after = ", paste(class(data_for_bootstrap), collapse = ", ")) + message("DEBUG: method exists for ", indicator$div_type, ": ", + exists(paste0("calc_ts.", indicator$div_type), mode = "function")) + } + + bootstrap_params <- list( + data_cube = data_for_bootstrap, + fun = calc_ts, + grouping_var = group_cols, + samples = num_bootstrap, + processed_cube = FALSE, + method = boot_method, + seed = seed + ) + + ## Allow user-supplied arguments to override defaults + bootstrap_params <- utils::modifyList(bootstrap_params, boot_args) + + ## ------------------------------------------------------------------ + ## Prepare calculate_bootstrap_ci parameters + ## ------------------------------------------------------------------ + if ("bca" %in% ci_type & !grepl("^boot", boot_method)) { + ci_params <- list( + grouping_var = group_cols, + type = ci_type, + h = rule$trans, + hinv = rule$inv_trans, + conf = confidence_level, + data_cube = indicator$raw_data, + fun = calc_ts, + no_bias = rule$no_bias + ) + } else { + ci_params <- list( + grouping_var = group_cols, + type = ci_type, + h = rule$trans, + hinv = rule$inv_trans, + conf = confidence_level, + no_bias = rule$no_bias + ) + } + + ## Allow user-supplied arguments to override defaults + ci_params <- utils::modifyList(ci_params, ci_args) + + ## ------------------------------------------------------------------ + ## Return prepared parameters + ## ------------------------------------------------------------------ + list( + bootstrap_params = bootstrap_params, + ci_params = ci_params + ) +} diff --git a/R/sanitize_geometries.R b/R/sanitize_geometries.R index af2909ae..21a7188f 100644 --- a/R/sanitize_geometries.R +++ b/R/sanitize_geometries.R @@ -28,7 +28,9 @@ sanitize_geometries <- function(sf_object, buffer_dist_meters = 10) { # 2. Dynamically choose a suitable projected CRS (e.g., local UTM zone) # We use the centroid to find a suitable local projection. - center_point <- sf::st_centroid(sf::st_geometry(sf_object), of_largest_polygon = TRUE) + center_point <- suppressWarnings( + sf::st_centroid(sf::st_geometry(sf_object), of_largest_polygon = TRUE) + ) target_crs <- sf::st_crs(center_point) # Fallback check (st_crs(point) can sometimes return 4326 if unprojected) diff --git a/R/utils.R b/R/utils.R index 5239eb5b..9151b094 100644 --- a/R/utils.R +++ b/R/utils.R @@ -702,8 +702,18 @@ my_estimateD <- function(...) { iNEXT::estimateD(...) } +# Transformation functions +# Logit transformation +logit <- function(p) { + log(p / (1 - p)) +} + +# Inverse logit transformation +inv_logit <- function(l) { + exp(l) / (1 + exp(l)) +} # Wrapper of function inherits. This is for mocking in testthat tests. #' @noRd my_inherits <- function(x, what) { inherits(x, what) -} \ No newline at end of file +} diff --git a/codemeta.json b/codemeta.json index 4b2d6d3b..eba191a3 100644 --- a/codemeta.json +++ b/codemeta.json @@ -8,13 +8,13 @@ "codeRepository": "https://github.com/b-cubed-eu/b3gbi", "issueTracker": "https://github.com/b-cubed-eu/b3gbi/issues", "license": "https://spdx.org/licenses/MIT", - "version": "0.8.13", + "version": "0.9.0", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, - "runtimePlatform": "R version 4.5.1 (2025-06-13 ucrt)", + "runtimePlatform": "R version 4.3.1 (2023-06-16 ucrt)", "author": [ { "@type": "Person", @@ -148,12 +148,6 @@ "identifier": "taxize", "name": "taxize", "version": ">= 0.9.99", - "provider": { - "@id": "https://cran.r-project.org", - "@type": "Organization", - "name": "Comprehensive R Archive Network (CRAN)", - "url": "https://cran.r-project.org" - }, "sameAs": "https://github.com/ropensci/taxize" }, { @@ -202,6 +196,12 @@ "sameAs": "https://CRAN.R-project.org/package=dplyr" }, "4": { + "@type": "SoftwareApplication", + "identifier": "dubicube", + "name": "dubicube", + "sameAs": "https://github.com/b-cubed-eu/dubicube" + }, + "5": { "@type": "SoftwareApplication", "identifier": "ggplot2", "name": "ggplot2", @@ -213,7 +213,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=ggplot2" }, - "5": { + "6": { "@type": "SoftwareApplication", "identifier": "iNEXT", "name": "iNEXT", @@ -225,7 +225,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=iNEXT" }, - "6": { + "7": { "@type": "SoftwareApplication", "identifier": "labeling", "name": "labeling", @@ -237,7 +237,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=labeling" }, - "7": { + "8": { "@type": "SoftwareApplication", "identifier": "magrittr", "name": "magrittr", @@ -249,14 +249,14 @@ }, "sameAs": "https://CRAN.R-project.org/package=magrittr" }, - "8": { + "9": { "@type": "SoftwareApplication", "identifier": "mgrs", "name": "mgrs", "version": ">= 0.2.4", "sameAs": "https://github.com/hrbrmstr/mgrs" }, - "9": { + "10": { "@type": "SoftwareApplication", "identifier": "patchwork", "name": "patchwork", @@ -268,7 +268,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=patchwork" }, - "10": { + "11": { "@type": "SoftwareApplication", "identifier": "permute", "name": "permute", @@ -280,7 +280,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=permute" }, - "11": { + "12": { "@type": "SoftwareApplication", "identifier": "purrr", "name": "purrr", @@ -292,7 +292,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=purrr" }, - "12": { + "13": { "@type": "SoftwareApplication", "identifier": "readr", "name": "readr", @@ -304,7 +304,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=readr" }, - "13": { + "14": { "@type": "SoftwareApplication", "identifier": "rlang", "name": "rlang", @@ -316,7 +316,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=rlang" }, - "14": { + "15": { "@type": "SoftwareApplication", "identifier": "rnaturalearth", "name": "rnaturalearth", @@ -328,7 +328,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=rnaturalearth" }, - "15": { + "16": { "@type": "SoftwareApplication", "identifier": "scales", "name": "scales", @@ -340,7 +340,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=scales" }, - "16": { + "17": { "@type": "SoftwareApplication", "identifier": "sf", "name": "sf", @@ -352,7 +352,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=sf" }, - "17": { + "18": { "@type": "SoftwareApplication", "identifier": "stringr", "name": "stringr", @@ -364,7 +364,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=stringr" }, - "18": { + "19": { "@type": "SoftwareApplication", "identifier": "tibble", "name": "tibble", @@ -376,7 +376,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=tibble" }, - "19": { + "20": { "@type": "SoftwareApplication", "identifier": "tidyr", "name": "tidyr", @@ -388,7 +388,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=tidyr" }, - "20": { + "21": { "@type": "SoftwareApplication", "identifier": "units", "name": "units", @@ -406,6 +406,5 @@ "releaseNotes": "https://github.com/b-cubed-eu/b3gbi/blob/main/NEWS.md", "readme": "https://github.com/b-cubed-eu/b3gbi/blob/main/README.md", "contIntegration": ["https://github.com/b-cubed-eu/b3gbi/actions/workflows/R-CMD-check.yaml", "https://app.codecov.io/gh/b-cubed-eu/b3gbi/"], - "developmentStatus": "https://www.repostatus.org/#wip", - "keywords": ["biodiversity-indicators", "data-cubes"] + "developmentStatus": "https://www.repostatus.org/#wip" } diff --git a/conductor/tracks/fix_confidence_interval_logic_20260108/plan.md b/conductor/tracks/fix_confidence_interval_logic_20260108/plan.md new file mode 100644 index 00000000..0d24e3df Binary files /dev/null and b/conductor/tracks/fix_confidence_interval_logic_20260108/plan.md differ diff --git a/man/add_ci.Rd b/man/add_ci.Rd new file mode 100644 index 00000000..800ef4c7 --- /dev/null +++ b/man/add_ci.Rd @@ -0,0 +1,158 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_ci.R +\name{add_ci} +\alias{add_ci} +\title{Add Confidence Intervals to an Indicator Object} +\usage{ +add_ci( + indicator, + num_bootstrap = 1000, + bootstrap_level = c("cube", "indicator"), + ci_type = c("perc", "bca", "norm", "basic", "none"), + trans = function(t) t, + inv_trans = function(t) t, + confidence_level = 0.95, + overwrite = TRUE, + boot_args = list(), + ci_args = list(), + ... +) +} +\arguments{ +\item{indicator}{An object of class \code{indicator_ts} to which confidence +intervals should be added.} + +\item{num_bootstrap}{(Optional) Number of bootstrap replicates to perform. +(Default: 1000)} + +\item{bootstrap_level}{(Optional) Level at which to perform bootstrapping: +\itemize{ +\item \code{cube} (default): Bootstrapping is done by resampling the +occurrence records in the cube. This is mathematically more robust as it +captures the underlying sampling uncertainty. +\item \code{indicator}: Bootstrapping is done by resampling indicator +values. This is faster for large cubes but less robust. +}} + +\item{ci_type}{(Optional) Type of bootstrap confidence intervals to +calculate. (Default: \code{"norm"}). Supported options are: +\itemize{ +\item \code{perc}: Percentile intervals. +\item \code{bca}: Bias-corrected and accelerated intervals. +\item \code{norm}: Normal approximation intervals. +\item \code{basic}: Basic bootstrap intervals. +\item \code{none}: No confidence intervals calculated. +}} + +\item{trans}{(Optional) A function for transforming the indicator values +before calculating confidence intervals (e.g., \code{log}). +(Default: identity function)} + +\item{inv_trans}{(Optional) The inverse of the transformation function +\code{trans} (e.g., \code{exp}). Used to back-transform the intervals +to the original scale. (Default: identity function)} + +\item{confidence_level}{(Optional) The confidence level for the calculated +intervals (e.g., 0.95 for 95\% CIs). (Default: 0.95)} + +\item{overwrite}{(Optional) Logical. If the indicator already contains +confidence intervals (\code{ll} and \code{ul} columns), should they +be replaced? (Default: TRUE)} + +\item{boot_args}{(Optional) Named list of additional arguments passed to +\code{dubicube::bootstrap_cube()}. (Default: \code{list()})} + +\item{ci_args}{(Optional) Named list of additional arguments passed to +\code{dubicube::calculate_bootstrap_ci()}. (Default: \code{list()})} + +\item{...}{(Optional) Additional arguments passed to calc_ci().} +} +\value{ +An updated object of class \code{indicator_ts} containing the +original data with the following additional columns: +\itemize{ +\item \code{ll}: Lower limit of the confidence interval. +\item \code{ul}: Upper limit of the confidence interval. +\item \code{est_boot}: The bootstrap estimate of the indicator value. +\item \code{se_boot}: The bootstrap standard error. +\item \code{bias_boot}: The bootstrap estimate of bias. +\item \code{int_type}: The type of interval calculated (e.g., 'perc'). +\item \code{conf}: The confidence level used. +} +} +\description{ +This function calculates bootstrap confidence intervals for an existing +\code{indicator_ts} object. It supports both cube-level bootstrapping (resampling +occurrence records) and indicator-level bootstrapping (resampling +calculated values), allowing for advanced transformations during the +CI calculation process. +} +\details{ +The function acts as a bridge to the \pkg{dubicube} package to calculate +bootstrap confidence intervals. +\subsection{Indicator-specific defaults}{ + +Depending on the indicator, default settings are internally applied when +calculating bootstrap confidence intervals. These defaults control whether +bootstrapping is performed per group, which transformation is used, and +whether bias correction is disabled. + +The following defaults are used unless explicitly overridden via +\code{trans}, \code{inv_trans}, \code{boot_args}, or \code{ci_args}: +\itemize{ +\item \strong{\code{total_occ}} +\itemize{ +\item Group-specific bootstrapping: \strong{yes} +\item Transformation: \strong{none (identity)} +\item Bias correction: \strong{disabled} (\code{no_bias = TRUE}) +} +\item \strong{\code{spec_occ}, \code{spec_range}} +\itemize{ +\item Group-specific bootstrapping: \strong{yes} +\item Transformation: \strong{none (identity)} +\item Bias correction: enabled +} +\item \strong{\code{pielou_evenness}, \code{williams_evenness}} +\itemize{ +\item Group-specific bootstrapping: \strong{no} +\item Transformation: \strong{logit} +\item Inverse transformation: \strong{inverse logit} +\item Bias correction: enabled +} +\item \strong{\code{occ_density}, \code{ab_rarity}, \code{area_rarity}, \code{newness}} +\itemize{ +\item Group-specific bootstrapping: \strong{no} +\item Transformation: \strong{none (identity)} +\item Bias correction: enabled +} +} + +Group-specific bootstrapping means that resampling is performed within each +group (e.g., species or year), which is required for indicators that are +inherently group-based. This in contrast to whole-cube bootstrapping +where resampling is performed across the whole dataset; applicable for +indicators that combine information across groups + +Transformations are applied prior to confidence interval calculation and +inverted afterwards to return intervals on the original scale. +} + +\subsection{Indicators outside scope of this function}{ + +For certain indicators, confidence intervals cannot be +added post-hoc as they are calculated internally via the \code{iNext} package, or +if they are not relevant. In such cases, a warning is issued and the +original object is returned. The following indicators cannot have confidence +intervals added via \code{add_ci()}: +\itemize{ +\item \code{hill0}, \code{hill1}, \code{hill2} (calculated internally) +\item \code{obs_richness} +\item \code{cum_richness} +\item \code{occ_turnover} +\item \code{tax_distinct} +} +} +} +\seealso{ +\code{\link[dubicube:bootstrap_cube]{dubicube::bootstrap_cube()}}, \code{\link[dubicube:calculate_bootstrap_ci]{dubicube::calculate_bootstrap_ci()}} +} diff --git a/man/area_rarity_map.Rd b/man/area_rarity_map.Rd index cfdb798b..a64f763d 100644 --- a/man/area_rarity_map.Rd +++ b/man/area_rarity_map.Rd @@ -21,9 +21,6 @@ ab_rarity_ts(data, ...) \item{...}{ Arguments passed on to \code{\link[=compute_indicator_workflow]{compute_indicator_workflow}} \describe{ - \item{\code{ci_type}}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} \item{\code{cell_size}}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -59,8 +56,6 @@ solve specific issues. (Default is TRUE).} \item{\code{make_valid}}{(Optional) Calls st_make_valid() from the sf package after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} - \item{\code{num_bootstrap}}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} \item{\code{shapefile_path}}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} @@ -190,5 +185,5 @@ Rabinowitz, D. (1981). Seven forms of rarity. *Biological aspects of rare * \emph{plant conservation}. } \seealso{ -compute_indicator_workflow +\link{compute_indicator_workflow}, \link{add_ci} } diff --git a/man/b3gbi-package.Rd b/man/b3gbi-package.Rd index e5a944ec..9408e38c 100644 --- a/man/b3gbi-package.Rd +++ b/man/b3gbi-package.Rd @@ -8,7 +8,7 @@ \description{ \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} -Calculate general biodiversity indicators from GBIF data cubes. Includes many common indicators such as species richness and evenness, which can be calculated over time (trends) or space (maps). +Calculate general biodiversity indicators from GBIF data cubes. Includes many common indicators such as species richness, evenness, and completeness, which can be calculated over time (trends) or space (maps). Now also supports retaining original cell IDs. } \seealso{ Useful links: diff --git a/man/calc_ci.Rd b/man/calc_ci.Rd index 00273c5f..d1c284f6 100644 --- a/man/calc_ci.Rd +++ b/man/calc_ci.Rd @@ -18,23 +18,23 @@ calc_ci(x, indicator, ...) \method{calc_ci}{default}(x, indicator, ...) -\method{calc_ci}{total_occ}(x, indicator, num_bootstrap = 1000, ci_type = ci_type, ...) +\method{calc_ci}{total_occ}(x, indicator, num_bootstrap = 1000, ci_type = "norm", ...) -\method{calc_ci}{occ_density}(x, indicator, num_bootstrap = 1000, ci_type = ci_type, ...) +\method{calc_ci}{occ_density}(x, indicator, num_bootstrap = 1000, ci_type = "norm", ...) -\method{calc_ci}{newness}(x, indicator, num_bootstrap = 1000, ci_type = ci_type, ...) +\method{calc_ci}{newness}(x, indicator, num_bootstrap = 1000, ci_type = "norm", ...) \method{calc_ci}{williams_evenness}(x, ...) \method{calc_ci}{pielou_evenness}(x, ...) -\method{calc_ci}{ab_rarity}(x, indicator, num_bootstrap = 1000, ci_type = ci_type, ...) +\method{calc_ci}{ab_rarity}(x, indicator, num_bootstrap = 1000, ci_type = "norm", ...) -\method{calc_ci}{area_rarity}(x, indicator, num_bootstrap = 1000, ci_type = ci_type, ...) +\method{calc_ci}{area_rarity}(x, indicator, num_bootstrap = 1000, ci_type = "norm", ...) -\method{calc_ci}{spec_occ}(x, indicator, num_bootstrap = 1000, ci_type = ci_type, ...) +\method{calc_ci}{spec_occ}(x, indicator, num_bootstrap = 1000, ci_type = "norm", ...) -\method{calc_ci}{spec_range}(x, indicator, num_bootstrap = 1000, ci_type = ci_type, ...) +\method{calc_ci}{spec_range}(x, indicator, num_bootstrap = 1000, ci_type = "norm", ...) } \arguments{ \item{x}{A data cube object} diff --git a/man/compute_indicator_workflow.Rd b/man/compute_indicator_workflow.Rd index f0542393..a669f050 100644 --- a/man/compute_indicator_workflow.Rd +++ b/man/compute_indicator_workflow.Rd @@ -8,7 +8,6 @@ compute_indicator_workflow( data, type, dim_type = c("map", "ts"), - ci_type = c("norm", "basic", "perc", "bca", "none"), cell_size = "grid", level = c("cube", "continent", "country", "world", "sovereignty", "geounit"), region = "Europe", @@ -19,7 +18,6 @@ compute_indicator_workflow( last_year = NULL, spherical_geometry = TRUE, make_valid = FALSE, - num_bootstrap = 100, shapefile_path = NULL, shapefile_crs = NULL, invert = FALSE, @@ -57,10 +55,6 @@ rarefaction). \item{dim_type}{(Optional) Dimension to calculate indicator over time: 'ts', or space: 'map'. (Default: 'map')} -\item{ci_type}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} - \item{cell_size}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -106,9 +100,6 @@ solve specific issues. (Default is TRUE).} after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} -\item{num_bootstrap}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} - \item{shapefile_path}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} diff --git a/man/cum_richness_ts.Rd b/man/cum_richness_ts.Rd index f6eb3620..94990424 100644 --- a/man/cum_richness_ts.Rd +++ b/man/cum_richness_ts.Rd @@ -12,9 +12,6 @@ cum_richness_ts(data, ...) \item{...}{ Arguments passed on to \code{\link[=compute_indicator_workflow]{compute_indicator_workflow}} \describe{ - \item{\code{ci_type}}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} \item{\code{cell_size}}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -50,8 +47,6 @@ solve specific issues. (Default is TRUE).} \item{\code{make_valid}}{(Optional) Calls st_make_valid() from the sf package after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} - \item{\code{num_bootstrap}}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} \item{\code{shapefile_path}}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} @@ -129,5 +124,5 @@ plot(cr_ts) } } \seealso{ -compute_indicator_workflow +\link{compute_indicator_workflow}, \link{add_ci} } diff --git a/man/get_bootstrap_ci.Rd b/man/get_bootstrap_ci.Rd index ec203ebb..dc82119f 100644 --- a/man/get_bootstrap_ci.Rd +++ b/man/get_bootstrap_ci.Rd @@ -19,8 +19,10 @@ function.} \value{ The returned value is a dataframe containing the time point, the type of interval (\code{int_type}), the lower limit of the confidence -interval (\code{ll}), the upper limit of the confidence interval (\code{ul}), and the -confidence level of the intervals (\code{conf_level}). +interval (\code{ll}), the upper limit of the confidence interval (\code{ul}), the +bootstrap estimate (\code{est_boot}), the bootstrap standard error (\code{se_boot}), +the bootstrap bias (\code{bias_boot}), and the confidence level of the +intervals (\code{conf_level}). } \description{ This function calculates confidence intervals for a list of objects of class diff --git a/man/hill0_map.Rd b/man/hill0_map.Rd index 4782c884..ab50e4cf 100644 --- a/man/hill0_map.Rd +++ b/man/hill0_map.Rd @@ -70,9 +70,6 @@ certain of how your data is structured. Default is FALSE.} \item{...}{ Arguments passed on to \code{\link[=compute_indicator_workflow]{compute_indicator_workflow}} \describe{ - \item{\code{ci_type}}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} \item{\code{cell_size}}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -108,8 +105,6 @@ solve specific issues. (Default is TRUE).} \item{\code{make_valid}}{(Optional) Calls st_make_valid() from the sf package after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} - \item{\code{num_bootstrap}}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} \item{\code{shapefile_path}}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} @@ -251,5 +246,5 @@ rarefaction and extrapolation of species diversity (Hill numbers). \emph{Methods in Ecology and Evolution}, \emph{7}(12), 1451-1456. } \seealso{ -compute_indicator_workflow +\link{compute_indicator_workflow}, \link{add_ci} } diff --git a/man/newness_map.Rd b/man/newness_map.Rd index 3e276897..d0a0529c 100644 --- a/man/newness_map.Rd +++ b/man/newness_map.Rd @@ -15,9 +15,6 @@ newness_ts(data, ...) \item{...}{ Arguments passed on to \code{\link[=compute_indicator_workflow]{compute_indicator_workflow}} \describe{ - \item{\code{ci_type}}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} \item{\code{cell_size}}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -53,8 +50,6 @@ solve specific issues. (Default is TRUE).} \item{\code{make_valid}}{(Optional) Calls st_make_valid() from the sf package after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} - \item{\code{num_bootstrap}}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} \item{\code{shapefile_path}}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} @@ -126,5 +121,5 @@ plot(n_ts) } } \seealso{ -compute_indicator_workflow +\link{compute_indicator_workflow}, \link{add_ci} } diff --git a/man/obs_richness_map.Rd b/man/obs_richness_map.Rd index 0416b3a4..7409d79e 100644 --- a/man/obs_richness_map.Rd +++ b/man/obs_richness_map.Rd @@ -15,9 +15,6 @@ obs_richness_ts(data, ...) \item{...}{ Arguments passed on to \code{\link[=compute_indicator_workflow]{compute_indicator_workflow}} \describe{ - \item{\code{ci_type}}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} \item{\code{cell_size}}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -53,8 +50,6 @@ solve specific issues. (Default is TRUE).} \item{\code{make_valid}}{(Optional) Calls st_make_valid() from the sf package after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} - \item{\code{num_bootstrap}}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} \item{\code{shapefile_path}}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} @@ -154,5 +149,5 @@ Consequences for conservation and monitoring. \emph{Journal of Applied Ecology}, \emph{55}(1), 169-184. } \seealso{ -compute_indicator_workflow +\link{compute_indicator_workflow}, \link{add_ci} } diff --git a/man/occ_density_map.Rd b/man/occ_density_map.Rd index 97a928e6..620bd0ab 100644 --- a/man/occ_density_map.Rd +++ b/man/occ_density_map.Rd @@ -15,9 +15,6 @@ occ_density_ts(data, ...) \item{...}{ Arguments passed on to \code{\link[=compute_indicator_workflow]{compute_indicator_workflow}} \describe{ - \item{\code{ci_type}}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} \item{\code{cell_size}}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -53,8 +50,6 @@ solve specific issues. (Default is TRUE).} \item{\code{make_valid}}{(Optional) Calls st_make_valid() from the sf package after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} - \item{\code{num_bootstrap}}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} \item{\code{shapefile_path}}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} @@ -121,5 +116,5 @@ plot(od_ts) } } \seealso{ -compute_indicator_workflow +\link{compute_indicator_workflow}, \link{add_ci} } diff --git a/man/occ_turnover_ts.Rd b/man/occ_turnover_ts.Rd index 76534930..24b5434c 100644 --- a/man/occ_turnover_ts.Rd +++ b/man/occ_turnover_ts.Rd @@ -12,9 +12,6 @@ occ_turnover_ts(data, ...) \item{...}{ Arguments passed on to \code{\link[=compute_indicator_workflow]{compute_indicator_workflow}} \describe{ - \item{\code{ci_type}}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} \item{\code{cell_size}}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -50,8 +47,6 @@ solve specific issues. (Default is TRUE).} \item{\code{make_valid}}{(Optional) Calls st_make_valid() from the sf package after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} - \item{\code{num_bootstrap}}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} \item{\code{shapefile_path}}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} @@ -131,5 +126,5 @@ des Alpes et du Jura. \emph{Bulletin de la Société Vaudoise des} \emph{Science Naturelles}, \emph{37}(142), 547-579. } \seealso{ -compute_indicator_workflow +\link{compute_indicator_workflow}, \link{add_ci} } diff --git a/man/pielou_evenness_map.Rd b/man/pielou_evenness_map.Rd index 0bf4cfc4..75eea6a9 100644 --- a/man/pielou_evenness_map.Rd +++ b/man/pielou_evenness_map.Rd @@ -21,9 +21,6 @@ williams_evenness_ts(data, ...) \item{...}{ Arguments passed on to \code{\link[=compute_indicator_workflow]{compute_indicator_workflow}} \describe{ - \item{\code{ci_type}}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} \item{\code{cell_size}}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -59,8 +56,6 @@ solve specific issues. (Default is TRUE).} \item{\code{make_valid}}{(Optional) Calls st_make_valid() from the sf package after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} - \item{\code{num_bootstrap}}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} \item{\code{shapefile_path}}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} @@ -203,5 +198,5 @@ Kvålseth, T. O. (2015). Evenness indices once again: critical analysis of properties. \emph{SpringerPlus}, \emph{4}, 1-12. } \seealso{ -compute_indicator_workflow +\link{compute_indicator_workflow}, \link{add_ci} } diff --git a/man/prepare_indicator_bootstrap.Rd b/man/prepare_indicator_bootstrap.Rd new file mode 100644 index 00000000..b2d01fbb --- /dev/null +++ b/man/prepare_indicator_bootstrap.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prepare_indicator_bootstrap.R +\name{prepare_indicator_bootstrap} +\alias{prepare_indicator_bootstrap} +\title{Prepare bootstrap and confidence interval parameters for an indicator} +\usage{ +prepare_indicator_bootstrap( + indicator, + num_bootstrap, + ci_type, + trans = function(t) t, + inv_trans = function(t) t, + confidence_level = 0.95, + seed = NULL, + boot_args = list(), + ci_args = list() +) +} +\arguments{ +\item{indicator}{A list describing the indicator. Must contain at least +\code{div_type} and \code{raw_data}.} + +\item{num_bootstrap}{Integer. Number of bootstrap samples.} + +\item{ci_type}{Character. Type of confidence interval.} + +\item{trans}{Optional transformation function applied to the statistic. +Will be overridden if specified by the indicator rule book.} + +\item{inv_trans}{Optional inverse transformation function. +Will be overridden if specified by the indicator rule book.} + +\item{confidence_level}{Numeric between 0 and 1. Confidence level.} + +\item{seed}{Integer. Random seed for bootstrapping.} + +\item{boot_args}{Named list of additional arguments passed to +\code{bootstrap_cube()}, overriding defaults.} + +\item{ci_args}{Named list of additional arguments passed to +\code{calculate_bootstrap_ci()}, overriding defaults.} +} +\value{ +A named list with two elements: +\describe{ +\item{bootstrap_params}{List of parameters for \code{bootstrap_cube()}} +\item{ci_params}{List of parameters for \code{calculate_bootstrap_ci()}} +} +} +\description{ +This function prepares the argument lists for \code{\link[dubicube:bootstrap_cube]{dubicube::bootstrap_cube()}} +and \code{\link[dubicube:calculate_bootstrap_ci]{dubicube::calculate_bootstrap_ci()}} based on the indicator definition. +Behaviour (grouping, bootstrap method, transformations, bias correction) +is fully controlled by a rule book keyed on \code{indicator$div_type}. +} +\details{ +No computation is performed; the function only returns parameter lists. +} +\examples{ +\dontrun{ +params <- prepare_indicator_bootstrap( + indicator = indicator, + num_bootstrap = 1000, + ci_type = "bca" +) +} + +} diff --git a/man/spec_occ_map.Rd b/man/spec_occ_map.Rd index 8713820d..dec4b345 100644 --- a/man/spec_occ_map.Rd +++ b/man/spec_occ_map.Rd @@ -15,9 +15,6 @@ spec_occ_ts(data, ...) \item{...}{ Arguments passed on to \code{\link[=compute_indicator_workflow]{compute_indicator_workflow}} \describe{ - \item{\code{ci_type}}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} \item{\code{cell_size}}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -53,8 +50,6 @@ solve specific issues. (Default is TRUE).} \item{\code{make_valid}}{(Optional) Calls st_make_valid() from the sf package after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} - \item{\code{num_bootstrap}}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} \item{\code{shapefile_path}}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} @@ -125,5 +120,5 @@ plot(so_ts, c(2435767, 2434793)) } \seealso{ -compute_indicator_workflow +\link{compute_indicator_workflow}, \link{add_ci} } diff --git a/man/spec_range_map.Rd b/man/spec_range_map.Rd index f40de86b..b3a359cd 100644 --- a/man/spec_range_map.Rd +++ b/man/spec_range_map.Rd @@ -15,9 +15,6 @@ spec_range_ts(data, ...) \item{...}{ Arguments passed on to \code{\link[=compute_indicator_workflow]{compute_indicator_workflow}} \describe{ - \item{\code{ci_type}}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} \item{\code{cell_size}}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -53,8 +50,6 @@ solve specific issues. (Default is TRUE).} \item{\code{make_valid}}{(Optional) Calls st_make_valid() from the sf package after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} - \item{\code{num_bootstrap}}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} \item{\code{shapefile_path}}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} @@ -116,5 +111,5 @@ plot(sr_ts, c(2440728, 4265185)) } } \seealso{ -compute_indicator_workflow +\link{compute_indicator_workflow}, \link{add_ci} } diff --git a/man/tax_distinct_map.Rd b/man/tax_distinct_map.Rd index 5aaf308e..ef9e315e 100644 --- a/man/tax_distinct_map.Rd +++ b/man/tax_distinct_map.Rd @@ -19,9 +19,6 @@ Use NA for interactive mode.)} \item{...}{ Arguments passed on to \code{\link[=compute_indicator_workflow]{compute_indicator_workflow}} \describe{ - \item{\code{ci_type}}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} \item{\code{cell_size}}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -57,8 +54,6 @@ solve specific issues. (Default is TRUE).} \item{\code{make_valid}}{(Optional) Calls st_make_valid() from the sf package after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} - \item{\code{num_bootstrap}}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} \item{\code{shapefile_path}}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} @@ -155,5 +150,5 @@ of biodiversity: weighting of step lengths between hierarchical levels. Marine Ecology Progress Series, 184, 21-29. } \seealso{ -compute_indicator_workflow +\link{compute_indicator_workflow}, \link{add_ci} } diff --git a/man/total_occ_map.Rd b/man/total_occ_map.Rd index 8fe5e6fb..40def976 100644 --- a/man/total_occ_map.Rd +++ b/man/total_occ_map.Rd @@ -15,9 +15,6 @@ total_occ_ts(data, ...) \item{...}{ Arguments passed on to \code{\link[=compute_indicator_workflow]{compute_indicator_workflow}} \describe{ - \item{\code{ci_type}}{(Optional) Type of bootstrap confidence intervals to -calculate. (Default: "norm"). Select "none" to avoid calculating bootstrap -CIs.} \item{\code{cell_size}}{(Optional) Length of grid cell sides, in km or degrees. If set to "grid" (default), this will use the existing grid size of your cube. If set to "auto", this will be automatically determined according to @@ -53,8 +50,6 @@ solve specific issues. (Default is TRUE).} \item{\code{make_valid}}{(Optional) Calls st_make_valid() from the sf package after creating the grid. Increases processing time but may help if you are getting polygon errors. (Default is FALSE).} - \item{\code{num_bootstrap}}{(Optional) Set the number of bootstraps to calculate for -generating confidence intervals. (Default: 100)} \item{\code{shapefile_path}}{(optional) Path of an external shapefile to merge into the workflow. For example, if you want to calculate your indicator particular features such as protected areas or wetlands.} @@ -124,5 +119,5 @@ plot(to_ts) } } \seealso{ -compute_indicator_workflow +\link{compute_indicator_workflow}, \link{add_ci} } diff --git a/tests/testthat/test-add_ci.R b/tests/testthat/test-add_ci.R new file mode 100644 index 00000000..b3dab3b8 --- /dev/null +++ b/tests/testthat/test-add_ci.R @@ -0,0 +1,209 @@ +library(testthat) +library(b3gbi) + +test_that("add_ci handles input validation", { + # Invalid data class + expect_error( + add_ci(indicator = "invalid"), + "indicator must be an indicator_ts object." + ) +}) + +test_that("add_ci returns original object with warning for excluded indicators", { + # Mock an indicator_ts object for obs_richness + mock_ts <- list( + data = data.frame(year = 2000, diversity_val = 10), + div_type = "obs_richness" + ) + class(mock_ts) <- c("indicator_ts", "obs_richness") + + expect_warning( + result <- add_ci(mock_ts), + "Cannot calculate sensible confidence intervals for obs_richness" + ) + expect_equal(result, mock_ts) + + # Mock for Hill numbers + mock_hill <- list( + data = data.frame(year = 2000, diversity_val = 10), + div_type = "hill0" + ) + class(mock_hill) <- c("indicator_ts", "hill0") + + expect_warning( + result_hill <- add_ci(mock_hill), + "Confidence intervals cannot calculated for hill0 as they are handled by the iNext package" + ) + expect_equal(result_hill, mock_hill) +}) + +test_that("add_ci calls dubicube for cube-level bootstrapping", { + skip_if_not_installed("dubicube") + + # Mock raw data + mock_raw_data <- data.frame( + year = c(2000, 2000), + scientificName = c("Sp A", "Sp B"), + obs = c(1, 1), + cellCode = c("C1", "C2") + ) + + # Mock indicator_ts object + mock_ts <- list( + data = data.frame(year = 2000, diversity_val = 2), + raw_data = mock_raw_data, + div_type = "total_occ" + ) + class(mock_ts) <- c("indicator_ts", "total_occ") + + # Mock dubicube functions + mock_bootstrap_cube <- function(...) { + data.frame(year = 2000, sample_id = 1, diversity_val = 2) + } + + mock_calculate_bootstrap_ci <- function(...) { + data.frame(year = 2000, + ll = 1, + ul = 3, + est_boot = 2, + se_boot = 0.1, + bias_boot = 0, + int_type = "norm", + conf = 0.95, + est_original = 2) + } + + testthat::with_mocked_bindings( + bootstrap_cube = mock_bootstrap_cube, + calculate_bootstrap_ci = mock_calculate_bootstrap_ci, + .package = "dubicube", + { + result <- add_ci(mock_ts, bootstrap_level = "cube") + + expect_true("ll" %in% names(result$data)) + expect_true("ul" %in% names(result$data)) + expect_equal(result$data$ll, 1) + expect_equal(result$data$ul, 3) + } + ) +}) + +test_that("add_ci determines boot_method correctly for pielou_evenness", { + skip_if_not_installed("dubicube") + + # 1. Setup Mock Data + mock_raw_data <- data.frame( + year = c(2000, 2000), + scientificName = c("Sp A", "Sp B"), + obs = c(1, 1), + cellCode = c("C1", "C2") + ) + + # Mock indicator_ts object for pielou_evenness + # Note: Pielou evenness is a 'whole_cube' indicator in the rule book + mock_ts <- list( + data = data.frame(year = 2000, diversity_val = 0.8), + raw_data = mock_raw_data, + div_type = "pielou_evenness" + ) + class(mock_ts) <- c("indicator_ts", "pielou_evenness") + + # 2. Define Mock Functions + # Capture and inspect arguments passed from b3gbi to dubicube + mock_bootstrap_cube <- function(...) { + args <- list(...) + + # VERIFY: 'method' should now be present and correctly set to 'boot_whole_cube' + # based on the logic in prepare_indicator_bootstrap + expect_true("method" %in% names(args), + info = "The 'method' argument should be passed to bootstrap_cube") + + expect_equal(args$method, "boot_whole_cube", + info = "Pielou evenness should use 'boot_whole_cube' method") + + # VERIFY: 'seed' is passed correctly (if provided in add_ci) + expect_true("seed" %in% names(args), + info = "The 'seed' argument should be passed to bootstrap_cube") + + # Return a dummy bootstrap result + data.frame(year = 2000, sample_id = 1, diversity_val = 0.8) + } + + mock_calculate_bootstrap_ci <- function(...) { + # Return a dummy CI result + data.frame(year = 2000, + ll = 0.7, + ul = 0.9, + est_boot = 0.8, + se_boot = 0.05, + bias_boot = 0, + int_type = "norm", + conf = 0.95, + est_original = 0.8) + } + + # 3. Execute Test with Mocked Bindings + testthat::with_mocked_bindings( + bootstrap_cube = mock_bootstrap_cube, + calculate_bootstrap_ci = mock_calculate_bootstrap_ci, + .package = "dubicube", + { + # We provide a seed here to test the new functionality + add_ci(mock_ts, bootstrap_level = "cube", seed = 123) + } + ) +}) + +test_that("add_ci respects boot_args and ci_args", { + skip_if_not_installed("dubicube") + + # Mock raw data + mock_raw_data <- data.frame( + year = c(2000, 2000), + scientificName = c("Sp A", "Sp B"), + obs = c(1, 1), + cellCode = c("C1", "C2") + ) + + # Mock indicator_ts object + mock_ts <- list( + data = data.frame(year = 2000, diversity_val = 2), + raw_data = mock_raw_data, + div_type = "total_occ" + ) + class(mock_ts) <- c("indicator_ts", "total_occ") + + captured_seed <- NULL + mock_bootstrap_cube <- function(..., seed) { + captured_seed <<- seed + data.frame(year = 2000, sample_id = 1, diversity_val = 2) + } + + captured_type <- NULL + mock_calculate_bootstrap_ci <- function(..., type) { + captured_type <<- type + data.frame(year = 2000, + ll = 1, + ul = 3, + est_boot = 2, + se_boot = 0.1, + bias_boot = 0, + int_type = "norm", + conf = 0.95, + est_original = 2) + } + + testthat::with_mocked_bindings( + bootstrap_cube = mock_bootstrap_cube, + calculate_bootstrap_ci = mock_calculate_bootstrap_ci, + .package = "dubicube", + { + add_ci(mock_ts, + boot_args = list(seed = 456), + ci_args = list(type = "perc")) + + expect_equal(captured_seed, 456) + expect_equal(captured_type, "perc") + } + ) +}) diff --git a/tests/testthat/test-calc_ts_dispatch.R b/tests/testthat/test-calc_ts_dispatch.R new file mode 100644 index 00000000..17b27ca5 --- /dev/null +++ b/tests/testthat/test-calc_ts_dispatch.R @@ -0,0 +1,50 @@ +# Test S3 dispatch for calc_ts during bootstrapping + +library(testthat) +library(b3gbi) + +# Skip if dubicube not available +skip_if_not_installed("dubicube") + +# Load test data +data(example_cube_1) + +# Test that calc_ts dispatch works correctly +test_that("calc_ts S3 dispatch works for bootstrapping", { + # Create indicator + res <- total_occ_ts(example_cube_1) + + # Get prepared params + params <- prepare_indicator_bootstrap( + indicator = res, + num_bootstrap = 2, + ci_type = "norm" + ) + + # Check that data has correct class + expect_true( + "total_occ" %in% class(params$bootstrap_params$data_cube), + info = "data_cube should have total_occ class" + ) + + # Check that total_occ is first class + expect_equal( + class(params$bootstrap_params$data_cube)[1], + "total_occ", + info = "total_occ should be first class for S3 dispatch" + ) + + # Test that calc_ts dispatches correctly + test_result <- calc_ts(params$bootstrap_params$data_cube) + + # Should return a data frame with diversity_val column + expect_s3_class(test_result, "data.frame") + expect_true( + "diversity_val" %in% names(test_result), + info = "calc_ts should return diversity_val column" + ) + expect_true( + "year" %in% names(test_result), + info = "calc_ts should return year column" + ) +}) diff --git a/tests/testthat/test-compute_indicator_workflow.R b/tests/testthat/test-compute_indicator_workflow.R index 5d8b47c6..0885f569 100644 --- a/tests/testthat/test-compute_indicator_workflow.R +++ b/tests/testthat/test-compute_indicator_workflow.R @@ -48,17 +48,6 @@ test_that("compute_indicator_workflow handles input validation", { "'arg' should be one of" ) - # Invalid ci_type - expect_error( - compute_indicator_workflow( - data = mock_cube, - type = "obs_richness", - dim_type = "map", - ci_type = "invalid" - ), - "'arg' should be one of" - ) - # Invalid level expect_error( compute_indicator_workflow( @@ -405,11 +394,7 @@ test_that( all( c( "year", - "diversity_val", - "int_type", - "ll", - "ul", - "conf_level" + "diversity_val" ) %in% names(result_ci$data) ) ) @@ -516,8 +501,7 @@ test_that("compute_indicator_workflow handles sim_cube objects", { result_ts <- compute_indicator_workflow( data = mock_sim_cube, type = "total_occ", - dim_type = "ts", - ci_type = "none" + dim_type = "ts" ) expect_s3_class(result_ts, "indicator_ts") @@ -525,8 +509,8 @@ test_that("compute_indicator_workflow handles sim_cube objects", { result_ci <- compute_indicator_workflow( data = mock_sim_cube, type = "total_occ", - dim_type = "ts", - ci_type = "norm") + dim_type = "ts") %>% + add_ci(ci_type = "norm", num_bootstrap = 100) expect_true( all( c( @@ -535,7 +519,7 @@ test_that("compute_indicator_workflow handles sim_cube objects", { "int_type", "ll", "ul", - "conf_level" + "conf" ) %in% names(result_ci$data) ) ) diff --git a/tests/testthat/test-constructor_functions.R b/tests/testthat/test-constructor_functions.R index fdc303ec..ed7c636c 100644 --- a/tests/testthat/test-constructor_functions.R +++ b/tests/testthat/test-constructor_functions.R @@ -128,7 +128,8 @@ test_that("new_indicator_ts creates indicator_ts object correctly", { num_species = 3, num_years = 11, species_names = NULL, - coord_range = list(xmin = 1, xmax = 2, ymin = 3, ymax = 4) + coord_range = list(xmin = 1, xmax = 2, ymin = 3, ymax = 4), + raw_cube_occurrences = mock_tibble ) expect_s3_class(its, c("indicator_ts", "obs_richness")) expect_equal(its$div_type, "obs_richness") diff --git a/tests/testthat/test-integration_add_ci.R b/tests/testthat/test-integration_add_ci.R new file mode 100644 index 00000000..b66e6abe --- /dev/null +++ b/tests/testthat/test-integration_add_ci.R @@ -0,0 +1,44 @@ +library(testthat) +library(b3gbi) + +test_that("Integration: total_occ_ts followed by add_ci works", { + # Load example data + data(example_cube_1) + + # Calculate time series of total occurrences + # This should return an indicator_ts object without CIs + res <- total_occ_ts(example_cube_1) + + expect_s3_class(res, "indicator_ts") + expect_false("ll" %in% names(res$data)) + expect_false("ul" %in% names(res$data)) + + # Add confidence intervals + # Using num_bootstrap = 10 for speed in tests + res_ci <- suppressWarnings(add_ci(res, num_bootstrap = 10, bootstrap_level = "indicator")) + + expect_s3_class(res_ci, "indicator_ts") + expect_true("ll" %in% names(res_ci$data)) + expect_true("ul" %in% names(res_ci$data)) + expect_true("est_boot" %in% names(res_ci$data)) +}) + +test_that("Integration: pielou_evenness_ts followed by add_ci works (cube level)", { + skip_if_not_installed("dubicube") + + # Load example data + data(example_cube_1) + + # Calculate time series of Pielou evenness + res <- pielou_evenness_ts(example_cube_1) + + expect_s3_class(res, "indicator_ts") + + # Add confidence intervals (cube level) + # Using num_bootstrap = 2 for speed + res_ci <- suppressWarnings(add_ci(res, num_bootstrap = 2, bootstrap_level = "cube")) + + expect_s3_class(res_ci, "indicator_ts") + expect_true("ll" %in% names(res_ci$data)) + expect_true("ul" %in% names(res_ci$data)) +}) diff --git a/tests/testthat/test-plot_methods.R b/tests/testthat/test-plot_methods.R index df5f4db0..4d124d05 100644 --- a/tests/testthat/test-plot_methods.R +++ b/tests/testthat/test-plot_methods.R @@ -416,7 +416,8 @@ test_that("plot_ts correctly applies smoothing and confidence intervals", { total_occ_example <- total_occ_ts(example_cube_1, level = "country", region = "Denmark") - + total_occ_example <- suppressWarnings(add_ci(total_occ_example, + num_bootstrap = 10)) # With confidence intervals p2 <- plot_ts( total_occ_example, @@ -514,8 +515,7 @@ test_that("plot_ts handles all parameters without error", { spec_occ_mammals_denmark_ts <- spec_occ_ts(example_cube_1, level = "country", - region = "Denmark", - ci_type = "none") + region = "Denmark") test_that( "plot_species_ts returns a ggplot or patchwork object with defaults", { @@ -577,11 +577,10 @@ test_that("plot_species_ts applies custom aesthetics correctly", { expect_equal(point_geom$aes_params$size, 4) }) -spec_occ_mammals_denmark_ts_ci <- spec_occ_ts(example_cube_1, +spec_occ_mammals_denmark_ts_ci <- suppressWarnings(spec_occ_ts(example_cube_1, level = "country", - region = "Denmark", - ci_type = "norm", - num_bootstrap = 20) + region = "Denmark") %>% + add_ci(num_bootstrap = 2)) test_that("plot_species_ts manages trends and confidence intervals", { # Verify presence of smoothed trend diff --git a/tests/testthat/test-prepare_indicator_bootstrap.R b/tests/testthat/test-prepare_indicator_bootstrap.R new file mode 100644 index 00000000..6af11797 --- /dev/null +++ b/tests/testthat/test-prepare_indicator_bootstrap.R @@ -0,0 +1,202 @@ +## ------------------------------------------------------------------ +## Helper transformations +## ------------------------------------------------------------------ +# logit <- function(p) { +# log(p / (1 - p)) +# } + +logit = function(p) { + p <- pmax(pmin(p, 1 - 1e-6), 1e-6) # Clamp values between ~0 and ~1 + log(p / (1 - p)) +} + +inv_logit <- function(l) { + exp(l) / (1 + exp(l)) +} + +## ------------------------------------------------------------------ +## Indicator objects for unit tests +## ------------------------------------------------------------------ +# Create mock data with required columns +mock_data_basic <- iris +mock_data_basic$year <- 2020 +mock_data_basic$taxonKey <- 1 + +mock_data_species <- iris +mock_data_species$year <- 2020 +mock_data_species$taxonKey <- seq_len(nrow(iris)) + +indicator_total_occ <- list(div_type = "total_occ", raw_data = mock_data_basic) +indicator_pielou_evenness <- list(div_type = "pielou_evenness", raw_data = mock_data_basic) +indicator_williams_evenness <- list(div_type = "williams_evenness", raw_data = mock_data_basic) +indicator_occ_density <- list(div_type = "occ_density", raw_data = mock_data_basic) +indicator_ab_rarity <- list(div_type = "ab_rarity", raw_data = mock_data_basic) +indicator_area_rarity <- list(div_type = "area_rarity", raw_data = mock_data_basic) +indicator_newness <- list(div_type = "newness", raw_data = mock_data_basic) +indicator_spec_occ <- list(div_type = "spec_occ", raw_data = mock_data_species) +indicator_spec_range <- list(div_type = "spec_range", raw_data = mock_data_species) +indicator_occ_ts <- list(div_type = "occ_ts", raw_data = mock_data_basic) + +## ------------------------------------------------------------------ +## Expected behaviour per div_type +## ------------------------------------------------------------------ +indicator_expectations <- list( + total_occ = list( + indicator = indicator_total_occ, + method = "boot_group_specific", + grouping = "year", + trans = function(t) t, + inv_trans = function(t) t, + no_bias = TRUE + ), + pielou_evenness = list( + indicator = indicator_pielou_evenness, + method = "boot_whole_cube", + grouping = "year", + trans = logit, + inv_trans = inv_logit, + no_bias = FALSE + ), + williams_evenness = list( + indicator = indicator_williams_evenness, + method = "boot_whole_cube", + grouping = "year", + trans = logit, + inv_trans = inv_logit, + no_bias = FALSE + ), + occ_density = list( + indicator = indicator_occ_density, + method = "boot_whole_cube", + grouping = "year", + trans = function(t) t, + inv_trans = function(t) t, + no_bias = FALSE + ), + ab_rarity = list( + indicator = indicator_ab_rarity, + method = "boot_whole_cube", + grouping = "year", + trans = function(t) t, + inv_trans = function(t) t, + no_bias = FALSE + ), + area_rarity = list( + indicator = indicator_area_rarity, + method = "boot_whole_cube", + grouping = "year", + trans = function(t) t, + inv_trans = function(t) t, + no_bias = FALSE + ), + newness = list( + indicator = indicator_newness, + method = "boot_whole_cube", + grouping = "year", + trans = function(t) t, + inv_trans = function(t) t, + no_bias = FALSE + ), + spec_occ = list( + indicator = indicator_spec_occ, + method = "boot_group_specific", + grouping = "group_key", + trans = function(t) t, + inv_trans = function(t) t, + no_bias = FALSE + ), + spec_range = list( + indicator = indicator_spec_range, + method = "boot_group_specific", + grouping = "group_key", + trans = function(t) t, + inv_trans = function(t) t, + no_bias = FALSE + ) +) + +test_that("prepare_indicator_bootstrap returns correct params for all div_types", { + for (div_type in names(indicator_expectations)) { + + exp <- indicator_expectations[[div_type]] + + params <- prepare_indicator_bootstrap( + indicator = exp$indicator, + num_bootstrap = 1000, + ci_type = c("perc", "bca"), + confidence_level = 0.9 + ) + + # ---- bootstrap params ---- + expect_equal( + params$bootstrap_params$samples, + 1000, + info = paste("bootstrap samples for", div_type) + ) + + expect_identical( + params$bootstrap_params$method, + exp$method, + info = paste("bootstrap method for", div_type) + ) + + expect_identical( + params$bootstrap_params$grouping_var, + exp$grouping, + info = paste("grouping_var for", div_type) + ) + + ## ---- CI params ---- + expect_equal( + sort(params$ci_params$type), + sort(c("perc", "bca")), + info = paste("confidence intervals for", div_type) + ) + + expect_equal( + params$ci_params$conf, + 0.9, + info = paste("confidence level for", div_type) + ) + + expect_identical( + params$ci_params$no_bias, + exp$no_bias, + info = paste("no_bias for", div_type) + ) + + x <- 0.3 + expect_equal( + params$ci_params$h(x), + exp$trans(x), + info = paste("transformation behaviour for", div_type) + ) + + y <- 0.2 + expect_equal( + params$ci_params$hinv(y), + exp$inv_trans(y), + info = paste("inverse transformation behaviour for", div_type) + ) + } +}) + +test_that("Test parsing of no_bias argument", { + params <- prepare_indicator_bootstrap( + indicator = indicator_expectations[["total_occ"]]$indicator, + num_bootstrap = 1000, + ci_type = c("perc", "bca"), + confidence_level = 0.9, + ci_args = list(no_bias = FALSE) + ) + + expect_identical( + params$bootstrap_params$method, + "boot_group_specific" + ) + + expect_identical( + params$ci_params$no_bias, + FALSE + ) +}) diff --git a/vignettes/b3gbi.Rmd b/vignettes/b3gbi.Rmd index 685de1bb..5e6942eb 100644 --- a/vignettes/b3gbi.Rmd +++ b/vignettes/b3gbi.Rmd @@ -104,13 +104,6 @@ All indicator wrapper functions (e.g., `obs_richness_map`, `occ_turnover_ts`) sh | `data` | The `processed_cube` object. Required. | | | `level` | The geographical scale ('country', 'continent', 'world'). | Automatically retrieves boundaries. | | `region` | The specific region name (e.g., 'Germany', 'Europe'). | Required if level is set. | -| `ci_type` | Type of bootstrap confidence interval to calculate. Only relevant for time series. | 'norm', 'basic', 'perc', 'bca', or 'none'. Defaults to 'norm' for time series. | -| `num_bootstrap` | Number of bootstrap runs for CI calculation. Only relevant for time series. | Defaults to 100. | - -⚠️ **Important Note on Confidence Intervals (CIs)**: - -- The `ci_type` argument is only used for calculating uncertainty in general time series indicators (e.g., `obs_richness_ts`) and is ignored for map indicators. -- For indicators based on Hill diversity (e.g., `hill_ts()`), the `ci_type` is ignored because CIs are calculated internally using the iNEXT package. However, the `num_bootstrap` argument is still required to define the number of runs for iNEXT's internal uncertainty estimation. ### Example: Observed Species Richness Map @@ -118,7 +111,7 @@ Let's calculate the observed richness spatially, covering the period from 1980 t ```{r richness-map} # Calculate a gridded map of observed species richness for Denmark -# Note that ci_type is ignored for map indicators +# Note that confidence intervals are not calculated for map indicators Denmark_observed_richness_map <- obs_richness_map(denmark_cube, level = "country", region = "Denmark") @@ -133,15 +126,14 @@ class(Denmark_observed_richness_map$data) ### Example: Total Occurrences Time Series -Now, let's calculate the same indicator temporally for a trend analysis. We will use the default `ci_type = "norm"` and `num_bootstrap = 100`. +Now, let's calculate the same indicator temporally for a trend analysis. -```{r richness-ts} +```{r total-occ-ts} # Calculate a time series of total occurrences for Denmark +# By default, confidence intervals are NOT calculated during this step Denmark_total_occ_ts <- total_occ_ts(denmark_cube, - level = "country", - region = "Denmark", - ci_type = "norm", # Include confidence intervals - num_bootstrap = 100) # Using the default number of runs + level = "country", + region = "Denmark") ``` The result is an `indicator_ts` object. @@ -150,6 +142,15 @@ The result is an `indicator_ts` object. class(Denmark_total_occ_ts) ``` +Now let's add confidence intervals using the `add_ci()` function. To speed things up we will reduce the number of bootstrap samples from the default of 1000 to 100. + +```{r add-ci} +# Add confidence intervals to the time series +Denmark_total_occ_ts_with_ci <- add_ci(Denmark_total_occ_ts, + num_bootstrap = 100) +``` + + ## Step 3: Visualization with `plot()` The generic `plot()` function automatically calls the appropriate helper function (`plot_map()` or `plot_ts()`) and applies smart defaults for titles, colors, and layout. @@ -172,18 +173,11 @@ plot(Denmark_observed_richness_map, ### Plotting the Time Series -| Argument | Description | Common Use | -|----------|-------------|------------| -| `smoothed_trend` | If TRUE, displays a smoothed trend line (LOESS). | Defaults to TRUE. | -| `linecolour` | Sets the color of the indicator line/points. | e.g., "blue" | -| `ribboncolour` | Sets the color of the indicator confidence interval. | e.g., "skyblue" | -| `trendlinecolour` | Sets the color of the trend line. | e.g., "darkorange" | -| `envelopecolour` | Sets the color of the trend line confidence intervals. | e.g., "orange" | -| `x_label` / `y_label` | Custom labels for the axes. | | +If your `indicator_ts` object contains confidence intervals, the `plot()` function will automatically display them as ribbons or error bars. ```{r plot-ts} -# Plotting the time series object -plot(Denmark_total_occ_ts, +# Plotting the time series object with confidence intervals +plot(Denmark_total_occ_ts_with_ci, title = "Temporal Trend of Total Mammal Occurrences in Denmark", linecolour = "blue", ribboncolour = "skyblue", diff --git a/vignettes/uncertainty.Rmd b/vignettes/uncertainty.Rmd new file mode 100644 index 00000000..e779cfd1 --- /dev/null +++ b/vignettes/uncertainty.Rmd @@ -0,0 +1,169 @@ +--- +title: "Uncertainty in Biodiversity Indicators" +author: "Shawn Dove" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Uncertainty in Biodiversity Indicators} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.width = 8, + fig.height = 6, + out.width = "95%" +) +``` + +## Introduction 🛡️ + +Quantifying uncertainty is crucial for robust biodiversity assessments. The **b3gbi** package provides a flexible and powerful way to calculate confidence intervals (CIs) for biodiversity indicators using bootstrapping methods, leveraging the **dubicube** package for robust cube-level resampling. + +To maintain a clean and efficient workflow, uncertainty calculation is decoupled from the initial indicator calculation. This "two-step" process allows users to first explore their data and indicators quickly, and then perform computationally intensive bootstrapping only when needed. + +## The Two-Step Workflow + +The standard workflow for adding uncertainty to an indicator is as follows: + +1. **Calculate the Indicator**: Use a time-series wrapper function (e.g., `total_occ_ts()`, `pielou_evenness_ts()`). +2. **Add Confidence Intervals**: Pass the resulting indicator object to the `add_ci()` function. + +### Example: Calculating Richness with Uncertainty + +In this example, we calculate total occurrences for mammals in Denmark and then add 95% confidence intervals using cube-level bootstrapping. + +```{r example, eval = FALSE} +library(b3gbi) + +# 1. Process the cube +denmark_cube <- process_cube(system.file("extdata", + "denmark_mammals_cube_eqdgc.csv", + package = "b3gbi")) + +# 2. Calculate a time series of total occurrences +# By default, CIs are NOT calculated during this step +occ_ts <- total_occ_ts(denmark_cube) + +# 3. Add confidence intervals using add_ci() +# This step uses the dubicube package for robust bootstrapping +occ_ts_with_ci <- add_ci(occ_ts, num_bootstrap = 100) # Using 100 for speed in this example + +# 4. Plot the result +plot(occ_ts_with_ci, title = "Total Occurrences with 95% CI") +``` + +## Bootstrapping Levels + +The `add_ci()` function supports two levels of bootstrapping, selectable via the `bootstrap_level` argument: + +### 1. Cube-Level Bootstrapping (`bootstrap_level = "cube"`) + +This is the **default and recommended method**. It resamples the raw occurrence records within the data cube. The function automatically determines whether to use group-specific resampling (for species-level indicators) or whole-cube resampling (for aggregate indicators) based on the indicator type. + +* **Pros**: Mathematically more robust; captures the underlying sampling uncertainty of the original data. +* **Cons**: Computationally more intensive, especially for large cubes. + +It leverages the `dubicube` package to ensure that indicators are recalculated correctly for each bootstrap replicate. + +### 2. Indicator-Level Bootstrapping (`bootstrap_level = "indicator"`) + +This method resamples the calculated indicator values themselves. + +* **Pros**: Very fast, even for massive datasets. +* **Cons**: Less robust; assumes that the calculated indicator values are independent and identically distributed, which is often not strictly true for biodiversity metrics. + +## Automatic Indicator Configuration + +The `add_ci()` function automatically applies appropriate bootstrapping strategies based on the indicator type. This ensures statistically correct confidence intervals without requiring manual configuration: + +* **Group-Specific Bootstrapping**: Species-level indicators (`total_occ`, `spec_occ`, `spec_range`) automatically resample within each species-year combination. This is essential when indicators are calculated per species. + +* **Whole-Cube Bootstrapping**: Aggregate indicators (evenness, rarity, density metrics) resample across the entire cube, treating the data as a single population. + +* **Bounded Indicators**: Evenness metrics (`pielou_evenness`, `williams_evenness`) automatically use logit transformation to ensure confidence intervals remain within valid [0, 1] bounds. + +These defaults are applied internally, but can be overridden using `boot_args` if needed. + +## Advanced Configuration + +The `add_ci()` function provides several arguments for fine-tuning the CI calculation: + +| Argument | Description | Default | +|----------|-------------|---------| +| `num_bootstrap` | Number of bootstrap replicates. | `1000` | +| `ci_type` | Type of bootstrap interval (`"norm"`, `"basic"`, `"perc"`, `"bca"`). | `"norm"` | +| `confidence_level` | The confidence level (e.g., 0.95). | `0.95` | +| `boot_args` | A list of additional arguments for `dubicube::bootstrap_cube()`. | `list()` | +| `ci_args` | A list of additional arguments for `dubicube::calculate_bootstrap_ci()`. | `list()` | + +### Customizing the Bootstrap Process + +If you need to pass specific parameters to the underlying `dubicube` functions (e.g., setting a specific seed), you can use the `boot_args` and `ci_args` parameters: + +```{r advanced, eval = FALSE} +occ_ts_custom <- add_ci(occ_ts, + num_bootstrap = 500, + boot_args = list(seed = 42), + ci_args = list(type = "perc")) +``` + +### Overriding Indicator Defaults + +While `add_ci()` automatically selects appropriate settings, you can override these defaults using `boot_args` and `ci_args`. This is useful when you need specialized behavior: + +```{r override, eval = FALSE} +# Example: Calculate CIs for evenness on raw scale (no logit transformation) +evenness_raw <- add_ci(my_evenness_indicator, + num_bootstrap = 1000, + boot_args = list(trans = identity, + inv_trans = identity)) + +# Example: Force bias correction for total_occ +occ_with_bias <- add_ci(my_occ_indicator, + num_bootstrap = 1000, + ci_args = list(no_bias = FALSE)) +``` + +Common override parameters include: +- `trans` / `inv_trans`: Transformation functions +- `no_bias`: Logical, disable bias correction (default varies by indicator) +- `group_specific`: Logical, force group-specific vs whole-cube bootstrapping + +## Supported Indicators + +Confidence intervals can be added post-hoc for the following indicators: + +* Total Occurrences (`total_occ`) +* Occurrence Density (`occ_density`) +* Newness (`newness`) +* Williams Evenness (`williams_evenness`) +* Pielou Evenness (`pielou_evenness`) +* Abundance-based Rarity (`ab_rarity`) +* Area-based Rarity (`area_rarity`) +* Species Occurrences (`spec_occ`) +* Species Range (`spec_range`) + +### Exclusions + +Certain indicators cannot have confidence intervals added via `add_ci()`: + +* **Hill Numbers** (`hill0`, `hill1`, `hill2`): These calculate their own uncertainty internally using the `iNext` package during the initial calculation. +* **Observed Richness** (`obs_richness`): Bootstrapping observed richness is often not statistically sensible; consider using Hill numbers for estimated richness instead. +* **Cumulative Richness** (`cum_richness`): The cumulative nature of this metric makes standard bootstrapping inappropriate. +* **Occurrence Turnover** (`occ_turnover`): Similar to cumulative richness, the temporal dependency requires specialized methods. +* **Taxonomic Distinctness** (`tax_distinct`): Requires specialized randomization tests rather than standard bootstrapping. + +## Data Exploration with dubicube 🔍 + +While **b3gbi** focuses on indicator calculation and visualization, the underlying **dubicube** package offers powerful functions for initial data exploration and quality assessment of your biodiversity data cubes. + +To learn more about the full capabilities of **dubicube**, including in-depth tutorials and advanced data processing techniques, please visit the [dubicube documentation](https://docs.b-cubed.eu/software/dubicube/readme/). + +## Summary + +The `add_ci()` function provides a unified and robust interface for adding uncertainty to your biodiversity assessments. By leveraging the power of `dubicube`, **b3gbi** ensures that your results are not just numbers, but scientifically grounded estimates with clearly defined confidence boundaries. + +With automatic indicator configuration, most users can rely on sensible defaults while advanced users retain full control over the bootstrapping process through the `boot_args` and `ci_args` parameters.