b-cubed-eu
diff --git a/‎inst/en_gb.dic‎
Lines changed: 11 additions & 0 deletions b/‎inst/en_gb.dic‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎source/Prepare_data_10km.Rmd‎
Lines changed: 14 additions & 10 deletions b/‎source/Prepare_data_10km.Rmd‎
Lines changed: 14 additions & 10 deletions
diff --git a/‎source/R/taxon_mapping.R‎
Lines changed: 17 additions & 12 deletions b/‎source/R/taxon_mapping.R‎
Lines changed: 17 additions & 12 deletions
diff --git a/‎source/pipelines/biodiversity_indicators/R/read_data.R‎
Lines changed: 34 additions & 18 deletions b/‎source/pipelines/biodiversity_indicators/R/read_data.R‎
Lines changed: 34 additions & 18 deletions
diff --git a/‎source/pipelines/exploratory_analysis/R/expl_data.R‎
Lines changed: 48 additions & 36 deletions b/‎source/pipelines/exploratory_analysis/R/expl_data.R‎
Lines changed: 48 additions & 36 deletions
@@ -1,10 +1,13 @@
 Algemene
+Anthus
 Bosonderzoek
 Broedvogelmonitoring
 Broedvogels
 Cartuyvels
 Cetti's
 Cettia
+Chloris
+Cyanistes
 Daele
 Databricks
 Dendrocopos
@@ -20,6 +23,7 @@ Laridae
 Larus
 Luscinia
 MGRS
+Motacilla
 Natuur
 OOSTENDE
 Parus
@@ -35,23 +39,30 @@ Watervogels
 abv
 argentatus
 birdcube
+caeruleus
 cetti
+chloris
 color
+communis
 datacube
 datacubes
+domesticus
 eBird
+flava
 fuscus
 gbi
 ies
 labeled
 megarhynchos
+modularis
 montanus
 org
 rubicola
 sublicensable
 synched
 tabset
 torquatus
+trivialis
 utm
 voor
 waarnemingen
 
@@ -199,9 +199,9 @@ utm_grid %>%
               summarise(n_species = n_distinct(species), .groups = "drop"),
             by = join_by(mgrscode)) %>%
   ggplot() +
-    geom_sf(aes(fill = n_species), col = alpha("white", 0)) +
-    scale_fill_viridis_c(option = "inferno") +
-    ggtitle("ABV data")
+  geom_sf(aes(fill = n_species), col = alpha("white", 0)) +
+  scale_fill_viridis_c(option = "inferno") +
+  ggtitle("ABV data")
 ```
 
 We select cube data from Flanders and add the geometry to the data layers by taking an inner join.
@@ -221,9 +221,9 @@ utm_grid %>%
               summarise(n_species = n_distinct(species), .groups = "drop"),
             by = join_by(mgrscode)) %>%
   ggplot() +
-    geom_sf(aes(fill = n_species), col = alpha("white", 0)) +
-    scale_fill_viridis_c(option = "inferno") +
-    ggtitle("Bird cube data from Flanders")
+  geom_sf(aes(fill = n_species), col = alpha("white", 0)) +
+  scale_fill_viridis_c(option = "inferno") +
+  ggtitle("Bird cube data from Flanders")
 ```
 
 # Correction of species names
@@ -236,11 +236,13 @@ abv_data_total_sf <- abv_data_total_sf %>%
     species = case_when(
       species == "Dendrocopus major" ~ "Dendrocopos major",
       species == "Saxicola torquatus" ~ "Saxicola rubicola",
-      TRUE ~ species),
+      TRUE ~ species
+    ),
     specieskey = case_when(
       species == "Dendrocopos major" ~ 2477968,
       species == "Saxicola rubicola" ~ 4408759,
-      TRUE ~ specieskey)
+      TRUE ~ specieskey
+    )
   )
 ```
 
@@ -249,10 +251,12 @@ birdcube_data_total_sf <- birdcube_data_total_sf %>%
   mutate(
     species = case_when(
       species == "Poecile montanus" ~ "Parus montanus",
-      TRUE ~ species),
+      TRUE ~ species
+    ),
     specieskey = case_when(
       species == "Parus montanus" ~ 4409010,
-      TRUE ~ specieskey)
+      TRUE ~ specieskey
+    )
   )
 ```
 
 
@@ -85,7 +85,8 @@ match_vernacular_name <- function(
       c(list(vernacular_name),
         list(datasetKey = "d7dddbf4-2cf0-4f39-9b2a-bb099caae36c"),
         list(limit = limit),
-        dots))
+        dots)
+    )
 
     # Return taxon data frame if match found
     if (nrow(gbif_lookup$data) > 0) {
@@ -135,7 +136,7 @@ match_vernacular_name <- function(
     # Return NA if no good match found
     if (is.na(taxon_key)) {
       return(NA_character_)
-    # Return match with taxon key
+      # Return match with taxon key
     } else {
       out_data <- taxon_data[taxon_data$key == taxon_key, ]
       out_data <- out_data[, colSums(is.na(out_data)) < nrow(out_data)]
@@ -171,13 +172,16 @@ map_taxa_from_vernacular <- function(
     nest(match_df = all_of(c(vernacular_name_col, group_cols))) %>%
 
     # find scientific name for each (distinct) vernacular name
-    mutate(taxon_df = purrr::map(
-      .data$match_df,
-      match_vernacular_name,
-      filter_cols = filter_cols,
-      lang = lang,
-      increment = increment,
-      ...)) %>%
+    mutate(
+      taxon_df = purrr::map(
+        .data$match_df,
+        match_vernacular_name,
+        filter_cols = filter_cols,
+        lang = lang,
+        increment = increment,
+        ...
+      )
+    ) %>%
     unnest("match_df") %>%
 
     # Remove unneeded columns
@@ -200,9 +204,10 @@ map_taxa_from_vernacular <- function(
     ungroup() %>%
 
     # Add other columns from input df
-    right_join(vernacular_name_df,
-               by = c(vernacular_name_col, group_cols)
-               ) %>%
+    right_join(
+      vernacular_name_df,
+      by = c(vernacular_name_col, group_cols)
+    ) %>%
 
     # Set desired column(s) at the right side
     select(all_of(names(vernacular_name_df)), all_of(out_cols)) %>%
 
@@ -1,9 +1,11 @@
 path_to_interim <- function(path_to_data, dataset, spat_res) {
-  file = paste0(dataset, "_cube_", spat_res, ".csv")
+  file <- paste0(dataset, "_cube_", spat_res, ".csv")
   file.path(path_to_data, "interim", file)
 }
 
-read_andid <- function(data_file, dataset, spat_res){
+read_andid <- function(data_file, dataset, spat_res) {
+  require("dplyr")
+
   data <- read.csv(data_file)
 
   output <- data |>
@@ -13,7 +15,9 @@ read_andid <- function(data_file, dataset, spat_res){
   return(output)
 }
 
-add_cyclus <- function(data){
+add_cyclus <- function(data) {
+  require("dplyr")
+
   output <- data |>
     mutate(cyclus = case_when(
       year >= 2007 & year <= 2009 ~ 1,
@@ -22,52 +26,64 @@ add_cyclus <- function(data){
       year >= 2016 & year <= 2018 ~ 4,
       year >= 2019 & year <= 2021 ~ 5,
       year >= 2022 & year <= 2024 ~ 6
-  ))
+    ))
 
   return(output)
 }
 
 
-filter_1 <- function(data){
+filter_1 <- function(data) {
+  require("dplyr")
+
   abv_birds <- read.csv("./data/interim/abv_birds.csv")
 
   output <- data |>
-    filter(species %in% abv_birds$species)
+    filter(.data$species %in% abv_birds$species)
+
+  return(output)
 }
 
 #' Rules (loosely based on ABV):
-#' 1) A square is only relevant is the species was observed in more than one time period
+#' 1) A square is only relevant is the species was observed in
+#' more than one time period
 #' 2) A minimum of three relevant squares to include the species
 #' 3) A minimum of a hundred observations to include the species
 
-filter_2 <- function(data, time_period = "year"){
+filter_2 <- function(data, time_period = "year") {
+  require("dplyr")
+
   output <- data |>
-    group_by(mgrscode, species) |>
+    group_by(.data$mgrscode, .data$species) |>
     mutate(periods = n_distinct(!!sym(time_period))) |>
     ungroup() |>
-    filter(periods > 1) |>
-    group_by(species) |>
-    mutate(squares = n_distinct(mgrscode)) |>
+    filter(.data$periods > 1) |>
+    group_by(.data$species) |>
+    mutate(squares = n_distinct(.data$mgrscode)) |>
     ungroup() |>
-    filter(squares > 2) |>
-    group_by(species) |>
+    filter(.data$squares > 2) |>
+    group_by(.data$species) |>
     mutate(obs = n()) |>
     ungroup() |>
-    filter(obs > 100) |>
+    filter(.data$obs > 100) |>
     mutate(id_filter_per = time_period)
 
   return(output)
 }
 
-filter_3 <- function(data, time_period = "year"){
+filter_3 <- function(data, time_period = "year") {
+  require("dplyr")
+
   output <- data |>
-    group_by(id_dataset, id_spat_res, species, !!sym(time_period)) |>
+    group_by(.data$id_dataset,
+             .data$id_spat_res,
+             .data$species,
+             !!sym(time_period)) |>
     summarise(n = sum(n)) |>
     ungroup() |>
     group_by(!!sym(time_period)) |>
     mutate(total_obs = sum(n)) |>
     ungroup() |>
-    mutate(n = n/total_obs)|>
+    mutate(n = .data$n / .data$total_obs) |>
     mutate(id_filter_per = time_period)
 
   return(output)
 
@@ -1,75 +1,87 @@
 my_group_by <- function(data, cols) {
+  require("dplyr")
+
   group_by(data, pick({{ cols }}))
 }
 
 range_comp <- function(data) {
+  require("dplyr")
+  require("tidyr")
 
   dataset_least_species <- data |>
-    group_by(id_dataset) |>
-    summarize(n_species = n_distinct(species)) |>
-    filter(n_species == min(n_species)) |>
-    pull(id_dataset)
+    group_by(.data$id_dataset) |>
+    summarize(n_species = n_distinct(.data$species)) |>
+    filter(.data$n_species == min(.data$n_species)) |>
+    pull(.data$id_dataset)
 
   species_list <- data |>
-    filter(id_dataset == dataset_least_species) |>
-    select(species) |>
+    filter(.data$id_dataset == dataset_least_species) |>
+    select(.data$species) |>
     distinct() |>
     pull()
 
   comp_range_data <- data |>
-    filter(species %in% species_list) |>
+    filter(.data$species %in% species_list) |>
     group_by(pick(matches("^id_"))) |>
-    mutate(tot_n_dist_gridcells = n_distinct(mgrscode)) |>
+    mutate(tot_n_dist_gridcells = n_distinct(.data$mgrscode)) |>
     ungroup() |>
-    my_group_by(c(c(species, tot_n_dist_gridcells), matches("^id_"))) |>
-    summarise(n_dist_gridcells = n_distinct(mgrscode)) |>
+    my_group_by(c(c(.data$species,
+                    .data$tot_n_dist_gridcells),
+                  matches("^id_"))) |>
+    summarise(n_dist_gridcells = n_distinct(.data$mgrscode)) |>
     ungroup() |>
-    mutate(percentage = n_dist_gridcells/tot_n_dist_gridcells) |>
-    pivot_wider(id_cols = c(id_spat_res, species, matches("^id_filter")),
-                names_from = id_dataset,
-                values_from = c(n_dist_gridcells, percentage))|>
+    mutate(percentage = .data$n_dist_gridcells / .data$tot_n_dist_gridcells) |>
+    pivot_wider(id_cols = c(.data$id_spat_res,
+                            .data$species,
+                            matches("^id_filter")),
+                names_from = .data$id_dataset,
+                values_from = c(.data$n_dist_gridcells, .data$percentage)) |>
     left_join(data |>
-                filter(id_dataset == "abv_data") |>
-                distinct(species, category),
-              by = join_by(species))
+                filter(.data$id_dataset == "abv_data") |>
+                distinct(.data$species, .data$category),
+              by = join_by(.data$species))
 
   return(comp_range_data)
 }
 
-trend_comp <- function(data, time_period){
+trend_comp <- function(data, time_period) {
+  require("dplyr")
+  require("tidyr")
+
   dataset_least_species <- data |>
-    group_by(id_dataset) |>
-    summarize(n_species = n_distinct(species)) |>
-    filter(n_species == min(n_species)) |>
-    pull(id_dataset)
+    group_by(.data$id_dataset) |>
+    summarize(n_species = n_distinct(.data$species)) |>
+    filter(.data$n_species == min(.data$n_species)) |>
+    pull(.data$id_dataset)
 
   species_list <- data |>
-    filter(id_dataset == dataset_least_species) |>
-    select(species) |>
+    filter(.data$id_dataset == dataset_least_species) |>
+    select(.data$species) |>
     distinct() |>
     pull()
 
   trend_range_data <- data |>
-    filter(species %in% species_list) |>
-    my_group_by(c(c(species, !!sym(time_period)), matches("^id_"))) |>
+    filter(.data$species %in% species_list) |>
+    my_group_by(c(c(.data$species, !!sym(time_period)), matches("^id_"))) |>
     summarize(occurrence = sum(n)) |>
     ungroup() |>
-    pivot_wider(id_cols = c(id_spat_res,
-                            species,
+    pivot_wider(id_cols = c(.data$id_spat_res,
+                            .data$species,
                             !!sym(time_period),
                             matches("^id_filter")),
-                names_from = id_dataset,
-                values_from = occurrence) |>
+                names_from = .data$id_dataset,
+                values_from = .data$occurrence) |>
     drop_na() |>
-    my_group_by(c(c(species, id_spat_res), matches("^id_filter"))) |>
-    summarise(correlation = cor(abv_data,
-                                birdflanders,
+    my_group_by(c(c(.data$species, .data$id_spat_res),
+                  matches("^id_filter"))) |>
+    summarise(correlation = cor(.data$abv_data,
+                                .data$birdflanders,
                                 method = "pearson")) |>
     ungroup() |>
     left_join(data |>
-                filter(id_dataset == "abv_data") |>
-                distinct(species, category),
-              by = join_by(species)) |>
+                filter(.data$id_dataset == "abv_data") |>
+                distinct(.data$species, .data$category),
+              by = join_by(.data$species)) |>
     mutate(time_period = time_period)
 
   return(trend_range_data)