Skip to content

Commit 9969df9

Browse files
author
EmmaCartuyvels1
committed
Fix checklist stuff
1 parent e9d2859 commit 9969df9

File tree

11 files changed

+341
-237
lines changed

11 files changed

+341
-237
lines changed

inst/en_gb.dic

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
Algemene
2+
Anthus
23
Bosonderzoek
34
Broedvogelmonitoring
45
Broedvogels
56
Cartuyvels
67
Cetti's
78
Cettia
9+
Chloris
10+
Cyanistes
811
Daele
912
Databricks
1013
Dendrocopos
@@ -20,6 +23,7 @@ Laridae
2023
Larus
2124
Luscinia
2225
MGRS
26+
Motacilla
2327
Natuur
2428
OOSTENDE
2529
Parus
@@ -35,23 +39,30 @@ Watervogels
3539
abv
3640
argentatus
3741
birdcube
42+
caeruleus
3843
cetti
44+
chloris
3945
color
46+
communis
4047
datacube
4148
datacubes
49+
domesticus
4250
eBird
51+
flava
4352
fuscus
4453
gbi
4554
ies
4655
labeled
4756
megarhynchos
57+
modularis
4858
montanus
4959
org
5060
rubicola
5161
sublicensable
5262
synched
5363
tabset
5464
torquatus
65+
trivialis
5566
utm
5667
voor
5768
waarnemingen

source/Prepare_data_10km.Rmd

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,9 @@ utm_grid %>%
199199
summarise(n_species = n_distinct(species), .groups = "drop"),
200200
by = join_by(mgrscode)) %>%
201201
ggplot() +
202-
geom_sf(aes(fill = n_species), col = alpha("white", 0)) +
203-
scale_fill_viridis_c(option = "inferno") +
204-
ggtitle("ABV data")
202+
geom_sf(aes(fill = n_species), col = alpha("white", 0)) +
203+
scale_fill_viridis_c(option = "inferno") +
204+
ggtitle("ABV data")
205205
```
206206

207207
We select cube data from Flanders and add the geometry to the data layers by taking an inner join.
@@ -221,9 +221,9 @@ utm_grid %>%
221221
summarise(n_species = n_distinct(species), .groups = "drop"),
222222
by = join_by(mgrscode)) %>%
223223
ggplot() +
224-
geom_sf(aes(fill = n_species), col = alpha("white", 0)) +
225-
scale_fill_viridis_c(option = "inferno") +
226-
ggtitle("Bird cube data from Flanders")
224+
geom_sf(aes(fill = n_species), col = alpha("white", 0)) +
225+
scale_fill_viridis_c(option = "inferno") +
226+
ggtitle("Bird cube data from Flanders")
227227
```
228228

229229
# Correction of species names
@@ -236,11 +236,13 @@ abv_data_total_sf <- abv_data_total_sf %>%
236236
species = case_when(
237237
species == "Dendrocopus major" ~ "Dendrocopos major",
238238
species == "Saxicola torquatus" ~ "Saxicola rubicola",
239-
TRUE ~ species),
239+
TRUE ~ species
240+
),
240241
specieskey = case_when(
241242
species == "Dendrocopos major" ~ 2477968,
242243
species == "Saxicola rubicola" ~ 4408759,
243-
TRUE ~ specieskey)
244+
TRUE ~ specieskey
245+
)
244246
)
245247
```
246248

@@ -249,10 +251,12 @@ birdcube_data_total_sf <- birdcube_data_total_sf %>%
249251
mutate(
250252
species = case_when(
251253
species == "Poecile montanus" ~ "Parus montanus",
252-
TRUE ~ species),
254+
TRUE ~ species
255+
),
253256
specieskey = case_when(
254257
species == "Parus montanus" ~ 4409010,
255-
TRUE ~ specieskey)
258+
TRUE ~ specieskey
259+
)
256260
)
257261
```
258262

source/R/taxon_mapping.R

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ match_vernacular_name <- function(
8585
c(list(vernacular_name),
8686
list(datasetKey = "d7dddbf4-2cf0-4f39-9b2a-bb099caae36c"),
8787
list(limit = limit),
88-
dots))
88+
dots)
89+
)
8990

9091
# Return taxon data frame if match found
9192
if (nrow(gbif_lookup$data) > 0) {
@@ -135,7 +136,7 @@ match_vernacular_name <- function(
135136
# Return NA if no good match found
136137
if (is.na(taxon_key)) {
137138
return(NA_character_)
138-
# Return match with taxon key
139+
# Return match with taxon key
139140
} else {
140141
out_data <- taxon_data[taxon_data$key == taxon_key, ]
141142
out_data <- out_data[, colSums(is.na(out_data)) < nrow(out_data)]
@@ -171,13 +172,16 @@ map_taxa_from_vernacular <- function(
171172
nest(match_df = all_of(c(vernacular_name_col, group_cols))) %>%
172173

173174
# find scientific name for each (distinct) vernacular name
174-
mutate(taxon_df = purrr::map(
175-
.data$match_df,
176-
match_vernacular_name,
177-
filter_cols = filter_cols,
178-
lang = lang,
179-
increment = increment,
180-
...)) %>%
175+
mutate(
176+
taxon_df = purrr::map(
177+
.data$match_df,
178+
match_vernacular_name,
179+
filter_cols = filter_cols,
180+
lang = lang,
181+
increment = increment,
182+
...
183+
)
184+
) %>%
181185
unnest("match_df") %>%
182186

183187
# Remove unneeded columns
@@ -200,9 +204,10 @@ map_taxa_from_vernacular <- function(
200204
ungroup() %>%
201205

202206
# Add other columns from input df
203-
right_join(vernacular_name_df,
204-
by = c(vernacular_name_col, group_cols)
205-
) %>%
207+
right_join(
208+
vernacular_name_df,
209+
by = c(vernacular_name_col, group_cols)
210+
) %>%
206211

207212
# Set desired column(s) at the right side
208213
select(all_of(names(vernacular_name_df)), all_of(out_cols)) %>%

source/pipelines/biodiversity_indicators/R/read_data.R

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
path_to_interim <- function(path_to_data, dataset, spat_res) {
2-
file = paste0(dataset, "_cube_", spat_res, ".csv")
2+
file <- paste0(dataset, "_cube_", spat_res, ".csv")
33
file.path(path_to_data, "interim", file)
44
}
55

6-
read_andid <- function(data_file, dataset, spat_res){
6+
read_andid <- function(data_file, dataset, spat_res) {
7+
require("dplyr")
8+
79
data <- read.csv(data_file)
810

911
output <- data |>
@@ -13,7 +15,9 @@ read_andid <- function(data_file, dataset, spat_res){
1315
return(output)
1416
}
1517

16-
add_cyclus <- function(data){
18+
add_cyclus <- function(data) {
19+
require("dplyr")
20+
1721
output <- data |>
1822
mutate(cyclus = case_when(
1923
year >= 2007 & year <= 2009 ~ 1,
@@ -22,52 +26,64 @@ add_cyclus <- function(data){
2226
year >= 2016 & year <= 2018 ~ 4,
2327
year >= 2019 & year <= 2021 ~ 5,
2428
year >= 2022 & year <= 2024 ~ 6
25-
))
29+
))
2630

2731
return(output)
2832
}
2933

3034

31-
filter_1 <- function(data){
35+
filter_1 <- function(data) {
36+
require("dplyr")
37+
3238
abv_birds <- read.csv("./data/interim/abv_birds.csv")
3339

3440
output <- data |>
35-
filter(species %in% abv_birds$species)
41+
filter(.data$species %in% abv_birds$species)
42+
43+
return(output)
3644
}
3745

3846
#' Rules (loosely based on ABV):
39-
#' 1) A square is only relevant is the species was observed in more than one time period
47+
#' 1) A square is only relevant is the species was observed in
48+
#' more than one time period
4049
#' 2) A minimum of three relevant squares to include the species
4150
#' 3) A minimum of a hundred observations to include the species
4251

43-
filter_2 <- function(data, time_period = "year"){
52+
filter_2 <- function(data, time_period = "year") {
53+
require("dplyr")
54+
4455
output <- data |>
45-
group_by(mgrscode, species) |>
56+
group_by(.data$mgrscode, .data$species) |>
4657
mutate(periods = n_distinct(!!sym(time_period))) |>
4758
ungroup() |>
48-
filter(periods > 1) |>
49-
group_by(species) |>
50-
mutate(squares = n_distinct(mgrscode)) |>
59+
filter(.data$periods > 1) |>
60+
group_by(.data$species) |>
61+
mutate(squares = n_distinct(.data$mgrscode)) |>
5162
ungroup() |>
52-
filter(squares > 2) |>
53-
group_by(species) |>
63+
filter(.data$squares > 2) |>
64+
group_by(.data$species) |>
5465
mutate(obs = n()) |>
5566
ungroup() |>
56-
filter(obs > 100) |>
67+
filter(.data$obs > 100) |>
5768
mutate(id_filter_per = time_period)
5869

5970
return(output)
6071
}
6172

62-
filter_3 <- function(data, time_period = "year"){
73+
filter_3 <- function(data, time_period = "year") {
74+
require("dplyr")
75+
6376
output <- data |>
64-
group_by(id_dataset, id_spat_res, species, !!sym(time_period)) |>
77+
group_by(.data$id_dataset,
78+
.data$id_spat_res,
79+
.data$species,
80+
!!sym(time_period)) |>
6581
summarise(n = sum(n)) |>
6682
ungroup() |>
6783
group_by(!!sym(time_period)) |>
6884
mutate(total_obs = sum(n)) |>
6985
ungroup() |>
70-
mutate(n = n/total_obs)|>
86+
mutate(n = .data$n / .data$total_obs) |>
7187
mutate(id_filter_per = time_period)
7288

7389
return(output)

source/pipelines/exploratory_analysis/R/expl_data.R

Lines changed: 48 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,87 @@
11
my_group_by <- function(data, cols) {
2+
require("dplyr")
3+
24
group_by(data, pick({{ cols }}))
35
}
46

57
range_comp <- function(data) {
8+
require("dplyr")
9+
require("tidyr")
610

711
dataset_least_species <- data |>
8-
group_by(id_dataset) |>
9-
summarize(n_species = n_distinct(species)) |>
10-
filter(n_species == min(n_species)) |>
11-
pull(id_dataset)
12+
group_by(.data$id_dataset) |>
13+
summarize(n_species = n_distinct(.data$species)) |>
14+
filter(.data$n_species == min(.data$n_species)) |>
15+
pull(.data$id_dataset)
1216

1317
species_list <- data |>
14-
filter(id_dataset == dataset_least_species) |>
15-
select(species) |>
18+
filter(.data$id_dataset == dataset_least_species) |>
19+
select(.data$species) |>
1620
distinct() |>
1721
pull()
1822

1923
comp_range_data <- data |>
20-
filter(species %in% species_list) |>
24+
filter(.data$species %in% species_list) |>
2125
group_by(pick(matches("^id_"))) |>
22-
mutate(tot_n_dist_gridcells = n_distinct(mgrscode)) |>
26+
mutate(tot_n_dist_gridcells = n_distinct(.data$mgrscode)) |>
2327
ungroup() |>
24-
my_group_by(c(c(species, tot_n_dist_gridcells), matches("^id_"))) |>
25-
summarise(n_dist_gridcells = n_distinct(mgrscode)) |>
28+
my_group_by(c(c(.data$species,
29+
.data$tot_n_dist_gridcells),
30+
matches("^id_"))) |>
31+
summarise(n_dist_gridcells = n_distinct(.data$mgrscode)) |>
2632
ungroup() |>
27-
mutate(percentage = n_dist_gridcells/tot_n_dist_gridcells) |>
28-
pivot_wider(id_cols = c(id_spat_res, species, matches("^id_filter")),
29-
names_from = id_dataset,
30-
values_from = c(n_dist_gridcells, percentage))|>
33+
mutate(percentage = .data$n_dist_gridcells / .data$tot_n_dist_gridcells) |>
34+
pivot_wider(id_cols = c(.data$id_spat_res,
35+
.data$species,
36+
matches("^id_filter")),
37+
names_from = .data$id_dataset,
38+
values_from = c(.data$n_dist_gridcells, .data$percentage)) |>
3139
left_join(data |>
32-
filter(id_dataset == "abv_data") |>
33-
distinct(species, category),
34-
by = join_by(species))
40+
filter(.data$id_dataset == "abv_data") |>
41+
distinct(.data$species, .data$category),
42+
by = join_by(.data$species))
3543

3644
return(comp_range_data)
3745
}
3846

39-
trend_comp <- function(data, time_period){
47+
trend_comp <- function(data, time_period) {
48+
require("dplyr")
49+
require("tidyr")
50+
4051
dataset_least_species <- data |>
41-
group_by(id_dataset) |>
42-
summarize(n_species = n_distinct(species)) |>
43-
filter(n_species == min(n_species)) |>
44-
pull(id_dataset)
52+
group_by(.data$id_dataset) |>
53+
summarize(n_species = n_distinct(.data$species)) |>
54+
filter(.data$n_species == min(.data$n_species)) |>
55+
pull(.data$id_dataset)
4556

4657
species_list <- data |>
47-
filter(id_dataset == dataset_least_species) |>
48-
select(species) |>
58+
filter(.data$id_dataset == dataset_least_species) |>
59+
select(.data$species) |>
4960
distinct() |>
5061
pull()
5162

5263
trend_range_data <- data |>
53-
filter(species %in% species_list) |>
54-
my_group_by(c(c(species, !!sym(time_period)), matches("^id_"))) |>
64+
filter(.data$species %in% species_list) |>
65+
my_group_by(c(c(.data$species, !!sym(time_period)), matches("^id_"))) |>
5566
summarize(occurrence = sum(n)) |>
5667
ungroup() |>
57-
pivot_wider(id_cols = c(id_spat_res,
58-
species,
68+
pivot_wider(id_cols = c(.data$id_spat_res,
69+
.data$species,
5970
!!sym(time_period),
6071
matches("^id_filter")),
61-
names_from = id_dataset,
62-
values_from = occurrence) |>
72+
names_from = .data$id_dataset,
73+
values_from = .data$occurrence) |>
6374
drop_na() |>
64-
my_group_by(c(c(species, id_spat_res), matches("^id_filter"))) |>
65-
summarise(correlation = cor(abv_data,
66-
birdflanders,
75+
my_group_by(c(c(.data$species, .data$id_spat_res),
76+
matches("^id_filter"))) |>
77+
summarise(correlation = cor(.data$abv_data,
78+
.data$birdflanders,
6779
method = "pearson")) |>
6880
ungroup() |>
6981
left_join(data |>
70-
filter(id_dataset == "abv_data") |>
71-
distinct(species, category),
72-
by = join_by(species)) |>
82+
filter(.data$id_dataset == "abv_data") |>
83+
distinct(.data$species, .data$category),
84+
by = join_by(.data$species)) |>
7385
mutate(time_period = time_period)
7486

7587
return(trend_range_data)

0 commit comments

Comments
 (0)