Skip to content

Commit e3cc8a8

Browse files
committed
coding style
1 parent 0055666 commit e3cc8a8

File tree

3 files changed

+95
-84
lines changed

3 files changed

+95
-84
lines changed

source/R/download_occ_cube.R

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@ download_occ_cube <- function(sql_query, file, path, overwrite = FALSE) {
66
# Stop if overwrite = FALSE and file does not exist
77
file_path <- file.path(path, file)
88
if (file.exists(file_path) && !overwrite) {
9-
message(paste("File already exists. Reading existing file.",
10-
"Set `overwrite = TRUE` to overwrite file.", sep = "\n"))
9+
message(
10+
paste("File already exists. Reading existing file.",
11+
"Set `overwrite = TRUE` to overwrite file.",
12+
sep = "\n")
13+
)
1114

1215
occ_cube <- readr::read_csv(file = file_path, show_col_types = FALSE)
1316

@@ -34,7 +37,8 @@ download_occ_cube <- function(sql_query, file, path, overwrite = FALSE) {
3437
readr::write_csv(
3538
x = occ_cube,
3639
file = file_path,
37-
append = FALSE)
40+
append = FALSE
41+
)
3842

3943
# Return tibble
4044
return(occ_cube)

source/R/plot_cross_validation.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ plot_cross_validation <- function(
2222
paste0(.data$species, "\n(value: ",
2323
round(.data[[measure]], 3), ")"),
2424
NA)
25-
),
25+
),
2626
size = 2.5, max.overlaps = max.overlaps
2727
) +
2828
labs(x = "Proportion of occupied grid cells\nin ABV dataset",

source/dataset_bias_cv.Rmd

Lines changed: 87 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ We read in the data cube and add dataset names.
9797
```{r}
9898
birdcubeflanders_dataset_raw <- read_csv(
9999
here::here("data", "raw", "birdcubeflanders_dataset.csv"),
100-
show_col_types = FALSE)
100+
show_col_types = FALSE
101+
)
101102
102103
# Add dataset names
103104
birdcubeflanders_dataset <- get_dataset_names(birdcubeflanders_dataset_raw) %>%
@@ -136,14 +137,14 @@ birdcubeflanders_dataset %>%
136137
.by = "datasetname") %>%
137138
mutate(datasetname = reorder(datasetname, n_obs)) %>%
138139
ggplot(aes(x = datasetname, y = n_obs)) +
139-
geom_bar(stat = "identity",
140-
fill = "cornflowerblue") +
141-
geom_text(aes(label = n_obs), vjust = 0.3, hjust = -0.3, size = 3) +
142-
scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
143-
scale_y_continuous(expand = expansion(mult = c(0.05, 0.2))) +
144-
labs(x = "", y = "Number of observations (sum)") +
145-
theme_minimal() +
146-
coord_flip()
140+
geom_bar(stat = "identity",
141+
fill = "cornflowerblue") +
142+
geom_text(aes(label = n_obs), vjust = 0.3, hjust = -0.3, size = 3) +
143+
scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
144+
scale_y_continuous(expand = expansion(mult = c(0.05, 0.2))) +
145+
labs(x = "", y = "Number of observations (sum)") +
146+
theme_minimal() +
147+
coord_flip()
147148
```
148149

149150
# Comparing species prevalence
@@ -162,7 +163,8 @@ We categorise the species according to rarity:
162163

163164
```{r, message=FALSE}
164165
abv_data_total_sf <- read_sf(
165-
here::here("data", "interim", "abv_data_total.gpkg"))
166+
here::here("data", "interim", "abv_data_total.gpkg")
167+
)
166168
167169
# Cut rarity
168170
abv_data <- abv_data_total_sf %>%
@@ -186,7 +188,7 @@ abv_data <- abv_data_total_sf %>%
186188
species == "Dendrocopus major" ~ "Dendrocopos major",
187189
species == "Saxicola torquatus" ~ "Saxicola rubicola",
188190
TRUE ~ species
189-
)
191+
)
190192
) %>%
191193
arrange(year, mgrscode, species)
192194
@@ -251,21 +253,21 @@ Rare species are more prevalent in the cube dataset while more common species ar
251253
```{r}
252254
prevalence_df %>%
253255
ggplot(aes(x = abv, y = birdcube)) +
254-
geom_abline(slope = 1, intercept = 0, colour = "firebrick",
255-
linewidth = 1) +
256-
annotate("label", x = 0.8, y = 0.6, size = 3,
257-
label = "Higher prevalence in ABV",
258-
color = "black") +
259-
annotate("label", x = 0.4, y = 0.6, size = 3,
260-
label = "Higher prevalence in cube",
261-
color = "black") +
262-
geom_smooth(method = "loess", formula = "y ~ x",
263-
colour = "darkgrey", linetype = "dashed") +
264-
geom_point(aes(shape = rarity), size = 2) +
265-
labs(x = "Proportion of occupied grid cells\nin ABV dataset",
266-
y = "Proportion of occupied grid cells\nin cube dataset",
267-
shape = "Rarity") +
268-
theme_minimal()
256+
geom_abline(slope = 1, intercept = 0, colour = "firebrick",
257+
linewidth = 1) +
258+
annotate("label", x = 0.8, y = 0.6, size = 3,
259+
label = "Higher prevalence in ABV",
260+
color = "black") +
261+
annotate("label", x = 0.4, y = 0.6, size = 3,
262+
label = "Higher prevalence in cube",
263+
color = "black") +
264+
geom_smooth(method = "loess", formula = "y ~ x",
265+
colour = "darkgrey", linetype = "dashed") +
266+
geom_point(aes(shape = rarity), size = 2) +
267+
labs(x = "Proportion of occupied grid cells\nin ABV dataset",
268+
y = "Proportion of occupied grid cells\nin cube dataset",
269+
shape = "Rarity") +
270+
theme_minimal()
269271
```
270272

271273
We calculate error measures for the indicator based on leave-one-dataset-out cross-validation (use `remotes::install_github("b-cubed-eu/dubicube#25")`).
@@ -298,7 +300,8 @@ prevalence_cv %>%
298300
plot_cross_validation(
299301
prevalence_df,
300302
measure = "max_error",
301-
quant = quantile)
303+
quant = quantile
304+
)
302305
```
303306

304307
We look at the `r (1 - quantile) * 100` % species with the highest maximum absolute error.
@@ -327,14 +330,14 @@ birdcube_dataset_filtered %>%
327330
mutate(species = reorder(species, max_error, decreasing = TRUE)) %>%
328331
mutate(datasetname = tidytext::reorder_within(datasetname, n, species)) %>%
329332
ggplot(aes(x = datasetname, y = n)) +
330-
geom_bar(stat = "identity") +
331-
geom_text(aes(label = n), vjust = 0.3, hjust = -0.3, size = 3) +
332-
scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
333-
labs(x = "", y = "Number of observations (count)") +
334-
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
335-
theme_minimal() +
336-
coord_flip() +
337-
facet_wrap(~species, ncol = 1, scales = "free")
333+
geom_bar(stat = "identity") +
334+
geom_text(aes(label = n), vjust = 0.3, hjust = -0.3, size = 3) +
335+
scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
336+
labs(x = "", y = "Number of observations (count)") +
337+
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
338+
theme_minimal() +
339+
coord_flip() +
340+
facet_wrap(~species, ncol = 1, scales = "free")
338341
```
339342

340343
### Maximum relative error
@@ -347,7 +350,8 @@ prevalence_cv %>%
347350
plot_cross_validation(
348351
prevalence_df,
349352
measure = "max_rel_error",
350-
quant = quantile)
353+
quant = quantile
354+
)
351355
```
352356

353357
We look at the `r (1 - quantile) * 100` % species with the highest maximum relative error.
@@ -376,14 +380,14 @@ birdcube_dataset_filtered %>%
376380
mutate(species = reorder(species, max_rel_error, decreasing = TRUE)) %>%
377381
mutate(datasetname = tidytext::reorder_within(datasetname, n, species)) %>%
378382
ggplot(aes(x = datasetname, y = n)) +
379-
geom_bar(stat = "identity") +
380-
geom_text(aes(label = n), vjust = 0.3, hjust = -0.3, size = 3) +
381-
scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
382-
labs(x = "", y = "Number of observations (count)") +
383-
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
384-
theme_minimal() +
385-
coord_flip() +
386-
facet_wrap(~species, ncol = 1, scales = "free")
383+
geom_bar(stat = "identity") +
384+
geom_text(aes(label = n), vjust = 0.3, hjust = -0.3, size = 3) +
385+
scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
386+
labs(x = "", y = "Number of observations (count)") +
387+
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
388+
theme_minimal() +
389+
coord_flip() +
390+
facet_wrap(~species, ncol = 1, scales = "free")
387391
```
388392

389393
### Mean relative error
@@ -394,14 +398,15 @@ plot_cross_validation(
394398
prevalence_cv,
395399
prevalence_df,
396400
measure = "mre",
397-
quant = quantile)
401+
quant = quantile
402+
)
398403
```
399404

400405
```{r}
401406
prevalence_cv %>%
402407
distinct(species, rarity, mre) %>%
403408
ggplot() +
404-
geom_histogram(aes(x = mre, fill = rarity))
409+
geom_histogram(aes(x = mre, fill = rarity))
405410
```
406411

407412
We look at the `r (1 - quantile) * 100` % species with the highest maximum relative error.
@@ -428,14 +433,14 @@ birdcube_dataset_filtered %>%
428433
mutate(species = reorder(species, mre, decreasing = TRUE)) %>%
429434
mutate(datasetname = tidytext::reorder_within(datasetname, n, species)) %>%
430435
ggplot(aes(x = datasetname, y = n)) +
431-
geom_bar(stat = "identity") +
432-
geom_text(aes(label = n), vjust = 0.3, hjust = -0.3, size = 3) +
433-
scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
434-
labs(x = "", y = "Number of observations (count)") +
435-
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
436-
theme_minimal() +
437-
coord_flip() +
438-
facet_wrap(~species, ncol = 1, scales = "free")
436+
geom_bar(stat = "identity") +
437+
geom_text(aes(label = n), vjust = 0.3, hjust = -0.3, size = 3) +
438+
scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
439+
labs(x = "", y = "Number of observations (count)") +
440+
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
441+
theme_minimal() +
442+
coord_flip() +
443+
facet_wrap(~species, ncol = 1, scales = "free")
439444
```
440445

441446
### Root mean squared error
@@ -446,14 +451,15 @@ plot_cross_validation(
446451
prevalence_cv,
447452
prevalence_df,
448453
measure = "rmse",
449-
quant = quantile)
454+
quant = quantile
455+
)
450456
```
451457

452458
```{r}
453459
prevalence_cv %>%
454460
distinct(species, rarity, rmse) %>%
455461
ggplot() +
456-
geom_histogram(aes(x = rmse, fill = rarity))
462+
geom_histogram(aes(x = rmse, fill = rarity))
457463
```
458464

459465
We look at the `r (1 - quantile) * 100` % species with the highest RMSE.
@@ -480,14 +486,14 @@ birdcube_dataset_filtered %>%
480486
mutate(species = reorder(species, rmse, decreasing = TRUE)) %>%
481487
mutate(datasetname = tidytext::reorder_within(datasetname, n, species)) %>%
482488
ggplot(aes(x = datasetname, y = n)) +
483-
geom_bar(stat = "identity") +
484-
geom_text(aes(label = n), vjust = 0.3, hjust = -0.3, size = 3) +
485-
scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
486-
labs(x = "", y = "Number of observations (count)") +
487-
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
488-
theme_minimal() +
489-
coord_flip() +
490-
facet_wrap(~species, ncol = 1, scales = "free")
489+
geom_bar(stat = "identity") +
490+
geom_text(aes(label = n), vjust = 0.3, hjust = -0.3, size = 3) +
491+
scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
492+
labs(x = "", y = "Number of observations (count)") +
493+
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
494+
theme_minimal() +
495+
coord_flip() +
496+
facet_wrap(~species, ncol = 1, scales = "free")
491497
```
492498

493499
We look at the `r (1 - quantile) * 100` % species with the lowest RMSE.
@@ -514,14 +520,14 @@ birdcube_dataset_filtered %>%
514520
mutate(species = reorder(species, rmse, decreasing = FALSE)) %>%
515521
mutate(datasetname = tidytext::reorder_within(datasetname, n, species)) %>%
516522
ggplot(aes(x = datasetname, y = n)) +
517-
geom_bar(stat = "identity") +
518-
geom_text(aes(label = n), vjust = 0.3, hjust = -0.3, size = 3) +
519-
scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
520-
labs(x = "", y = "Number of observations (count)") +
521-
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
522-
theme_minimal() +
523-
coord_flip() +
524-
facet_wrap(~species, ncol = 1, scales = "free")
523+
geom_bar(stat = "identity") +
524+
geom_text(aes(label = n), vjust = 0.3, hjust = -0.3, size = 3) +
525+
scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
526+
labs(x = "", y = "Number of observations (count)") +
527+
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
528+
theme_minimal() +
529+
coord_flip() +
530+
facet_wrap(~species, ncol = 1, scales = "free")
525531
```
526532

527533
## Trends in error
@@ -538,7 +544,7 @@ We look at trends in CV error measures related to:
538544
```{r}
539545
prevalence_cv %>%
540546
ggplot(aes(x = rarity, y = rmse, colour = rarity)) +
541-
geom_boxplot()
547+
geom_boxplot()
542548
```
543549

544550
### Number of datasets
@@ -557,16 +563,17 @@ trend_dataset <- birdcube_dataset_filtered %>%
557563
shannon = -sum(p * log(p)),
558564
neff_datasets = exp(shannon),
559565
evenness = shannon / log(n_datasets),
560-
.groups = "drop")
566+
.groups = "drop"
567+
)
561568
```
562569

563570
Does RMSE change with number of datasets?
564571

565572
```{r}
566573
trend_dataset %>%
567574
ggplot(aes(x = n_datasets, y = rmse, colour = rarity)) +
568-
geom_point() +
569-
geom_smooth(method = "lm", formula = "y ~ x")
575+
geom_point() +
576+
geom_smooth(method = "lm", formula = "y ~ x")
570577
```
571578

572579
### Effective number of datasets
@@ -585,8 +592,8 @@ where $D_j$ the total number of datasets where species $j$ is present, and $p_i$
585592
```{r}
586593
trend_dataset %>%
587594
ggplot(aes(x = neff_datasets, y = rmse, colour = rarity)) +
588-
geom_point() +
589-
geom_smooth(method = "lm", formula = "y ~ x")
595+
geom_point() +
596+
geom_smooth(method = "lm", formula = "y ~ x")
590597
```
591598

592599
### Dataset evenness
@@ -603,8 +610,8 @@ $$
603610
```{r}
604611
trend_dataset %>%
605612
ggplot(aes(x = evenness, y = rmse, colour = rarity)) +
606-
geom_point() +
607-
geom_smooth(method = "lm", formula = "y ~ x")
613+
geom_point() +
614+
geom_smooth(method = "lm", formula = "y ~ x")
608615
```
609616

610617
# Species specific indicators

0 commit comments

Comments
 (0)