@@ -19,6 +19,7 @@ knitr::opts_chunk$set(echo = TRUE)
1919``` {r, warning=FALSE, message=FALSE}
2020# Load packages
2121library(tidyverse) # Data wrangling and visualisation
22+ library(cowplot) # Nice figures
2223library(dubicube) # Cross-validation
2324library(sf) # Spatial objects
2425library(targets)
@@ -369,15 +370,16 @@ p_prevalence <- prevalence_df %>%
369370 ggplot(aes(x = abv, y = birdcube)) +
370371 geom_abline(slope = 1, intercept = 0, colour = "firebrick",
371372 linewidth = 1) +
372- annotate("label", x = 0.8, y = 0.6 , size = 3 ,
373+ annotate("label", x = 0.8, y = 0.1 , size = 4 ,
373374 label = "Higher prevalence in ABV",
374375 color = "black") +
375- annotate("label", x = 0.4 , y = 0.6 , size = 3 ,
376+ annotate("label", x = 0.2 , y = 0.95 , size = 4 ,
376377 label = "Higher prevalence in cube",
377378 color = "black") +
378379 geom_smooth(method = "loess", formula = "y ~ x",
379380 colour = "darkgrey", linetype = "dashed") +
380381 geom_point(aes(shape = rarity), size = 2) +
382+ coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
381383 labs(x = "Proportion of occupied grid cells\nin ABV dataset",
382384 y = "Proportion of occupied grid cells\nin cube dataset",
383385 shape = "Rarity") +
@@ -423,7 +425,8 @@ prevalence_cv %>%
423425 prevalence_df,
424426 measure = "max_error",
425427 quant = quantile
426- )
428+ ) +
429+ theme_bw(base_size = 12)
427430```
428431
429432We look at the ` r (1 - quantile) * 100 ` % species with the highest maximum absolute error.
@@ -473,7 +476,8 @@ prevalence_cv %>%
473476 prevalence_df,
474477 measure = "max_rel_error",
475478 quant = quantile
476- )
479+ ) +
480+ theme_bw(base_size = 12)
477481```
478482
479483We look at the ` r (1 - quantile) * 100 ` % species with the highest maximum relative error.
@@ -516,12 +520,14 @@ birdcube_dataset_filtered %>%
516520
517521``` {r, warning=FALSE, out.width="90%", message=FALSE}
518522# Mean relative error
519- plot_cross_validation(
523+ p_mre <- plot_cross_validation(
520524 prevalence_cv,
521525 prevalence_df,
522526 measure = "mre",
523527 quant = quantile
524- )
528+ ) +
529+ theme_bw(base_size = 12)
530+ p_mre
525531```
526532
527533``` {r}
@@ -604,12 +610,14 @@ birdcube_dataset_filtered %>%
604610
605611``` {r, warning=FALSE, out.width="90%", message=FALSE}
606612# Root mean squared error
607- plot_cross_validation(
613+ p_rmse <- plot_cross_validation(
608614 prevalence_cv,
609615 prevalence_df,
610616 measure = "rmse",
611617 quant = quantile
612- )
618+ ) +
619+ theme_bw(base_size = 12)
620+ p_rmse
613621```
614622
615623``` {r}
@@ -688,6 +696,40 @@ birdcube_dataset_filtered %>%
688696 facet_wrap(~species, ncol = 1, scales = "free")
689697```
690698
699+ ``` {r, echo=FALSE}
700+ # ----- Combine plots side by side -----
701+ bottom_row <- plot_grid(
702+ p_rmse +
703+ theme(plot.margin = margin(t = 12, r = -5, b = 5, l = 10)) +
704+ guides(shape = "none"),
705+ p_mre +
706+ theme(axis.title.y = element_blank(),
707+ plot.margin = margin(t = 12, r = 5, b = 5, l = 40)) +
708+ guides(shape = "none"),
709+ labels = c("B.", "C."),
710+ label_size = 20,
711+ ncol = 2,
712+ rel_widths = c(1, 1)
713+ )
714+
715+ # ----- Combine top + bottom -----
716+ final_plot <- plot_grid(
717+ p_prevalence +
718+ theme(axis.title.x = element_blank(),
719+ plot.margin = margin(t = 10, r = 60, b = 10, l = 80)),
720+ bottom_row,
721+ labels = "A.",
722+ label_size = 20,
723+ ncol = 1,
724+ rel_heights = c(1.2, 1) # top vs bottom height ratio
725+ )
726+
727+ # ----- Save to file -----
728+ ggsave(file.path(out_path, "prevalence_panels.png"),
729+ final_plot,
730+ width = 12, height = 10, dpi = 300)
731+ ```
732+
691733## Trends in error: RMSE
692734
693735We look at trends in CV error measures related to:
@@ -700,16 +742,26 @@ We look at trends in CV error measures related to:
700742### Differences in rarity
701743
702744``` {r}
703- prevalence_cv %>%
704- ggplot(aes(x = rarity, y = rmse, colour = rarity)) +
745+ p_rmse_rarity <- prevalence_cv %>%
746+ mutate(
747+ rarity = fct_recode(
748+ rarity,
749+ "Extremely\ncommon" = "Extremely common"
750+ )
751+ ) %>%
752+ ggplot(aes(x = rarity, y = rmse)) +
705753 geom_boxplot() +
754+ labs(x = "Rarity", y = "RMSE") +
706755 theme_bw(base_size = 12)
756+ p_rmse_rarity
707757```
708758
709759### Number of datasets
710760
761+ We cannot compute the evenness for species only found in a single dataset.
762+
711763``` {r}
712- trend_dataset <- birdcube_dataset_filtered %>%
764+ trend_dataset_rmse <- birdcube_dataset_filtered %>%
713765 left_join(prevalence_cv %>% distinct(species, rarity, rmse),
714766 by = join_by(species, rarity)) %>%
715767 select(mgrscode, year, species, datasetname, rarity, rmse) %>%
@@ -729,7 +781,7 @@ trend_dataset <- birdcube_dataset_filtered %>%
729781Does RMSE change with number of datasets?
730782
731783``` {r}
732- trend_dataset %>%
784+ trend_dataset_rmse %>%
733785 ggplot(aes(x = n_datasets, y = rmse, colour = rarity)) +
734786 geom_point() +
735787 geom_smooth(method = "lm", formula = "y ~ x") +
@@ -738,7 +790,7 @@ trend_dataset %>%
738790
739791``` {r}
740792grouped_lm(
741- data = trend_dataset ,
793+ data = trend_dataset_rmse ,
742794 group_var = "rarity",
743795 x_var = "n_datasets",
744796 y_var = "rmse"
759811where $D_j$ the total number of datasets where species $j$ is present, and $p_i$ the proportion of entries (rows) of species $j$ in dataset $i$.
760812
761813``` {r}
762- trend_dataset %>%
814+ trend_dataset_rmse %>%
763815 ggplot(aes(x = neff_datasets, y = rmse, colour = rarity)) +
764816 geom_point() +
765817 geom_smooth(method = "lm", formula = "y ~ x") +
@@ -768,7 +820,7 @@ trend_dataset %>%
768820
769821``` {r}
770822grouped_lm(
771- data = trend_dataset ,
823+ data = trend_dataset_rmse ,
772824 group_var = "rarity",
773825 x_var = "neff_datasets",
774826 y_var = "rmse"
@@ -786,17 +838,36 @@ J_{j} = \frac{- \sum_{i=1}^{D_j}p_{ij}\ln(p_{ij}))}{\ln(D_j)}
786838$$
787839<!-- spell-check: ignore:end -->
788840
841+
789842``` {r}
790- trend_dataset %>%
843+ trend_dataset_rmse %>%
791844 ggplot(aes(x = evenness, y = rmse, colour = rarity)) +
792845 geom_point() +
793846 geom_smooth(method = "lm", formula = "y ~ x") +
847+ labs(x = "Dataset evenness", y = "RMSE", colour = "Rarity") +
794848 theme_bw(base_size = 12)
795849```
796850
851+ ``` {r, echo=FALSE}
852+ p_rmse_evenness <- trend_dataset_rmse %>%
853+ ggplot(aes(x = evenness, y = rmse, colour = rarity)) +
854+ geom_point() +
855+ geom_smooth(method = "lm", formula = "y ~ x") +
856+ labs(x = "Dataset evenness", y = "RMSE", colour = "Rarity") +
857+ theme_bw(base_size = 12) +
858+ theme(
859+ legend.position = c(0.05, 0.05), # bottom-left inside
860+ legend.justification = c(0, 0), # anchor legend at bottom-left
861+ legend.background = element_rect(
862+ fill = "white", colour = "black", linewidth = 0.3
863+ ),
864+ legend.key = element_blank()
865+ )
866+ ```
867+
797868``` {r}
798869grouped_lm(
799- data = trend_dataset ,
870+ data = trend_dataset_rmse ,
800871 group_var = "rarity",
801872 x_var = "evenness",
802873 y_var = "rmse"
@@ -815,16 +886,24 @@ We look at trends in CV error measures related to:
815886### Differences in rarity
816887
817888``` {r}
818- prevalence_cv %>%
819- ggplot(aes(x = rarity, y = mre, colour = rarity)) +
889+ p_mre_rarity <- prevalence_cv %>%
890+ mutate(
891+ rarity = fct_recode(
892+ rarity,
893+ "Extremely\ncommon" = "Extremely common"
894+ )
895+ ) %>%
896+ ggplot(aes(x = rarity, y = mre)) +
820897 geom_boxplot() +
898+ labs(x = "Rarity", y = "MRE") +
821899 theme_bw(base_size = 12)
900+ p_mre_rarity
822901```
823902
824903### Number of datasets
825904
826905``` {r}
827- trend_dataset <- birdcube_dataset_filtered %>%
906+ trend_dataset_mre <- birdcube_dataset_filtered %>%
828907 left_join(prevalence_cv %>% distinct(species, rarity, mre),
829908 by = join_by(species, rarity)) %>%
830909 select(mgrscode, year, species, datasetname, rarity, mre) %>%
@@ -844,7 +923,7 @@ trend_dataset <- birdcube_dataset_filtered %>%
844923Does MRE change with number of datasets?
845924
846925``` {r}
847- trend_dataset %>%
926+ trend_dataset_mre %>%
848927 ggplot(aes(x = n_datasets, y = mre, colour = rarity)) +
849928 geom_point() +
850929 geom_smooth(method = "lm", formula = "y ~ x") +
@@ -853,7 +932,7 @@ trend_dataset %>%
853932
854933``` {r}
855934grouped_lm(
856- data = trend_dataset ,
935+ data = trend_dataset_mre ,
857936 group_var = "rarity",
858937 x_var = "n_datasets",
859938 y_var = "mre"
874953where $D_j$ the total number of datasets where species $j$ is present, and $p_i$ the proportion of entries (rows) of species $j$ in dataset $i$.
875954
876955``` {r}
877- trend_dataset %>%
956+ trend_dataset_mre %>%
878957 ggplot(aes(x = neff_datasets, y = mre, colour = rarity)) +
879958 geom_point() +
880959 geom_smooth(method = "lm", formula = "y ~ x") +
@@ -883,7 +962,7 @@ trend_dataset %>%
883962
884963``` {r}
885964grouped_lm(
886- data = trend_dataset ,
965+ data = trend_dataset_mre ,
887966 group_var = "rarity",
888967 x_var = "neff_datasets",
889968 y_var = "mre"
902981<!-- spell-check: ignore:end -->
903982
904983``` {r}
905- trend_dataset %>%
984+ p_mre_evenness <- trend_dataset_mre %>%
906985 ggplot(aes(x = evenness, y = mre, colour = rarity)) +
907986 geom_point() +
908987 geom_smooth(method = "lm", formula = "y ~ x") +
909- theme_bw(base_size = 12)
988+ labs(x = "Dataset evenness", y = "MRE", colour = "Rarity") +
989+ theme_bw(base_size = 12) +
990+ theme(
991+ legend.position = c(0.05, 0.05), # bottom-left inside
992+ legend.justification = c(0, 0), # anchor legend at bottom-left
993+ legend.background = element_rect(
994+ fill = "white", colour = "black", linewidth = 0.3
995+ ),
996+ legend.key = element_blank()
997+ )
998+ p_mre_evenness
910999```
9111000
9121001``` {r}
9131002grouped_lm(
914- data = trend_dataset ,
1003+ data = trend_dataset_mre ,
9151004 group_var = "rarity",
9161005 x_var = "evenness",
9171006 y_var = "mre"
9181007)
9191008```
9201009
1010+ ``` {r, echo=FALSE}
1011+ col_rmse <- plot_grid(
1012+ p_rmse_rarity +
1013+ coord_cartesian(ylim = c(0, 0.12)) +
1014+ theme(
1015+ plot.margin = margin(t = 10, r = 5, b = 5, l = 20),
1016+ axis.title.x = element_blank(),
1017+ plot.title = element_text(hjust = 0.5)
1018+ ),
1019+ p_rmse_evenness +
1020+ coord_cartesian(ylim = c(0, 0.12)) +
1021+ theme(plot.margin = margin(t = 5, r = 5, b = 5, l = 20)) +
1022+ guides(colour = "none"),
1023+ labels = c("A.", "C."),
1024+ label_size = 20,
1025+ ncol = 1
1026+ )
1027+
1028+ col_mre <- plot_grid(
1029+ p_mre_rarity +
1030+ coord_cartesian(ylim = c(0, 0.027)) +
1031+ theme(
1032+ plot.margin = margin(t = 10, r = 5, b = 5, l = 20),
1033+ axis.title.x = element_blank(),
1034+ plot.title = element_text(hjust = 0.5)
1035+ ),
1036+ p_mre_evenness +
1037+ coord_cartesian(ylim = c(0, 0.027)) +
1038+ theme(plot.margin = margin(t = 5, r = 5, b = 5, l = 5)),
1039+ labels = c("B.", "D."),
1040+ label_size = 20,
1041+ ncol = 1
1042+ )
1043+
1044+ p_error_trends <- plot_grid(
1045+ ggdraw() + draw_label("RMSE", fontface = "bold", size = 16),
1046+ ggdraw() + draw_label("MRE", fontface = "bold", size = 16),
1047+ col_rmse,
1048+ col_mre,
1049+ ncol = 2,
1050+ rel_heights = c(0.08, 1)
1051+ )
1052+
1053+ ggsave(file.path(out_path, "error_trends.png"),
1054+ p_error_trends,
1055+ width = 10, height = 10, dpi = 300)
1056+ ```
1057+
1058+
9211059## Error of prevalence estimates relative to ABV reference values
9221060Calculate the CV error compared to ABV prevalence (= "true" prevalence).
9231061
0 commit comments