@@ -470,7 +470,7 @@ prevalence_cv %>%
470470 theme_bw(base_size = 12)
471471```
472472
473- We look at the ` r (1 - quantile) * 100 ` % species with the highest maximum relative error.
473+ We look at the ` r (1 - quantile) * 100 ` % species with the highest mean relative error (MRE) .
474474
475475``` {r}
476476top_spec_mre_df <- prevalence_cv %>%
@@ -504,6 +504,40 @@ birdcube_dataset_filtered %>%
504504 facet_wrap(~species, ncol = 1, scales = "free")
505505```
506506
507+ We look at the ` r (1 - quantile) * 100 ` % species with the lowest MRE.
508+
509+ ``` {r}
510+ top_spec_mre_df <- prevalence_cv %>%
511+ distinct(species, rarity, mre) %>%
512+ slice_min(mre, prop = 1 - quantile) %>%
513+ arrange(mre)
514+
515+ top_spec_mre_df %>%
516+ knitr::kable(digits = 5)
517+ ```
518+
519+ ``` {r}
520+ top_mre_specs <- top_spec_mre_df %>%
521+ slice_min(mre, n = 5) %>%
522+ pull(species)
523+
524+ birdcube_dataset_filtered %>%
525+ dplyr::filter(species %in% top_mre_specs) %>%
526+ count(species, datasetname) %>%
527+ left_join(top_spec_mre_df, by = join_by(species)) %>%
528+ mutate(species = reorder(species, mre, decreasing = FALSE)) %>%
529+ mutate(datasetname = tidytext::reorder_within(datasetname, n, species)) %>%
530+ ggplot(aes(x = datasetname, y = n)) +
531+ geom_bar(stat = "identity") +
532+ geom_text(aes(label = n), vjust = 0.3, hjust = -0.3, size = 3) +
533+ scale_x_discrete(label = function(x) stringr::str_trunc(x, 40)) +
534+ labs(x = "", y = "Number of observations (count)") +
535+ scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
536+ theme_bw(base_size = 12) +
537+ coord_flip() +
538+ facet_wrap(~species, ncol = 1, scales = "free")
539+ ```
540+
507541### Root mean squared error
508542
509543``` {r, warning=FALSE, out.width="90%", message=FALSE}
0 commit comments