diff --git a/.gitignore b/.gitignore index 2cca830..a6f64cf 100644 --- a/.gitignore +++ b/.gitignore @@ -16,10 +16,10 @@ *.shx *.xls* *_files -*.csv *.gz -*.zip +*.geojson docs libs output renv/library +data/raw diff --git a/README.Rmd b/README.Rmd index 26a1750..d411896 100644 --- a/README.Rmd +++ b/README.Rmd @@ -46,6 +46,22 @@ This repository contains scripts to create the b3data [frictionless](https://doc This code is developed in context of **T5.5** of the [B-Cubed project](https://b-cubed.eu/). +## Order of execution + +Follow the steps below to run the scripts in a logical order. + +**1.** `create_b3data_package.Rmd` + +- creates the data package +- adds tabular resources +- writes data package + +**2.** `add_spatial_resources.Rmd` + +- loads data package +- adds spatial resources +- writes data package + ## 📦 The `b3data` data package - **Name**: `b3data` diff --git a/README.md b/README.md index e94df17..d1f406b 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,22 @@ on [Zenodo](https://zenodo.org/). This code is developed in context of **T5.5** of the [B-Cubed project](https://b-cubed.eu/). +## Order of execution + +Follow the steps below to run the scripts in a logical order. + +**1.** `create_b3data_package.Rmd` + +- creates the data package +- adds tabular resources +- writes data package + +**2.** `add_spatial_resources.Rmd` + +- loads data package +- adds spatial resources +- writes data package + ## 📦 The `b3data` data package - **Name**: `b3data` diff --git a/data/b3data_package/datapackage.json b/data/b3data_package/datapackage.json index c8464fb..1cfb066 100644 --- a/data/b3data_package/datapackage.json +++ b/data/b3data_package/datapackage.json @@ -10,9 +10,16 @@ "mediatype": "text/csv", "encoding": "utf-8", "title": "Occurrence cube for birds in Belgium (MGRS 10 km)", - "description": "Occurrence cube for birds in Belgium between 2000 en 2024. The taxonomical resolution is 'species' and the temporal resolution is 'year' Spatial aggregation is done using the MGRS grid at 10 km scale.", - "sources": ["GBIF Occurrence Download", "https://doi.org/10.15468/dl.y3wpwk"], - "licenses": [" CC BY-NC 4.0", "https://creativecommons.org/licenses/by-nc/4.0/", "Creative Commons Attribution-NonCommercial 4.0 International"], + "description": "Occurrence cube for birds in Belgium between 2000 en 2024. The taxonomical resolution is 'species' and the temporal resolution is 'year' Spatial aggregation is done using the MGRS grid at 10 km scale. Only grid cells that fall within the 10 km MGRS reference grid for mainland Belgium (see b3data: `mgrs10_refgrid_belgium.geojson`) are included.", + "sources": { + "title": "GBIF Occurrence Download", + "path": "https://doi.org/10.15468/dl.y3wpwk" + }, + "licenses": { + "name": "CC BY-NC 4.0", + "path": "https://creativecommons.org/licenses/by-nc/4.0/", + "title": "Creative Commons Attribution-NonCommercial 4.0 International" + }, "schema": { "fields": [ { @@ -49,27 +56,47 @@ } ] } + }, + { + "name": "mgrs10_refgrid_belgium", + "path": "mgrs10_refgrid_belgium.geojson", + "profile": "spatial-data-resource", + "format": "geojson", + "title": "MGRS 10 Km reference grid Belgium", + "description": "MGRS 10 Km reference grid for the mainland of Belgium.", + "licenses": { + "name": "CC0 1.0", + "path": "https://creativecommons.org/publicdomain/zero/1.0/", + "title": "Creative Commons Zero v1.0 Universal" + } } ], "title": "b3data: Data resources for the b3verse", "description": "This data package contains data resources to be used across the b3verse (https://docs.b-cubed.eu/guides/b3verse/). This includes example datasets (occurrence cubes) as well as spatial resources like reference grids or raster data.", - "keywords1": "data cubes", - "licenses.name": " CC BY-NC 4.0", + "keywords": ["data cubes", "b3verse", "frictionless", "biodiversity"], + "licenses": { + "name": "CC BY-NC 4.0", + "path": "https://creativecommons.org/licenses/by-nc/4.0/", + "title": "Creative Commons Attribution-NonCommercial 4.0 International" + }, "version": "0.1.0", - "sources.title": "b3data-scripts", - "contributors.title": "Ward Langeraert", - "contributors.path": "https://orcid.org/0000-0002-5900-8109", - "contributors.email": "ward.langeraert@inbo.be", - "contributors.role": "author", - "contributors.organization": "Research Institute for Nature and Forest (INBO)", - "contributors.title.1": "Toon Van Daele", - "contributors.path.1": "https://orcid.org/0000-0002-1362-853X", - "contributors.role.1": "contributor", - "contributors.organization.1": "Research Institute for Nature and Forest (INBO)", - "sources.path": "https://github.com/b-cubed-eu/b3data-scripts", - "licenses.path": "https://creativecommons.org/licenses/by-nc/4.0/", - "licenses.title": "Creative Commons Attribution-NonCommercial 4.0 International", - "keywords2": "b3verse", - "keywords3": "frictionless", - "keywords4": "biodiversity" + "sources": { + "title": "b3data-scripts", + "path": "https://github.com/b-cubed-eu/b3data-scripts" + }, + "contributors": [ + { + "title": "Ward Langeraert", + "path": "https://orcid.org/0000-0002-5900-8109", + "email": "ward.langeraert@inbo.be", + "role": "author", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Toon Van Daele", + "path": "https://orcid.org/0000-0002-1362-853X", + "role": "contributor", + "organization": "Research Institute for Nature and Forest (INBO)" + } + ] } diff --git a/source/R/add_manual_resource.R b/source/R/add_manual_resource.R new file mode 100644 index 0000000..78400f5 --- /dev/null +++ b/source/R/add_manual_resource.R @@ -0,0 +1,23 @@ +add_manual_resource <- function(package, new_resource, replace = TRUE) { + if (replace) { + # Filter out any resource with same name or path + package$resources <- Filter(function(res) { + !(res$name == new_resource$name && res$path == new_resource$path) + }, package$resources) + } else { + # If not overwriting, check for duplicates and warn + conflict <- any(vapply(package$resources, function(res) { + res$name == new_resource$name && res$path == new_resource$path + }, logical(1))) + + if (conflict) { + warning(paste("Resource with same name and path already exists and", + "`replace = FALSE`. Skipping.")) + return(package) + } + } + + # Append new resource + package$resources <- append(package$resources, list(new_resource)) + return(package) +} diff --git a/source/add_spatial_resources.Rmd b/source/add_spatial_resources.Rmd new file mode 100644 index 0000000..5309169 --- /dev/null +++ b/source/add_spatial_resources.Rmd @@ -0,0 +1,129 @@ +--- +title: "Add spatial resources to b3data data package" +author: "Ward Langeraert" +date: "`r Sys.Date()`" +output: + html_document: + code_folding: hide + toc: true + toc_float: true + toc_collapsed: true +editor_options: + chunk_output_type: console +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +```{r, warning=FALSE, message=FALSE} +# Load packages +library(tidyverse) # Data wrangling and visualisation +library(frictionless) # Create frictionless data package +library(sf) # Spatial objects + +# Source functions +source(here::here("source", "R", "add_manual_resource.R")) + +# Data path and create directory if necessary +data_path <- here::here("data", "raw") +dir.create(data_path, showWarnings = FALSE, recursive = TRUE) + +package_path <- here::here("data", "b3data_package") +dir.create(package_path, showWarnings = FALSE, recursive = TRUE) +``` + +# Goal + +Add spatial data resources to b3data frictionless data package. + +# Methods + +This report focuses on adding spatial resources (e.g. GeoJSON, raster) to the **b3data** frictionless data package. + +## Source Data + +Spatial data need to be saved locally in the data package directory and then added manually to the descriptor file using `append()`. +We provide the reference grids for the occurrence cubes in the data package following this naming convention: + +``` +resource_name = "refgrid" +``` + +Example: +`mgrs10_refgrid_belgium` → MGRS 10 km reference grid Belgium + +## Resource-level Metadata + +Each dataset includes the following metadata (see: [Frictionless resource spec](https://docs.ropensci.org/frictionless/articles/data-resource.html#properties-implementation)): + +- **profile**: `spatial-data-resource` +- **format**: e.g. `geojson` +- **title**: e.g. `"MGRS 10 km reference grid for Belgium"` +- **description**: concise explanation of content +- **sources**: source if applicable +- **licenses**: [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/) + +# Datasets +## MGRS 10 Km reference grid Belgium + +Load the data. + +```{r} +# Read reference grid +utm10_bel <- st_read(file.path(data_path, "utm10_bel.shp")) + +# Visualise +ggplot() + geom_sf(data = utm10_bel) +``` + +Clean data and transform cell codes to MGRS. + +```{r} +mgrs10_refgrid_belgium <- utm10_bel %>% + select(utm_tag = TAG, geometry) %>% + mutate(mgrscode = ifelse(grepl("^[A-G]", utm_tag), paste0("31U", utm_tag), + paste0("32U", utm_tag))) %>% + select(mgrscode, geometry) +``` + +Write to data package and add metadata. + +```{r} +st_write(mgrs10_refgrid_belgium, + file.path(package_path, "mgrs10_refgrid_belgium.geojson"), + delete_dsn = TRUE) +``` + +```{r} +# Read package +b3data_package <- read_package(file.path(package_path, "datapackage.json")) + +# Add resource to data package +mgrs10_resource <- list( + name = "mgrs10_refgrid_belgium", + path = "mgrs10_refgrid_belgium.geojson", + profile = "spatial-data-resource", + format = "geojson", + title = "MGRS 10 Km reference grid Belgium", + description = "MGRS 10 Km reference grid for the mainland of Belgium.", + licenses = list( + name = "CC0 1.0", + path = "https://creativecommons.org/publicdomain/zero/1.0/", + title = "Creative Commons Zero v1.0 Universal" + ) + ) + +b3data_package <- add_manual_resource( + b3data_package, mgrs10_resource, replace = TRUE) + +# Write package to directory +write_package( + package = b3data_package, + directory = package_path, + compress = TRUE) +``` + +## EEA 100 km reference grid Europe + +> Coming soon diff --git a/source/create_b3data_package.Rmd b/source/create_b3data_package.Rmd index 77c0546..f0c0ec9 100644 --- a/source/create_b3data_package.Rmd +++ b/source/create_b3data_package.Rmd @@ -21,6 +21,7 @@ knitr::opts_chunk$set(echo = TRUE) library(tidyverse) # Data wrangling and visualisation library(frictionless) # Create frictionless data package library(rgbif) # Create occurrence cubes from GBIF data +library(sf) # Spatial objects # Source functions source(here::here("source", "R", "download_occ_cube.R")) @@ -28,9 +29,6 @@ source(here::here("source", "R", "download_occ_cube.R")) # Data path and create directory if necessary data_path <- here::here("data", "raw") dir.create(data_path, showWarnings = FALSE, recursive = TRUE) - -out_path <- here::here("data", "processed") -dir.create(out_path, showWarnings = FALSE, recursive = TRUE) ``` # Goal @@ -39,8 +37,8 @@ Create [frictionless](https://docs.ropensci.org/frictionless/) data package with # Methods -This report focuses on creating the **b3data** Frictionless data package for **tabular datasets**. -A separate report adds **spatial resources** (e.g. GeoJSON, raster) to the same data package. +This report focuses on creating the **b3data** frictionless data package for tabular datasets. +A separate report adds spatial resources (e.g. GeoJSON, raster) to the same data package. ## Data Package Overview @@ -133,7 +131,7 @@ bird_cube_query <- "SELECT # nolint end # Download and load cube -bird_cube_belgium_mgrs10 <- download_occ_cube( +bird_cube_belgium_mgrs10_full <- download_occ_cube( sql_query = bird_cube_query, file = "bird_cube_belgium_mgrs10.csv", path = data_path, @@ -141,6 +139,22 @@ bird_cube_belgium_mgrs10 <- download_occ_cube( ) ``` +We only select the grid cells that belong to the MGRS 10 km reference grid for the mainland of Belgium (see b3data `mgrs10_refgrid_belgium.geojson`). + +```{r} +# Read reference grid +utm10_bel <- st_read(file.path(data_path, "utm10_bel.shp")) + +# Visualise +ggplot() + geom_sf(data = utm10_bel) +``` + +```{r} +# Only select grid cells from reference grid +bird_cube_belgium_mgrs10 <- bird_cube_belgium_mgrs10_full %>% + filter(substring(mgrscode, 4) %in% utm10_bel$TAG) +``` + We create the package and add the dataset. ```{r} @@ -153,13 +167,16 @@ b3data_package <- create_package() %>% "Occurrence cube for birds in Belgium between 2000 en 2024.", "The taxonomical resolution is 'species' and", "the temporal resolution is 'year'", - "Spatial aggregation is done using the MGRS grid at 10 km scale."), - sources = c( + "Spatial aggregation is done using the MGRS grid at 10 km scale.", + "Only grid cells that fall within the 10 km MGRS reference grid for", + "mainland Belgium (see b3data: `mgrs10_refgrid_belgium.geojson`) are", + "included."), + sources = list( title = "GBIF Occurrence Download", path = "https://doi.org/10.15468/dl.y3wpwk" ), - licenses = c( - name = " CC BY-NC 4.0", + licenses = list( + name = "CC BY-NC 4.0", path = "https://creativecommons.org/licenses/by-nc/4.0/", title = "Creative Commons Attribution-NonCommercial 4.0 International" ) @@ -193,45 +210,47 @@ b3data_package <- append( "resources like reference grids or raster data." )), after = 4) -b3data_package <- append(b3data_package, - c(keywords = c("data cubes", "b3verse", "frictionless", - "biodiversity")), - after = 5) b3data_package <- append( b3data_package, - c(licenses = c( - name = " CC BY-NC 4.0", + c(keywords = list(list( + "data cubes", "b3verse", "frictionless", "biodiversity" + ))), + after = 5) +b3data_package <- append( + b3data_package, + c(licenses = list(list( + name = "CC BY-NC 4.0", path = "https://creativecommons.org/licenses/by-nc/4.0/", title = "Creative Commons Attribution-NonCommercial 4.0 International" - )), + ))), after = 6) b3data_package <- append(b3data_package, c(version = "0.1.0"), after = 7) b3data_package <- append( b3data_package, - c(sources = c( + c(sources = list(list( title = "b3data-scripts", path = "https://github.com/b-cubed-eu/b3data-scripts" - )), + ))), after = 8) b3data_package <- append( b3data_package, - c(contributors = c( - c( + c(contributors = list(list( + list( title = "Ward Langeraert", path = "https://orcid.org/0000-0002-5900-8109", email = "ward.langeraert@inbo.be", role = "author", organization = "Research Institute for Nature and Forest (INBO)" ), - c( + list( title = "Toon Van Daele", path = "https://orcid.org/0000-0002-1362-853X", role = "contributor", organization = "Research Institute for Nature and Forest (INBO)" ) - )), + ))), after = 9) # Warning: append() drops the custom datapackage class.