Skip to content

Commit 729f99a

Browse files
Merge pull request #195 from NCEAS/develop
add dataset categorization helper
2 parents 5acac22 + 2aba872 commit 729f99a

File tree

9 files changed

+182
-31
lines changed

9 files changed

+182
-31
lines changed

.github/workflows/R-CMD-check.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ jobs:
2424

2525
- uses: r-lib/actions/setup-pandoc@master
2626

27+
- uses: actions/checkout@master
28+
- uses: r-lib/actions/setup-tinytex@v1
29+
2730
- name: Cache R packages
2831
uses: actions/cache@v1
2932
if: runner.os != 'Windows'

.travis.yml

Lines changed: 0 additions & 26 deletions
This file was deleted.

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@ export(create_dummy_package)
99
export(create_dummy_package_full)
1010
export(create_dummy_parent_package)
1111
export(create_resource_map)
12+
export(eml_adcad_annotation)
1213
export(eml_add_entity_system)
1314
export(eml_add_publisher)
1415
export(eml_associated_party)
16+
export(eml_categorize_dataset)
1517
export(eml_contact)
1618
export(eml_creator)
1719
export(eml_ecso_annotation)

R/eml.R

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,3 +644,42 @@ eml_add_entity_system <- function(doc){
644644

645645

646646
}
647+
648+
#' Categorize a dataset with an annotation
649+
#'
650+
#' Creates an annotation from the ADC Academic Disciplines ontology
651+
#' [here](https://bioportal.bioontology.org/ontologies/ADCAD/?p=classes&conceptid=root)
652+
#' and inserts the annotation into the EML document `doc` while retaining any existing
653+
#' annotations such as the sensitivity annotations. For a list of available disciplines,
654+
#' see link above.
655+
#'
656+
#'
657+
#'
658+
#' @param doc (emld) An EML document
659+
#' @param discipline (character) One or more disciplines in title case from the ADCAD ontology.
660+
#'
661+
#' @return doc (emld) An EML document with annotation added
662+
#' @export
663+
#' @examples
664+
#' library(EML)
665+
#' # read in any EML document
666+
#' doc <- read_eml(system.file("extdata/strix-pacific-northwest.xml", package="dataone"))
667+
#' # add the dataset categories
668+
#' doc <- eml_categorize_dataset(doc, c("Soil Science", "Ecology"))
669+
#'
670+
eml_categorize_dataset <- function(doc, discipline){
671+
672+
stopifnot("emld" %in% class(doc))
673+
674+
if (is.null(doc$dataset$id)){
675+
doc$dataset$id <- gsub(":", "-", doc$packageId)
676+
}
677+
678+
existing_anns <- doc$dataset$annotation
679+
680+
new_ann <- purrr::map(discipline, eml_adcad_annotation)
681+
682+
doc$dataset$annotation <- c(list(existing_anns), new_ann)
683+
684+
return(doc)
685+
}

R/ontology.R

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,20 @@
1212
read_ontology <- function(ontology_name) {
1313
# get the owl file from github
1414
if(ontology_name == "mosaic"){
15-
mosaic_url <-
15+
ann_url <-
1616
"https://raw.githubusercontent.com/DataONEorg/sem-prov-ontologies/main/MOSAiC/MOSAiC.owl"
17-
mosaic <- rdflib::rdf_parse(pins::pin(mosaic_url),
17+
ont <- rdflib::rdf_parse(pins::pin(ann_url),
1818
format = "rdfxml")
1919
} else if(ontology_name == "ecso"){
20-
mosaic_url <-
20+
ann_url <-
2121
"https://raw.githubusercontent.com/DataONEorg/sem-prov-ontologies/ECSO8-add_non-carbon_measurements/observation/ECSO8.owl"
22-
mosaic <- rdflib::rdf_parse(pins::pin(mosaic_url),
22+
ont <- rdflib::rdf_parse(pins::pin(ann_url),
2323
format = "rdfxml")
24+
} else if (ontology_name == "ADCAD"){
25+
ann_url <- "https://data.bioontology.org/ontologies/ADCAD/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb&download_format=rdf"
26+
ont <- rdflib::rdf_parse(ann_url,
27+
format = "rdfxml")
28+
2429
}
2530

2631
}
@@ -91,3 +96,43 @@ eml_ecso_annotation <- function(valueLabel){
9196
valueURI = annotations$iri)
9297
)
9398
}
99+
100+
#' Given a term from the ADC Academic Disciplines (ADCAD) ontology, produce the corresponding annotation
101+
#'
102+
#' Reduces the amount of copy pasting needed
103+
#'
104+
#' @param valueLabel (character) One of the disciplines found in
105+
#' [ADCAD](https://bioportal.bioontology.org/ontologies/OBOE/?p=classes&conceptid=root)
106+
#'
107+
#' @return list - a formatted EML annotation
108+
#' @export
109+
#'
110+
#' @examples eml_ecso_annotation("latitude coordinate")
111+
eml_adcad_annotation <- function(valueLabel){
112+
113+
adcad <- read_ontology("ADCAD")
114+
115+
query <-
116+
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
117+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
118+
119+
SELECT ?iri ?label
120+
WHERE {
121+
?iri rdf:type <http://www.w3.org/2002/07/owl#Class> .
122+
?iri rdfs:label ?label .
123+
}"
124+
125+
df <- suppressMessages(rdflib::rdf_query(adcad, query))
126+
127+
stopifnot(valueLabel %in% df$label)
128+
129+
annotations <- dplyr::filter(df, label == valueLabel)
130+
131+
list(
132+
propertyURI = list(label = "theme",
133+
propertyURI = "http://www.w3.org/ns/dcat#theme"),
134+
valueURI = list(label = annotations$label,
135+
valueURI = annotations$iri)
136+
)
137+
}
138+

man/eml_adcad_annotation.Rd

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/eml_categorize_dataset.Rd

Lines changed: 31 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_ontology_concepts.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test_eml.R

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,3 +368,39 @@ test_that('Identifier systems can be added', {
368368
expect_equal(doc$dataset$otherEntity[[1]]$system, "https://tools.ietf.org/html/rfc4122")
369369
expect_equal(doc$dataset$otherEntity[[2]]$system, "https://search.dataone.org")
370370
})
371+
372+
373+
test_that('Datasets can be categorized', {
374+
375+
# two disciplines
376+
doc <- read_eml("https://arcticdata.io/metacat/d1/mn/v2/object/doi%3A10.18739%2FA27D2Q84F")
377+
378+
doc <- eml_categorize_dataset(doc, c("Atmospheric Science", "Chemistry"))
379+
380+
expect_true(eml_validate(doc))
381+
expect_true("Atmospheric Science" %in% eml_get_simple(doc$dataset$annotation, "label"))
382+
expect_true("Chemistry" %in% eml_get_simple(doc$dataset$annotation, "label"))
383+
384+
# one discipline
385+
doc <- read_eml("https://arcticdata.io/metacat/d1/mn/v2/object/doi%3A10.18739%2FA27D2Q84F")
386+
387+
doc <- eml_categorize_dataset(doc, c("Atmospheric Science"))
388+
389+
expect_true(eml_validate(doc))
390+
expect_true("Atmospheric Science" %in% eml_get_simple(doc$dataset$annotation, "label"))
391+
392+
# no previous annotations with one
393+
394+
doc <- read_eml("https://arcticdata.io/metacat/d1/mn/v2/object/doi%3A10.18739%2FA2PV6B79W")
395+
doc <- eml_categorize_dataset(doc, c("Oceanography"))
396+
397+
expect_true(eml_validate(doc))
398+
399+
# no previous annotations with one
400+
401+
doc <- read_eml("https://arcticdata.io/metacat/d1/mn/v2/object/doi%3A10.18739%2FA2PV6B79W")
402+
doc <- eml_categorize_dataset(doc, c("Oceanography", "Mathematics"))
403+
404+
expect_true(eml_validate(doc))
405+
406+
})

0 commit comments

Comments
 (0)