Skip to content

Commit 3f71e9b

Browse files
committed
Remove automatic adding of EML otherEntity elements
I took this out because the data team no longer needs otherEntity elements automatically added to the metadata because they are hand-editing the metadata for other reasons which makes the metadata non-editable in the registry which is the main reason we added this in the first place. I also factored out the creation of the physical sub-tree
1 parent 5941b91 commit 3f71e9b

File tree

10 files changed

+75
-99
lines changed

10 files changed

+75
-99
lines changed

NAMESPACE

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ export(new_uuid)
6767
export(object_exists)
6868
export(parse_resource_map)
6969
export(path_join)
70-
export(pid_to_entity)
70+
export(pid_to_other_entity)
7171
export(publish_object)
7272
export(publish_update)
7373
export(remove_public_read)
@@ -81,7 +81,8 @@ export(set_rights_and_access)
8181
export(set_rights_holder)
8282
export(show_random_dataset)
8383
export(substitute_eml_party)
84-
export(sysmeta_to_entity)
84+
export(sysmeta_to_eml_physical)
85+
export(sysmeta_to_other_entity)
8586
export(theme_packages)
8687
export(update_object)
8788
export(update_package)

R/editing.R

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -182,16 +182,16 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL)
182182
#' @param identifier (character) Manually specify the identifier for the new metadata object.
183183
#' @param use_doi (logical) Generate and use a DOI as the identifier for the updated metadata object.
184184
#' @param parent_resmap_pid (character) Optional. PID of a parent package to be updated.
185-
#' @param parent_metadata_pid (character) Optional. identifier for the metadata document of the parent package.
185+
#' @param parent_metadata_pid (character) Optional. Identifier for the metadata document of the parent package.
186+
#' @param parent_data_pids (character) Optional. Identifier for the data objects of the parent package.
186187
#' @param parent_child_pids (character) Optional. Resource map identifier(s) of child packages in the parent package.
187188
#' @param child_pids (character) Optional. Child packages resource map PIDs.
188189
#' @param metadata_path (character) Optional. Path to a metadata file to update with. If this is not set, the existing metadata document will be used.
189190
#' @param public (logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects).
190191
#' This applies to the new metadata PID and its resource map and data object.
191192
#' access policies are not affected.
192-
#' @param check_first (logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. This speeds up the function, especially when `data_pids` has many elements.
193+
#' @param check_first (logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements.
193194
#' @param parent_data_pids
194-
#' @param skip_other_entities (logical) Default FALSE Whether to create all EML otherEntity elements.
195195
#'
196196
#' @import dataone
197197
#' @import datapack
@@ -211,8 +211,7 @@ publish_update <- function(mn,
211211
parent_data_pids=NULL,
212212
parent_child_pids=NULL,
213213
public=TRUE,
214-
check_first=TRUE,
215-
skip_other_entities=FALSE) {
214+
check_first=TRUE) {
216215

217216
# Don't allow setting a dataset to private when it uses a DOI
218217
if (use_doi && !public) {
@@ -279,17 +278,6 @@ publish_update <- function(mn,
279278

280279
# get the metadata sysmeta from the node
281280
metadata_sysmeta <- dataone::getSystemMetadata(mn, metadata_pid)
282-
#eml_acl <- sysmeta_orig@accessPolicy
283-
# TODO: error check: md and sm existence
284-
285-
# get the resource_map (not used for now, could be used to get the list of data pids)
286-
# resmap <- rawToChar(dataone::getObject(mn, resource_map_pid))
287-
# resmap_sysmeta <- dataone::getSystemMetadata(mn, resource_map_pid)
288-
# TODO: error check: resmap existence, and ensure we don't fail hard
289-
290-
# Get the list of data files from the resource map
291-
# TODO: parse these from the resource map, rather than taking them as input
292-
# TODO: data_pids <- as.vector(c("pid1", "pid2", "pid3"))
293281

294282
log_message("Downloaded EML and sysmeta...")
295283

@@ -311,7 +299,7 @@ publish_update <- function(mn,
311299
# Generate a resource map PID from the new metadata PID
312300
resmap_updated_pid <- paste0("resource_map_", metadata_updated_pid)
313301

314-
# Update the metadata object
302+
# Update the metadata
315303

316304
# Replace packageId
317305
eml@packageId <- new("xml_attribute", metadata_updated_pid)
@@ -326,15 +314,6 @@ publish_update <- function(mn,
326314
eml_path <- tempfile()
327315
EML::write_eml(eml, eml_path)
328316

329-
# Add other entity fields (if appropriate)
330-
if (!is.null(data_pids) && !skip_other_entities) {
331-
eml <- set_other_entities(mn, eml_path, data_pids)
332-
}
333-
334-
if (skip_other_entities) {
335-
log_message("Skipping modifying EML otherEntity elements. This may result a data package that is not editable using the registry!")
336-
}
337-
338317
# Create System Metadata for the updated EML file
339318
metadata_updated_sysmeta <- new("SystemMetadata",
340319
identifier = metadata_updated_pid,

R/eml.R

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
#' Create an EML otherEntity sub-tree for the given PID.
77
#'
8-
#' Note this is a wrapper around sysmeta_to_entity which handles the task of
8+
#' Note this is a wrapper around sysmeta_to_other_entity which handles the task of
99
#' creating the EML otherEntity sub-ree.
1010
#'
1111
#' @param mn (MNode) Member Node where the PID is associated with an object.
@@ -16,7 +16,7 @@
1616
#' @export
1717
#'
1818
#' @examples
19-
pid_to_entity <- function(mn, pid, sysmeta=NULL) {
19+
pid_to_other_entity <- function(mn, pid, sysmeta=NULL) {
2020
stopifnot(class(mn) == "MNode")
2121
stopifnot(is.character(pid),
2222
nchar(pid) > 0)
@@ -35,18 +35,19 @@ pid_to_entity <- function(mn, pid, sysmeta=NULL) {
3535
stop(paste0("System Metadata for object with PID '", pid, "' did not have its fileName property set. This will result in 'NA' being set for the EML entityName and objectName (which we don't want). You need to give each data object a fileName property in its System Metadata. You can use the arcticdatautils::set_file_name() function to do this or you can use dataone::getSystemMetadata(), change the fileName property, and update it with dataone::updateSystemMetadata()"))
3636
}
3737

38-
sysmeta_to_entity(sysmeta)
38+
sysmeta_to_other_entity(mn, sysmeta)
3939
}
4040

4141
#' Create an EML otherEntity sub-tree for the given object.
4242
#'
43-
#' @param sysmeta (SystemMetadata) A SystemMedata instance for the object.
43+
#' @param mn (MemberNode) The Member Node the object lives on.
44+
#' @param sysmeta (SystemMetadata) The System Metadata of the object.
4445
#'
4546
#' @return (otherEntity) The XML sub-tree.
4647
#' @export
4748
#'
4849
#' @examples
49-
sysmeta_to_entity <- function(sysmeta) {
50+
sysmeta_to_other_entity <- function(mn, sysmeta) {
5051
stopifnot(class(sysmeta) == "SystemMetadata")
5152

5253
# otherEntity
@@ -64,6 +65,28 @@ sysmeta_to_entity <- function(sysmeta) {
6465
other_entity@entityType <- "Other"
6566

6667
# otherEntity/physical
68+
phys <- sysmeta_to_eml_physical(mn, sysmeta)
69+
other_entity@physical <- new("ListOfphysical", list(phys))
70+
71+
other_entity
72+
}
73+
74+
#' Create an EML physical subtree from a System Metadata instance
75+
#'
76+
#' This function creates a pre-canned EML physical subtree from what's in the
77+
#' System Metadata of an Object. Note that it sets an Online Distrubtion URL
78+
#' of the DataONE v2 resolve service for the PID.
79+
#'
80+
#' @param mn (MemberNode) The Member Node the object lives on.
81+
#' @param sysmeta (SystemMetadata) The System Metadata of the object.
82+
#'
83+
#' @return
84+
#' @export
85+
#'
86+
#' @examples
87+
sysmeta_to_eml_physical <- function(sysmeta) {
88+
stopifnot(class(sysmeta) == "SystemMetadata")
89+
6790
phys <- new("physical")
6891
phys@scope <- new("xml_attribute", "document")
6992

@@ -84,16 +107,13 @@ sysmeta_to_entity <- function(sysmeta) {
84107
phys@distribution <- new("ListOfdistribution", list(new("distribution")))
85108
phys@distribution[[1]]@scope <- new("xml_attribute", "document")
86109
phys@distribution[[1]]@online <- new("online")
87-
phys@distribution[[1]]@online@url <- new("url", paste0("ecogrid://knb/", get_doc_id(sysmeta)))
110+
phys@distribution[[1]]@online@url <- new("url", paste0("https://cn.dataone.org/cn/v2/resolve/", sysmeta@identifier))
88111

89112
slot(phys@distribution[[1]]@online@url, "function") <- new("xml_attribute", "download")
90113

91-
other_entity@physical <- new("ListOfphysical", list(phys))
92-
93-
other_entity
114+
phys
94115
}
95116

96-
97117
#' Creates and sets EML otherEntity elements to an existing EML document.
98118
#'
99119
#' This function isn't that smart. It will remove existing otherEntity elements
@@ -137,7 +157,7 @@ set_other_entities <- function(mn, path, pids) {
137157

138158
# Generate otherEntity elements for any new otherEntity elements that weren't
139159
# already in the EML
140-
new_entities <- lapply(pids[!(pids %in% current_entity_pids)], function(pid) pid_to_entity(mn, pid))
160+
new_entities <- lapply(pids[!(pids %in% current_entity_pids)], function(pid) pid_to_other_entity(mn, pid))
141161

142162
# Concatenate the existing and new otherEntity elements and put back in the
143163
# EML

R/helpers.R

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,6 @@ create_dummy_metadata <- function(mn, data_pids=NULL) {
1919
metadata_file <- tempfile()
2020
file.copy(original_file, metadata_file)
2121

22-
# Add otherEntity elements if needed
23-
if (!is.null(data_pids)) {
24-
metadata_file <- set_other_entities(mn, metadata_file, data_pids)
25-
}
26-
2722
sysmeta <- new("SystemMetadata",
2823
id = pid,
2924
formatId = "eml://ecoinformatics.org/eml-2.1.1",
Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/publish_update.Rd

Lines changed: 6 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/sysmeta_to_eml_physical.Rd

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test_editing.R

Lines changed: 4 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -77,48 +77,6 @@ test_that("we can update a resource map", {
7777
expect_equal(updated, get_package(mn, response$metadata)$resource_map)
7878
})
7979

80-
test_that("otherEntity elements are set when publishing an update", {
81-
if (!is_token_set(mn)) {
82-
skip("No token set. Skipping test.")
83-
}
84-
85-
# Setup
86-
library(dataone)
87-
library(EML)
88-
library(stringr)
89-
90-
# Create an initial package
91-
response <- create_dummy_package(mn)
92-
93-
# Create a new dummy object
94-
tmp <- tempfile()
95-
dummy_df <- data.frame(x = 1:100, y = 1:100)
96-
write.csv(dummy_df, file = tmp)
97-
object <- publish_object(mn, tmp, "text/csv")
98-
file.remove(tmp) # Remove dummy data file
99-
100-
# Note I use the wrong data pid argument here. I send the new data pid instead
101-
# which lets me update the data in a package when doing a publish_update()
102-
# call
103-
update <- publish_update(mn,
104-
response$metadata,
105-
response$resource_map,
106-
object)
107-
108-
109-
tmp <- tempfile()
110-
writeLines(rawToChar(getObject(mn, update$metadata)), con = tmp)
111-
doc <- read_eml(tmp)
112-
113-
expect_true(length(doc@dataset@otherEntity) == 1)
114-
115-
sysmeta <- getSystemMetadata(mn, object)
116-
doc_id <- get_doc_id(sysmeta)
117-
expect_true(str_detect(doc@dataset@otherEntity[[1]]@physical[[1]]@distribution[[1]]@online@url@.Data, doc_id))
118-
119-
file.remove(tmp)
120-
})
121-
12280
test_that("an object can be published with a SID", {
12381
if (!is_token_set(mn)) {
12482
skip("No token set. Skipping test.")
@@ -274,6 +232,10 @@ test_that("replication policies are honored when updating packages", {
274232

275233

276234
test_that("extra statements are maintained between updates", {
235+
if (!is_token_set(mn)) {
236+
skip("No token set. Skipping test.")
237+
}
238+
277239
pkg <- create_dummy_package(mn, 3)
278240

279241
# Add some PROV triples to the Resource Map

tests/testthat/test_eml.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ test_that("an EML otherEntity subtree can be created when the sysmeta has a file
88
sysmeta <- new("SystemMetadata")
99
sysmeta <- datapack::parseSystemMetadata(sysmeta, XML::xmlRoot(doc))
1010

11-
other_entity <- sysmeta_to_entity(sysmeta)
11+
other_entity <- sysmeta_to_other_entity(sysmeta)
1212

1313
# Check some rough properties of the subtree
1414
expect_is(other_entity, "otherEntity")
@@ -24,7 +24,7 @@ test_that("an EML otherEntity subtree can be created when the sysmeta doesn't ha
2424
sysmeta <- new("SystemMetadata")
2525
sysmeta <- datapack::parseSystemMetadata(sysmeta, XML::xmlRoot(doc))
2626

27-
other_entity <- sysmeta_to_entity(sysmeta)
27+
other_entity <- sysmeta_to_other_entity(sysmeta)
2828

2929
# Check some rough properties of the subtree
3030
expect_is(other_entity, "otherEntity")

0 commit comments

Comments
 (0)