Skip to content

Commit 79f8b74

Browse files
committed
added support for file arrays + https://schema.org
1 parent 7a509b6 commit 79f8b74

File tree

1 file changed

+27
-35
lines changed

1 file changed

+27
-35
lines changed

cwltool/provenance_profile.py

Lines changed: 27 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def copy_job_order(
7676
return customised_job
7777

7878

79+
7980
class ProvenanceProfile:
8081
"""
8182
Provenance profile.
@@ -296,24 +297,23 @@ def record_process_end(
296297
self.generate_output_prov(outputs, process_run_id, process_name)
297298
self.document.wasEndedBy(process_run_id, None, self.workflow_run_uri, when)
298299

299-
300-
301-
# def _add_nested_annotations(dataset, e: ProvEntity) -> ProvEntity:
302-
# for annotation in dataset:
303-
# if isinstance(dataset[annotation], (str, bool, int, float)): # check if these are all allowed types
304-
# e.add_attributes({annotation: dataset[annotation]})
305-
# else:
306-
# nested_id = uuid.uuid4().urn
307-
# # e.add_attributes({annotation: nested_id})
308-
# nested_entity = self.document.entity(nested_id)
309-
# e.add_attributes({annotation: nested_entity.identifier})
310-
# nested_entity = _add_nested_annotations(dataset[annotation], nested_entity)
311-
# return e
312-
313-
# def _propagate_input_annotations(entity):
314-
# entity.add_attributes( {PROV_TYPE: SCHEMA["Dataset"]})
315-
# entity = _add_nested_annotations(value[SCHEMA["Dataset"].uri], entity)
316-
# return entity
300+
def _add_nested_annotations(self, annotation_key, annotation_value, e: ProvEntity) -> ProvEntity:
301+
"""Propagate input data annotations to provenance."""
302+
# Change https:// into http:// first
303+
schema2_uri = "https://schema.org/"
304+
if schema2_uri in annotation_key:
305+
annotation_key = SCHEMA[annotation_key.replace(schema2_uri, '')].uri
306+
307+
if not isinstance(annotation_value, (MutableSequence, MutableMapping)):
308+
e.add_attributes({annotation_key: str(annotation_value)})
309+
else:
310+
nested_id = uuid.uuid4().urn
311+
nested_entity = self.document.entity(nested_id)
312+
e.add_attributes({annotation_key: nested_entity.identifier})
313+
for nested_key in annotation_value:
314+
nested_value = annotation_value[nested_key]
315+
nested_entity = self._add_nested_annotations(nested_key, nested_value, nested_entity)
316+
return e
317317

318318
def declare_file(self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, str]:
319319
if value["class"] != "File":
@@ -369,24 +369,16 @@ def declare_file(self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, st
369369
file_entity.add_attributes({CWLPROV["nameext"]: value["nameext"]})
370370
self.document.specializationOf(file_entity, entity)
371371

372-
372+
# Identify all schema annotations
373+
schema_annotations = dict([(v, value[v]) for v in value.keys() if 'schema.org' in v])
373374

374-
def _add_nested_annotations(dataset, e: ProvEntity) -> ProvEntity:
375-
for annotation in dataset:
376-
if isinstance(dataset[annotation], (str, bool, int, float)): # check if these are all allowed types
377-
e.add_attributes({annotation: dataset[annotation]})
378-
else:
379-
nested_id = uuid.uuid4().urn
380-
# e.add_attributes({annotation: nested_id})
381-
nested_entity = self.document.entity(nested_id)
382-
e.add_attributes({annotation: nested_entity.identifier})
383-
nested_entity = _add_nested_annotations(dataset[annotation], nested_entity)
384-
return e
385-
386-
# Transfer input data annotations to provenance:
387-
if SCHEMA["Dataset"].uri in value: # TODO: modify so both http:/ and https:/ are recognized
388-
entity.add_attributes( {PROV_TYPE: SCHEMA["Dataset"]})
389-
entity = _add_nested_annotations(value[SCHEMA["Dataset"].uri], entity)
375+
# Transfer SCHEMA annotations to provenance
376+
for s in schema_annotations:
377+
if "additionalType" in s:
378+
additional_type = schema_annotations[s].split(sep='/')[-1] # find better method?
379+
entity.add_attributes( {PROV_TYPE: SCHEMA[additional_type]})
380+
else:
381+
entity = self._add_nested_annotations(s, schema_annotations[s], entity)
390382

391383
# Transfer format annotations to provenance:
392384
if "format" in value:

0 commit comments

Comments
 (0)