Skip to content

Commit c6e7385

Browse files
committed
Added format annotations to provenance + started Directory annotations
1 parent 99fc196 commit c6e7385

File tree

1 file changed

+43
-4
lines changed

1 file changed

+43
-4
lines changed

cwltool/provenance_profile.py

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,25 @@ def record_process_end(
296296
self.generate_output_prov(outputs, process_run_id, process_name)
297297
self.document.wasEndedBy(process_run_id, None, self.workflow_run_uri, when)
298298

299+
300+
301+
# def _add_nested_annotations(dataset, e: ProvEntity) -> ProvEntity:
302+
# for annotation in dataset:
303+
# if isinstance(dataset[annotation], (str, bool, int, float)): # check if these are all allowed types
304+
# e.add_attributes({annotation: dataset[annotation]})
305+
# else:
306+
# nested_id = uuid.uuid4().urn
307+
# # e.add_attributes({annotation: nested_id})
308+
# nested_entity = self.document.entity(nested_id)
309+
# e.add_attributes({annotation: nested_entity.identifier})
310+
# nested_entity = _add_nested_annotations(dataset[annotation], nested_entity)
311+
# return e
312+
313+
# def _propagate_input_annotations(entity):
314+
# entity.add_attributes( {PROV_TYPE: SCHEMA["Dataset"]})
315+
# entity = _add_nested_annotations(value[SCHEMA["Dataset"].uri], entity)
316+
# return entity
317+
299318
def declare_file(self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, str]:
300319
if value["class"] != "File":
301320
raise ValueError("Must have class:File: %s" % value)
@@ -350,7 +369,9 @@ def declare_file(self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, st
350369
file_entity.add_attributes({CWLPROV["nameext"]: value["nameext"]})
351370
self.document.specializationOf(file_entity, entity)
352371

353-
def recursive_function(dataset, e: ProvEntity) -> ProvEntity:
372+
373+
374+
def _add_nested_annotations(dataset, e: ProvEntity) -> ProvEntity:
354375
for annotation in dataset:
355376
if isinstance(dataset[annotation], (str, bool, int, float)): # check if these are all allowed types
356377
e.add_attributes({annotation: dataset[annotation]})
@@ -359,14 +380,17 @@ def recursive_function(dataset, e: ProvEntity) -> ProvEntity:
359380
# e.add_attributes({annotation: nested_id})
360381
nested_entity = self.document.entity(nested_id)
361382
e.add_attributes({annotation: nested_entity.identifier})
362-
nested_entity = recursive_function(dataset[annotation], nested_entity)
383+
nested_entity = _add_nested_annotations(dataset[annotation], nested_entity)
363384
return e
364385

365386
# Transfer input data annotations to provenance:
366-
if SCHEMA["Dataset"].uri in value:
387+
if SCHEMA["Dataset"].uri in value: # TODO: modify so both http:/ and https:/ are recognized
367388
entity.add_attributes( {PROV_TYPE: SCHEMA["Dataset"]})
368-
entity = recursive_function(value[SCHEMA["Dataset"].uri], entity)
389+
entity = _add_nested_annotations(value[SCHEMA["Dataset"].uri], entity)
369390

391+
# Transfer format annotations to provenance:
392+
if "format" in value:
393+
entity.add_attributes({SCHEMA["encodingFormat"]: value["format"]})
370394

371395
# Check for secondaries
372396
for sec in cast(
@@ -413,6 +437,7 @@ def declare_directory(self, value: CWLObjectType) -> ProvEntity:
413437
(PROV_TYPE, RO["Folder"]),
414438
],
415439
)
440+
416441
# ORE description of ro:Folder, saved separately
417442
coll_b = dir_bundle.entity(
418443
dir_id,
@@ -473,6 +498,20 @@ def declare_directory(self, value: CWLObjectType) -> ProvEntity:
473498
coll.add_attributes(coll_attribs)
474499
coll_b.add_attributes(coll_b_attribs)
475500

501+
# Propagate input data annotations
502+
if SCHEMA["Dataset"].uri in value:
503+
# coll_annotations = [ (PROV_TYPE, SCHEMA["Dataset"]) ]
504+
coll.add_attributes([ (PROV_TYPE, SCHEMA["Dataset"]) ])
505+
506+
dataset = value[SCHEMA["Dataset"].uri]
507+
508+
for annotation in dataset:
509+
if isinstance(dataset[annotation], (str, bool, int, float)): # check if these are all allowed types
510+
coll.add_attributes({annotation: dataset[annotation]})
511+
512+
if "format" in value:
513+
coll.add_attributes({SCHEMA["encodingFormat"]: value["format"]})
514+
476515
# Also Save ORE Folder as annotation metadata
477516
ore_doc = ProvDocument()
478517
ore_doc.add_namespace(ORE)

0 commit comments

Comments
 (0)