@@ -296,6 +296,25 @@ def record_process_end(
296
296
self .generate_output_prov (outputs , process_run_id , process_name )
297
297
self .document .wasEndedBy (process_run_id , None , self .workflow_run_uri , when )
298
298
299
+
300
+
301
+ # def _add_nested_annotations(dataset, e: ProvEntity) -> ProvEntity:
302
+ # for annotation in dataset:
303
+ # if isinstance(dataset[annotation], (str, bool, int, float)): # check if these are all allowed types
304
+ # e.add_attributes({annotation: dataset[annotation]})
305
+ # else:
306
+ # nested_id = uuid.uuid4().urn
307
+ # # e.add_attributes({annotation: nested_id})
308
+ # nested_entity = self.document.entity(nested_id)
309
+ # e.add_attributes({annotation: nested_entity.identifier})
310
+ # nested_entity = _add_nested_annotations(dataset[annotation], nested_entity)
311
+ # return e
312
+
313
+ # def _propagate_input_annotations(entity):
314
+ # entity.add_attributes( {PROV_TYPE: SCHEMA["Dataset"]})
315
+ # entity = _add_nested_annotations(value[SCHEMA["Dataset"].uri], entity)
316
+ # return entity
317
+
299
318
def declare_file (self , value : CWLObjectType ) -> Tuple [ProvEntity , ProvEntity , str ]:
300
319
if value ["class" ] != "File" :
301
320
raise ValueError ("Must have class:File: %s" % value )
@@ -350,7 +369,9 @@ def declare_file(self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, st
350
369
file_entity .add_attributes ({CWLPROV ["nameext" ]: value ["nameext" ]})
351
370
self .document .specializationOf (file_entity , entity )
352
371
353
- def recursive_function (dataset , e : ProvEntity ) -> ProvEntity :
372
+
373
+
374
+ def _add_nested_annotations (dataset , e : ProvEntity ) -> ProvEntity :
354
375
for annotation in dataset :
355
376
if isinstance (dataset [annotation ], (str , bool , int , float )): # check if these are all allowed types
356
377
e .add_attributes ({annotation : dataset [annotation ]})
@@ -359,14 +380,17 @@ def recursive_function(dataset, e: ProvEntity) -> ProvEntity:
359
380
# e.add_attributes({annotation: nested_id})
360
381
nested_entity = self .document .entity (nested_id )
361
382
e .add_attributes ({annotation : nested_entity .identifier })
362
- nested_entity = recursive_function (dataset [annotation ], nested_entity )
383
+ nested_entity = _add_nested_annotations (dataset [annotation ], nested_entity )
363
384
return e
364
385
365
386
# Transfer input data annotations to provenance:
366
- if SCHEMA ["Dataset" ].uri in value :
387
+ if SCHEMA ["Dataset" ].uri in value : # TODO: modify so both http:/ and https:/ are recognized
367
388
entity .add_attributes ( {PROV_TYPE : SCHEMA ["Dataset" ]})
368
- entity = recursive_function (value [SCHEMA ["Dataset" ].uri ], entity )
389
+ entity = _add_nested_annotations (value [SCHEMA ["Dataset" ].uri ], entity )
369
390
391
+ # Transfer format annotations to provenance:
392
+ if "format" in value :
393
+ entity .add_attributes ({SCHEMA ["encodingFormat" ]: value ["format" ]})
370
394
371
395
# Check for secondaries
372
396
for sec in cast (
@@ -413,6 +437,7 @@ def declare_directory(self, value: CWLObjectType) -> ProvEntity:
413
437
(PROV_TYPE , RO ["Folder" ]),
414
438
],
415
439
)
440
+
416
441
# ORE description of ro:Folder, saved separately
417
442
coll_b = dir_bundle .entity (
418
443
dir_id ,
@@ -473,6 +498,20 @@ def declare_directory(self, value: CWLObjectType) -> ProvEntity:
473
498
coll .add_attributes (coll_attribs )
474
499
coll_b .add_attributes (coll_b_attribs )
475
500
501
+ # Propagate input data annotations
502
+ if SCHEMA ["Dataset" ].uri in value :
503
+ # coll_annotations = [ (PROV_TYPE, SCHEMA["Dataset"]) ]
504
+ coll .add_attributes ([ (PROV_TYPE , SCHEMA ["Dataset" ]) ])
505
+
506
+ dataset = value [SCHEMA ["Dataset" ].uri ]
507
+
508
+ for annotation in dataset :
509
+ if isinstance (dataset [annotation ], (str , bool , int , float )): # check if these are all allowed types
510
+ coll .add_attributes ({annotation : dataset [annotation ]})
511
+
512
+ if "format" in value :
513
+ coll .add_attributes ({SCHEMA ["encodingFormat" ]: value ["format" ]})
514
+
476
515
# Also Save ORE Folder as annotation metadata
477
516
ore_doc = ProvDocument ()
478
517
ore_doc .add_namespace (ORE )
0 commit comments