@@ -93,6 +93,8 @@ class PermissionError(OSError): # pylint: disable=redefined-builtin
93
93
PROVENANCE = os .path .join (METADATA , "provenance" )
94
94
WFDESC = Namespace ("wfdesc" , 'http://purl.org/wf4ever/wfdesc#' )
95
95
WFPROV = Namespace ("wfprov" , 'http://purl.org/wf4ever/wfprov#' )
96
+ RO = Namespace ("ro" , 'http://purl.org/wf4ever/ro#' )
97
+ ORE = Namespace ("ore" , 'http://www.openarchives.org/ore/terms/' )
96
98
FOAF = Namespace ("foaf" , 'http://xmlns.com/foaf/0.1/' )
97
99
SCHEMA = Namespace ("schema" , 'http://schema.org/' )
98
100
CWLPROV = Namespace ('cwlprov' , 'https://w3id.org/cwl/prov#' )
@@ -373,6 +375,9 @@ def host_provenance(document):
373
375
374
376
# info only, won't really be used by prov as sub-resources use /
375
377
self .document .add_namespace ('researchobject' , self .research_object .base_uri )
378
+ # annotations
379
+ self .metadata_ns = self .document .add_namespace ('metadata' ,
380
+ self .research_object .base_uri + _posix_path (METADATA ) + "/" )
376
381
# Pre-register provenance directory so we can refer to its files
377
382
self .provenance_ns = self .document .add_namespace ('provenance' ,
378
383
self .research_object .base_uri + _posix_path (PROVENANCE ) + "/" )
@@ -539,8 +544,10 @@ def declare_artefact(self, value):
539
544
# FIXME: Make consistent hash URIs for these
540
545
# that somehow include the type
541
546
# (so "1" != 1 != "1.0" != true)
542
- return self .document .entity (uuid .uuid4 ().urn ,
547
+ e = self .document .entity (uuid .uuid4 ().urn ,
543
548
{ provM .PROV_VALUE : value })
549
+ self .research_object .add_uri (e .identifier .uri )
550
+ return e
544
551
545
552
elif isinstance (value , (Text , str )):
546
553
# Save as string in UTF-8
@@ -598,37 +605,92 @@ def declare_artefact(self, value):
598
605
# attempt to keep it inside the value dictionary
599
606
dir_id = value .setdefault ("id" ,
600
607
uuid .uuid4 ().urn )
608
+
609
+ # New annotation file to keep the ORE Folder listing
610
+ ore_doc_fn = dir_id .replace ("urn:uuid:" , "directory-" ) + ".ttl"
611
+ dir_bundle = self .document .bundle (self .metadata_ns [ore_doc_fn ])
612
+
601
613
coll = self .document .entity (dir_id ,
602
614
[ (provM .PROV_TYPE , WFPROV ["Artifact" ]),
603
615
(provM .PROV_TYPE , PROV ["Collection" ]),
604
616
(provM .PROV_TYPE , PROV ["Dictionary" ]),
605
- (provM .PROV_TYPE , CWLPROV [ "Directory " ]),
617
+ (provM .PROV_TYPE , RO [ "Folder " ]),
606
618
])
607
- coll_attribs = [] # type ( tuple(Identifier, ProvEntity) )
619
+ # ORE description of ro:Folder, saved separately
620
+ coll_b = dir_bundle .entity (dir_id ,
621
+ [
622
+ (provM .PROV_TYPE , RO ["Folder" ]),
623
+ (provM .PROV_TYPE , ORE ["Aggregation" ]),
624
+ ])
625
+ self .document .mentionOf (dir_id + "#ore" , dir_id , dir_bundle .identifier )
626
+
627
+ dir_manifest = dir_bundle .entity (dir_bundle .identifier ,
628
+ {PROV ["type" ]: ORE ["ResourceMap" ],
629
+ ORE ["describes" ]: coll_b .identifier }
630
+ )
631
+
632
+ coll_attribs = [ # type ( tuple(Identifier, ProvEntity) )
633
+ (ORE ["isDescribedBy" ], dir_bundle .identifier )
634
+ ]
635
+ coll_b_attribs = [] # type ( tuple(Identifier, ProvEntity) )
636
+
608
637
# FIXME: .listing might not be populated yet - hopefully
609
638
# a later call to this method will sort that
610
639
for f in value .get ("listing" , []):
611
640
# Declare child-artifacts
612
641
entity = self .declare_artefact (f )
613
- # TODO: Add filename to PROV-dictionary
614
642
self .document .membership (coll , entity )
615
- # Membership
616
- m = self .document .entity (uuid .uuid4 ().urn )
617
- # Note: only support PROV-O style dictionary
643
+ # Membership relation aka our ORE Proxy
644
+ m_id = uuid .uuid4 ().urn
645
+ m = self .document .entity (m_id )
646
+ m_b = dir_bundle .entity (m_id )
647
+
648
+ # PROV-O style Dictionary
618
649
# https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
619
- # as prov.py do not easily allow PROV-N extensions
650
+ # ..as prov.py do not currently allow PROV-N extensions
651
+ # like hadDictionaryMember(..)
620
652
m .add_asserted_type (PROV ["KeyEntityPair" ])
653
+
621
654
m .add_attributes ({
622
655
PROV ["pairKey" ]: f ["basename" ],
623
- PROV ["pairEntity" ]: entity
656
+ PROV ["pairEntity" ]: entity ,
657
+ })
658
+
659
+ # As well as a being a
660
+ # http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry
661
+ m_b .add_asserted_type (RO ["FolderEntry" ])
662
+ m_b .add_asserted_type (ORE ["Proxy" ])
663
+ m_b .add_attributes ({
664
+ RO ["entryName" ]: f ["basename" ],
665
+ ORE ["proxyIn" ]: coll ,
666
+ ORE ["proxyFor" ]: entity ,
667
+
624
668
})
625
669
coll_attribs .append (
626
670
(PROV ["hadDictionaryMember" ], m ))
671
+ coll_b_attribs .append (
672
+ (ORE ["aggregates" ], m_b ))
673
+
627
674
coll .add_attributes (coll_attribs )
675
+ coll_b .add_attributes (coll_b_attribs )
676
+
677
+ # Also Save ORE Folder as annotation metadata
678
+ ore_doc = ProvDocument ()
679
+ ore_doc .add_namespace (ORE )
680
+ ore_doc .add_namespace (RO )
681
+ ore_doc .add_namespace (UUID )
682
+ ore_doc .add_bundle (dir_bundle )
683
+ ore_doc = ore_doc .flattened ()
684
+ ore_doc_path = posixpath .join (_posix_path (METADATA ), ore_doc_fn )
685
+ with self .research_object .write_bag_file (ore_doc_path ) as provenance_file :
686
+ ore_doc .serialize (provenance_file , format = "rdf" , rdf_format = "turtle" )
687
+ self .research_object .add_annotation (dir_id , [ore_doc_fn ], ORE ["isDescribedBy" ].uri )
688
+
628
689
if not coll_attribs :
629
690
# Empty directory
630
691
coll .add_asserted_type (PROV ["EmptyCollection" ])
631
692
coll .add_asserted_type (PROV ["EmptyDictionary" ])
693
+ self .research_object .add_uri (coll .identifier .uri )
632
694
return coll
633
695
else :
634
696
# some other kind of dictionary?
@@ -661,6 +723,7 @@ def declare_artefact(self, value):
661
723
coll_attribs .append (
662
724
(PROV ["hadDictionaryMember" ], m ))
663
725
coll .add_attributes (coll_attribs )
726
+ self .research_object .add_uri (coll .identifier .uri )
664
727
return coll
665
728
666
729
# some other kind of Collection?
@@ -686,13 +749,16 @@ def declare_artefact(self, value):
686
749
# we would need to use PROV.Dictionary
687
750
# with numeric keys
688
751
self .document .membership (coll , e )
752
+ self .research_object .add_uri (coll .identifier .uri )
689
753
return coll
690
754
except TypeError :
691
755
_logger .warning ("Unrecognized type %s of %r" %
692
756
(type (value ), value ))
693
757
# Let's just fall back to Python repr()
694
- return self .document .entity (uuid .uuid4 ().urn ,
758
+ e = self .document .entity (uuid .uuid4 ().urn ,
695
759
{ provM .PROV_LABEL : repr (value ) })
760
+ self .research_object .add_uri (e .identifier .uri )
761
+ return e
696
762
697
763
def used_artefacts (self ,
698
764
job_order , # type: Dict
@@ -909,6 +975,7 @@ def __init__(self, temp_prefix_ro="tmp", orcid=None, full_name=None):
909
975
self .bagged_size = {} # type: Dict
910
976
self .tagfiles = set () # type: Set
911
977
self ._file_provenance = {} # type: Dict
978
+ self ._external_aggregates = [] # type: List[Dict]
912
979
self .annotations = [] # type: List[Dict]
913
980
self ._content_types = {} # type: Dict[Text,str]
914
981
@@ -1093,7 +1160,7 @@ def guess_mediatype(rel_path):
1093
1160
local_aggregate ["conformsTo" ] = prov_conforms_to [extension ]
1094
1161
return local_aggregate
1095
1162
1096
- aggregates = []
1163
+ aggregates = [] # type: List[Dict]
1097
1164
for path in self .bagged_size .keys ():
1098
1165
aggregate_dict = {} # type: Dict[str,Any]
1099
1166
@@ -1133,10 +1200,9 @@ def guess_mediatype(rel_path):
1133
1200
if path == posixpath .join (METADATA , "manifest.json" ):
1134
1201
# Should not really be there yet! But anyway, we won't
1135
1202
# aggregate it.
1136
-
1137
1203
continue
1138
1204
1139
- rel_aggregates = {}
1205
+ rel_aggregates = {} # type: Dict[str,Any]
1140
1206
# These are local paths like metadata/provenance - but
1141
1207
# we need to relativize them for our current directory for
1142
1208
# as we are saved in metadata/manifest.json
@@ -1152,8 +1218,16 @@ def guess_mediatype(rel_path):
1152
1218
# make new timestamp?
1153
1219
rel_aggregates .update (self ._self_made ())
1154
1220
aggregates .append (rel_aggregates )
1221
+ aggregates .extend (self ._external_aggregates )
1155
1222
return aggregates
1156
1223
1224
+ def add_uri (self , uri , when = None ):
1225
+ # type: (str, Optional[datetime.datetime]) -> Dict
1226
+ aggr = self ._self_made (when = when )
1227
+ aggr ["uri" ] = uri
1228
+ self ._external_aggregates .append (aggr )
1229
+ return aggr
1230
+
1157
1231
def add_annotation (self , about , content , motivatedBy = "oa:describing" ):
1158
1232
# type: (str, List[str], str) -> str
1159
1233
0 commit comments