@@ -581,6 +581,113 @@ def declare_file(self, value):
581
581
assert entity
582
582
return file_entity , entity , checksum
583
583
584
+ def declare_directory (self , value ):
585
+ # type: (Dict) -> ProvEntity
586
+
587
+ # Register any nested files/directories
588
+
589
+ # FIXME: Calculate a hash-like identifier for directory
590
+ # so we get same value if it's the same filenames/hashes
591
+ # in a different location.
592
+ # For now, mint a new UUID to identify this directory, but
593
+ # attempt to keep it inside the value dictionary
594
+ dir_id = value .setdefault ("@id" ,
595
+ uuid .uuid4 ().urn )
596
+
597
+ # New annotation file to keep the ORE Folder listing
598
+ ore_doc_fn = dir_id .replace ("urn:uuid:" , "directory-" ) + ".ttl"
599
+ dir_bundle = self .document .bundle (self .metadata_ns [ore_doc_fn ])
600
+
601
+ coll = self .document .entity (dir_id ,
602
+ [ (provM .PROV_TYPE , WFPROV ["Artifact" ]),
603
+ (provM .PROV_TYPE , PROV ["Collection" ]),
604
+ (provM .PROV_TYPE , PROV ["Dictionary" ]),
605
+ (provM .PROV_TYPE , RO ["Folder" ]),
606
+ ])
607
+ # ORE description of ro:Folder, saved separately
608
+ coll_b = dir_bundle .entity (dir_id ,
609
+ [
610
+ (provM .PROV_TYPE , RO ["Folder" ]),
611
+ (provM .PROV_TYPE , ORE ["Aggregation" ]),
612
+ ])
613
+ self .document .mentionOf (dir_id + "#ore" , dir_id , dir_bundle .identifier )
614
+
615
+ dir_manifest = dir_bundle .entity (dir_bundle .identifier ,
616
+ {PROV ["type" ]: ORE ["ResourceMap" ],
617
+ ORE ["describes" ]: coll_b .identifier }
618
+ )
619
+
620
+ coll_attribs = [ # type ( tuple(Identifier, ProvEntity) )
621
+ (ORE ["isDescribedBy" ], dir_bundle .identifier )
622
+ ]
623
+ coll_b_attribs = [] # type ( tuple(Identifier, ProvEntity) )
624
+
625
+ # FIXME: .listing might not be populated yet - hopefully
626
+ # a later call to this method will sort that
627
+ is_empty = True
628
+
629
+ if not "listing" in value :
630
+ assert self .research_object .make_fs_access
631
+ fsaccess = self .research_object .make_fs_access ("" )
632
+ get_listing (fsaccess , value )
633
+ for f in value .get ("listing" , []):
634
+ is_empty = False
635
+ # Declare child-artifacts
636
+ entity = self .declare_artefact (f )
637
+ self .document .membership (coll , entity )
638
+ # Membership relation aka our ORE Proxy
639
+ m_id = uuid .uuid4 ().urn
640
+ m = self .document .entity (m_id )
641
+ m_b = dir_bundle .entity (m_id )
642
+
643
+ # PROV-O style Dictionary
644
+ # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
645
+ # ..as prov.py do not currently allow PROV-N extensions
646
+ # like hadDictionaryMember(..)
647
+ m .add_asserted_type (PROV ["KeyEntityPair" ])
648
+
649
+ m .add_attributes ({
650
+ PROV ["pairKey" ]: f ["basename" ],
651
+ PROV ["pairEntity" ]: entity ,
652
+ })
653
+
654
+ # As well as a being a
655
+ # http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry
656
+ m_b .add_asserted_type (RO ["FolderEntry" ])
657
+ m_b .add_asserted_type (ORE ["Proxy" ])
658
+ m_b .add_attributes ({
659
+ RO ["entryName" ]: f ["basename" ],
660
+ ORE ["proxyIn" ]: coll ,
661
+ ORE ["proxyFor" ]: entity ,
662
+
663
+ })
664
+ coll_attribs .append (
665
+ (PROV ["hadDictionaryMember" ], m ))
666
+ coll_b_attribs .append (
667
+ (ORE ["aggregates" ], m_b ))
668
+
669
+ coll .add_attributes (coll_attribs )
670
+ coll_b .add_attributes (coll_b_attribs )
671
+
672
+ # Also Save ORE Folder as annotation metadata
673
+ ore_doc = ProvDocument ()
674
+ ore_doc .add_namespace (ORE )
675
+ ore_doc .add_namespace (RO )
676
+ ore_doc .add_namespace (UUID )
677
+ ore_doc .add_bundle (dir_bundle )
678
+ ore_doc = ore_doc .flattened ()
679
+ ore_doc_path = posixpath .join (_posix_path (METADATA ), ore_doc_fn )
680
+ with self .research_object .write_bag_file (ore_doc_path ) as provenance_file :
681
+ ore_doc .serialize (provenance_file , format = "rdf" , rdf_format = "turtle" )
682
+ self .research_object .add_annotation (dir_id , [ore_doc_fn ], ORE ["isDescribedBy" ].uri )
683
+
684
+ if is_empty :
685
+ # Empty directory
686
+ coll .add_asserted_type (PROV ["EmptyCollection" ])
687
+ coll .add_asserted_type (PROV ["EmptyDictionary" ])
688
+ self .research_object .add_uri (coll .identifier .uri )
689
+ return coll
690
+
584
691
def declare_artefact (self , value ):
585
692
# type: (Any) -> ProvEntity
586
693
'''
@@ -638,109 +745,9 @@ def declare_artefact(self, value):
638
745
return entity
639
746
640
747
elif value .get ("class" ) == "Directory" :
641
- # Register any nested files/directories
642
-
643
- # FIXME: Calculate a hash-like identifier for directory
644
- # so we get same value if it's the same filenames/hashes
645
- # in a different location.
646
- # For now, mint a new UUID to identify this directory, but
647
- # attempt to keep it inside the value dictionary
648
- dir_id = value .setdefault ("@id" ,
649
- uuid .uuid4 ().urn )
650
-
651
- # New annotation file to keep the ORE Folder listing
652
- ore_doc_fn = dir_id .replace ("urn:uuid:" , "directory-" ) + ".ttl"
653
- dir_bundle = self .document .bundle (self .metadata_ns [ore_doc_fn ])
654
-
655
- coll = self .document .entity (dir_id ,
656
- [ (provM .PROV_TYPE , WFPROV ["Artifact" ]),
657
- (provM .PROV_TYPE , PROV ["Collection" ]),
658
- (provM .PROV_TYPE , PROV ["Dictionary" ]),
659
- (provM .PROV_TYPE , RO ["Folder" ]),
660
- ])
661
- # ORE description of ro:Folder, saved separately
662
- coll_b = dir_bundle .entity (dir_id ,
663
- [
664
- (provM .PROV_TYPE , RO ["Folder" ]),
665
- (provM .PROV_TYPE , ORE ["Aggregation" ]),
666
- ])
667
- self .document .mentionOf (dir_id + "#ore" , dir_id , dir_bundle .identifier )
668
-
669
- dir_manifest = dir_bundle .entity (dir_bundle .identifier ,
670
- {PROV ["type" ]: ORE ["ResourceMap" ],
671
- ORE ["describes" ]: coll_b .identifier }
672
- )
673
-
674
- coll_attribs = [ # type ( tuple(Identifier, ProvEntity) )
675
- (ORE ["isDescribedBy" ], dir_bundle .identifier )
676
- ]
677
- coll_b_attribs = [] # type ( tuple(Identifier, ProvEntity) )
678
-
679
- # FIXME: .listing might not be populated yet - hopefully
680
- # a later call to this method will sort that
681
- is_empty = True
682
-
683
- if not "listing" in value :
684
- assert self .research_object .make_fs_access
685
- fsaccess = self .research_object .make_fs_access ("" )
686
- get_listing (fsaccess , value )
687
- for f in value .get ("listing" , []):
688
- is_empty = False
689
- # Declare child-artifacts
690
- entity = self .declare_artefact (f )
691
- self .document .membership (coll , entity )
692
- # Membership relation aka our ORE Proxy
693
- m_id = uuid .uuid4 ().urn
694
- m = self .document .entity (m_id )
695
- m_b = dir_bundle .entity (m_id )
696
-
697
- # PROV-O style Dictionary
698
- # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
699
- # ..as prov.py do not currently allow PROV-N extensions
700
- # like hadDictionaryMember(..)
701
- m .add_asserted_type (PROV ["KeyEntityPair" ])
702
-
703
- m .add_attributes ({
704
- PROV ["pairKey" ]: f ["basename" ],
705
- PROV ["pairEntity" ]: entity ,
706
- })
707
-
708
- # As well as a being a
709
- # http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry
710
- m_b .add_asserted_type (RO ["FolderEntry" ])
711
- m_b .add_asserted_type (ORE ["Proxy" ])
712
- m_b .add_attributes ({
713
- RO ["entryName" ]: f ["basename" ],
714
- ORE ["proxyIn" ]: coll ,
715
- ORE ["proxyFor" ]: entity ,
716
-
717
- })
718
- coll_attribs .append (
719
- (PROV ["hadDictionaryMember" ], m ))
720
- coll_b_attribs .append (
721
- (ORE ["aggregates" ], m_b ))
722
-
723
- coll .add_attributes (coll_attribs )
724
- coll_b .add_attributes (coll_b_attribs )
725
-
726
- # Also Save ORE Folder as annotation metadata
727
- ore_doc = ProvDocument ()
728
- ore_doc .add_namespace (ORE )
729
- ore_doc .add_namespace (RO )
730
- ore_doc .add_namespace (UUID )
731
- ore_doc .add_bundle (dir_bundle )
732
- ore_doc = ore_doc .flattened ()
733
- ore_doc_path = posixpath .join (_posix_path (METADATA ), ore_doc_fn )
734
- with self .research_object .write_bag_file (ore_doc_path ) as provenance_file :
735
- ore_doc .serialize (provenance_file , format = "rdf" , rdf_format = "turtle" )
736
- self .research_object .add_annotation (dir_id , [ore_doc_fn ], ORE ["isDescribedBy" ].uri )
737
-
738
- if is_empty :
739
- # Empty directory
740
- coll .add_asserted_type (PROV ["EmptyCollection" ])
741
- coll .add_asserted_type (PROV ["EmptyDictionary" ])
742
- self .research_object .add_uri (coll .identifier .uri )
743
- return coll
748
+ entity = self .declare_directory (value )
749
+ value ["@id" ] = entity .identifier .uri
750
+ return entity
744
751
else :
745
752
coll_id = value .setdefault ("@id" ,
746
753
uuid .uuid4 ().urn )
@@ -1526,7 +1533,7 @@ def create_job(self,
1526
1533
'''
1527
1534
copied = copy .deepcopy (builderJob )
1528
1535
relativised_input_objecttemp = {} # type: Dict[Any,Any]
1529
- self .relativise_files (copied )
1536
+ self ._relativise_files (copied )
1530
1537
rel_path = posixpath .join (_posix_path (WORKFLOW ), "primary-job.json" )
1531
1538
j = json .dumps (copied , indent = 4 , ensure_ascii = False )
1532
1539
with self .write_bag_file (rel_path ) as file_path :
@@ -1548,7 +1555,7 @@ def create_job(self,
1548
1555
{k : v for k , v in relativised_input_objecttemp .items () if v })
1549
1556
return relativised_input_object
1550
1557
1551
- def relativise_files (self , structure ):
1558
+ def _relativise_files (self , structure ):
1552
1559
# type: (Any, Dict) -> None
1553
1560
'''
1554
1561
save any file objects into Research Object and update the local paths
@@ -1589,7 +1596,7 @@ def relativise_files(self, structure):
1589
1596
del structure ["location" ]
1590
1597
1591
1598
for val in structure .values ():
1592
- self .relativise_files (val )
1599
+ self ._relativise_files (val )
1593
1600
return
1594
1601
1595
1602
if isinstance (structure , (str , Text )):
@@ -1598,7 +1605,7 @@ def relativise_files(self, structure):
1598
1605
try :
1599
1606
for obj in iter (structure ):
1600
1607
# Recurse and rewrite any nested File objects
1601
- self .relativise_files (obj )
1608
+ self ._relativise_files (obj )
1602
1609
except TypeError :
1603
1610
pass
1604
1611
0 commit comments