Skip to content

Commit 3d32d0d

Browse files
committed
split out declare_directory
1 parent 7285b97 commit 3d32d0d

File tree

1 file changed

+114
-107
lines changed

1 file changed

+114
-107
lines changed

cwltool/provenance.py

Lines changed: 114 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,113 @@ def declare_file(self, value):
581581
assert entity
582582
return file_entity, entity, checksum
583583

584+
def declare_directory(self, value):
585+
# type: (Dict) -> ProvEntity
586+
587+
# Register any nested files/directories
588+
589+
# FIXME: Calculate a hash-like identifier for directory
590+
# so we get same value if it's the same filenames/hashes
591+
# in a different location.
592+
# For now, mint a new UUID to identify this directory, but
593+
# attempt to keep it inside the value dictionary
594+
dir_id = value.setdefault("@id",
595+
uuid.uuid4().urn)
596+
597+
# New annotation file to keep the ORE Folder listing
598+
ore_doc_fn = dir_id.replace("urn:uuid:", "directory-") + ".ttl"
599+
dir_bundle = self.document.bundle(self.metadata_ns[ore_doc_fn])
600+
601+
coll = self.document.entity(dir_id,
602+
[ (provM.PROV_TYPE, WFPROV["Artifact"]),
603+
(provM.PROV_TYPE, PROV["Collection"]),
604+
(provM.PROV_TYPE, PROV["Dictionary"]),
605+
(provM.PROV_TYPE, RO["Folder"]),
606+
])
607+
# ORE description of ro:Folder, saved separately
608+
coll_b = dir_bundle.entity(dir_id,
609+
[
610+
(provM.PROV_TYPE, RO["Folder"]),
611+
(provM.PROV_TYPE, ORE["Aggregation"]),
612+
])
613+
self.document.mentionOf(dir_id + "#ore", dir_id, dir_bundle.identifier)
614+
615+
dir_manifest = dir_bundle.entity(dir_bundle.identifier,
616+
{PROV["type"]: ORE["ResourceMap"],
617+
ORE["describes"]: coll_b.identifier}
618+
)
619+
620+
coll_attribs = [ # type ( tuple(Identifier, ProvEntity) )
621+
(ORE["isDescribedBy"], dir_bundle.identifier )
622+
]
623+
coll_b_attribs = [] # type ( tuple(Identifier, ProvEntity) )
624+
625+
# FIXME: .listing might not be populated yet - hopefully
626+
# a later call to this method will sort that
627+
is_empty = True
628+
629+
if not "listing" in value:
630+
assert self.research_object.make_fs_access
631+
fsaccess = self.research_object.make_fs_access("")
632+
get_listing(fsaccess, value)
633+
for f in value.get("listing", []):
634+
is_empty = False
635+
# Declare child-artifacts
636+
entity = self.declare_artefact(f)
637+
self.document.membership(coll, entity)
638+
# Membership relation aka our ORE Proxy
639+
m_id = uuid.uuid4().urn
640+
m = self.document.entity(m_id)
641+
m_b = dir_bundle.entity(m_id)
642+
643+
# PROV-O style Dictionary
644+
# https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
645+
# ..as prov.py do not currently allow PROV-N extensions
646+
# like hadDictionaryMember(..)
647+
m.add_asserted_type(PROV["KeyEntityPair"])
648+
649+
m.add_attributes({
650+
PROV["pairKey"]: f["basename"],
651+
PROV["pairEntity"]: entity,
652+
})
653+
654+
# As well as a being a
655+
# http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry
656+
m_b.add_asserted_type(RO["FolderEntry"])
657+
m_b.add_asserted_type(ORE["Proxy"])
658+
m_b.add_attributes({
659+
RO["entryName"]: f["basename"],
660+
ORE["proxyIn"]: coll,
661+
ORE["proxyFor"]: entity,
662+
663+
})
664+
coll_attribs.append(
665+
(PROV["hadDictionaryMember"], m))
666+
coll_b_attribs.append(
667+
(ORE["aggregates"], m_b))
668+
669+
coll.add_attributes(coll_attribs)
670+
coll_b.add_attributes(coll_b_attribs)
671+
672+
# Also Save ORE Folder as annotation metadata
673+
ore_doc = ProvDocument()
674+
ore_doc.add_namespace(ORE)
675+
ore_doc.add_namespace(RO)
676+
ore_doc.add_namespace(UUID)
677+
ore_doc.add_bundle(dir_bundle)
678+
ore_doc = ore_doc.flattened()
679+
ore_doc_path = posixpath.join(_posix_path(METADATA), ore_doc_fn)
680+
with self.research_object.write_bag_file(ore_doc_path) as provenance_file:
681+
ore_doc.serialize(provenance_file, format="rdf", rdf_format="turtle")
682+
self.research_object.add_annotation(dir_id, [ore_doc_fn], ORE["isDescribedBy"].uri)
683+
684+
if is_empty:
685+
# Empty directory
686+
coll.add_asserted_type(PROV["EmptyCollection"])
687+
coll.add_asserted_type(PROV["EmptyDictionary"])
688+
self.research_object.add_uri(coll.identifier.uri)
689+
return coll
690+
584691
def declare_artefact(self, value):
585692
# type: (Any) -> ProvEntity
586693
'''
@@ -638,109 +745,9 @@ def declare_artefact(self, value):
638745
return entity
639746

640747
elif value.get("class") == "Directory":
641-
# Register any nested files/directories
642-
643-
# FIXME: Calculate a hash-like identifier for directory
644-
# so we get same value if it's the same filenames/hashes
645-
# in a different location.
646-
# For now, mint a new UUID to identify this directory, but
647-
# attempt to keep it inside the value dictionary
648-
dir_id = value.setdefault("@id",
649-
uuid.uuid4().urn)
650-
651-
# New annotation file to keep the ORE Folder listing
652-
ore_doc_fn = dir_id.replace("urn:uuid:", "directory-") + ".ttl"
653-
dir_bundle = self.document.bundle(self.metadata_ns[ore_doc_fn])
654-
655-
coll = self.document.entity(dir_id,
656-
[ (provM.PROV_TYPE, WFPROV["Artifact"]),
657-
(provM.PROV_TYPE, PROV["Collection"]),
658-
(provM.PROV_TYPE, PROV["Dictionary"]),
659-
(provM.PROV_TYPE, RO["Folder"]),
660-
])
661-
# ORE description of ro:Folder, saved separately
662-
coll_b = dir_bundle.entity(dir_id,
663-
[
664-
(provM.PROV_TYPE, RO["Folder"]),
665-
(provM.PROV_TYPE, ORE["Aggregation"]),
666-
])
667-
self.document.mentionOf(dir_id + "#ore", dir_id, dir_bundle.identifier)
668-
669-
dir_manifest = dir_bundle.entity(dir_bundle.identifier,
670-
{PROV["type"]: ORE["ResourceMap"],
671-
ORE["describes"]: coll_b.identifier}
672-
)
673-
674-
coll_attribs = [ # type ( tuple(Identifier, ProvEntity) )
675-
(ORE["isDescribedBy"], dir_bundle.identifier )
676-
]
677-
coll_b_attribs = [] # type ( tuple(Identifier, ProvEntity) )
678-
679-
# FIXME: .listing might not be populated yet - hopefully
680-
# a later call to this method will sort that
681-
is_empty = True
682-
683-
if not "listing" in value:
684-
assert self.research_object.make_fs_access
685-
fsaccess = self.research_object.make_fs_access("")
686-
get_listing(fsaccess, value)
687-
for f in value.get("listing", []):
688-
is_empty = False
689-
# Declare child-artifacts
690-
entity = self.declare_artefact(f)
691-
self.document.membership(coll, entity)
692-
# Membership relation aka our ORE Proxy
693-
m_id = uuid.uuid4().urn
694-
m = self.document.entity(m_id)
695-
m_b = dir_bundle.entity(m_id)
696-
697-
# PROV-O style Dictionary
698-
# https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
699-
# ..as prov.py do not currently allow PROV-N extensions
700-
# like hadDictionaryMember(..)
701-
m.add_asserted_type(PROV["KeyEntityPair"])
702-
703-
m.add_attributes({
704-
PROV["pairKey"]: f["basename"],
705-
PROV["pairEntity"]: entity,
706-
})
707-
708-
# As well as a being a
709-
# http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry
710-
m_b.add_asserted_type(RO["FolderEntry"])
711-
m_b.add_asserted_type(ORE["Proxy"])
712-
m_b.add_attributes({
713-
RO["entryName"]: f["basename"],
714-
ORE["proxyIn"]: coll,
715-
ORE["proxyFor"]: entity,
716-
717-
})
718-
coll_attribs.append(
719-
(PROV["hadDictionaryMember"], m))
720-
coll_b_attribs.append(
721-
(ORE["aggregates"], m_b))
722-
723-
coll.add_attributes(coll_attribs)
724-
coll_b.add_attributes(coll_b_attribs)
725-
726-
# Also Save ORE Folder as annotation metadata
727-
ore_doc = ProvDocument()
728-
ore_doc.add_namespace(ORE)
729-
ore_doc.add_namespace(RO)
730-
ore_doc.add_namespace(UUID)
731-
ore_doc.add_bundle(dir_bundle)
732-
ore_doc = ore_doc.flattened()
733-
ore_doc_path = posixpath.join(_posix_path(METADATA), ore_doc_fn)
734-
with self.research_object.write_bag_file(ore_doc_path) as provenance_file:
735-
ore_doc.serialize(provenance_file, format="rdf", rdf_format="turtle")
736-
self.research_object.add_annotation(dir_id, [ore_doc_fn], ORE["isDescribedBy"].uri)
737-
738-
if is_empty:
739-
# Empty directory
740-
coll.add_asserted_type(PROV["EmptyCollection"])
741-
coll.add_asserted_type(PROV["EmptyDictionary"])
742-
self.research_object.add_uri(coll.identifier.uri)
743-
return coll
748+
entity = self.declare_directory(value)
749+
value["@id"] = entity.identifier.uri
750+
return entity
744751
else:
745752
coll_id = value.setdefault("@id",
746753
uuid.uuid4().urn)
@@ -1526,7 +1533,7 @@ def create_job(self,
15261533
'''
15271534
copied=copy.deepcopy(builderJob)
15281535
relativised_input_objecttemp = {} # type: Dict[Any,Any]
1529-
self.relativise_files(copied)
1536+
self._relativise_files(copied)
15301537
rel_path = posixpath.join(_posix_path(WORKFLOW), "primary-job.json")
15311538
j = json.dumps(copied, indent=4, ensure_ascii=False)
15321539
with self.write_bag_file(rel_path) as file_path:
@@ -1548,7 +1555,7 @@ def create_job(self,
15481555
{k: v for k, v in relativised_input_objecttemp.items() if v})
15491556
return relativised_input_object
15501557

1551-
def relativise_files(self, structure):
1558+
def _relativise_files(self, structure):
15521559
# type: (Any, Dict) -> None
15531560
'''
15541561
save any file objects into Research Object and update the local paths
@@ -1589,7 +1596,7 @@ def relativise_files(self, structure):
15891596
del structure["location"]
15901597

15911598
for val in structure.values():
1592-
self.relativise_files(val)
1599+
self._relativise_files(val)
15931600
return
15941601

15951602
if isinstance(structure, (str, Text)):
@@ -1598,7 +1605,7 @@ def relativise_files(self, structure):
15981605
try:
15991606
for obj in iter(structure):
16001607
# Recurse and rewrite any nested File objects
1601-
self.relativise_files(obj)
1608+
self._relativise_files(obj)
16021609
except TypeError:
16031610
pass
16041611

0 commit comments

Comments
 (0)