Skip to content

Commit 7df8203

Browse files
committed
split out declare_string
also some type declarations sorted
1 parent 3d32d0d commit 7df8203

File tree

2 files changed

+23
-15
lines changed

2 files changed

+23
-15
lines changed

cwltool/provenance.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from socket import getfqdn
2828
from getpass import getuser
2929
from typing import (Any, Callable, Dict, IO, List, Optional, MutableMapping,
30-
Set, Tuple, cast)
30+
Set, Tuple, Union, cast)
3131
from typing_extensions import Text, TYPE_CHECKING # pylint: disable=unused-import
3232
# move to a regular typing import when Python 3.3-3.6 is no longer supported
3333
import six
@@ -519,6 +519,7 @@ def declare_file(self, value):
519519
if 'checksum' in value:
520520
csum = value['checksum']
521521
(method, checksum) = csum.split("$", 1)
522+
assert checksum
522523
if method == SHA1 and \
523524
self.research_object.has_data_file(checksum):
524525
entity = self.document.entity("data:" + checksum)
@@ -540,8 +541,7 @@ def declare_file(self, value):
540541

541542
if not entity and 'content' in value:
542543
# Anonymous file, add content as string
543-
entity = self.declare_artefact(value["content"])
544-
checksum = None # TODO
544+
entity,checksum = self.declare_string(value["content"])
545545

546546
# By here one of them should have worked!
547547
if not entity:
@@ -579,6 +579,7 @@ def declare_file(self, value):
579579
other_attributes={PROV["type"]: CWLPROV["SecondaryFile"]})
580580

581581
assert entity
582+
assert checksum
582583
return file_entity, entity, checksum
583584

584585
def declare_directory(self, value):
@@ -688,6 +689,20 @@ def declare_directory(self, value):
688689
self.research_object.add_uri(coll.identifier.uri)
689690
return coll
690691

692+
def declare_string(self, value):
693+
# type: (Union[Text, str]) -> Tuple[ProvEntity,str]
694+
695+
# Save as string in UTF-8
696+
byte_s = io.BytesIO(str(value).encode(ENCODING))
697+
data_file = self.research_object.add_data_file(byte_s, content_type=TEXT_PLAIN)
698+
checksum = posixpath.basename(data_file)
699+
# FIXME: Don't naively assume add_data_file uses hash in filename!
700+
data_id = "data:%s" % posixpath.split(data_file)[1]
701+
entity = self.document.entity(data_id,
702+
{provM.PROV_TYPE: WFPROV["Artifact"],
703+
provM.PROV_VALUE: str(value)})
704+
return entity, checksum
705+
691706
def declare_artefact(self, value):
692707
# type: (Any) -> ProvEntity
693708
'''
@@ -711,14 +726,8 @@ def declare_artefact(self, value):
711726
return e
712727

713728
elif isinstance(value, (Text, str)):
714-
# Save as string in UTF-8
715-
byte_s = io.BytesIO(str(value).encode(ENCODING))
716-
data_file = self.research_object.add_data_file(byte_s, content_type=TEXT_PLAIN)
717-
# FIXME: Don't naively assume add_data_file uses hash in filename!
718-
data_id = "data:%s" % posixpath.split(data_file)[1]
719-
return self.document.entity(data_id,
720-
{provM.PROV_TYPE: WFPROV["Artifact"],
721-
provM.PROV_VALUE: str(value)})
729+
(entity,_) = self.declare_string(value)
730+
return entity
722731

723732
elif isinstance(value, bytes):
724733
# If we got here then we must be in Python 3
@@ -840,7 +849,7 @@ def used_artefacts(self,
840849
datetime.datetime.now(), None, {"prov:role": prov_role})
841850

842851
def generate_output_prov(self,
843-
final_output, # type: Optional[Dict[Text, Any]]
852+
final_output, # type: Dict[Text, Any]
844853
process_run_id, # type: Optional[str]
845854
name # type: Optional[Text]
846855
): # type: (...) -> None

cwltool/workflow.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,20 +239,19 @@ def do_output_callback(self, final_output_callback):
239239

240240
supportsMultipleInput = bool(self.workflow.get_requirement("MultipleInputFeatureRequirement")[0])
241241

242-
wo = {} # type: Optional[Dict[Text, Text]]
242+
wo = None # type: Optional[Dict[Text, Text]]
243243
try:
244244
wo = object_from_state(
245245
self.state, self.tool["outputs"], True, supportsMultipleInput,
246246
"outputSource", incomplete=True)
247247
except WorkflowException as err:
248248
_logger.error(
249249
u"[%s] Cannot collect workflow output: %s", self.name, err)
250-
wo = {}
251250
self.processStatus = "permanentFail"
252251
if self.prov_obj and self.parent_wf \
253252
and self.prov_obj.workflow_run_uri != self.parent_wf.workflow_run_uri:
254253
process_run_id = None
255-
self.prov_obj.generate_output_prov(wo, process_run_id, self.name)
254+
self.prov_obj.generate_output_prov(wo or {}, process_run_id, self.name)
256255
self.prov_obj.document.wasEndedBy(
257256
self.prov_obj.workflow_run_uri, None, self.prov_obj.engine_uuid,
258257
datetime.datetime.now())

0 commit comments

Comments
 (0)