Skip to content

Commit 492d6ee

Browse files
committed
Provenance: Indicate strings as text/plain
1 parent d8ba856 commit 492d6ee

File tree

1 file changed

+14
-7
lines changed

1 file changed

+14
-7
lines changed

cwltool/provenance.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,15 +101,13 @@ class PermissionError(OSError): # pylint: disable=redefined-builtin
101101

102102
# BagIt and YAML always use UTF-8
103103
ENCODING = "UTF-8"
104-
104+
TEXT_PLAIN = 'text/plain; charset="%s"' % ENCODING
105105

106106
# sha1, compatible with the File type's "checksum" field
107107
# e.g. "checksum" = "sha1$47a013e660d408619d894b20806b1d5086aab03b"
108108
# See ./cwltool/schemas/v1.0/Process.yml
109109
Hasher = hashlib.sha1
110110

111-
112-
113111
# TODO: Better identifiers for user, at least
114112
# these should be preserved in ~/.config/cwl for every execution
115113
# on this host
@@ -547,7 +545,7 @@ def declare_artefact(self, value):
547545
elif isinstance(value, (Text, str)):
548546
# Save as string in UTF-8
549547
byte_s = io.BytesIO(str(value).encode(ENCODING))
550-
data_file = self.research_object.add_data_file(byte_s)
548+
data_file = self.research_object.add_data_file(byte_s, content_type=TEXT_PLAIN)
551549
# FIXME: Don't naively assume add_data_file uses hash in filename!
552550
data_id = "data:%s" % posixpath.split(data_file)[1]
553551
return self.document.entity(data_id,
@@ -634,6 +632,7 @@ def declare_artefact(self, value):
634632
return coll
635633
else:
636634
# some other kind of dictionary?
635+
# TODO: also Save as JSON
637636
coll = self.document.entity(uuid.uuid4().urn,
638637
[ (provM.PROV_TYPE, WFPROV["Artifact"]),
639638
(provM.PROV_TYPE, PROV["Collection"]),
@@ -665,6 +664,7 @@ def declare_artefact(self, value):
665664
return coll
666665

667666
# some other kind of Collection?
667+
# TODO: also save as JSON
668668
try:
669669
members = []
670670
for each_input_obj in iter(value):
@@ -910,6 +910,7 @@ def __init__(self, temp_prefix_ro="tmp", orcid=None, full_name=None):
910910
self.tagfiles = set() # type: Set
911911
self._file_provenance = {} # type: Dict
912912
self.annotations = [] # type: List[Dict]
913+
self._content_types = {} # type: Dict[Text,str]
913914

914915
# These should be replaced by generate_prov_doc when workflow/run IDs are known:
915916
self.engine_uuid = "urn:uuid:%s" % uuid.uuid4()
@@ -1040,7 +1041,7 @@ def guess_mediatype(rel_path):
10401041
# Adapted from
10411042
# https://w3id.org/bundle/2014-11-05/#media-types
10421043

1043-
"txt": 'text/plain; charset="UTF-8"',
1044+
"txt": TEXT_PLAIN,
10441045
"ttl": 'text/turtle; charset="UTF-8"',
10451046
"rdf": 'application/rdf+xml',
10461047
"json": 'application/json',
@@ -1119,6 +1120,9 @@ def guess_mediatype(rel_path):
11191120
else:
11201121
# Probably made outside wf run, part of job object?
11211122
pass
1123+
if path in self._content_types:
1124+
aggregate_dict["mediatype"] = self._content_types[path]
1125+
11221126
aggregates.append(aggregate_dict)
11231127

11241128
for path in self.tagfiles:
@@ -1324,8 +1328,8 @@ def has_data_file(self, sha1hash):
13241328
folder = os.path.join(self.folder, DATA, sha1hash[0:2])
13251329
return os.path.isfile(os.path.join(folder, sha1hash))
13261330

1327-
def add_data_file(self, from_fp, when=None):
1328-
# type: (IO, Optional[datetime.datetime]) -> Text
1331+
def add_data_file(self, from_fp, when=None, content_type=None):
1332+
# type: (IO, Optional[datetime.datetime], Optional[str]) -> Text
13291333
'''
13301334
copies inputs to Data
13311335
'''
@@ -1359,6 +1363,9 @@ def add_data_file(self, from_fp, when=None):
13591363
if when:
13601364
self._file_provenance[rel_path] = self._self_made(when)
13611365
_logger.info(u"[provenance] Relative path for data file %s", rel_path)
1366+
1367+
if content_type:
1368+
self._content_types[rel_path] = content_type
13621369
return rel_path
13631370

13641371
def _self_made(self, when=None):

0 commit comments

Comments
 (0)