@@ -101,15 +101,13 @@ class PermissionError(OSError): # pylint: disable=redefined-builtin
101
101
102
102
# BagIt and YAML always use UTF-8
103
103
ENCODING = "UTF-8"
104
-
104
+ TEXT_PLAIN = 'text/plain; charset="%s"' % ENCODING
105
105
106
106
# sha1, compatible with the File type's "checksum" field
107
107
# e.g. "checksum" = "sha1$47a013e660d408619d894b20806b1d5086aab03b"
108
108
# See ./cwltool/schemas/v1.0/Process.yml
109
109
Hasher = hashlib .sha1
110
110
111
-
112
-
113
111
# TODO: Better identifiers for user, at least
114
112
# these should be preserved in ~/.config/cwl for every execution
115
113
# on this host
@@ -547,7 +545,7 @@ def declare_artefact(self, value):
547
545
elif isinstance (value , (Text , str )):
548
546
# Save as string in UTF-8
549
547
byte_s = io .BytesIO (str (value ).encode (ENCODING ))
550
- data_file = self .research_object .add_data_file (byte_s )
548
+ data_file = self .research_object .add_data_file (byte_s , content_type = TEXT_PLAIN )
551
549
# FIXME: Don't naively assume add_data_file uses hash in filename!
552
550
data_id = "data:%s" % posixpath .split (data_file )[1 ]
553
551
return self .document .entity (data_id ,
@@ -634,6 +632,7 @@ def declare_artefact(self, value):
634
632
return coll
635
633
else :
636
634
# some other kind of dictionary?
635
+ # TODO: also Save as JSON
637
636
coll = self .document .entity (uuid .uuid4 ().urn ,
638
637
[ (provM .PROV_TYPE , WFPROV ["Artifact" ]),
639
638
(provM .PROV_TYPE , PROV ["Collection" ]),
@@ -665,6 +664,7 @@ def declare_artefact(self, value):
665
664
return coll
666
665
667
666
# some other kind of Collection?
667
+ # TODO: also save as JSON
668
668
try :
669
669
members = []
670
670
for each_input_obj in iter (value ):
@@ -910,6 +910,7 @@ def __init__(self, temp_prefix_ro="tmp", orcid=None, full_name=None):
910
910
self .tagfiles = set () # type: Set
911
911
self ._file_provenance = {} # type: Dict
912
912
self .annotations = [] # type: List[Dict]
913
+ self ._content_types = {} # type: Dict[Text,str]
913
914
914
915
# These should be replaced by generate_prov_doc when workflow/run IDs are known:
915
916
self .engine_uuid = "urn:uuid:%s" % uuid .uuid4 ()
@@ -1040,7 +1041,7 @@ def guess_mediatype(rel_path):
1040
1041
# Adapted from
1041
1042
# https://w3id.org/bundle/2014-11-05/#media-types
1042
1043
1043
- "txt" : 'text/plain; charset="UTF-8"' ,
1044
+ "txt" : TEXT_PLAIN ,
1044
1045
"ttl" : 'text/turtle; charset="UTF-8"' ,
1045
1046
"rdf" : 'application/rdf+xml' ,
1046
1047
"json" : 'application/json' ,
@@ -1119,6 +1120,9 @@ def guess_mediatype(rel_path):
1119
1120
else :
1120
1121
# Probably made outside wf run, part of job object?
1121
1122
pass
1123
+ if path in self ._content_types :
1124
+ aggregate_dict ["mediatype" ] = self ._content_types [path ]
1125
+
1122
1126
aggregates .append (aggregate_dict )
1123
1127
1124
1128
for path in self .tagfiles :
@@ -1324,8 +1328,8 @@ def has_data_file(self, sha1hash):
1324
1328
folder = os .path .join (self .folder , DATA , sha1hash [0 :2 ])
1325
1329
return os .path .isfile (os .path .join (folder , sha1hash ))
1326
1330
1327
- def add_data_file (self , from_fp , when = None ):
1328
- # type: (IO, Optional[datetime.datetime]) -> Text
1331
+ def add_data_file (self , from_fp , when = None , content_type = None ):
1332
+ # type: (IO, Optional[datetime.datetime], Optional[str] ) -> Text
1329
1333
'''
1330
1334
copies inputs to Data
1331
1335
'''
@@ -1359,6 +1363,9 @@ def add_data_file(self, from_fp, when=None):
1359
1363
if when :
1360
1364
self ._file_provenance [rel_path ] = self ._self_made (when )
1361
1365
_logger .info (u"[provenance] Relative path for data file %s" , rel_path )
1366
+
1367
+ if content_type :
1368
+ self ._content_types [rel_path ] = content_type
1362
1369
return rel_path
1363
1370
1364
1371
def _self_made (self , when = None ):
0 commit comments