Skip to content

Commit 1eb6bfe

Browse files
authored
when outputting JSON & RDF, Force UTF-8, always (#786)
* configure rdflib logging * add test for UTF with --print-rdf * PY2: json.dump{,s} with unicode everywhere
1 parent 48bd6c7 commit 1eb6bfe

15 files changed

+190
-87
lines changed

cwltool/builder.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import copy
33
import os
44
import logging
5-
import json
65
from typing import Any, Callable, Dict, List, Text, Type, Union, Set
76

87
import six
@@ -22,7 +21,8 @@
2221
from .pathmapper import (PathMapper, get_listing, normalizeFilesDirs,
2322
visit_class)
2423
from .stdfsaccess import StdFsAccess
25-
from .utils import aslist, get_feature, docker_windows_path_adjust, onWindows
24+
from .utils import (aslist, get_feature, docker_windows_path_adjust,
25+
json_dumps, onWindows)
2626

2727
_logger = logging.getLogger("cwltool")
2828

@@ -75,12 +75,13 @@ def checkFormat(actualFile, inputFormats, ontology):
7575
if not af:
7676
continue
7777
if "format" not in af:
78-
raise validate.ValidationException(u"File has no 'format' defined: %s" % json.dumps(af, indent=4))
78+
raise validate.ValidationException(
79+
u"File has no 'format' defined: %s" % json_dumps(af, indent=4))
7980
for inpf in aslist(inputFormats):
8081
if af["format"] == inpf or formatSubclassOf(af["format"], inpf, ontology, set()):
8182
return
8283
raise validate.ValidationException(
83-
u"File has an incompatible format: %s" % json.dumps(af, indent=4))
84+
u"File has an incompatible format: %s" % json_dumps(af, indent=4))
8485

8586
class Builder(object):
8687
def __init__(self): # type: () -> None
@@ -231,7 +232,7 @@ def bind_input(self, schema, datum, lead_pos=None, tail_pos=None, discover_secon
231232
"class": "File"})
232233
else:
233234
raise WorkflowException("Missing required secondary file '%s' from file object: %s" % (
234-
sfname, json.dumps(datum, indent=4)))
235+
sfname, json_dumps(datum, indent=4)))
235236

236237
normalizeFilesDirs(datum["secondaryFiles"])
237238

cwltool/checker.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import json
21
from collections import namedtuple
32
import logging
43

@@ -9,6 +8,7 @@
98
import schema_salad.validate as validate
109
from .process import shortname
1110
from .errors import WorkflowException
11+
from .utils import json_dumps
1212

1313
_logger = logging.getLogger("cwltool")
1414

@@ -177,10 +177,10 @@ def static_checker(workflow_inputs, workflow_outputs, step_inputs, step_outputs,
177177
else:
178178
msg = SourceLine(src, "type").makeError(
179179
"Source '%s' of type %s may be incompatible"
180-
% (shortname(src["id"]), json.dumps(src["type"]))) + "\n" + \
180+
% (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + \
181181
SourceLine(sink, "type").makeError(
182-
" with sink '%s' of type %s"
183-
% (shortname(sink["id"]), json.dumps(sink["type"])))
182+
" with sink '%s' of type %s"
183+
% (shortname(sink["id"]), json_dumps(sink["type"])))
184184
if linkMerge:
185185
msg += "\n" + SourceLine(sink).makeError(" source has linkMerge method %s" % linkMerge)
186186

@@ -191,10 +191,10 @@ def static_checker(workflow_inputs, workflow_outputs, step_inputs, step_outputs,
191191
linkMerge = exception.linkMerge
192192
msg = SourceLine(src, "type").makeError(
193193
"Source '%s' of type %s is incompatible"
194-
% (shortname(src["id"]), json.dumps(src["type"]))) + "\n" + \
194+
% (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + \
195195
SourceLine(sink, "type").makeError(
196-
" with sink '%s' of type %s"
197-
% (shortname(sink["id"]), json.dumps(sink["type"])))
196+
" with sink '%s' of type %s"
197+
% (shortname(sink["id"]), json_dumps(sink["type"])))
198198
if linkMerge:
199199
msg += "\n" + SourceLine(sink).makeError(" source has linkMerge method %s" % linkMerge)
200200
exception_msgs.append(msg)

cwltool/command_line_tool.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
import copy
33
import hashlib
44
import locale
5-
import json
65
import logging
6+
import json
77
import os
88
import re
99
import shutil
@@ -32,7 +32,9 @@
3232
normalizeFilesDirs, shortname, uniquename)
3333
from .singularity import SingularityCommandLineJob
3434
from .stdfsaccess import StdFsAccess
35-
from .utils import aslist, docker_windows_path_adjust, convert_pathsep_to_unix, windows_default_container_id, onWindows
35+
from .utils import (aslist, docker_windows_path_adjust,
36+
convert_pathsep_to_unix, json_dumps,
37+
windows_default_container_id, onWindows)
3638
from six.moves import map
3739

3840
ACCEPTLIST_EN_STRICT_RE = re.compile(r"^[a-zA-Z0-9._+-]+$")
@@ -309,7 +311,8 @@ def job(self,
309311
if r["class"] in interesting and r["class"] not in keydict:
310312
keydict[r["class"]] = r
311313

312-
keydictstr = json.dumps(keydict, separators=(',', ':'), sort_keys=True)
314+
keydictstr = json_dumps(keydict, separators=(',', ':'),
315+
sort_keys=True)
313316
cachekey = hashlib.md5(keydictstr.encode('utf-8')).hexdigest()
314317

315318
_logger.debug("[job %s] keydictstr is %s -> %s", jobname,
@@ -370,8 +373,10 @@ def rm_pending_output_callback(output_callbacks, jobcachepending,
370373
_logger.debug(u"[job %s] initializing from %s%s",
371374
j.name,
372375
self.tool.get("id", ""),
373-
u" as part of %s" % kwargs["part_of"] if "part_of" in kwargs else "")
374-
_logger.debug(u"[job %s] %s", j.name, json.dumps(job_order, indent=4))
376+
u" as part of %s" % kwargs["part_of"]
377+
if "part_of" in kwargs else "")
378+
_logger.debug(u"[job %s] %s", j.name, json_dumps(job_order,
379+
indent=4))
375380

376381
builder.pathmapper = None
377382
make_path_mapper_kwargs = kwargs
@@ -427,7 +432,9 @@ def rm_pending_output_callback(output_callbacks, jobcachepending,
427432

428433
if debug:
429434
_logger.debug(u"[job %s] path mappings is %s", j.name,
430-
json.dumps({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}, indent=4))
435+
json_dumps({p: builder.pathmapper.mapper(p)
436+
for p in builder.pathmapper.files()},
437+
indent=4))
431438

432439
if self.tool.get("stdin"):
433440
with SourceLine(self.tool, "stdin", validate.ValidationException, debug):
@@ -447,7 +454,8 @@ def rm_pending_output_callback(output_callbacks, jobcachepending,
447454
raise validate.ValidationException("stdout must be a relative path, got '%s'" % j.stdout)
448455

449456
if debug:
450-
_logger.debug(u"[job %s] command line bindings is %s", j.name, json.dumps(builder.bindings, indent=4))
457+
_logger.debug(u"[job %s] command line bindings is %s", j.name,
458+
json_dumps(builder.bindings, indent=4))
451459

452460
dockerReq = self.get_requirement("DockerRequirement")[0]
453461
if dockerReq and kwargs.get("use_container"):
@@ -549,7 +557,8 @@ def collect_output_ports(self, ports, builder, outdir, compute_checksum=True, jo
549557
with fs_access.open(custom_output, "r") as f:
550558
ret = json.load(f)
551559
if debug:
552-
_logger.debug(u"Raw output from %s: %s", custom_output, json.dumps(ret, indent=4))
560+
_logger.debug(u"Raw output from %s: %s", custom_output,
561+
json_dumps(ret, indent=4))
553562
else:
554563
for i, port in enumerate(ports):
555564
def makeWorkflowException(msg):
@@ -571,13 +580,16 @@ def makeWorkflowException(msg):
571580
if compute_checksum:
572581
adjustFileObjs(ret, partial(compute_checksums, fs_access))
573582

574-
validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret,
575-
strict=False, logger=_logger_validation_warnings)
583+
validate.validate_ex(
584+
self.names.get_name("outputs_record_schema", ""), ret,
585+
strict=False, logger=_logger_validation_warnings)
576586
if ret is not None and builder.mutation_manager is not None:
577587
adjustFileObjs(ret, builder.mutation_manager.set_generation)
578588
return ret if ret is not None else {}
579589
except validate.ValidationException as e:
580-
raise WorkflowException("Error validating output record. " + Text(e) + "\n in " + json.dumps(ret, indent=4))
590+
raise WorkflowException(
591+
"Error validating output record. " + Text(e) + "\n in " +
592+
json_dumps(ret, indent=4))
581593
finally:
582594
if builder.mutation_manager and readers:
583595
for r in readers.values():

cwltool/cwlrdf.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ def visitor(t):
2020
return g
2121

2222

23-
def printrdf(wf, ctx, sr):
24-
# type: (Process, ContextType, Text) -> Text
25-
return gather(wf, ctx).serialize(format=sr).decode('utf-8')
23+
def printrdf(wflow, ctx, style): # type: (Process, ContextType, Text) -> Text
24+
"""Serialize the CWL document into a string, ready for printing."""
25+
return gather(wflow, ctx).serialize(format=style,
26+
encoding='utf-8').decode('utf-8')
2627

2728

2829
def lastpart(uri): # type: (Any) -> Text

cwltool/expression.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import absolute_import
22
import copy
3-
import json
43
import logging
54
import re
65
from typing import Any, AnyStr, Dict, List, Text, Union
@@ -10,7 +9,7 @@
109

1110
from . import sandboxjs
1211
from .errors import WorkflowException
13-
from .utils import bytes2str_in_dicts
12+
from .utils import bytes2str_in_dicts, json_dumps
1413

1514
_logger = logging.getLogger("cwltool")
1615

@@ -25,7 +24,9 @@ def jshead(engineConfig, rootvars):
2524
if six.PY3:
2625
rootvars = bytes2str_in_dicts(rootvars) # type: ignore
2726

28-
return u"\n".join(engineConfig + [u"var %s = %s;" % (k, json.dumps(v, indent=4)) for k, v in rootvars.items()])
27+
return u"\n".join(engineConfig + [
28+
u"var %s = %s;" % (k, json_dumps(v, indent=4))
29+
for k, v in rootvars.items()])
2930

3031

3132
# decode all raw strings to unicode
@@ -209,7 +210,7 @@ def interpolate(scan, rootvars,
209210
debug=debug, js_console=js_console)
210211
if w[0] == 0 and w[1] == len(scan) and len(parts) <= 1:
211212
return e
212-
leaf = json.dumps(e, sort_keys=True)
213+
leaf = json_dumps(e, sort_keys=True)
213214
if leaf[0] == '"':
214215
leaf = leaf[1:-1]
215216
parts.append(leaf)

cwltool/job.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import codecs
44
import functools
55
import io
6-
import json
76
import logging
87
import os
98
import re
@@ -26,8 +25,8 @@
2625
from .process import (UnsupportedRequirement, get_feature,
2726
stageFiles)
2827
from .secrets import SecretStore
29-
from .utils import (bytes2str_in_dicts, copytree_with_merge, onWindows,
30-
subprocess)
28+
from .utils import (bytes2str_in_dicts, copytree_with_merge, json_dump,
29+
json_dumps, onWindows, subprocess)
3130

3231
_logger = logging.getLogger("cwltool")
3332

@@ -177,7 +176,9 @@ def _setup(self, kwargs): # type: (Dict) -> None
177176
self.generatemapper = self.make_pathmapper(cast(List[Any], self.generatefiles["listing"]),
178177
self.builder.outdir, basedir=self.outdir, separateDirs=False, **make_path_mapper_kwargs)
179178
_logger.debug(u"[job %s] initial work dir %s", self.name,
180-
json.dumps({p: self.generatemapper.mapper(p) for p in self.generatemapper.files()}, indent=4))
179+
json_dumps({p: self.generatemapper.mapper(p)
180+
for p in self.generatemapper.files()},
181+
indent=4))
181182

182183
def _execute(self,
183184
runtime, # type:List[Text]
@@ -289,7 +290,8 @@ def _execute(self,
289290
_logger.info(u"[job %s] completed %s", self.name, processStatus)
290291

291292
if _logger.isEnabledFor(logging.DEBUG):
292-
_logger.debug(u"[job %s] %s", self.name, json.dumps(outputs, indent=4))
293+
_logger.debug(u"[job %s] %s", self.name,
294+
json_dumps(outputs, indent=4))
293295

294296
if self.generatemapper and secret_store:
295297
# Delete any runtime-generated files containing secrets.
@@ -520,8 +522,9 @@ def terminate():
520522
stderr_path=stderr_path,
521523
stdin_path=stdin_path,
522524
)
523-
with open(os.path.join(job_dir, "job.json"), "wb") as f:
524-
json.dump(job_description, codecs.getwriter('utf-8')(f), ensure_ascii=False) # type: ignore
525+
with io.open(os.path.join(job_dir, "job.json"), encoding='utf-8',
526+
mode="wb") as job_file:
527+
json_dump(job_description, job_file, ensure_ascii=False)
525528
try:
526529
job_script = os.path.join(job_dir, "run_job.bash")
527530
with open(job_script, "wb") as f:

cwltool/load_tool.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import re
88
import uuid
99
import hashlib
10-
import json
1110
import copy
1211
from typing import (Any, Callable, Dict, Iterable, List, Mapping, Optional,
1312
Text, Tuple, Union, cast)
@@ -26,6 +25,7 @@
2625
from .errors import WorkflowException
2726
from .process import Process, shortname, get_schema
2827
from .update import ALLUPDATES
28+
from .utils import json_dumps
2929

3030
_logger = logging.getLogger("cwltool")
3131
jobloaderctx = {
@@ -143,7 +143,7 @@ def _convert_stdstreams_to_files(workflowobj):
143143
filename = workflowobj[streamtype]
144144
else:
145145
filename = Text(
146-
hashlib.sha1(json.dumps(workflowobj,
146+
hashlib.sha1(json_dumps(workflowobj,
147147
sort_keys=True
148148
).encode('utf-8')
149149
).hexdigest())

0 commit comments

Comments
 (0)