Skip to content

Commit 691dea2

Browse files
jmchiltonmr-c
authored andcommitted
Limit loadContents/File literal contents length. (#1048)
We decided to limit these and cause a hard error if these buffer sizes were exceeded. We decided this would retro-actively apply to v1.0 - since silently failing seems to be the worst of all behavior.
1 parent 526f36f commit 691dea2

File tree

4 files changed

+25
-7
lines changed

4 files changed

+25
-7
lines changed

cwltool/builder.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from schema_salad.sourceline import SourceLine
1919
from schema_salad.ref_resolver import uri_file_path
2020
from six import iteritems, string_types
21+
from typing import IO
2122
from typing_extensions import (TYPE_CHECKING, # pylint: disable=unused-import
2223
Text, Type)
2324
# move to a regular typing import when Python 3.3-3.6 is no longer supported
@@ -27,15 +28,25 @@
2728
from .loghandler import _logger
2829
from .mutation import MutationManager # pylint: disable=unused-import
2930
from .pathmapper import PathMapper # pylint: disable=unused-import
30-
from .pathmapper import get_listing, normalizeFilesDirs, visit_class
31+
from .pathmapper import CONTENT_LIMIT, get_listing, normalizeFilesDirs, visit_class
3132
from .stdfsaccess import StdFsAccess # pylint: disable=unused-import
3233
from .utils import aslist, docker_windows_path_adjust, json_dumps, onWindows
3334

3435

3536

3637
if TYPE_CHECKING:
3738
from .provenance import ProvenanceProfile # pylint: disable=unused-import
38-
CONTENT_LIMIT = 64 * 1024
39+
40+
41+
def content_limit_respected_read_bytes(f): # type: (IO) -> bytes
42+
contents = f.read(CONTENT_LIMIT + 1)
43+
if len(contents) > CONTENT_LIMIT:
44+
raise WorkflowException("loadContents handling encountered buffer that is exceeds maximum lenght of %d bytes" % CONTENT_LIMIT)
45+
return contents
46+
47+
48+
def content_limit_respected_read(f): # type: (IO) -> Text
49+
return content_limit_respected_read_bytes(f).decode("utf-8")
3950

4051

4152
def substitute(value, replace): # type: (Text, Text) -> Text
@@ -283,7 +294,7 @@ def _capture_files(f):
283294
self.files.append(datum)
284295
if (binding and binding.get("loadContents")) or schema.get("loadContents"):
285296
with self.fs_access.open(datum["location"], "rb") as f:
286-
datum["contents"] = f.read(CONTENT_LIMIT).decode("utf-8")
297+
datum["contents"] = content_limit_respected_read(f)
287298

288299
if "secondaryFiles" in schema:
289300
if "secondaryFiles" not in datum:

cwltool/command_line_tool.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
Text, Type)
3131
# move to a regular typing import when Python 3.3-3.6 is no longer supported
3232

33-
from .builder import (CONTENT_LIMIT, Builder, # pylint: disable=unused-import
33+
from .builder import (Builder, content_limit_respected_read_bytes, # pylint: disable=unused-import
3434
substitute)
3535
from .context import LoadingContext # pylint: disable=unused-import
3636
from .context import RuntimeContext, getdefault
@@ -709,7 +709,7 @@ def collect_output(self,
709709
with fs_access.open(rfile["location"], "rb") as f:
710710
contents = b""
711711
if binding.get("loadContents") or compute_checksum:
712-
contents = f.read(CONTENT_LIMIT)
712+
contents = content_limit_respected_read_bytes(f)
713713
if binding.get("loadContents"):
714714
files["contents"] = contents.decode("utf-8")
715715
if compute_checksum:

cwltool/pathmapper.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
from .utils import convert_pathsep_to_unix, visit_class
2727

2828

29+
CONTENT_LIMIT = 64 * 1024
30+
2931
MapperEnt = collections.namedtuple("MapperEnt", ["resolved", "target", "type", "staged"])
3032

3133

@@ -87,6 +89,11 @@ def addLocation(d):
8789
if d.get("nameext") != ne:
8890
d["nameext"] = Text(ne)
8991

92+
contents = d.get("contents")
93+
if contents and len(contents) > CONTENT_LIMIT:
94+
if len(contents) > CONTENT_LIMIT:
95+
raise validate.ValidationException("File object contains contents with number of bytes that exceeds CONTENT_LIMIT length (%d)" % CONTENT_LIMIT)
96+
9097
visit_class(job, ("File", "Directory"), addLocation)
9198

9299

cwltool/workflow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
# move to a regular typing import when Python 3.3-3.6 is no longer supported
2222

2323
from . import command_line_tool, context, expression
24-
from .builder import CONTENT_LIMIT
24+
from .builder import content_limit_respected_read
2525
from .checker import can_assign_src_to_sink, static_checker
2626
from .context import LoadingContext # pylint: disable=unused-import
2727
from .context import RuntimeContext, getdefault
@@ -348,7 +348,7 @@ def postScatterEval(io):
348348
for k, v in io.items():
349349
if k in loadContents and v.get("contents") is None:
350350
with fs_access.open(v["location"], "rb") as f:
351-
v["contents"] = f.read(CONTENT_LIMIT).decode("utf-8")
351+
v["contents"] = content_limit_respected_read(f)
352352

353353
def valueFromFunc(k, v): # type: (Any, Any) -> Any
354354
if k in valueFrom:

0 commit comments

Comments
 (0)