Skip to content

Commit 213d93a

Browse files
author
radhika
committed
cwltool: do not read all file contents when compute_checksum is not set.
1 parent 96f088e commit 213d93a

File tree

1 file changed

+18
-10
lines changed

1 file changed

+18
-10
lines changed

cwltool/draft2tool.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,13 @@ def __init__(self, job, output_callback, cachebuilder, jobcache):
107107

108108
def run(self, **kwargs):
109109
# type: (**Any) -> None
110+
compute_checksum = kwargs.get("compute_checksum")
111+
if compute_checksum == None:
112+
compute_checksum = True
110113
self.output_callback(self.job.collect_output_ports(self.job.tool["outputs"],
111114
self.cachebuilder,
112115
self.outdir,
113-
kwargs.get("compute_checksum")),
116+
compute_checksum),
114117
"success")
115118

116119
# map files to assigned path inside a container. We need to also explicitly
@@ -334,9 +337,12 @@ def rm_pending_output_callback(output_callback, jobcachepending,
334337
else:
335338
j.command_line = flatten(map(builder.generate_arg, builder.bindings))
336339

340+
compute_checksum = kwargs.get("compute_checksum")
341+
if compute_checksum == None:
342+
compute_checksum = True
337343
j.pathmapper = builder.pathmapper
338344
j.collect_outputs = partial(
339-
self.collect_output_ports, self.tool["outputs"], builder, compute_checksum=kwargs.get("compute_checksum"))
345+
self.collect_output_ports, self.tool["outputs"], builder, compute_checksum=compute_checksum)
340346
j.output_callback = output_callback
341347

342348
yield j
@@ -409,19 +415,21 @@ def collect_output(self, schema, builder, outdir, compute_checksum=True):
409415
if files["class"] == "Directory" and "listing" not in files:
410416
getListing(builder.fs_access, files)
411417
else:
412-
checksum = hashlib.sha1()
413418
with builder.fs_access.open(files["location"], "rb") as f:
419+
filesize = 0
414420
contents = f.read(CONTENT_LIMIT)
415421
if binding.get("loadContents"):
416422
files["contents"] = contents
417-
filesize = 0
418-
while contents != "":
419-
if compute_checksum:
423+
if compute_checksum:
424+
checksum = hashlib.sha1()
425+
while contents != "":
420426
checksum.update(contents)
421-
filesize += len(contents)
422-
contents = f.read(1024*1024)
423-
if compute_checksum:
424-
files["checksum"] = "sha1$%s" % checksum.hexdigest()
427+
filesize += len(contents)
428+
contents = f.read(1024*1024)
429+
files["checksum"] = "sha1$%s" % checksum.hexdigest()
430+
else:
431+
f.seek(0, 2)
432+
filesize = f.tell()
425433
files["size"] = filesize
426434
if "format" in schema:
427435
files["format"] = builder.do_eval(schema["format"], context=files)

0 commit comments

Comments
 (0)