Skip to content

Commit 7acd95b

Browse files
author
Peter Amstutz
committed
Refactor secondaryFile handling to avoid basing on primary "location".
Instead prefer to use basename or path. Necessary to support engines such as Toil with non-filename-preserving storage systems.
1 parent 8d5b3e8 commit 7acd95b

File tree

2 files changed

+49
-23
lines changed

2 files changed

+49
-23
lines changed

cwltool/builder.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import absolute_import
22
import copy
33
import os
4+
import logging
45
from typing import Any, Callable, Dict, List, Text, Type, Union
56

67
import six
@@ -18,6 +19,8 @@
1819
from .stdfsaccess import StdFsAccess
1920
from .utils import aslist, get_feature, docker_windows_path_adjust, onWindows
2021

22+
_logger = logging.getLogger("cwltool")
23+
2124
AvroSchemaFromJSONData = avro.schema.make_avsc_object
2225

2326
CONTENT_LIMIT = 64 * 1024
@@ -145,18 +148,25 @@ def bind_input(self, schema, datum, lead_pos=None, tail_pos=None):
145148
datum["secondaryFiles"] = []
146149
for sf in aslist(schema["secondaryFiles"]):
147150
if isinstance(sf, dict) or "$(" in sf or "${" in sf:
148-
secondary_eval = self.do_eval(sf, context=datum)
149-
if isinstance(secondary_eval, string_types):
150-
sfpath = {"location": secondary_eval,
151-
"class": "File"}
152-
else:
153-
sfpath = secondary_eval
154-
else:
155-
sfpath = {"location": substitute(datum["location"], sf), "class": "File"}
156-
if isinstance(sfpath, list):
157-
datum["secondaryFiles"].extend(sfpath)
151+
sfpath = self.do_eval(sf, context=datum)
158152
else:
159-
datum["secondaryFiles"].append(sfpath)
153+
sfpath = substitute(datum["basename"], sf)
154+
for sfname in aslist(sfpath):
155+
found = False
156+
for d in datum["secondaryFiles"]:
157+
if not d.get("basename"):
158+
d["basename"] = d["location"][d["location"].rindex("/")+1:]
159+
if d["basename"] == sfname:
160+
found = True
161+
if not found:
162+
if isinstance(sfname, dict):
163+
datum["secondaryFiles"].append(sfname)
164+
else:
165+
datum["secondaryFiles"].append({
166+
"location": datum["location"][0:datum["location"].rindex("/")+1]+sfname,
167+
"basename": sfname,
168+
"class": "File"})
169+
160170
normalizeFilesDirs(datum["secondaryFiles"])
161171

162172
def _capture_files(f):

cwltool/draft2tool.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*") # Accept anything
3636
ACCEPTLIST_RE = ACCEPTLIST_EN_STRICT_RE
3737
DEFAULT_CONTAINER_MSG="""We are on Microsoft Windows and not all components of this CWL description have a
38-
container specified. This means that these steps will be executed in the default container,
38+
container specified. This means that these steps will be executed in the default container,
3939
which is %s.
4040
4141
Note, this could affect portability if this CWL description relies on non-POSIX features
@@ -116,17 +116,26 @@ def revmap_file(builder, outdir, f):
116116
if not split.scheme:
117117
outdir = file_uri(str(outdir))
118118

119+
# builder.outdir is the inner (container/compute node) output directory
120+
# outdir is the outer (host/storage system) output directory
121+
119122
if "location" in f:
120123
if f["location"].startswith("file://"):
121124
path = convert_pathsep_to_unix(uri_file_path(f["location"]))
122125
revmap_f = builder.pathmapper.reversemap(path)
126+
123127
if revmap_f and not builder.pathmapper.mapper(revmap_f[0]).type.startswith("Writable"):
124128
f["basename"] = os.path.basename(path)
125-
f["location"] = revmap_f[0]
129+
f["location"] = revmap_f[1]
126130
elif path == builder.outdir:
127131
f["location"] = outdir
128132
elif path.startswith(builder.outdir):
129133
f["location"] = builder.fs_access.join(outdir, path[len(builder.outdir) + 1:])
134+
elif f["location"].startswith(outdir):
135+
revmap_f = builder.pathmapper.reversemap(builder.fs_access.join(builder.outdir, f["location"][len(outdir) + 1:]))
136+
if revmap_f and not builder.pathmapper.mapper(revmap_f[0]).type.startswith("Writable"):
137+
f["basename"] = os.path.basename(path)
138+
f["location"] = revmap_f[1]
130139
return f
131140

132141
if "path" in f:
@@ -350,6 +359,7 @@ def rm_pending_output_callback(output_callbacks, jobcachepending,
350359
if "stagedir" in make_path_mapper_kwargs:
351360
make_path_mapper_kwargs = make_path_mapper_kwargs.copy()
352361
del make_path_mapper_kwargs["stagedir"]
362+
353363
builder.pathmapper = self.makePathMapper(reffiles, builder.stagedir, **make_path_mapper_kwargs)
354364
builder.requirements = j.requirements
355365

@@ -566,7 +576,10 @@ def collect_output(self, schema, builder, outdir, fs_access, compute_checksum=Tr
566576
elif gb.startswith("/"):
567577
raise WorkflowException("glob patterns must not start with '/'")
568578
try:
579+
prefix = fs_access.glob(outdir)
569580
r.extend([{"location": g,
581+
"path": fs_access.join(builder.outdir, g[len(prefix[0])+1:]),
582+
"basename": os.path.basename(g),
570583
"class": "File" if fs_access.isfile(g) else "Directory"}
571584
for g in fs_access.glob(fs_access.join(outdir, gb))])
572585
except (OSError, IOError) as e:
@@ -576,12 +589,14 @@ def collect_output(self, schema, builder, outdir, fs_access, compute_checksum=Tr
576589
raise
577590

578591
for files in r:
592+
rfile = files.copy()
593+
revmap(rfile)
579594
if files["class"] == "Directory":
580595
ll = builder.loadListing or (binding and binding.get("loadListing"))
581596
if ll and ll != "no_listing":
582597
get_listing(fs_access, files, (ll == "deep_listing"))
583598
else:
584-
with fs_access.open(files["location"], "rb") as f:
599+
with fs_access.open(rfile["location"], "rb") as f:
585600
contents = b""
586601
if binding.get("loadContents") or compute_checksum:
587602
contents = f.read(CONTENT_LIMIT)
@@ -625,28 +640,29 @@ def collect_output(self, schema, builder, outdir, fs_access, compute_checksum=Tr
625640
else:
626641
r = r[0]
627642

628-
# Ensure files point to local references outside of the run environment
629-
adjustFileObjs(r, cast( # known bug in mypy
630-
# https://github.com/python/mypy/issues/797
631-
Callable[[Any], Any], revmap))
632-
633643
if "secondaryFiles" in schema:
634644
with SourceLine(schema, "secondaryFiles", WorkflowException):
635645
for primary in aslist(r):
636646
if isinstance(primary, dict):
637-
primary["secondaryFiles"] = []
647+
primary.setdefault("secondaryFiles", [])
638648
for sf in aslist(schema["secondaryFiles"]):
639649
if isinstance(sf, dict) or "$(" in sf or "${" in sf:
640650
sfpath = builder.do_eval(sf, context=primary)
641651
if isinstance(sfpath, string_types):
642-
sfpath = revmap({"location": sfpath, "class": "File"})
652+
sfpath = {"path": primary["path"][0:primary["path"].rindex("/")+1]+sfpath, "class": "File"}
643653
else:
644-
sfpath = {"location": substitute(primary["location"], sf), "class": "File"}
645-
654+
sfpath = {"path": substitute(primary["path"], sf), "class": "File"}
646655
for sfitem in aslist(sfpath):
656+
if "path" in sfitem and "location" not in sfitem:
657+
revmap(sfitem)
647658
if fs_access.exists(sfitem["location"]):
648659
primary["secondaryFiles"].append(sfitem)
649660

661+
# Ensure files point to local references outside of the run environment
662+
adjustFileObjs(r, cast( # known bug in mypy
663+
# https://github.com/python/mypy/issues/797
664+
Callable[[Any], Any], revmap))
665+
650666
if not r and optional:
651667
r = None
652668

0 commit comments

Comments
 (0)