Skip to content

Commit a523c2f

Browse files
rupertnashmr-c
authored andcommitted
Add proper tests for env vars and fix a few resulting issues
1 parent 725174e commit a523c2f

12 files changed

+503
-81
lines changed

cwltool/docker.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,6 @@ def _check_docker_machine_path(path: Optional[str]) -> None:
7878
class DockerCommandLineJob(ContainerCommandLineJob):
7979
"""Runs a CommandLineJob in a sofware container using the Docker engine."""
8080

81-
CONTAINER_TMPDIR: str = "/tmp" # nosec
82-
8381
def __init__(
8482
self,
8583
builder: Builder,

cwltool/env_to_stdout.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
r"""Python script that acts like (GNU coreutils) env -0.
2+
3+
When run as a script, it prints the the environment as
4+
`(VARNAME=value\0)*`.
5+
6+
Ideally we would just use `env -0`, because python (thanks to PEPs 538
7+
and 540) will set zero to two environment variables to better handle
8+
Unicode-locale interactions, however BSD familiy implementations of
9+
`env` do not all support the `-0` flag so we supply this script that
10+
produces equivalent output.
11+
"""
12+
13+
import os
14+
from typing import Dict
15+
16+
17+
def deserialize_env(data: str) -> Dict[str, str]:
18+
"""Deserialize the output of `env -0` to dictionary."""
19+
ans = {}
20+
for item in data.strip("\0").split("\0"):
21+
key, val = item.split("=", 1)
22+
ans[key] = val
23+
return ans
24+
25+
26+
def main() -> None:
27+
"""Print the null-separated enviroment to stdout."""
28+
for k, v in os.environ.items():
29+
print(f"{k}={v}", end="\0")
30+
31+
32+
if __name__ == "__main__":
33+
main()

cwltool/job.py

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from schema_salad.utils import json_dump, json_dumps
3939
from typing_extensions import TYPE_CHECKING
4040

41-
from . import run_job
41+
from . import env_to_stdout, run_job
4242
from .builder import Builder, HasReqsHints
4343
from .context import RuntimeContext
4444
from .errors import UnsupportedRequirement, WorkflowException
@@ -212,7 +212,18 @@ def _execute(
212212
runtimeContext: RuntimeContext,
213213
monitor_function=None, # type: Optional[Callable[[subprocess.Popen[str]], None]]
214214
) -> None:
215+
"""Execute the tool, either directly or via script.
215216
217+
Note: we are now at the point where self.environment is
218+
ignored. The caller is responsible for correctly splitting that
219+
into the runtime and env arguments.
220+
221+
`runtime` is the list of arguments to put at the start of the
222+
command (e.g. docker run).
223+
224+
`env` is the enviroment to be set for running the resulting
225+
command line.
226+
"""
216227
scr = self.get_requirement("ShellCommandRequirement")[0]
217228

218229
shouldquote = needs_shell_quoting_re.search
@@ -431,7 +442,7 @@ def _required_env(self) -> Dict[str, str]:
431442
pass
432443

433444
def _preserve_environment_on_containers_warning(
434-
self, varname: Optional[str] = None
445+
self, varname: Optional[Iterable[str]] = None
435446
) -> None:
436447
"""When running in a container, issue a warning."""
437448
# By default, don't do anything; ContainerCommandLineJob below
@@ -456,8 +467,10 @@ def prepare_environment(
456467
self._preserve_environment_on_containers_warning()
457468
env.update(os.environ)
458469
elif runtimeContext.preserve_environment:
470+
self._preserve_environment_on_containers_warning(
471+
runtimeContext.preserve_environment
472+
)
459473
for key in runtimeContext.preserve_environment:
460-
self._preserve_environment_on_containers_warning(key)
461474
try:
462475
env[key] = os.environ[key]
463476
except KeyError:
@@ -564,6 +577,8 @@ def _required_env(self) -> Dict[str, str]:
564577
class ContainerCommandLineJob(JobBase, metaclass=ABCMeta):
565578
"""Commandline job using containers."""
566579

580+
CONTAINER_TMPDIR: str = "/tmp" # nosec
581+
567582
@abstractmethod
568583
def get_from_requirements(
569584
self,
@@ -616,17 +631,17 @@ def add_writable_directory_volume(
616631
"""Append a writable directory mapping to the runtime option list."""
617632

618633
def _preserve_environment_on_containers_warning(
619-
self, varname: Optional[str] = None
634+
self, varnames: Optional[Iterable[str]] = None
620635
) -> None:
621636
"""When running in a container, issue a warning."""
622-
if varname is None:
623-
_logger.warning(
624-
"You have specified `--preserve-entire-environment` while running a container"
625-
)
637+
if varnames is None:
638+
flags = "--preserve-entire-environment"
626639
else:
627-
_logger.warning(
628-
f"You have specified `--preserve-environment={varname}` while running a container"
629-
)
640+
flags = "--preserve-environment={" + ", ".join(varnames) + "}"
641+
642+
_logger.warning(
643+
f"You have specified `{flags}` while running a container which will override variables set in the container. This may break the container, be non-portable, and/or affect reproducibility."
644+
)
630645

631646
def create_file_and_add_volume(
632647
self,
@@ -713,7 +728,6 @@ def run(
713728
(docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
714729
self.prov_obj = runtimeContext.prov_obj
715730
img_id = None
716-
env = cast(MutableMapping[str, str], os.environ)
717731
user_space_docker_cmd = runtimeContext.user_space_docker_cmd
718732
if docker_req is not None and user_space_docker_cmd:
719733
# For user-space docker implementations, a local image name or ID
@@ -807,7 +821,11 @@ def run(
807821
)
808822

809823
self._setup(runtimeContext)
824+
825+
# Copy as don't want to modify our env
826+
env = dict(os.environ)
810827
(runtime, cidfile) = self.create_runtime(env, runtimeContext)
828+
811829
runtime.append(str(img_id))
812830
monitor_function = None
813831
if cidfile:
@@ -997,8 +1015,13 @@ def terminate(): # type: () -> None
9971015
job_script = os.path.join(job_dir, "run_job.bash")
9981016
with open(job_script, "wb") as _:
9991017
_.write(job_script_contents.encode("utf-8"))
1018+
10001019
job_run = os.path.join(job_dir, "run_job.py")
10011020
shutil.copyfile(run_job.__file__, job_run)
1021+
1022+
env_getter = os.path.join(job_dir, "env_to_stdout.py")
1023+
shutil.copyfile(env_to_stdout.__file__, env_getter)
1024+
10021025
sproc = subprocess.Popen( # nosec
10031026
["bash", job_script.encode("utf-8")],
10041027
shell=False, # nosec

cwltool/run_job.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,15 @@ def handle_software_environment(cwl_env: Dict[str, str], script: str) -> Dict[st
1818
return cwl_env
1919

2020
env = cwl_env.copy()
21-
with open("output_environment.bash") as env_file:
22-
for line in env_file:
23-
key, val = line.split("=", 1)
24-
if key in ("_", "PWD", "SHLVL", "TMPDIR", "HOME", "_CWLTOOL"):
25-
# Skip some variables that are meaningful to the shell
26-
# or set specifically by the CWL runtime environment.
27-
continue
28-
env[key] = val[:-1] # remove trailing newline
21+
with open("output_environment.dat") as _:
22+
data = _.read().strip("\0")
23+
for line in data.split("\0"):
24+
key, val = line.split("=", 1)
25+
if key in ("_", "PWD", "SHLVL", "TMPDIR", "HOME", "_CWLTOOL"):
26+
# Skip some variables that are meaningful to the shell or set
27+
# specifically by the CWL runtime environment.
28+
continue
29+
env[key] = val
2930
return env
3031

3132

cwltool/singularity.py

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,6 @@ def _normalize_sif_id(string: str) -> str:
8383

8484

8585
class SingularityCommandLineJob(ContainerCommandLineJob):
86-
CONTAINER_TMPDIR: str = "/tmp" # nosec
87-
8886
def __init__(
8987
self,
9088
builder: Builder,
@@ -277,13 +275,12 @@ def append_volume(
277275
runtime: List[str], source: str, target: str, writable: bool = False
278276
) -> None:
279277
runtime.append("--bind")
280-
runtime.append(
281-
"{}:{}:{}".format(
282-
source,
283-
target,
284-
"rw" if writable else "ro",
285-
)
286-
)
278+
# Mounts are writable by default, so 'rw' is optional and not
279+
# supported (due to a bug) in some 3.6 series releases.
280+
vol = f"{source}:{target}"
281+
if not writable:
282+
vol += ":ro"
283+
runtime.append(vol)
287284

288285
def add_file_or_directory_volume(
289286
self, runtime: List[str], volume: MapperEnt, host_outdir_tgt: Optional[str]
@@ -395,28 +392,28 @@ def create_runtime(
395392
runtime.append("--userns")
396393
else:
397394
runtime.append("--pid")
395+
396+
container_HOME: Optional[str] = None
398397
if is_version_3_1_or_newer():
398+
# Remove HOME, as passed in a special way (restore it below)
399+
container_HOME = self.environment.pop("HOME")
399400
runtime.append("--home")
400401
runtime.append(
401402
"{}:{}".format(
402403
os.path.realpath(self.outdir),
403-
self.builder.outdir,
404+
container_HOME,
404405
)
405406
)
406407
else:
407-
runtime.append("--bind")
408-
runtime.append(
409-
"{}:{}:rw".format(
410-
os.path.realpath(self.outdir),
411-
self.builder.outdir,
412-
)
413-
)
414-
runtime.append("--bind")
415-
runtime.append(
416-
"{}:{}:rw".format(
417-
os.path.realpath(self.tmpdir),
418-
self.CONTAINER_TMPDIR,
408+
self.append_volume(
409+
runtime,
410+
os.path.realpath(self.outdir),
411+
self.environment["HOME"],
412+
writable=True,
419413
)
414+
415+
self.append_volume(
416+
runtime, os.path.realpath(self.tmpdir), self.CONTAINER_TMPDIR, writable=True
420417
)
421418

422419
self.add_volumes(
@@ -447,4 +444,8 @@ def create_runtime(
447444

448445
for name, value in self.environment.items():
449446
env[f"SINGULARITYENV_{name}"] = str(value)
447+
448+
if container_HOME:
449+
# Restore HOME if we removed it above.
450+
self.environment["HOME"] = container_HOME
450451
return (runtime, None)

cwltool/software_requirements.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,12 @@
3232
"""#!/bin/bash
3333
cat > modify_environment.bash <<'EOF'
3434
$handle_dependencies
35-
env > output_environment.bash
35+
# First try env -0
36+
if ! env -0 > "output_environment.dat" 2> /dev/null; then
37+
# If that fails, use the python script.
38+
# In some circumstances (see PEP 538) this will the add LC_CTYPE env var.
39+
python3 "env_to_stdout.py" > "output_environment.dat"
40+
fi
3641
EOF
3742
python3 "run_job.py" "job.json" "modify_environment.bash"
3843
"""

tests/env3.cwl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ cwlVersion: v1.0
33
class: CommandLineTool
44
inputs: []
55
baseCommand: env
6+
arguments: ["-0"]
67
outputs:
78
env:
89
type: stdout

tests/env4.cwl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/usr/bin/env cwl-runner
2+
cwlVersion: v1.0
3+
class: CommandLineTool
4+
5+
requirements:
6+
InitialWorkDirRequirement:
7+
listing:
8+
- entryname: env0.py
9+
entry: |
10+
import os
11+
for k, v in os.environ.items():
12+
print(f"{k}={v}", end="\0")
13+
14+
inputs: []
15+
baseCommand: python3
16+
arguments: ["env0.py"]
17+
outputs:
18+
env:
19+
type: stdout

tests/test_dependencies.py

Lines changed: 16 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
"""Tests of satisfying SoftwareRequirement via dependencies."""
2-
from io import StringIO
3-
import json
42
import os
53
from pathlib import Path
64
from shutil import which
75
from types import ModuleType
8-
from typing import Optional
6+
from typing import Dict, List, Optional
97

108
import pytest
119

1210
from cwltool.main import main
1311

14-
from .util import get_data, get_main_output, needs_docker, working_directory
12+
from .util import (
13+
get_data,
14+
get_main_output,
15+
get_tool_env,
16+
needs_docker,
17+
working_directory,
18+
)
1519

1620
deps = None # type: Optional[ModuleType]
1721
try:
@@ -74,29 +78,15 @@ def test_modules_environment(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) ->
7478
monkeypatch.setenv(
7579
"MODULEPATH", os.path.join(os.getcwd(), "tests/test_deps_env/modulefiles")
7680
)
81+
tool_env = get_tool_env(
82+
tmp_path,
83+
[
84+
"--beta-dependency-resolvers-configuration",
85+
get_data("tests/test_deps_env_modules_resolvers_conf.yml"),
86+
],
87+
get_data("tests/env_with_software_req.yml"),
88+
)
7789

78-
stdout = StringIO()
79-
stderr = StringIO()
80-
with working_directory(tmp_path):
81-
rc = main(
82-
argsl=[
83-
"--beta-dependency-resolvers-configuration",
84-
get_data("tests/test_deps_env_modules_resolvers_conf.yml"),
85-
get_data("tests/env3.cwl"),
86-
get_data("tests/env_with_software_req.yml"),
87-
],
88-
stdout=stdout,
89-
stderr=stderr,
90-
)
91-
assert rc == 0
92-
93-
output = json.loads(stdout.getvalue())
94-
env_path = output["env"]["path"]
95-
tool_env = {}
96-
with open(env_path) as _:
97-
for line in _:
98-
key, val = line.split("=", 1)
99-
tool_env[key] = val[:-1]
10090
assert tool_env["TEST_VAR_MODULE"] == "environment variable ends in space "
10191
tool_path = tool_env["PATH"].split(":")
10292
assert get_data("tests/test_deps_env/random-lines/1.0/scripts") in tool_path

0 commit comments

Comments
 (0)