Skip to content

Commit dba5477

Browse files
natthan-pigouxmr-c
andauthored
feat: handle singularity/apptainer sandbox images (#2166)
Co-authored-by: Michael R. Crusoe <[email protected]>
1 parent c1128a4 commit dba5477

15 files changed

+732
-220
lines changed

cwltool/argparser.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,16 @@ def arg_parser() -> argparse.ArgumentParser:
543543
dest="pull_image",
544544
)
545545

546+
container_group.add_argument(
547+
"--singularity-sandbox-path",
548+
default=None,
549+
type=str,
550+
help="Singularity/Apptainer sandbox image base path. "
551+
"Will use a pre-existing sandbox image. "
552+
"Will be prepended to the dockerPull path. "
553+
"Equivalent to use CWL_SINGULARITY_IMAGES variable. ",
554+
dest="image_base_path",
555+
)
546556
container_group.add_argument(
547557
"--force-docker-pull",
548558
action="store_true",

cwltool/context.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ def __init__(self, kwargs: dict[str, Any] | None = None) -> None:
151151
self.streaming_allowed: bool = False
152152

153153
self.singularity: bool = False
154+
self.image_base_path: str | None = None
154155
self.podman: bool = False
155156
self.debug: bool = False
156157
self.compute_checksum: bool = True

cwltool/docker.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ def get_from_requirements(
201201
pull_image: bool,
202202
force_pull: bool,
203203
tmp_outdir_prefix: str,
204+
image_base_path: str | None = None,
204205
) -> str | None:
205206
if not shutil.which(self.docker_exec):
206207
raise WorkflowException(f"{self.docker_exec} executable is not available")

cwltool/job.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,7 @@ def get_from_requirements(
626626
pull_image: bool,
627627
force_pull: bool,
628628
tmp_outdir_prefix: str,
629+
image_base_path: str | None = None,
629630
) -> str | None:
630631
pass
631632

@@ -787,6 +788,7 @@ def run(
787788
runtimeContext.pull_image,
788789
runtimeContext.force_docker_pull,
789790
runtimeContext.tmp_outdir_prefix,
791+
runtimeContext.image_base_path,
790792
)
791793
)
792794
if img_id is None:

cwltool/singularity.py

Lines changed: 95 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import copy
44
import hashlib
5+
import json
56
import logging
67
import os
78
import os.path
@@ -10,9 +11,10 @@
1011
import sys
1112
import threading
1213
from collections.abc import Callable, MutableMapping
13-
from subprocess import check_call, check_output # nosec
14+
from subprocess import check_call, check_output, run # nosec
1415
from typing import cast
1516

17+
from mypy_extensions import mypyc_attr
1618
from packaging.version import Version
1719
from schema_salad.sourceline import SourceLine
1820
from schema_salad.utils import json_dumps
@@ -164,6 +166,30 @@ def _normalize_sif_id(string: str) -> str:
164166
return string.replace("/", "_") + ".sif"
165167

166168

169+
@mypyc_attr(allow_interpreted_subclasses=True)
170+
def _inspect_singularity_sandbox_image(path: str) -> bool:
171+
"""Inspect singularity sandbox image to be sure it is not an empty directory."""
172+
cmd = [
173+
"singularity",
174+
"inspect",
175+
"--json",
176+
path,
177+
]
178+
try:
179+
result = run(cmd, capture_output=True, text=True) # nosec
180+
except Exception:
181+
return False
182+
183+
if result.returncode == 0:
184+
try:
185+
output = json.loads(result.stdout)
186+
except json.JSONDecodeError:
187+
return False
188+
if output.get("data", {}).get("attributes", {}):
189+
return True
190+
return False
191+
192+
167193
class SingularityCommandLineJob(ContainerCommandLineJob):
168194
def __init__(
169195
self,
@@ -183,6 +209,7 @@ def get_image(
183209
pull_image: bool,
184210
tmp_outdir_prefix: str,
185211
force_pull: bool = False,
212+
sandbox_base_path: str | None = None,
186213
) -> bool:
187214
"""
188215
Acquire the software container image in the specified dockerRequirement.
@@ -201,17 +228,34 @@ def get_image(
201228

202229
with _IMAGES_LOCK:
203230
if "dockerImageId" in dockerRequirement:
204-
if (d_image_id := dockerRequirement["dockerImageId"]) in _IMAGES:
231+
d_image_id = dockerRequirement["dockerImageId"]
232+
if d_image_id in _IMAGES:
205233
if (resolved_image_id := _IMAGES[d_image_id]) != d_image_id:
206234
dockerRequirement["dockerImage_id"] = resolved_image_id
207235
return True
236+
if d_image_id.startswith("/"):
237+
_logger.info(
238+
SourceLine(dockerRequirement, "dockerImageId").makeError(
239+
f"Non-portable: using an absolute file path in a 'dockerImageId': {d_image_id}"
240+
)
241+
)
208242

209243
docker_req = copy.deepcopy(dockerRequirement) # thread safety
210244
if "CWL_SINGULARITY_CACHE" in os.environ:
211245
cache_folder = os.environ["CWL_SINGULARITY_CACHE"]
212246
elif is_version_2_6() and "SINGULARITY_PULLFOLDER" in os.environ:
213247
cache_folder = os.environ["SINGULARITY_PULLFOLDER"]
214248

249+
if os.environ.get("CWL_SINGULARITY_IMAGES", None):
250+
image_base_path = os.environ["CWL_SINGULARITY_IMAGES"]
251+
else:
252+
image_base_path = cache_folder if cache_folder else ""
253+
254+
if not sandbox_base_path:
255+
sandbox_base_path = os.path.abspath(image_base_path)
256+
else:
257+
sandbox_base_path = os.path.abspath(sandbox_base_path)
258+
215259
if "dockerFile" in docker_req:
216260
if cache_folder is None: # if environment variables were not set
217261
cache_folder = create_tmp_dir(tmp_outdir_prefix)
@@ -261,21 +305,44 @@ def get_image(
261305
)
262306
found = True
263307
elif "dockerImageId" not in docker_req and "dockerPull" in docker_req:
264-
match = re.search(pattern=r"([a-z]*://)", string=docker_req["dockerPull"])
265-
img_name = _normalize_image_id(docker_req["dockerPull"])
266-
candidates.append(img_name)
267-
if is_version_3_or_newer():
268-
sif_name = _normalize_sif_id(docker_req["dockerPull"])
269-
candidates.append(sif_name)
270-
docker_req["dockerImageId"] = sif_name
308+
# looking for local singularity sandbox image and handle it as a local image
309+
sandbox_image_path = os.path.join(sandbox_base_path, dockerRequirement["dockerPull"])
310+
if os.path.isdir(sandbox_image_path) and _inspect_singularity_sandbox_image(
311+
sandbox_image_path
312+
):
313+
docker_req["dockerImageId"] = sandbox_image_path
314+
_logger.info(
315+
"Using local Singularity sandbox image found in %s",
316+
sandbox_image_path,
317+
)
318+
found = True
271319
else:
272-
docker_req["dockerImageId"] = img_name
273-
if not match:
274-
docker_req["dockerPull"] = "docker://" + docker_req["dockerPull"]
320+
match = re.search(pattern=r"([a-z]*://)", string=docker_req["dockerPull"])
321+
img_name = _normalize_image_id(docker_req["dockerPull"])
322+
candidates.append(img_name)
323+
if is_version_3_or_newer():
324+
sif_name = _normalize_sif_id(docker_req["dockerPull"])
325+
candidates.append(sif_name)
326+
docker_req["dockerImageId"] = sif_name
327+
else:
328+
docker_req["dockerImageId"] = img_name
329+
if not match:
330+
docker_req["dockerPull"] = "docker://" + docker_req["dockerPull"]
275331
elif "dockerImageId" in docker_req:
276-
if os.path.isfile(docker_req["dockerImageId"]):
332+
sandbox_image_path = os.path.join(sandbox_base_path, dockerRequirement["dockerImageId"])
333+
# handling local singularity sandbox image
334+
if os.path.isdir(sandbox_image_path) and _inspect_singularity_sandbox_image(
335+
sandbox_image_path
336+
):
337+
_logger.info(
338+
"Using local Singularity sandbox image found in %s",
339+
sandbox_image_path,
340+
)
341+
docker_req["dockerImageId"] = sandbox_image_path
277342
found = True
278343
else:
344+
if os.path.isfile(docker_req["dockerImageId"]):
345+
found = True
279346
candidates.append(docker_req["dockerImageId"])
280347
candidates.append(_normalize_image_id(docker_req["dockerImageId"]))
281348
if is_version_3_or_newer():
@@ -294,18 +361,19 @@ def get_image(
294361
path = os.path.join(dirpath, entry)
295362
if os.path.isfile(path):
296363
_logger.info(
297-
"Using local copy of Singularity image found in %s",
364+
"Using local copy of Singularity image %s found in %s",
365+
entry,
298366
dirpath,
299367
)
300368
docker_req["dockerImageId"] = path
301369
found = True
302370
if (force_pull or not found) and pull_image:
303371
cmd: list[str] = []
304372
if "dockerPull" in docker_req:
305-
if cache_folder:
373+
if image_base_path:
306374
env = os.environ.copy()
307375
if is_version_2_6():
308-
env["SINGULARITY_PULLFOLDER"] = cache_folder
376+
env["SINGULARITY_PULLFOLDER"] = image_base_path
309377
cmd = [
310378
"singularity",
311379
"pull",
@@ -320,14 +388,14 @@ def get_image(
320388
"pull",
321389
"--force",
322390
"--name",
323-
"{}/{}".format(cache_folder, docker_req["dockerImageId"]),
391+
"{}/{}".format(image_base_path, docker_req["dockerImageId"]),
324392
str(docker_req["dockerPull"]),
325393
]
326394

327395
_logger.info(str(cmd))
328396
check_call(cmd, env=env, stdout=sys.stderr) # nosec
329397
docker_req["dockerImageId"] = "{}/{}".format(
330-
cache_folder, docker_req["dockerImageId"]
398+
image_base_path, docker_req["dockerImageId"]
331399
)
332400
found = True
333401
else:
@@ -385,6 +453,7 @@ def get_from_requirements(
385453
pull_image: bool,
386454
force_pull: bool,
387455
tmp_outdir_prefix: str,
456+
image_base_path: str | None = None,
388457
) -> str | None:
389458
"""
390459
Return the filename of the Singularity image.
@@ -394,8 +463,14 @@ def get_from_requirements(
394463
if not bool(shutil.which("singularity")):
395464
raise WorkflowException("singularity executable is not available")
396465

397-
if not self.get_image(cast(dict[str, str], r), pull_image, tmp_outdir_prefix, force_pull):
398-
raise WorkflowException("Container image {} not found".format(r["dockerImageId"]))
466+
if not self.get_image(
467+
cast(dict[str, str], r),
468+
pull_image,
469+
tmp_outdir_prefix,
470+
force_pull,
471+
sandbox_base_path=image_base_path,
472+
):
473+
raise WorkflowException(f"Container image not found for {r}")
399474

400475
return os.path.abspath(cast(str, r["dockerImageId"]))
401476

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/usr/bin/env cwl-runner
2+
cwlVersion: v1.0
3+
class: CommandLineTool
4+
5+
requirements:
6+
DockerRequirement:
7+
dockerImageId: container_repo/alpine
8+
9+
inputs:
10+
message: string
11+
12+
outputs: []
13+
14+
baseCommand: echo

tests/sing_local_sandbox_test.cwl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/usr/bin/env cwl-runner
2+
cwlVersion: v1.0
3+
class: CommandLineTool
4+
5+
requirements:
6+
DockerRequirement:
7+
dockerPull: container_repo/alpine
8+
9+
inputs:
10+
message: string
11+
12+
outputs: []
13+
14+
baseCommand: echo

tests/test_docker.py

Lines changed: 31 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -185,21 +185,21 @@ def test_podman_required_secfile(tmp_path: Path) -> None:
185185
def test_singularity_required_secfile(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
186186
singularity_dir = tmp_path / "singularity"
187187
singularity_dir.mkdir()
188-
monkeypatch.setenv("CWL_SINGULARITY_CACHE", str(singularity_dir))
189-
190-
result_code, stdout, stderr = get_main_output(
191-
[
192-
"--singularity",
193-
"--outdir",
194-
str(tmp_path / "out"),
195-
get_data("tests/secondary-files-required-container.cwl"),
196-
]
197-
)
198-
assert result_code == 0, stderr
199-
assert (
200-
json.loads(stdout)["output"]["secondaryFiles"][0]["checksum"]
201-
== "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709"
202-
)
188+
with monkeypatch.context() as m:
189+
m.setenv("CWL_SINGULARITY_CACHE", str(singularity_dir))
190+
result_code, stdout, stderr = get_main_output(
191+
[
192+
"--singularity",
193+
"--outdir",
194+
str(tmp_path / "out"),
195+
get_data("tests/secondary-files-required-container.cwl"),
196+
]
197+
)
198+
assert result_code == 0, stderr
199+
assert (
200+
json.loads(stdout)["output"]["secondaryFiles"][0]["checksum"]
201+
== "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709"
202+
)
203203

204204

205205
@needs_docker
@@ -247,23 +247,22 @@ def test_singularity_required_missing_secfile(
247247
) -> None:
248248
singularity_dir = tmp_path / "singularity"
249249
singularity_dir.mkdir()
250-
monkeypatch.setenv("CWL_SINGULARITY_CACHE", str(singularity_dir))
251-
result_code, stdout, stderr = get_main_output(
252-
[
253-
"--singularity",
254-
"--outdir",
255-
str(tmp_path),
256-
get_data("tests/secondary-files-required-missing-container.cwl"),
257-
]
258-
)
259-
assert result_code == 1, stderr
260-
stderr = re.sub(r"\s\s+", " ", stderr)
261-
assert "Job error:" in stderr
262-
assert "Error collecting output for parameter 'output'" in stderr
263-
assert (
264-
"tests/secondary-files-required-missing-container.cwl:16:5: Missing required secondary file"
265-
)
266-
assert "file.ext3" in stderr
250+
with monkeypatch.context() as m:
251+
m.setenv("CWL_SINGULARITY_CACHE", str(singularity_dir))
252+
result_code, stdout, stderr = get_main_output(
253+
[
254+
"--singularity",
255+
"--outdir",
256+
str(tmp_path),
257+
get_data("tests/secondary-files-required-missing-container.cwl"),
258+
]
259+
)
260+
assert result_code == 1, stderr
261+
stderr = re.sub(r"\s\s+", " ", stderr)
262+
assert "Job error:" in stderr
263+
assert "Error collecting output for parameter 'output'" in stderr
264+
assert "tests/secondary-files-required-missing-container.cwl:16:5: Missing required secondary file"
265+
assert "file.ext3" in stderr
267266

268267

269268
@needs_docker

tests/test_ext.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,13 @@ def test_listing_deep() -> None:
3333
@needs_docker
3434
def test_cwltool_options(monkeypatch: pytest.MonkeyPatch) -> None:
3535
"""Check setting options via environment variable."""
36-
monkeypatch.setenv("CWLTOOL_OPTIONS", "--enable-ext")
37-
params = [
38-
get_data("tests/wf/listing_deep.cwl"),
39-
get_data("tests/listing-job.yml"),
40-
]
41-
assert main(params) == 0
36+
with monkeypatch.context() as m:
37+
m.setenv("CWLTOOL_OPTIONS", "--enable-ext")
38+
params = [
39+
get_data("tests/wf/listing_deep.cwl"),
40+
get_data("tests/listing-job.yml"),
41+
]
42+
assert main(params) == 0
4243

4344

4445
@needs_docker

0 commit comments

Comments
 (0)