Skip to content

Commit ac5497a

Browse files
authored
Merge branch 'dev' into documentation
2 parents a683b97 + be5f2e2 commit ac5497a

File tree

7 files changed

+192
-11
lines changed

7 files changed

+192
-11
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@
1414
- Move `includeConfig 'conf/modules.config'` next to `includeConfig 'conf/base.config'` to not overwrite tests profiles configurations ([#3301](https://github.com/nf-core/tools/pull/3301))
1515
- Use `params.monochrome_logs` in the template and update nf-core components ([#3310](https://github.com/nf-core/tools/pull/3310))
1616
- Fix some typos and improve writing in `usage.md` and `CONTRIBUTING.md` ([#3302](https://github.com/nf-core/tools/pull/3302))
17+
- Add `manifest.contributors` to `nextflow.config` ([#3311](https://github.com/nf-core/tools/pull/3311))
1718

1819
### Download
1920

2021
- First steps towards fixing [#3179](https://github.com/nf-core/tools/issues/3179): Modify `prioritize_direct_download()` to retain Seqera Singularity https:// Container URIs and hardcode Seqera Containers into `gather_registries()` ([#3244](https://github.com/nf-core/tools/pull/3244)).
22+
- Further steps towards fixing [#3179](https://github.com/nf-core/tools/issues/3179): Enable limited support for `oras://` container paths (_only absolute URIs, no flexible registries like with Docker_) and prevent unnecessary image downloads for Seqera Container modules with `reconcile_seqera_container_uris()` ([#3293](https://github.com/nf-core/tools/pull/3293)).
2123
- Update dawidd6/action-download-artifact action to v7 ([#3306](https://github.com/nf-core/tools/pull/3306))
2224

2325
### Linting

nf_core/pipeline-template/nextflow.config

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,20 @@ dag {
274274

275275
manifest {
276276
name = '{{ name }}'
277-
author = """{{ author }}"""
277+
author = """{{ author }}""" // The author field is deprecated from Nextflow version 24.10.0, use contributors instead
278+
contributors = [
279+
// TODO nf-core: Update the field with the details of the contributors to your pipeline. New with Nextflow version 24.10.0
280+
{%- for author_name in author.split(",") %}
281+
[
282+
name: '{{ author_name }}',
283+
affiliation: '',
284+
email: '',
285+
github: '',
286+
contribution: [], // List of contribution types ('author', 'maintainer' or 'contributor')
287+
orcid: ''
288+
],
289+
{%- endfor %}
290+
]
278291
homePage = 'https://github.com/{{ name }}'
279292
description = """{{ description }}"""
280293
mainScript = 'main.nf'

nf_core/pipelines/download.py

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -839,11 +839,12 @@ def rectify_raw_container_matches(self, raw_findings):
839839
url_regex = (
840840
r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)"
841841
)
842+
oras_regex = r"oras:\/\/[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)"
842843
# Thanks Stack Overflow for the regex: https://stackoverflow.com/a/39672069/713980
843844
docker_regex = r"^(?:(?=[^:\/]{1,253})(?!-)[a-zA-Z0-9-]{1,63}(?<!-)(?:\.(?!-)[a-zA-Z0-9-]{1,63}(?<!-))*(?::[0-9]{1,5})?/)?((?![._-])(?:[a-z0-9._-]*)(?<![._-])(?:/(?![._-])[a-z0-9._-]*(?<![._-]))*)(?::(?![.-])[a-zA-Z0-9_.-]{1,128})?$"
844845

845846
# at this point, we don't have to distinguish anymore, because we will later prioritize direct downloads over Docker URIs.
846-
either_url_or_docker = re.compile(f"{url_regex}|{docker_regex}", re.S)
847+
either_url_or_docker = re.compile(f"{url_regex}|{oras_regex}|{docker_regex}", re.S)
847848

848849
for _, container_value, search_space, file_path in raw_findings:
849850
"""
@@ -1000,14 +1001,18 @@ def prioritize_direct_download(self, container_list: List[str]) -> List[str]:
10001001
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data'
10011002
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data'
10021003
1004+
Lastly, we want to remove at least a few Docker URIs for those modules, that have an oras:// download link.
10031005
"""
10041006
d: Dict[str, str] = {}
1005-
seqera_containers: List[str] = []
1007+
seqera_containers_http: List[str] = []
1008+
seqera_containers_oras: List[str] = []
10061009
all_others: List[str] = []
10071010

10081011
for c in container_list:
10091012
if bool(re.search(r"/data$", c)):
1010-
seqera_containers.append(c)
1013+
seqera_containers_http.append(c)
1014+
elif bool(re.search(r"^oras://", c)):
1015+
seqera_containers_oras.append(c)
10111016
else:
10121017
all_others.append(c)
10131018

@@ -1016,8 +1021,47 @@ def prioritize_direct_download(self, container_list: List[str]) -> List[str]:
10161021
log.debug(f"{c} matches and will be saved as {k}")
10171022
d[k] = c
10181023

1019-
# combine deduplicated others and Seqera containers
1020-
return sorted(list(d.values()) + seqera_containers)
1024+
combined_with_oras = self.reconcile_seqera_container_uris(seqera_containers_oras, list(d.values()))
1025+
1026+
# combine deduplicated others (Seqera containers oras, http others and Docker URI others) and Seqera containers http
1027+
return sorted(list(set(combined_with_oras + seqera_containers_http)))
1028+
1029+
@staticmethod
1030+
def reconcile_seqera_container_uris(prioritized_container_list: List[str], other_list: List[str]) -> List[str]:
1031+
"""
1032+
Helper function that takes a list of Seqera container URIs,
1033+
extracts the software string and builds a regex from them to filter out
1034+
similar containers from the second container list.
1035+
1036+
prioritzed_container_list = [
1037+
... "oras://community.wave.seqera.io/library/multiqc:1.25.1--f0e743d16869c0bf",
1038+
... "oras://community.wave.seqera.io/library/multiqc_pip_multiqc-plugins:e1f4877f1515d03c"
1039+
... ]
1040+
1041+
will be cleaned to
1042+
1043+
['library/multiqc:1.25.1', 'library/multiqc_pip_multiqc-plugins']
1044+
1045+
Subsequently, build a regex from those and filter out matching duplicates in other_list:
1046+
"""
1047+
if not prioritized_container_list:
1048+
return other_list
1049+
else:
1050+
# trim the URIs to the stem that contains the tool string, assign with Walrus operator to account for non-matching patterns
1051+
trimmed_priority_list = [
1052+
match.group()
1053+
for c in set(prioritized_container_list)
1054+
if (match := re.search(r"library/.*?:[\d.]+", c) if "--" in c else re.search(r"library/[^\s:]+", c))
1055+
]
1056+
1057+
# build regex
1058+
prioritized_containers = re.compile("|".join(f"{re.escape(c)}" for c in trimmed_priority_list))
1059+
1060+
# filter out matches in other list
1061+
filtered_containers = [c for c in other_list if not re.search(prioritized_containers, c)]
1062+
1063+
# combine prioritized and regular container lists
1064+
return sorted(list(set(prioritized_container_list + filtered_containers)))
10211065

10221066
def gather_registries(self, workflow_directory: str) -> None:
10231067
"""Fetch the registries from the pipeline config and CLI arguments and store them in a set.
@@ -1419,9 +1463,10 @@ def singularity_pull_image(
14191463
# Sometimes, container still contain an explicit library specification, which
14201464
# resulted in attempted pulls e.g. from docker://quay.io/quay.io/qiime2/core:2022.11
14211465
# Thus, if an explicit registry is specified, the provided -l value is ignored.
1466+
# Additionally, check if the container to be pulled is native Singularity: oras:// protocol.
14221467
container_parts = container.split("/")
14231468
if len(container_parts) > 2:
1424-
address = f"docker://{container}"
1469+
address = container if container.startswith("oras://") else f"docker://{container}"
14251470
absolute_URI = True
14261471
else:
14271472
address = f"docker://{library}/{container.replace('docker://', '')}"
@@ -1843,6 +1888,9 @@ def __init__(
18431888
elif re.search(r"manifest\sunknown", line):
18441889
self.error_type = self.InvalidTagError(self)
18451890
break
1891+
elif re.search(r"ORAS\sSIF\simage\sshould\shave\sa\ssingle\slayer", line):
1892+
self.error_type = self.NoSingularityContainerError(self)
1893+
break
18461894
elif re.search(r"Image\sfile\salready\sexists", line):
18471895
self.error_type = self.ImageExistsError(self)
18481896
break
@@ -1907,6 +1955,17 @@ def __init__(self, error_log):
19071955
self.helpmessage = f'Saving image of "{self.error_log.container}" failed, because "{self.error_log.out_path}" exists.\nPlease troubleshoot the command \n"{" ".join(self.error_log.singularity_command)}" manually.\n'
19081956
super().__init__(self.message)
19091957

1958+
class NoSingularityContainerError(RuntimeError):
1959+
"""The container image is no native Singularity Image Format."""
1960+
1961+
def __init__(self, error_log):
1962+
self.error_log = error_log
1963+
self.message = (
1964+
f'[bold red]"{self.error_log.container}" is no valid Singularity Image Format container.[/]\n'
1965+
)
1966+
self.helpmessage = f"Pulling \"{self.error_log.container}\" failed, because it appears invalid. To convert from Docker's OCI format, prefix the URI with 'docker://' instead of 'oras://'.\n"
1967+
super().__init__(self.message)
1968+
19101969
class OtherError(RuntimeError):
19111970
"""Undefined error with the container"""
19121971

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
process UMI_TRANSFER {
2+
label 'process_single'
3+
4+
conda "${moduleDir}/environment.yml"
5+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
6+
'oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6' :
7+
'community.wave.seqera.io/library/umi-transfer:1.0.0--d30e8812ea280fa1' }"
8+
9+
// truncated
10+
11+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
process UMI_TRANSFER_MULLED {
2+
label 'process_single'
3+
4+
conda "${moduleDir}/environment.yml"
5+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
6+
'oras://community.wave.seqera.io/library/umi-transfer_umicollapse:796a995ff53da9e3' :
7+
'community.wave.seqera.io/library/umi-transfer_umicollapse:3298d4f1b49e33bd' }"
8+
9+
// truncated
10+
11+
}

tests/pipelines/test_download.py

Lines changed: 89 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,20 @@ def test_find_container_images_modules(self, tmp_path, mock_fetch_wf_config):
257257
not in download_obj.containers
258258
)
259259

260-
# mock_seqera_container.nf
260+
# mock_seqera_container_oras.nf
261+
assert "oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6" in download_obj.containers
262+
assert "community.wave.seqera.io/library/umi-transfer:1.0.0--d30e8812ea280fa1" not in download_obj.containers
263+
264+
# mock_seqera_container_oras_mulled.nf
265+
assert (
266+
"oras://community.wave.seqera.io/library/umi-transfer_umicollapse:796a995ff53da9e3"
267+
in download_obj.containers
268+
)
269+
assert (
270+
"community.wave.seqera.io/library/umi-transfer_umicollapse:3298d4f1b49e33bd" not in download_obj.containers
271+
)
272+
273+
# mock_seqera_container_http.nf
261274
assert (
262275
"https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data"
263276
in download_obj.containers
@@ -294,6 +307,7 @@ def test_prioritize_direct_download(self, tmp_path):
294307
"https://depot.galaxyproject.org/singularity/sortmerna:4.2.0--h9ee0642_1",
295308
"https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data",
296309
"https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data",
310+
"https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data",
297311
]
298312

299313
result = download_obj.prioritize_direct_download(test_container)
@@ -316,7 +330,7 @@ def test_prioritize_direct_download(self, tmp_path):
316330
assert "https://depot.galaxyproject.org/singularity/sortmerna:4.3.7--hdbdd923_0" in result
317331
assert "https://depot.galaxyproject.org/singularity/sortmerna:4.2.0--h9ee0642_1" in result
318332

319-
# Verify that Seqera containers are not deduplicated
333+
# Verify that Seqera containers are not deduplicated...
320334
assert (
321335
"https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data"
322336
in result
@@ -325,6 +339,58 @@ def test_prioritize_direct_download(self, tmp_path):
325339
"https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data"
326340
in result
327341
)
342+
# ...but identical ones are.
343+
assert (
344+
result.count(
345+
"https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data"
346+
)
347+
== 1
348+
)
349+
350+
#
351+
# Test for 'reconcile_seqera_container_uris'
352+
#
353+
@with_temporary_folder
354+
def test_reconcile_seqera_container_uris(self, tmp_path):
355+
download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_path)
356+
357+
prioritized_container = [
358+
"oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6",
359+
"oras://community.wave.seqera.io/library/sylph:0.6.1--b97274cdc1caa649",
360+
]
361+
362+
test_container = [
363+
"https://depot.galaxyproject.org/singularity/ubuntu:22.04",
364+
"nf-core/ubuntu:22.04",
365+
"nf-core/ubuntu:22.04",
366+
"nf-core/ubuntu:22.04",
367+
"community.wave.seqera.io/library/umi-transfer:1.5.0--73c1a6b65e5b0b81",
368+
"community.wave.seqera.io/library/sylph:0.6.1--a21713a57a65a373",
369+
"biocontainers/sylph:0.6.1--b97274cdc1caa649",
370+
]
371+
372+
# test that the test_container list is returned as it is, if no prioritized_containers are specified
373+
result_empty = download_obj.reconcile_seqera_container_uris([], test_container)
374+
assert result_empty == test_container
375+
376+
result = download_obj.reconcile_seqera_container_uris(prioritized_container, test_container)
377+
378+
# Verify that unrelated images are retained
379+
assert "https://depot.galaxyproject.org/singularity/ubuntu:22.04" in result
380+
assert "nf-core/ubuntu:22.04" in result
381+
382+
# Verify that the priority works for regular Seqera container (Native Singularity over Docker, but only for Seqera registry)
383+
assert "oras://community.wave.seqera.io/library/sylph:0.6.1--b97274cdc1caa649" in result
384+
assert "community.wave.seqera.io/library/sylph:0.6.1--a21713a57a65a373" not in result
385+
assert "biocontainers/sylph:0.6.1--b97274cdc1caa649" in result
386+
387+
# Verify that version strings are respected: Version 1.0.0 does not replace version 1.5.0
388+
assert "oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6" in result
389+
assert "community.wave.seqera.io/library/umi-transfer:1.5.0--73c1a6b65e5b0b81" in result
390+
391+
# assert that the deduplication works
392+
assert test_container.count("nf-core/ubuntu:22.04") == 3
393+
assert result.count("nf-core/ubuntu:22.04") == 1
328394

329395
#
330396
# Tests for 'singularity_pull_image'
@@ -356,11 +422,30 @@ def test_singularity_pull_image_singularity_installed(self, tmp_dir, mock_rich_p
356422
"docker.io/bschiffthaler/sed", f"{tmp_dir}/sed.sif", None, "docker.io", mock_rich_progress
357423
)
358424

425+
# Test successful pull with absolute oras:// URI
426+
download_obj.singularity_pull_image(
427+
"oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6",
428+
f"{tmp_dir}/umi-transfer-oras.sif",
429+
None,
430+
"docker.io",
431+
mock_rich_progress,
432+
)
433+
434+
# try pulling Docker container image with oras://
435+
with pytest.raises(ContainerError.NoSingularityContainerError):
436+
download_obj.singularity_pull_image(
437+
"oras://ghcr.io/matthiaszepper/umi-transfer:dev",
438+
f"{tmp_dir}/umi-transfer-oras_impostor.sif",
439+
None,
440+
"docker.io",
441+
mock_rich_progress,
442+
)
443+
359444
# try to pull from non-existing registry (Name change hello-world_new.sif is needed, otherwise ImageExistsError is raised before attempting to pull.)
360445
with pytest.raises(ContainerError.RegistryNotFoundError):
361446
download_obj.singularity_pull_image(
362447
"hello-world",
363-
f"{tmp_dir}/hello-world_new.sif",
448+
f"{tmp_dir}/break_the_registry_test.sif",
364449
None,
365450
"register-this-domain-to-break-the-test.io",
366451
mock_rich_progress,
@@ -396,7 +481,7 @@ def test_singularity_pull_image_singularity_installed(self, tmp_dir, mock_rich_p
396481
with pytest.raises(ContainerError.InvalidTagError):
397482
download_obj.singularity_pull_image(
398483
"ewels/multiqc:go-rewrite",
399-
f"{tmp_dir}/umi-transfer.sif",
484+
f"{tmp_dir}/multiqc-go.sif",
400485
None,
401486
"ghcr.io",
402487
mock_rich_progress,

0 commit comments

Comments
 (0)