Skip to content

Commit fec9929

Browse files
Ensure that prioritize_direct_download() retains Seqera Singularity Containers and write additional test.
1 parent b79525b commit fec9929

File tree

3 files changed

+77
-3
lines changed

3 files changed

+77
-3
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
### Download
1313

14+
- First steps towards fixing [#3179](https://github.com/nf-core/tools/issues/3179): Modify `prioritize_direct_download()` to retain Seqera Singularity https:// Container URIs and hardcode Seqera Containers into `gather_registries()` ([#3244](https://github.com/nf-core/tools/pull/3244)).
15+
1416
### Linting
1517

1618
### Modules

nf_core/pipelines/download.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,7 @@ def rectify_raw_container_matches(self, raw_findings):
970970
"""
971971
return self.prioritize_direct_download(cleaned_matches)
972972

973-
def prioritize_direct_download(self, container_list):
973+
def prioritize_direct_download(self, container_list: List[str]) -> List[str]:
974974
"""
975975
Helper function that takes a list of container images (URLs and Docker URIs),
976976
eliminates all Docker URIs for which also a URL is contained and returns the
@@ -993,13 +993,31 @@ def prioritize_direct_download(self, container_list):
993993
we want to keep it and not replace with with whatever we have now (which might be the Docker URI).
994994
995995
A regex that matches http, r"^$|^http" could thus be used to prioritize the Docker URIs over http Downloads
996+
997+
We also need to handle a special case: The https:// Singularity downloads from Seqera Containers all end in 'data', although
998+
they are not equivalent:
999+
1000+
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data'
1001+
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data'
1002+
9961003
"""
997-
d = {}
1004+
d: dict[str, str] = {}
1005+
seqera_containers: list[str] = []
1006+
all_others: list[str] = []
1007+
9981008
for c in container_list:
1009+
if bool(re.search(r"/data$", c)):
1010+
seqera_containers.append(c)
1011+
else:
1012+
all_others.append(c)
1013+
1014+
for c in all_others:
9991015
if re.match(r"^$|(?!^http)", d.get(k := re.sub(".*/(.*)", "\\1", c), "")):
10001016
log.debug(f"{c} matches and will be saved as {k}")
10011017
d[k] = c
1002-
return sorted(list(d.values()))
1018+
1019+
# combine deduplicated others and Seqera containers
1020+
return sorted(list(d.values()) + seqera_containers)
10031021

10041022
def gather_registries(self, workflow_directory: str) -> None:
10051023
"""Fetch the registries from the pipeline config and CLI arguments and store them in a set.

tests/pipelines/test_download.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,60 @@ def test_find_container_images_modules(self, tmp_path, mock_fetch_wf_config):
272272

273273
assert "community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264" in download_obj.containers
274274

275+
#
276+
# Test for 'prioritize_direct_download'
277+
#
278+
@with_temporary_folder
279+
def test_prioritize_direct_download(self, tmp_path):
280+
download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_path)
281+
282+
# tests deduplication and https priority as well as Seqera Container exception
283+
284+
test_container = [
285+
"https://depot.galaxyproject.org/singularity/ubuntu:22.04",
286+
"nf-core/ubuntu:22.04",
287+
"biocontainers/umi-transfer:1.5.0--h715e4b3_0",
288+
"https://depot.galaxyproject.org/singularity/umi-transfer:1.5.0--h715e4b3_0",
289+
"biocontainers/umi-transfer:1.5.0--h715e4b3_0",
290+
"quay.io/nf-core/sortmerna:4.3.7--6502243397c065ba",
291+
"nf-core/sortmerna:4.3.7--6502243397c065ba",
292+
"https://depot.galaxyproject.org/singularity/sortmerna:4.3.7--hdbdd923_1",
293+
"https://depot.galaxyproject.org/singularity/sortmerna:4.3.7--hdbdd923_0",
294+
"https://depot.galaxyproject.org/singularity/sortmerna:4.2.0--h9ee0642_1",
295+
"https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data",
296+
"https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data",
297+
]
298+
299+
result = download_obj.prioritize_direct_download(test_container)
300+
301+
# Verify that the priority works for regular https downloads (https encountered first)
302+
assert "https://depot.galaxyproject.org/singularity/ubuntu:22.04" in result
303+
assert "nf-core/ubuntu:22.04" not in result
304+
305+
# Verify that the priority works for regular https downloads (https encountered second)
306+
assert "biocontainers/umi-transfer:1.5.0--h715e4b3_0" not in result
307+
assert "https://depot.galaxyproject.org/singularity/umi-transfer:1.5.0--h715e4b3_0" in result
308+
309+
# Verify that the priority works for images with and without explicit registry
310+
# No priority here, though - the first is retained.
311+
assert "nf-core/sortmerna:4.3.7--6502243397c065ba" in result
312+
assert "quay.io/nf-core/sortmerna:4.3.7--6502243397c065ba" not in result
313+
314+
# Verify that different versions of the same tool and different build numbers are retained
315+
assert "https://depot.galaxyproject.org/singularity/sortmerna:4.3.7--hdbdd923_1" in result
316+
assert "https://depot.galaxyproject.org/singularity/sortmerna:4.3.7--hdbdd923_0" in result
317+
assert "https://depot.galaxyproject.org/singularity/sortmerna:4.2.0--h9ee0642_1" in result
318+
319+
# Verify that Seqera containers are not deduplicated
320+
assert (
321+
"https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data"
322+
in result
323+
)
324+
assert (
325+
"https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data"
326+
in result
327+
)
328+
275329
#
276330
# Tests for 'singularity_pull_image'
277331
#

0 commit comments

Comments
 (0)