Skip to content

Commit 65db460

Browse files
Merge pull request #3244 from MatthiasZepper/seqera_containers_example_mod
Download: Seqera container support - Patch 1
2 parents eb4c237 + 32a7e6b commit 65db460

File tree

5 files changed

+238
-24
lines changed

5 files changed

+238
-24
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
### Download
1313

14+
- First steps towards fixing [#3179](https://github.com/nf-core/tools/issues/3179): Modify `prioritize_direct_download()` to retain Seqera Singularity https:// Container URIs and hardcode Seqera Containers into `gather_registries()` ([#3244](https://github.com/nf-core/tools/pull/3244)).
15+
1416
### Linting
1517

1618
### Modules

nf_core/pipeline-template/.github/workflows/download_pipeline.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ jobs:
6969
--outdir ./${{ env.REPOTITLE_LOWERCASE }} \
7070
--compress "none" \
7171
--container-system 'singularity' \
72-
--container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io" \
72+
--container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io/library/" \
7373
--container-cache-utilisation 'amend' \
7474
--download-configuration 'yes'
7575

nf_core/pipelines/download.py

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,7 @@ def rectify_raw_container_matches(self, raw_findings):
970970
"""
971971
return self.prioritize_direct_download(cleaned_matches)
972972

973-
def prioritize_direct_download(self, container_list):
973+
def prioritize_direct_download(self, container_list: List[str]) -> List[str]:
974974
"""
975975
Helper function that takes a list of container images (URLs and Docker URIs),
976976
eliminates all Docker URIs for which also a URL is contained and returns the
@@ -993,13 +993,31 @@ def prioritize_direct_download(self, container_list):
993993
we want to keep it and not replace with with whatever we have now (which might be the Docker URI).
994994
995995
A regex that matches http, r"^$|^http" could thus be used to prioritize the Docker URIs over http Downloads
996+
997+
We also need to handle a special case: The https:// Singularity downloads from Seqera Containers all end in 'data', although
998+
they are not equivalent, e.g.:
999+
1000+
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data'
1001+
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data'
1002+
9961003
"""
997-
d = {}
1004+
d: Dict[str, str] = {}
1005+
seqera_containers: List[str] = []
1006+
all_others: List[str] = []
1007+
9981008
for c in container_list:
1009+
if bool(re.search(r"/data$", c)):
1010+
seqera_containers.append(c)
1011+
else:
1012+
all_others.append(c)
1013+
1014+
for c in all_others:
9991015
if re.match(r"^$|(?!^http)", d.get(k := re.sub(".*/(.*)", "\\1", c), "")):
10001016
log.debug(f"{c} matches and will be saved as {k}")
10011017
d[k] = c
1002-
return sorted(list(d.values()))
1018+
1019+
# combine deduplicated others and Seqera containers
1020+
return sorted(list(d.values()) + seqera_containers)
10031021

10041022
def gather_registries(self, workflow_directory: str) -> None:
10051023
"""Fetch the registries from the pipeline config and CLI arguments and store them in a set.
@@ -1023,7 +1041,13 @@ def gather_registries(self, workflow_directory: str) -> None:
10231041
self.registry_set.add(self.nf_config[registry])
10241042

10251043
# add depot.galaxyproject.org to the set, because it is the default registry for singularity hardcoded in modules
1026-
self.registry_set.add("depot.galaxyproject.org")
1044+
self.registry_set.add("depot.galaxyproject.org/singularity")
1045+
1046+
# add community.wave.seqera.io/library to the set to support the new Seqera Docker container registry
1047+
self.registry_set.add("community.wave.seqera.io/library")
1048+
1049+
# add chttps://community-cr-prod.seqera.io/docker/registry/v2/ to the set to support the new Seqera Singularity container registry
1050+
self.registry_set.add("community-cr-prod.seqera.io/docker/registry/v2")
10271051

10281052
def symlink_singularity_images(self, image_out_path: str) -> None:
10291053
"""Create a symlink for each registry in the registry set that points to the image.
@@ -1040,10 +1064,13 @@ def symlink_singularity_images(self, image_out_path: str) -> None:
10401064

10411065
if self.registry_set:
10421066
# Create a regex pattern from the set, in case trimming is needed.
1043-
trim_pattern = "|".join(f"^{re.escape(registry)}-?" for registry in self.registry_set)
1067+
trim_pattern = "|".join(f"^{re.escape(registry)}-?".replace("/", "[/-]") for registry in self.registry_set)
10441068

10451069
for registry in self.registry_set:
1046-
if not os.path.basename(image_out_path).startswith(registry):
1070+
# Nextflow will convert it like this as well, so we need it mimic its behavior
1071+
registry = registry.replace("/", "-")
1072+
1073+
if not bool(re.search(trim_pattern, os.path.basename(image_out_path))):
10471074
symlink_name = os.path.join("./", f"{registry}-{os.path.basename(image_out_path)}")
10481075
else:
10491076
trimmed_name = re.sub(f"{trim_pattern}", "", os.path.basename(image_out_path))
@@ -1263,7 +1290,7 @@ def singularity_image_filenames(self, container: str) -> Tuple[str, Optional[str
12631290
# if docker.registry / singularity.registry are set to empty strings at runtime, which can be included in the HPC config profiles easily.
12641291
if self.registry_set:
12651292
# Create a regex pattern from the set of registries
1266-
trim_pattern = "|".join(f"^{re.escape(registry)}-?" for registry in self.registry_set)
1293+
trim_pattern = "|".join(f"^{re.escape(registry)}-?".replace("/", "[/-]") for registry in self.registry_set)
12671294
# Use the pattern to trim the string
12681295
out_name = re.sub(f"{trim_pattern}", "", out_name)
12691296

@@ -1345,9 +1372,10 @@ def singularity_download_image(
13451372
log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'")
13461373
progress.update(task, description="Copying from cache to target directory")
13471374
shutil.copyfile(cache_path, out_path)
1375+
self.symlink_singularity_images(cache_path) # symlinks inside the cache directory
13481376

13491377
# Create symlinks to ensure that the images are found even with different registries being used.
1350-
self.symlink_singularity_images(output_path)
1378+
self.symlink_singularity_images(out_path)
13511379

13521380
progress.remove_task(task)
13531381

@@ -1456,9 +1484,10 @@ def singularity_pull_image(
14561484
log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'")
14571485
progress.update(task, current_log="Copying from cache to target directory")
14581486
shutil.copyfile(cache_path, out_path)
1487+
self.symlink_singularity_images(cache_path) # symlinks inside the cache directory
14591488

14601489
# Create symlinks to ensure that the images are found even with different registries being used.
1461-
self.symlink_singularity_images(output_path)
1490+
self.symlink_singularity_images(out_path)
14621491

14631492
progress.remove_task(task)
14641493

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
process CAT_FASTQ {
2+
label 'process_single'
3+
4+
conda "${moduleDir}/environment.yml"
5+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
6+
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data' :
7+
'community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264' }"
8+
9+
// truncated
10+
11+
}

0 commit comments

Comments
 (0)