@@ -970,7 +970,7 @@ def rectify_raw_container_matches(self, raw_findings):
970970 """
971971 return self .prioritize_direct_download (cleaned_matches )
972972
973- def prioritize_direct_download (self , container_list ) :
973+ def prioritize_direct_download (self , container_list : List [ str ]) -> List [ str ] :
974974 """
975975 Helper function that takes a list of container images (URLs and Docker URIs),
976976 eliminates all Docker URIs for which also a URL is contained and returns the
@@ -993,13 +993,31 @@ def prioritize_direct_download(self, container_list):
993993 we want to keep it and not replace with with whatever we have now (which might be the Docker URI).
994994
995995 A regex that matches http, r"^$|^http" could thus be used to prioritize the Docker URIs over http Downloads
996+
997+ We also need to handle a special case: The https:// Singularity downloads from Seqera Containers all end in 'data', although
998+ they are not equivalent, e.g.:
999+
1000+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data'
1001+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data'
1002+
9961003 """
997- d = {}
1004+ d : Dict [str , str ] = {}
1005+ seqera_containers : List [str ] = []
1006+ all_others : List [str ] = []
1007+
9981008 for c in container_list :
1009+ if bool (re .search (r"/data$" , c )):
1010+ seqera_containers .append (c )
1011+ else :
1012+ all_others .append (c )
1013+
1014+ for c in all_others :
9991015 if re .match (r"^$|(?!^http)" , d .get (k := re .sub (".*/(.*)" , "\\ 1" , c ), "" )):
10001016 log .debug (f"{ c } matches and will be saved as { k } " )
10011017 d [k ] = c
1002- return sorted (list (d .values ()))
1018+
1019+ # combine deduplicated others and Seqera containers
1020+ return sorted (list (d .values ()) + seqera_containers )
10031021
10041022 def gather_registries (self , workflow_directory : str ) -> None :
10051023 """Fetch the registries from the pipeline config and CLI arguments and store them in a set.
@@ -1023,7 +1041,13 @@ def gather_registries(self, workflow_directory: str) -> None:
10231041 self .registry_set .add (self .nf_config [registry ])
10241042
10251043 # add depot.galaxyproject.org to the set, because it is the default registry for singularity hardcoded in modules
1026- self .registry_set .add ("depot.galaxyproject.org" )
1044+ self .registry_set .add ("depot.galaxyproject.org/singularity" )
1045+
1046+ # add community.wave.seqera.io/library to the set to support the new Seqera Docker container registry
1047+ self .registry_set .add ("community.wave.seqera.io/library" )
1048+
1049+ # add chttps://community-cr-prod.seqera.io/docker/registry/v2/ to the set to support the new Seqera Singularity container registry
1050+ self .registry_set .add ("community-cr-prod.seqera.io/docker/registry/v2" )
10271051
10281052 def symlink_singularity_images (self , image_out_path : str ) -> None :
10291053 """Create a symlink for each registry in the registry set that points to the image.
@@ -1040,10 +1064,13 @@ def symlink_singularity_images(self, image_out_path: str) -> None:
10401064
10411065 if self .registry_set :
10421066 # Create a regex pattern from the set, in case trimming is needed.
1043- trim_pattern = "|" .join (f"^{ re .escape (registry )} -?" for registry in self .registry_set )
1067+ trim_pattern = "|" .join (f"^{ re .escape (registry )} -?" . replace ( "/" , "[/-]" ) for registry in self .registry_set )
10441068
10451069 for registry in self .registry_set :
1046- if not os .path .basename (image_out_path ).startswith (registry ):
1070+ # Nextflow will convert it like this as well, so we need it mimic its behavior
1071+ registry = registry .replace ("/" , "-" )
1072+
1073+ if not bool (re .search (trim_pattern , os .path .basename (image_out_path ))):
10471074 symlink_name = os .path .join ("./" , f"{ registry } -{ os .path .basename (image_out_path )} " )
10481075 else :
10491076 trimmed_name = re .sub (f"{ trim_pattern } " , "" , os .path .basename (image_out_path ))
@@ -1263,7 +1290,7 @@ def singularity_image_filenames(self, container: str) -> Tuple[str, Optional[str
12631290 # if docker.registry / singularity.registry are set to empty strings at runtime, which can be included in the HPC config profiles easily.
12641291 if self .registry_set :
12651292 # Create a regex pattern from the set of registries
1266- trim_pattern = "|" .join (f"^{ re .escape (registry )} -?" for registry in self .registry_set )
1293+ trim_pattern = "|" .join (f"^{ re .escape (registry )} -?" . replace ( "/" , "[/-]" ) for registry in self .registry_set )
12671294 # Use the pattern to trim the string
12681295 out_name = re .sub (f"{ trim_pattern } " , "" , out_name )
12691296
@@ -1345,9 +1372,10 @@ def singularity_download_image(
13451372 log .debug (f"Copying { container } from cache: '{ os .path .basename (out_path )} '" )
13461373 progress .update (task , description = "Copying from cache to target directory" )
13471374 shutil .copyfile (cache_path , out_path )
1375+ self .symlink_singularity_images (cache_path ) # symlinks inside the cache directory
13481376
13491377 # Create symlinks to ensure that the images are found even with different registries being used.
1350- self .symlink_singularity_images (output_path )
1378+ self .symlink_singularity_images (out_path )
13511379
13521380 progress .remove_task (task )
13531381
@@ -1456,9 +1484,10 @@ def singularity_pull_image(
14561484 log .debug (f"Copying { container } from cache: '{ os .path .basename (out_path )} '" )
14571485 progress .update (task , current_log = "Copying from cache to target directory" )
14581486 shutil .copyfile (cache_path , out_path )
1487+ self .symlink_singularity_images (cache_path ) # symlinks inside the cache directory
14591488
14601489 # Create symlinks to ensure that the images are found even with different registries being used.
1461- self .symlink_singularity_images (output_path )
1490+ self .symlink_singularity_images (out_path )
14621491
14631492 progress .remove_task (task )
14641493
0 commit comments