1717import shutil
1818import sys
1919import tempfile
20+ from pathlib import Path
2021
2122import DIRAC
2223from DIRAC import S_ERROR , S_OK , gConfig , gLogger
6263echo "Finishing inner container wrapper scripts at `date`."
6364
6465"""
65- # Path to a directory on CVMFS to use as a fallback if no
66- # other version found: Only used if node has user namespaces
67- FALLBACK_SINGULARITY = "/cvmfs/oasis.opensciencegrid.org/mis/singularity/current/bin"
68-
6966CONTAINER_WRAPPER_NO_INSTALL = """#!/bin/bash
7067
7168echo "Starting inner container wrapper scripts (no install) at `date`."
@@ -110,7 +107,6 @@ def __init__(self, ceUniqueID):
110107 self .__root = self .ceParameters ["ContainerRoot" ]
111108 self .__workdir = CONTAINER_WORKDIR
112109 self .__innerdir = CONTAINER_INNERDIR
113- self .__singularityBin = "singularity"
114110 self .__installDIRACInContainer = self .ceParameters .get ("InstallDIRACInContainer" , False )
115111 if isinstance (self .__installDIRACInContainer , str ) and self .__installDIRACInContainer .lower () in (
116112 "false" ,
@@ -120,47 +116,6 @@ def __init__(self, ceUniqueID):
120116
121117 self .processors = int (self .ceParameters .get ("NumberOfProcessors" , 1 ))
122118
123- def __hasUserNS (self ):
124- """Detect if this node has user namespaces enabled.
125- Returns True if they are enabled, False otherwise.
126- """
127- try :
128- with open ("/proc/sys/user/max_user_namespaces" ) as proc_fd :
129- maxns = int (proc_fd .readline ().strip ())
130- # Any "reasonable number" of namespaces is sufficient
131- return maxns > 100
132- except Exception :
133- # Any failure, missing file, doesn't contain a number, etc. and we
134- # assume they are disabled.
135- return False
136-
137- def __hasSingularity (self ):
138- """Search the current PATH for an exectuable named singularity.
139- Returns True if it is found, False otherwise.
140- """
141- if self .ceParameters .get ("ContainerBin" ):
142- binPath = self .ceParameters ["ContainerBin" ]
143- if os .path .isfile (binPath ) and os .access (binPath , os .X_OK ):
144- self .__singularityBin = binPath
145- self .log .debug (f'Use singularity from "{ self .__singularityBin } "' )
146- return True
147- if "PATH" not in os .environ :
148- return False # Hmm, PATH not set? How unusual...
149- searchPaths = os .environ ["PATH" ].split (os .pathsep )
150- # We can use CVMFS as a last resort if userNS is enabled
151- if self .__hasUserNS ():
152- searchPaths .append (FALLBACK_SINGULARITY )
153- for searchPath in searchPaths :
154- binPath = os .path .join (searchPath , "singularity" )
155- if os .path .isfile (binPath ):
156- # File found, check it's executable to be certain:
157- if os .access (binPath , os .X_OK ):
158- self .log .debug (f'Found singularity at "{ binPath } "' )
159- self .__singularityBin = binPath
160- return True
161- # No suitable binaries found
162- return False
163-
164119 @staticmethod
165120 def __findInstallBaseDir ():
166121 """Find the path to root of the current DIRAC installation"""
@@ -321,11 +276,12 @@ def __getEnv(self):
321276 We blank almost everything to prevent contamination from the host system.
322277 """
323278
324- if not self .__installDIRACInContainer :
325- payloadEnv = {k : v for k , v in os .environ .items () if ENV_VAR_WHITELIST .match (k )}
326- else :
279+ if self .__installDIRACInContainer :
327280 payloadEnv = {}
281+ else :
282+ payloadEnv = {k : v for k , v in os .environ .items () if ENV_VAR_WHITELIST .match (k )}
328283
284+ payloadEnv ["PATH" ] = str (Path (sys .executable ).parent )
329285 payloadEnv ["TMP" ] = "/tmp"
330286 payloadEnv ["TMPDIR" ] = "/tmp"
331287 payloadEnv ["X509_USER_PROXY" ] = os .path .join (self .__innerdir , "proxy" )
@@ -356,10 +312,6 @@ def submitJob(self, executableFile, proxy=None, **kwargs):
356312 """
357313 rootImage = self .__root
358314 renewTask = None
359- # Check that singularity is available
360- if not self .__hasSingularity ():
361- self .log .error ("Singularity is not installed on PATH." )
362- return S_ERROR ("Failed to find singularity" )
363315
364316 self .log .info ("Creating singularity container" )
365317
@@ -391,19 +343,19 @@ def submitJob(self, executableFile, proxy=None, **kwargs):
391343 # Mount /cvmfs in if it exists on the host
392344 withCVMFS = os .path .isdir ("/cvmfs" )
393345 innerCmd = os .path .join (self .__innerdir , "dirac_container.sh" )
394- cmd = [self .__singularityBin , "exec" ]
395- cmd .extend (["--contain" ]) # use minimal /dev and empty other directories (e.g. /tmp and $HOME)
396- cmd .extend (["--ipc" ]) # run container in a new IPC namespace
397- cmd .extend (["--workdir" , baseDir ]) # working directory to be used for /tmp, /var/tmp and $HOME
398- cmd .extend (["--home" , "/tmp" ]) # Avoid using small tmpfs for default $HOME and use scratch /tmp instead
399- if self .__hasUserNS ():
400- cmd .append ("--userns" )
346+ outerCmd = ["apptainer" , "exec" ]
347+ outerCmd .extend (["--contain" ]) # use minimal /dev and empty other directories (e.g. /tmp and $HOME)
348+ outerCmd .extend (["--ipc" ]) # run container in a new IPC namespace
349+ outerCmd .extend (["--workdir" , baseDir ]) # working directory to be used for /tmp, /var/tmp and $HOME
350+ outerCmd .extend (["--home" , "/tmp" ]) # Avoid using small tmpfs for default $HOME and use scratch /tmp instead
351+ outerCmd .append ("--userns" )
401352 if withCVMFS :
402- cmd .extend (["--bind" , "/cvmfs" ])
353+ outerCmd .extend (["--bind" , "/cvmfs" ])
403354 if not self .__installDIRACInContainer :
404- cmd .extend (["--bind" , "{0}:{0}:ro" .format (self .__findInstallBaseDir ())])
355+ outerCmd .extend (["--bind" , "{0}:{0}:ro" .format (self .__findInstallBaseDir ())])
405356
406- bindPaths = self .ceParameters .get ("ContainerBind" , "" ).split ("," )
357+ rawBindPaths = self .ceParameters .get ("ContainerBind" , "" )
358+ bindPaths = rawBindPaths .split ("," ) if rawBindPaths else []
407359 siteName = gConfig .getValue ("/LocalSite/Site" , "" )
408360 ceName = gConfig .getValue ("/LocalSite/GridCE" , "" )
409361 if siteName and ceName :
@@ -436,20 +388,20 @@ def submitJob(self, executableFile, proxy=None, **kwargs):
436388
437389 for bindPath in bindPaths :
438390 if len (bindPath .split (":::" )) == 1 :
439- cmd .extend (["--bind" , bindPath .strip ()])
391+ outerCmd .extend (["--bind" , bindPath .strip ()])
440392 elif len (bindPath .split (":::" )) in [2 , 3 ]:
441- cmd .extend (["--bind" , ":" .join ([bp .strip () for bp in bindPath .split (":::" )])])
393+ outerCmd .extend (["--bind" , ":" .join ([bp .strip () for bp in bindPath .split (":::" )])])
442394
443395 if "ContainerOptions" in self .ceParameters :
444396 containerOpts = self .ceParameters ["ContainerOptions" ].split ("," )
445397 for opt in containerOpts :
446- cmd .extend ([opt .strip ()])
447- if os .path .isdir (rootImage ) or os .path .isfile (rootImage ):
448- cmd .extend ([rootImage , innerCmd ])
449- else :
398+ outerCmd .extend ([opt .strip ()])
399+ if not (os .path .isdir (rootImage ) or os .path .isfile (rootImage )):
450400 # if we are here is because there's no image, or it is not accessible (e.g. not on CVMFS)
451401 self .log .error ("Singularity image to exec not found: " , rootImage )
452402 return S_ERROR ("Failed to find singularity image to exec" )
403+ outerCmd .append (rootImage )
404+ cmd = outerCmd + [innerCmd ]
453405
454406 self .log .debug (f"Execute singularity command: { cmd } " )
455407 self .log .debug (f"Execute singularity env: { self .__getEnv ()} " )
@@ -459,6 +411,13 @@ def submitJob(self, executableFile, proxy=None, **kwargs):
459411
460412 if not result ["OK" ]:
461413 self .log .error ("Fail to run Singularity" , result ["Message" ])
414+ # If we fail to run the container try to run it again with verbose output
415+ # to help with debugging.
416+ self .log .error ("Singularity command was: " , cmd )
417+ self .log .error (f"Singularity env was: { self .__getEnv ()} " )
418+ debugCmd = [outerCmd [0 ], "--debug" ] + outerCmd [1 :] + ["echo" , "All okay" ]
419+ self .log .error ("Running with debug output to facilitate debugging" , debugCmd )
420+ result = systemCall (0 , debugCmd , callbackFunction = self .sendOutput , env = self .__getEnv ())
462421 if proxy and renewTask :
463422 gThreadScheduler .removeTask (renewTask )
464423 self .__deleteWorkArea (baseDir )
0 commit comments