11import  logging 
2- import  time 
3- 
4- from  parsl .providers .kubernetes .template  import  template_string 
5- 
6- logger  =  logging .getLogger (__name__ )
7- 
2+ import  uuid 
83from  typing  import  Any , Dict , List , Optional , Tuple 
94
105import  typeguard 
116
127from  parsl .errors  import  OptionalModuleMissing 
138from  parsl .jobs .states  import  JobState , JobStatus 
149from  parsl .providers .base  import  ExecutionProvider 
15- from  parsl .utils  import  RepresentationMixin 
10+ from  parsl .providers .kubernetes .template  import  template_string 
11+ from  parsl .utils  import  RepresentationMixin , sanitize_dns_subdomain_rfc1123 
1612
1713try :
1814    from  kubernetes  import  client , config 
1915    _kubernetes_enabled  =  True 
2016except  (ImportError , NameError , FileNotFoundError ):
2117    _kubernetes_enabled  =  False 
2218
19+ logger  =  logging .getLogger (__name__ )
20+ 
2321translate_table  =  {
2422    'Running' : JobState .RUNNING ,
2523    'Pending' : JobState .PENDING ,
@@ -161,7 +159,7 @@ def __init__(self,
161159        self .resources : Dict [object , Dict [str , Any ]]
162160        self .resources  =  {}
163161
164-     def  submit (self , cmd_string , tasks_per_node , job_name = "parsl" ):
162+     def  submit (self , cmd_string :  str , tasks_per_node :  int , job_name :  str   =   "parsl.kube " ):
165163        """ Submit a job 
166164        Args: 
167165             - cmd_string  :(String) - Name of the container to initiate 
@@ -173,30 +171,34 @@ def submit(self, cmd_string, tasks_per_node, job_name="parsl"):
173171        Returns: 
174172             - job_id: (string) Identifier for the job 
175173        """ 
174+         job_id  =  uuid .uuid4 ().hex [:8 ]
176175
177-         cur_timestamp  =  str (time .time () *  1000 ).split ("." )[0 ]
178-         job_name  =  "{0}-{1}" .format (job_name , cur_timestamp )
179- 
180-         if  not  self .pod_name :
181-             pod_name  =  '{}' .format (job_name )
182-         else :
183-             pod_name  =  '{}-{}' .format (self .pod_name ,
184-                                       cur_timestamp )
176+         pod_name  =  self .pod_name  or  job_name 
177+         try :
178+             pod_name  =  sanitize_dns_subdomain_rfc1123 (pod_name )
179+         except  ValueError :
180+             logger .warning (
181+                 f"Invalid pod name '{ pod_name }  ' for job '{ job_id }  ', falling back to 'parsl.kube'" 
182+             )
183+             pod_name  =  "parsl.kube" 
184+         pod_name  =  pod_name [:253  -  1  -  len (job_id )]  # Leave room for the job ID 
185+         pod_name  =  pod_name .rstrip (".-" )  # Remove trailing dot or hyphen after trim 
186+         pod_name  =  f"{ pod_name }  .{ job_id }  " 
185187
186188        formatted_cmd  =  template_string .format (command = cmd_string ,
187189                                               worker_init = self .worker_init )
188190
189191        logger .debug ("Pod name: %s" , pod_name )
190192        self ._create_pod (image = self .image ,
191193                         pod_name = pod_name ,
192-                          job_name = job_name ,
194+                          job_id = job_id ,
193195                         cmd_string = formatted_cmd ,
194196                         volumes = self .persistent_volumes ,
195197                         service_account_name = self .service_account_name ,
196198                         annotations = self .annotations )
197-         self .resources [pod_name ] =  {'status' : JobStatus (JobState .RUNNING )}
199+         self .resources [job_id ] =  {'status' : JobStatus (JobState .RUNNING ),  'pod_name' :  pod_name }
198200
199-         return  pod_name 
201+         return  job_id 
200202
201203    def  status (self , job_ids ):
202204        """ Get the status of a list of jobs identified by the job identifiers 
@@ -212,6 +214,9 @@ def status(self, job_ids):
212214            self ._status ()
213215        return  [self .resources [jid ]['status' ] for  jid  in  job_ids ]
214216
217+     def  _get_pod_name (self , job_id : str ) ->  str :
218+         return  self .resources [job_id ]['pod_name' ]
219+ 
215220    def  cancel (self , job_ids ):
216221        """ Cancels the jobs specified by a list of job ids 
217222        Args: 
@@ -221,7 +226,8 @@ def cancel(self, job_ids):
221226        """ 
222227        for  job  in  job_ids :
223228            logger .debug ("Terminating job/pod: {0}" .format (job ))
224-             self ._delete_pod (job )
229+             pod_name  =  self ._get_pod_name (job )
230+             self ._delete_pod (pod_name )
225231
226232            self .resources [job ]['status' ] =  JobStatus (JobState .CANCELLED )
227233        rets  =  [True  for  i  in  job_ids ]
@@ -242,7 +248,8 @@ def _status(self):
242248        for  jid  in  to_poll_job_ids :
243249            phase  =  None 
244250            try :
245-                 pod  =  self .kube_client .read_namespaced_pod (name = jid , namespace = self .namespace )
251+                 pod_name  =  self ._get_pod_name (jid )
252+                 pod  =  self .kube_client .read_namespaced_pod (name = pod_name , namespace = self .namespace )
246253            except  Exception :
247254                logger .exception ("Failed to poll pod {} status, most likely because pod was terminated" .format (jid ))
248255                if  self .resources [jid ]['status' ] is  JobStatus (JobState .RUNNING ):
@@ -257,10 +264,10 @@ def _status(self):
257264                self .resources [jid ]['status' ] =  JobStatus (status )
258265
259266    def  _create_pod (self ,
260-                     image ,
261-                     pod_name ,
262-                     job_name ,
263-                     port = 80 ,
267+                     image :  str ,
268+                     pod_name :  str ,
269+                     job_id :  str ,
270+                     port :  int   =   80 ,
264271                    cmd_string = None ,
265272                    volumes = [],
266273                    service_account_name = None ,
@@ -269,7 +276,7 @@ def _create_pod(self,
269276        Args: 
270277              - image (string) : Docker image to launch 
271278              - pod_name (string) : Name of the pod 
272-               - job_name  (string) : App label  
279+               - job_id  (string) : Job ID  
273280        KWargs: 
274281             - port (integer) : Container port 
275282        Returns: 
@@ -299,7 +306,7 @@ def _create_pod(self,
299306                                                  )
300307        # Configure Pod template container 
301308        container  =  client .V1Container (
302-             name = pod_name ,
309+             name = job_id ,
303310            image = image ,
304311            resources = resources ,
305312            ports = [client .V1ContainerPort (container_port = port )],
@@ -322,7 +329,7 @@ def _create_pod(self,
322329                                                   claim_name = volume [0 ])))
323330
324331        metadata  =  client .V1ObjectMeta (name = pod_name ,
325-                                        labels = {"app " : job_name },
332+                                        labels = {"parsl-job-id " : job_id },
326333                                       annotations = annotations )
327334        spec  =  client .V1PodSpec (containers = [container ],
328335                                image_pull_secrets = [secret ],
0 commit comments