File tree Expand file tree Collapse file tree 1 file changed +8
-0
lines changed
Expand file tree Collapse file tree 1 file changed +8
-0
lines changed Original file line number Diff line number Diff line change @@ -225,6 +225,12 @@ def create_training_job(
225225 if self .nodes < 1 :
226226 raise ValueError ("Node count must be at least 1" )
227227
228+ if len (name ) >= 35 :
229+ logger .warning (
230+ "Training name can only be max 35 characters. Shortening name to 35 characters..."
231+ )
232+ name = name [:34 ]
233+
228234 # Common payload elements
229235 common_payload = {
230236 "name" : name ,
@@ -265,6 +271,7 @@ def create_training_job(
265271 headers = self ._default_headers (token = token )
266272 response = requests .post (url , json = payload , headers = headers )
267273
274+ logger .info (json .dumps (payload ))
268275 logger .debug (
269276 "Created %s job; response code=%s, content=%s" ,
270277 "distributed" if self .nodes > 1 else "training" ,
@@ -276,6 +283,7 @@ def create_training_job(
276283
277284 def launch (self , name : str , cmd : list [str ]) -> tuple [str , str ]:
278285 name = name .replace ("_" , "-" ).replace ("." , "-" ).lower () # to meet K8s requirements
286+ logger .info (f"workload name:{ name } " )
279287 token = self .get_auth_token ()
280288 if not token :
281289 raise RuntimeError ("Failed to get auth token" )
You can’t perform that action at this time.
0 commit comments