@@ -60,13 +60,14 @@ class DGXRequest:
6060 Wrapper around the torchx AppDef and the DGX executor.
6161 This object is used to store job submission info for the scheduler.
6262 """
63+
6364 app : AppDef
6465 executor : DGXCloudExecutor
6566 cmd : list [str ]
6667 name : str
6768
6869
69- class DGXCloudScheduler (SchedulerMixin , Scheduler [dict [str , str ]]): # type: ignore
70+ class DGXCloudScheduler (SchedulerMixin , Scheduler [dict [str , str ]]): # type: ignore
7071 def __init__ (self , session_name : str ) -> None :
7172 super ().__init__ ("dgx" , session_name )
7273
@@ -76,11 +77,11 @@ def _run_opts(self) -> runopts:
7677 "job_dir" ,
7778 type_ = str ,
7879 help = "The directory to place the job code and outputs."
79- " The directory must not exist and will be created." ,
80+ " The directory must not exist and will be created." ,
8081 )
8182 return opts
8283
83- def _submit_dryrun ( # type: ignore
84+ def _submit_dryrun ( # type: ignore
8485 self ,
8586 app : AppDef ,
8687 cfg : Executor ,
@@ -100,7 +101,7 @@ def _submit_dryrun( # type: ignore
100101 return AppDryRunInfo (
101102 DGXRequest (app = app , executor = executor , cmd = cmd , name = role .name ),
102103 # Minimal function to show the config, if any
103- lambda req : f"DGX job for app: { req .app .name } , cmd: { ' ' .join (cmd )} , executor: { executor } "
104+ lambda req : f"DGX job for app: { req .app .name } , cmd: { ' ' .join (cmd )} , executor: { executor } " ,
104105 )
105106
106107 def schedule (self , dryrun_info : AppDryRunInfo [DGXRequest ]) -> str :
@@ -148,20 +149,15 @@ def describe(self, app_id: str) -> Optional[DescribeAppResponse]:
148149 RoleStatus (
149150 role_name ,
150151 replicas = [
151- ReplicaStatus (
152- id = 0 ,
153- role = role_name ,
154- state = AppState .SUBMITTED ,
155- hostname = ""
156- )
152+ ReplicaStatus (id = 0 , role = role_name , state = AppState .SUBMITTED , hostname = "" )
157153 ],
158154 )
159155 ]
160156
161157 if not job_info :
162158 return None
163159
164- executor : DGXCloudExecutor = job_info .get ("executor" , None ) # type: ignore
160+ executor : DGXCloudExecutor = job_info .get ("executor" , None ) # type: ignore
165161 if not executor :
166162 return None
167163
@@ -175,7 +171,7 @@ def describe(self, app_id: str) -> Optional[DescribeAppResponse]:
175171 roles_statuses = roles_statuses ,
176172 state = app_state ,
177173 msg = "" ,
178- ui_url = f"{ executor .base_url } /workloads/distributed/{ job_id } "
174+ ui_url = f"{ executor .base_url } /workloads/distributed/{ job_id } " ,
179175 )
180176
181177 def _cancel_existing (self , app_id : str ) -> None :
@@ -185,7 +181,7 @@ def _cancel_existing(self, app_id: str) -> None:
185181 stored_data = _get_job_dirs ()
186182 job_info = stored_data .get (app_id )
187183 _ , _ , job_id = app_id .split ("___" )
188- executor : DGXCloudExecutor = job_info .get ("executor" , None ) # type: ignore
184+ executor : DGXCloudExecutor = job_info .get ("executor" , None ) # type: ignore
189185 if not executor :
190186 return None
191187 executor .delete (job_id )
@@ -219,7 +215,9 @@ def _save_job_dir(app_id: str, job_status: str, executor: DGXCloudExecutor) -> N
219215
220216 app = {
221217 "job_status" : job_status ,
222- "executor" : serializer .serialize (fdl_dc .convert_dataclasses_to_configs (executor , allow_post_init = True )),
218+ "executor" : serializer .serialize (
219+ fdl_dc .convert_dataclasses_to_configs (executor , allow_post_init = True )
220+ ),
223221 }
224222 original_apps [app_id ] = app
225223
0 commit comments