@@ -178,7 +178,9 @@ def _update_nodelist(self, job, nodespec):
178178 job ._nodelist = [x .split ('/' )[0 ] for x in nodespec .split ('+' )]
179179 job ._nodelist .sort ()
180180
181- def poll (self , * jobs ):
181+ # The second argument is to specialise some code paths to PBS Pro only, but
182+ # not Torque.
183+ def _poll (self , is_pbs_pro , * jobs ):
182184 def output_ready (job ):
183185 # We report a job as finished only when its stdout/stderr are
184186 # written back to the working directory
@@ -209,6 +211,19 @@ def output_ready(job):
209211 if job .cancelled or output_ready (job ):
210212 self .log (f'Assuming job { job .jobid } completed' )
211213 job ._completed = True
214+ if is_pbs_pro :
215+ # With PBS Pro we can obtain the exit status of the job,
216+ # in case it actually failed.
217+ extended_info = osext .run_command (
218+ f'qstat -xf { job .jobid } '
219+ )
220+ exit_status_match = re .search (
221+ r'^ *Exit_status *= *(?P<exit_status>\d+)' ,
222+ extended_info .stdout ,
223+ flags = re .MULTILINE ,
224+ )
225+ if exit_status_match :
226+ job ._exitcode = int (exit_status_match .group ('exit_status' ))
212227
213228 return
214229
@@ -277,7 +292,13 @@ def output_ready(job):
277292 job ._exception = JobError ('maximum pending time exceeded' ,
278293 job .jobid )
279294
295+ def poll (self , * job ):
296+ self ._poll (True , * job )
297+
280298
281299@register_scheduler ('torque' )
282300class TorqueJobScheduler (PbsJobScheduler ):
283301 TASKS_OPT = '-l nodes={num_nodes}:ppn={num_cpus_per_node}'
302+
303+ def poll (self , * job ):
304+ self ._poll (False , * job )
0 commit comments