Skip to content

Commit b778a76

Browse files
committed
Check exit status of PBS Pro jobs
1 parent fc54c42 commit b778a76

File tree

1 file changed

+22
-1
lines changed

1 file changed

+22
-1
lines changed

reframe/core/schedulers/pbs.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,9 @@ def _update_nodelist(self, job, nodespec):
178178
job._nodelist = [x.split('/')[0] for x in nodespec.split('+')]
179179
job._nodelist.sort()
180180

181-
def poll(self, *jobs):
181+
# The second argument is to specialise some code paths to PBS Pro only, but
182+
# not Torque.
183+
def _poll(self, is_pbs_pro, *jobs):
182184
def output_ready(job):
183185
# We report a job as finished only when its stdout/stderr are
184186
# written back to the working directory
@@ -209,6 +211,19 @@ def output_ready(job):
209211
if job.cancelled or output_ready(job):
210212
self.log(f'Assuming job {job.jobid} completed')
211213
job._completed = True
214+
if is_pbs_pro:
215+
# With PBS Pro we can obtain the exit status of the job,
216+
# in case it actually failed.
217+
extended_info = osext.run_command(
218+
f'qstat -xf {job.jobid}'
219+
)
220+
exit_status_match = re.search(
221+
r'^ *Exit_status *= *(?P<exit_status>\d+)',
222+
extended_info.stdout,
223+
flags=re.MULTILINE,
224+
)
225+
if exit_status_match:
226+
job._exitcode = int(exit_status_match.group('exit_status'))
212227

213228
return
214229

@@ -277,7 +292,13 @@ def output_ready(job):
277292
job._exception = JobError('maximum pending time exceeded',
278293
job.jobid)
279294

295+
def poll(self, *job):
296+
self._poll(True, *job)
297+
280298

281299
@register_scheduler('torque')
282300
class TorqueJobScheduler(PbsJobScheduler):
283301
TASKS_OPT = '-l nodes={num_nodes}:ppn={num_cpus_per_node}'
302+
303+
def poll(self, *job):
304+
self._poll(False, *job)

0 commit comments

Comments
 (0)