Skip to content

Commit df18164

Browse files
authored
Merge pull request #2993 from giordano/mg/pbs-job-status
[bugfix] Properly retrieve exit status for PBS Pro jobs
2 parents 6cdf0bb + 519b70f commit df18164

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

reframe/core/schedulers/pbs.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,20 @@ def _update_nodelist(self, job, nodespec):
178178
job._nodelist = [x.split('/')[0] for x in nodespec.split('+')]
179179
job._nodelist.sort()
180180

181+
def _query_exit_code(self, job):
182+
'''Try to retrieve the exit code of a past job.'''
183+
184+
# With PBS Pro we can obtain the exit status of a past job
185+
extended_info = osext.run_command(f'qstat -xf {job.jobid}')
186+
exit_status_match = re.search(
187+
r'^ *Exit_status *= *(?P<exit_status>\d+)', extended_info.stdout,
188+
flags=re.MULTILINE,
189+
)
190+
if exit_status_match:
191+
return int(exit_status_match.group('exit_status'))
192+
193+
return None
194+
181195
def poll(self, *jobs):
182196
def output_ready(job):
183197
# We report a job as finished only when its stdout/stderr are
@@ -209,6 +223,7 @@ def output_ready(job):
209223
if job.cancelled or output_ready(job):
210224
self.log(f'Assuming job {job.jobid} completed')
211225
job._completed = True
226+
job._exitcode = self._query_exit_code(job)
212227

213228
return
214229

@@ -281,3 +296,9 @@ def output_ready(job):
281296
@register_scheduler('torque')
282297
class TorqueJobScheduler(PbsJobScheduler):
283298
TASKS_OPT = '-l nodes={num_nodes}:ppn={num_cpus_per_node}'
299+
300+
def _query_exit_code(self, job):
301+
'''Try to retrieve the exit code of a past job.'''
302+
303+
# Torque does not provide a way to retrieve the history of jobs
304+
return None

0 commit comments

Comments
 (0)