Skip to content

Commit 25196f2

Browse files
committed
Fix pbs stdout/stderr bug
1 parent 681440b commit 25196f2

File tree

1 file changed

+21
-6
lines changed

1 file changed

+21
-6
lines changed

reframe/core/schedulers/pbs.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,13 @@ def _update_nodelist(self, job, nodespec):
181181
job._nodelist.sort()
182182

183183
def poll(self, *jobs):
184+
def out_ready(job):
185+
# We report a job as finished only when its stdout/stderr are
186+
# written back to the working directory
187+
stdout = os.path.join(job.workdir, job.stdout)
188+
stderr = os.path.join(job.workdir, job.stderr)
189+
return os.path.exists(stdout) and os.path.exists(stderr)
190+
184191
if jobs:
185192
# Filter out non-jobs
186193
jobs = [job for job in jobs if job is not None]
@@ -202,7 +209,12 @@ def poll(self, *jobs):
202209
f'assuming all jobs completed')
203210
for job in jobs:
204211
job._state = 'COMPLETED'
205-
job._completed = True
212+
if job.cancelled or out_ready(job):
213+
job._completed = True
214+
else:
215+
self.log(f'Job {job.jobid} output has not been written '
216+
f'back to working directory yet; will poll '
217+
f'again later')
206218

207219
return
208220

@@ -227,7 +239,13 @@ def poll(self, *jobs):
227239
self.log(f'Job {job.jobid} not known to scheduler, '
228240
f'assuming job completed')
229241
job._state = 'COMPLETED'
230-
job._completed = True
242+
if job.cancelled or out_ready(job):
243+
job._completed = True
244+
else:
245+
self.log(f'Job {job.jobid} output has not been written '
246+
f'back to working directory yet; will poll '
247+
f'again later')
248+
231249
continue
232250

233251
info = jobinfo[job.jobid]
@@ -259,10 +277,7 @@ def poll(self, *jobs):
259277

260278
# We report a job as finished only when its stdout/stderr are
261279
# written back to the working directory
262-
stdout = os.path.join(job.workdir, job.stdout)
263-
stderr = os.path.join(job.workdir, job.stderr)
264-
out_ready = os.path.exists(stdout) and os.path.exists(stderr)
265-
done = job.cancelled or out_ready
280+
done = job.cancelled or out_ready(job)
266281
if done:
267282
job._completed = True
268283
elif (job.state in ['QUEUED', 'HELD', 'WAITING'] and

0 commit comments

Comments
 (0)