@@ -231,8 +231,26 @@ def select_resources(self, request, runtime_context): # pylint: disable=unused-
231
231
232
232
return result
233
233
234
+ def _runner (self , job , runtime_context ):
235
+ """ Job running thread. """
236
+ try :
237
+ job .run (runtime_context )
238
+ except WorkflowException as err :
239
+ _logger .exception ("Got workflow error" )
240
+ self .exceptions .append (err )
241
+ except Exception as err : # pylint: disable=broad-except
242
+ _logger .exception ("Got workflow error" )
243
+ self .exceptions .append (WorkflowException (Text (err )))
244
+ finally :
245
+ with runtime_context .workflow_eval_lock :
246
+ self .threads .remove (threading .current_thread ())
247
+ if isinstance (job , JobBase ):
248
+ self .allocated_ram -= job .builder .resources ["ram" ]
249
+ self .allocated_cores -= job .builder .resources ["cores" ]
250
+ runtime_context .workflow_eval_lock .notifyAll ()
251
+
234
252
def run_job (self ,
235
- job , # type: Union[JobBase, WorkflowJob]
253
+ job , # type: Union[JobBase, WorkflowJob, None ]
236
254
runtime_context # type: RuntimeContext
237
255
): # type: (...) -> None
238
256
""" Execute a single Job in a seperate thread. """
@@ -241,50 +259,51 @@ def run_job(self,
241
259
with self .pending_jobs_lock :
242
260
self .pending_jobs .append (job )
243
261
244
- while self .pending_jobs :
245
- with self .pending_jobs_lock :
246
- job = self .pending_jobs [0 ]
247
- if isinstance (job , JobBase ) \
248
- and \
249
- ((self .allocated_ram + job .builder .resources ["ram" ])
250
- > self .max_ram
251
- or (self .allocated_cores + job .builder .resources ["cores" ])
252
- > self .max_cores ):
253
- _logger .warning (
254
- 'Job "%s" requested more resources (%s) than are '
255
- 'available (max ram is %f, max cores is %f)' ,
256
- job .name , job .builder .resources , self .max_ram ,
257
- self .max_cores )
258
- return
262
+ with self .pending_jobs_lock :
263
+ n = 0
264
+ while (n + 1 ) <= len (self .pending_jobs ):
265
+ job = self .pending_jobs [n ]
266
+ if isinstance (job , JobBase ):
267
+ if ((job .builder .resources ["ram" ])
268
+ > self .max_ram
269
+ or (job .builder .resources ["cores" ])
270
+ > self .max_cores ):
271
+ _logger .error (
272
+ 'Job "%s" cannot be run, requests more resources (%s) '
273
+ 'than available on this host (max ram %d, max cores %d' ,
274
+ job .name , job .builder .resources ,
275
+ self .allocated_ram ,
276
+ self .allocated_cores ,
277
+ self .max_ram ,
278
+ self .max_cores )
279
+ self .pending_jobs .remove (job )
280
+ return
281
+
282
+ if ((self .allocated_ram + job .builder .resources ["ram" ])
283
+ > self .max_ram
284
+ or (self .allocated_cores + job .builder .resources ["cores" ])
285
+ > self .max_cores ):
286
+ _logger .debug (
287
+ 'Job "%s" cannot run yet, resources (%s) are not '
288
+ 'available (already allocated ram is %d, allocated cores is %d, '
289
+ 'max ram %d, max cores %d' ,
290
+ job .name , job .builder .resources ,
291
+ self .allocated_ram ,
292
+ self .allocated_cores ,
293
+ self .max_ram ,
294
+ self .max_cores )
295
+ n += 1
296
+ continue
297
+
298
+ thread = threading .Thread (target = self ._runner , args = (job , runtime_context ))
299
+ thread .daemon = True
300
+ self .threads .add (thread )
301
+ if isinstance (job , JobBase ):
302
+ self .allocated_ram += job .builder .resources ["ram" ]
303
+ self .allocated_cores += job .builder .resources ["cores" ]
304
+ thread .start ()
259
305
self .pending_jobs .remove (job )
260
306
261
- def runner ():
262
- """ Job running thread. """
263
- try :
264
- job .run (runtime_context )
265
- except WorkflowException as err :
266
- _logger .exception ("Got workflow error" )
267
- self .exceptions .append (err )
268
- except Exception as err : # pylint: disable=broad-except
269
- _logger .exception ("Got workflow error" )
270
- self .exceptions .append (WorkflowException (Text (err )))
271
- finally :
272
- with runtime_context .workflow_eval_lock :
273
- self .threads .remove (threading .current_thread ())
274
- if isinstance (job , JobBase ):
275
- self .allocated_ram -= job .builder .resources ["ram" ]
276
- self .allocated_cores -= job .builder .resources ["cores" ]
277
- runtime_context .workflow_eval_lock .notifyAll ()
278
-
279
- thread = threading .Thread (target = runner )
280
- thread .daemon = True
281
- self .threads .add (thread )
282
- if isinstance (job , JobBase ):
283
- self .allocated_ram += job .builder .resources ["ram" ]
284
- self .allocated_cores += job .builder .resources ["cores" ]
285
- thread .start ()
286
-
287
-
288
307
def wait_for_next_completion (self , runtime_context ):
289
308
# type: (RuntimeContext) -> None
290
309
""" Wait for jobs to finish. """
@@ -324,6 +343,9 @@ def run_jobs(self,
324
343
logger .error ("Workflow cannot make any more progress." )
325
344
break
326
345
346
+ self .run_job (None , runtime_context )
327
347
while self .threads :
328
348
self .wait_for_next_completion (runtime_context )
349
+ self .run_job (None , runtime_context )
350
+
329
351
runtime_context .workflow_eval_lock .release ()
0 commit comments