Skip to content

Commit e38b31f

Browse files
matthdsmjagednabhi18av
authored
Allow 1 restart per task (#82)
* Allow 1 restart per task This should fix a concurrency issue with the CSI driver ceph/ceph-csi#3511 hashicorp/nomad#15197 * expose the reschedule and restart config vars * remove unused import --------- Co-authored-by: Jorge <[email protected]> Co-authored-by: Abhinav Sharma <[email protected]>
1 parent 42f44d3 commit e38b31f

File tree

2 files changed

+9
-7
lines changed

2 files changed

+9
-7
lines changed

plugins/nf-nomad/src/main/nextflow/nomad/config/NomadJobOpts.groovy

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,11 @@ class NomadJobOpts{
4444
JobVolume[] volumeSpec
4545
JobAffinity affinitySpec
4646
JobConstraint constraintSpec
47-
4847
JobConstraints constraintsSpec
4948

49+
Integer rescheduleAttempts
50+
Integer restartAttempts
51+
5052
NomadSecretOpts secretOpts
5153

5254
NomadJobOpts(Map nomadJobOpts, Map<String,String> env=null){
@@ -69,6 +71,10 @@ class NomadJobOpts{
6971
region = nomadJobOpts.region ?: sysEnv.get('NOMAD_REGION')
7072
namespace = nomadJobOpts.namespace ?: sysEnv.get('NOMAD_NAMESPACE')
7173

74+
//NOTE: Default to a single attempt per nomad job definition
75+
rescheduleAttempts = nomadJobOpts.rescheduleAttempts as Integer ?: 1
76+
restartAttempts = nomadJobOpts.restartAttempts as Integer ?: 1
77+
7278
dockerVolume = nomadJobOpts.dockerVolume ?: null
7379
if( dockerVolume ){
7480
log.info "dockerVolume config will be deprecated, use volume type:'docker' name:'name' instead"

plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,12 +125,8 @@ class NomadService implements Closeable{
125125
}
126126

127127
TaskGroup createTaskGroup(TaskRun taskRun, List<String> args, Map<String, String>env){
128-
//NOTE: Force a single-allocation with no-retries per nomad job definition
129-
final TASK_RESCHEDULE_ATTEMPTS = 0
130-
final TASK_RESTART_ATTEMPTS = 0
131-
132-
final ReschedulePolicy taskReschedulePolicy = new ReschedulePolicy().attempts(TASK_RESCHEDULE_ATTEMPTS)
133-
final RestartPolicy taskRestartPolicy = new RestartPolicy().attempts(TASK_RESTART_ATTEMPTS)
128+
final ReschedulePolicy taskReschedulePolicy = new ReschedulePolicy().attempts(this.config.jobOpts().rescheduleAttempts)
129+
final RestartPolicy taskRestartPolicy = new RestartPolicy().attempts(this.config.jobOpts().restartAttempts)
134130

135131
def task = createTask(taskRun, args, env)
136132
def taskGroup = new TaskGroup(

0 commit comments

Comments
 (0)