Skip to content

Commit b14674d

Browse files
Add deleteTasksOnCompletion to Azure Batch configuration (#4114)
Deleting Azure Tasks was checking the configuration object deleteJobsOnCompletion which was incorrect since a task belongs to a job. This adds the equivalent configuration for tasks which is checked before deleting the tasks. Signed-off-by: Adam Talbot <[email protected]> Signed-off-by: Ben Sherman <[email protected]> Signed-off-by: Adam Talbot <[email protected]> Co-authored-by: Ben Sherman <[email protected]>
1 parent 22638d4 commit b14674d

File tree

6 files changed

+61
-35
lines changed

6 files changed

+61
-35
lines changed

docs/config.md

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -330,21 +330,32 @@ The following settings are available:
330330
`azure.batch.copyToolInstallMode`
331331
: Specify where the `azcopy` tool used by Nextflow. When `node` is specified it's copied once during the pool creation. When `task` is provider, it's installed for each task execution (default: `node`).
332332

333-
`azure.batch.terminateJobsOnCompletion`
334-
: Enables the Batch Job to automatically terminate a job once all tasks have completed (default: `true`).
335-
336333
`azure.batch.deleteJobsOnCompletion`
337-
: Enable the automatic deletion of jobs created by the pipeline execution (default: `true`).
334+
: Delete all jobs when the workflow completes (default: `false`).
335+
: :::{versionchanged} 23.08.0-edge
336+
Default value was changed from `true` to `false`.
337+
:::
338338

339339
`azure.batch.deletePoolsOnCompletion`
340-
: Enable the automatic deletion of compute node pools upon pipeline completion (default: `false`).
340+
: Delete all compute node pools when the workflow completes (default: `false`).
341+
342+
`azure.batch.deleteTasksOnCompletion`
343+
: :::{versionadded} 23.08.0-edge
344+
:::
345+
: Delete each task when it completes (default: `true`).
346+
: Although this setting is enabled by default, failed tasks will not be deleted unless it is explicitly enabled. This way, the default behavior is that successful tasks are deleted while failed tasks are preserved for debugging purposes.
341347

342348
`azure.batch.endpoint`
343349
: The batch service endpoint e.g. `https://nfbatch1.westeurope.batch.azure.com`.
344350

345351
`azure.batch.location`
346352
: The name of the batch service region, e.g. `westeurope` or `eastus2`. This is not needed when the endpoint is specified.
347353

354+
`azure.batch.terminateJobsOnCompletion`
355+
: :::{versionadded} 23.05.0-edge
356+
:::
357+
: When the workflow completes, set all jobs to terminate on task completion. (default: `true`).
358+
348359
`azure.batch.pools.<name>.autoScale`
349360
: Enable autoscaling feature for the pool identified with `<name>`.
350361

plugins/nf-azure/src/main/nextflow/cloud/azure/batch/AzBatchService.groovy

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -785,17 +785,13 @@ class AzBatchService implements Closeable {
785785
apply(() -> client.taskOperations().deleteTask(key.jobId, key.taskId))
786786
}
787787

788+
/**
789+
* Set all jobs to terminate on completion.
790+
*/
788791
protected void terminateJobs() {
789-
/*
790-
We set the job to terminate when all tasks are complete rather than directly terminating, this allows Azure Batch to handle the termination for us.
791-
*/
792-
793-
for( Map.Entry<TaskProcessor,String> entry : allJobIds ) {
794-
final proc = entry.key
795-
final jobId = entry.value
796-
792+
for( String jobId : allJobIds.values() ) {
797793
try {
798-
log.trace "Terminating Azure job ${jobId}"
794+
log.trace "Setting Azure job ${jobId} to terminate on completion"
799795

800796
CloudJob job = apply(() -> client.jobOperations().getJob(jobId))
801797
final poolInfo = job.poolInfo()
@@ -813,10 +809,7 @@ class AzBatchService implements Closeable {
813809
}
814810

815811
protected void cleanupJobs() {
816-
for( Map.Entry<TaskProcessor,String> entry : allJobIds ) {
817-
final proc = entry.key
818-
final jobId = entry.value
819-
812+
for( String jobId : allJobIds.values() ) {
820813
try {
821814
log.trace "Deleting Azure job ${jobId}"
822815
apply(() -> client.jobOperations().deleteJob(jobId))
@@ -828,7 +821,7 @@ class AzBatchService implements Closeable {
828821
}
829822

830823
protected void cleanupPools() {
831-
for( String poolId : allPools.keySet()) {
824+
for( String poolId : allPools.keySet() ) {
832825
try {
833826
apply(() -> client.poolOperations().deletePool(poolId))
834827
}
@@ -849,17 +842,20 @@ class AzBatchService implements Closeable {
849842
}
850843
return identity
851844
}
845+
852846
@Override
853847
void close() {
854-
// Terminate existing jobs to prevent them occupying quota
855-
if( config.batch().terminateJobsOnCompletion!=Boolean.FALSE ) {
848+
// terminate all jobs to prevent them from occupying quota
849+
if( config.batch().terminateJobsOnCompletion ) {
856850
terminateJobs()
857851
}
858852

859-
// cleanup app successful jobs
860-
if( config.batch().deleteJobsOnCompletion!=Boolean.FALSE ) {
853+
// delete all jobs
854+
if( config.batch().deleteJobsOnCompletion ) {
861855
cleanupJobs()
862856
}
857+
858+
// delete all autopools
863859
if( config.batch().canCreatePool() && config.batch().deletePoolsOnCompletion ) {
864860
cleanupPools()
865861
}

plugins/nf-azure/src/main/nextflow/cloud/azure/batch/AzBatchTaskHandler.groovy

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,15 +130,15 @@ class AzBatchTaskHandler extends TaskHandler implements FusionAwareTask {
130130
}
131131

132132
private Boolean shouldDelete() {
133-
executor.config.batch().deleteJobsOnCompletion
133+
executor.config.batch().deleteTasksOnCompletion
134134
}
135135

136136
protected void deleteTask(AzTaskKey taskKey, TaskRun task) {
137137
if( !taskKey || shouldDelete()==Boolean.FALSE )
138138
return
139139

140140
if( !task.isSuccess() && shouldDelete()==null ) {
141-
// do not delete successfully executed pods for debugging purpose
141+
// preserve failed tasks for debugging purposes, unless deletion is explicitly enabled
142142
return
143143
}
144144

plugins/nf-azure/src/main/nextflow/cloud/azure/config/AzBatchOpts.groovy

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class AzBatchOpts implements CloudTransferOptions {
5050
Boolean terminateJobsOnCompletion
5151
Boolean deleteJobsOnCompletion
5252
Boolean deletePoolsOnCompletion
53+
Boolean deleteTasksOnCompletion
5354
CopyToolInstallMode copyToolInstallMode
5455

5556
Map<String,AzPoolOpts> pools
@@ -63,9 +64,10 @@ class AzBatchOpts implements CloudTransferOptions {
6364
location = config.location
6465
autoPoolMode = config.autoPoolMode
6566
allowPoolCreation = config.allowPoolCreation
66-
terminateJobsOnCompletion = config.terminateJobsOnCompletion
67+
terminateJobsOnCompletion = config.terminateJobsOnCompletion != Boolean.FALSE
6768
deleteJobsOnCompletion = config.deleteJobsOnCompletion
6869
deletePoolsOnCompletion = config.deletePoolsOnCompletion
70+
deleteTasksOnCompletion = config.deleteTasksOnCompletion
6971
pools = parsePools(config.pools instanceof Map ? config.pools as Map<String,Map> : Collections.<String,Map>emptyMap())
7072
maxParallelTransfers = config.maxParallelTransfers ? config.maxParallelTransfers as int : MAX_TRANSFER
7173
maxTransferAttempts = config.maxTransferAttempts ? config.maxTransferAttempts as int : MAX_TRANSFER_ATTEMPTS

plugins/nf-azure/src/test/nextflow/cloud/azure/batch/AzBatchServiceTest.groovy

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -350,21 +350,20 @@ class AzBatchServiceTest extends Specification {
350350

351351
}
352352

353-
354-
def 'should cleanup jobs by default' () {
353+
def 'should set jobs to automatically terminate by default' () {
355354
given:
356355
def CONFIG = [:]
357356
def exec = Mock(AzBatchExecutor) {getConfig() >> new AzConfig(CONFIG) }
358357
AzBatchService svc = Spy(AzBatchService, constructorArgs:[exec])
359358
when:
360359
svc.close()
361360
then:
362-
1 * svc.cleanupJobs() >> null
361+
1 * svc.terminateJobs() >> null
363362
}
364363

365-
def 'should cleanup jobs no cleanup jobs' () {
364+
def 'should not cleanup jobs by default' () {
366365
given:
367-
def CONFIG = [batch:[deleteJobsOnCompletion: false]]
366+
def CONFIG = [:]
368367
def exec = Mock(AzBatchExecutor) {getConfig() >> new AzConfig(CONFIG) }
369368
AzBatchService svc = Spy(AzBatchService, constructorArgs:[exec])
370369
when:
@@ -373,7 +372,18 @@ class AzBatchServiceTest extends Specification {
373372
0 * svc.cleanupJobs() >> null
374373
}
375374

376-
def 'should cleanup not cleanup pools by default' () {
375+
def 'should cleanup jobs if specified' () {
376+
given:
377+
def CONFIG = [batch:[deleteJobsOnCompletion: true]]
378+
def exec = Mock(AzBatchExecutor) {getConfig() >> new AzConfig(CONFIG) }
379+
AzBatchService svc = Spy(AzBatchService, constructorArgs:[exec])
380+
when:
381+
svc.close()
382+
then:
383+
1 * svc.cleanupJobs() >> null
384+
}
385+
386+
def 'should not cleanup pools by default' () {
377387
given:
378388
def CONFIG = [:]
379389
def exec = Mock(AzBatchExecutor) {getConfig() >> new AzConfig(CONFIG) }
@@ -395,7 +405,7 @@ class AzBatchServiceTest extends Specification {
395405
1 * svc.cleanupPools() >> null
396406
}
397407

398-
def 'should cleanup cleanup pools with allowPoolCreation' () {
408+
def 'should cleanup pools with allowPoolCreation' () {
399409
given:
400410
def CONFIG = [batch:[allowPoolCreation: true, deletePoolsOnCompletion: true]]
401411
def exec = Mock(AzBatchExecutor) {getConfig() >> new AzConfig(CONFIG) }

plugins/nf-azure/src/test/nextflow/cloud/azure/config/AzureConfigTest.groovy

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,10 @@ class AzureConfigTest extends Specification {
7070

7171
and:
7272
cfg.batch().endpoint == null
73+
cfg.batch().terminateJobsOnCompletion == true
7374
cfg.batch().deleteJobsOnCompletion == null
7475
cfg.batch().deletePoolsOnCompletion == null
76+
cfg.batch().deleteTasksOnCompletion == null
7577
cfg.batch().location == null
7678
cfg.batch().autoPoolMode == null
7779
cfg.batch().allowPoolCreation == null
@@ -99,8 +101,11 @@ class AzureConfigTest extends Specification {
99101
endpoint: ENDPOINT,
100102
location: LOCATION,
101103
autoPoolMode: true,
102-
allowPoolCreation: true, deleteJobsOnCompletion: false,
104+
allowPoolCreation: true,
105+
terminateJobsOnCompletion: false,
106+
deleteJobsOnCompletion: true,
103107
deletePoolsOnCompletion: true,
108+
deleteTasksOnCompletion: false,
104109
pools: [ myPool: [
105110
vmType: 'Foo_A1',
106111
autoScale: true,
@@ -124,8 +129,10 @@ class AzureConfigTest extends Specification {
124129
cfg.batch().location == LOCATION
125130
cfg.batch().autoPoolMode == true
126131
cfg.batch().allowPoolCreation == true
127-
cfg.batch().deleteJobsOnCompletion == false
132+
cfg.batch().terminateJobsOnCompletion == false
133+
cfg.batch().deleteJobsOnCompletion == true
128134
cfg.batch().deletePoolsOnCompletion == true
135+
cfg.batch().deleteTasksOnCompletion == false
129136
cfg.batch().canCreatePool()
130137
and:
131138
cfg.batch().pool('myPool').vmType == 'Foo_A1'

0 commit comments

Comments
 (0)