Skip to content

Commit 5730679

Browse files
authored
Fix K8s job fallback to not return incorrect zero exit code (#6746)
1 parent 24b858d commit 5730679

File tree

4 files changed

+49
-5
lines changed

4 files changed

+49
-5
lines changed

plugins/nf-k8s/src/main/nextflow/k8s/K8sTaskHandler.groovy

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ class K8sTaskHandler extends TaskHandler implements FusionAwareTask {
151151
final workDir = Escape.path(task.workDir)
152152

153153
final result = new ArrayList(BashWrapperBuilder.BASH)
154+
result.add('-o')
155+
result.add('pipefail')
154156
result.add('-c')
155157
result.add("bash ${workDir}/${TaskRun.CMD_RUN} 2>&1 | tee ${workDir}/${TaskRun.CMD_LOG}")
156158
return result

plugins/nf-k8s/src/main/nextflow/k8s/client/K8sClient.groovy

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,6 @@ class K8sClient {
419419
log.warn1("Job $jobName already completed and Pod is gone")
420420
final dummyPodStatus = [
421421
terminated: [
422-
exitcode: 0,
423422
reason: "Completed",
424423
startedAt: jobStatus.startTime,
425424
finishedAt: jobStatus.completionTime,

plugins/nf-k8s/src/test/nextflow/k8s/K8sTaskHandlerTest.groovy

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ class K8sTaskHandlerTest extends Specification {
9696
containers: [[
9797
name:'nf-123',
9898
image:'debian:latest',
99-
args:['/bin/bash', '-ue','-c','bash /some/work/dir/.command.run 2>&1 | tee /some/work/dir/.command.log']
99+
args:['/bin/bash', '-ue', '-o', 'pipefail', '-c', 'bash /some/work/dir/.command.run 2>&1 | tee /some/work/dir/.command.log']
100100
]]
101101
]
102102
]
@@ -123,7 +123,7 @@ class K8sTaskHandlerTest extends Specification {
123123
and:
124124
result.metadata.labels == [sessionId: 'xxx']
125125
result.metadata.annotations == [evict: 'false']
126-
result.spec.containers[0].command == ['/bin/bash', '-ue', '-c','bash /some/work/dir/.command.run 2>&1 | tee /some/work/dir/.command.log']
126+
result.spec.containers[0].command == ['/bin/bash', '-ue', '-o', 'pipefail', '-c', 'bash /some/work/dir/.command.run 2>&1 | tee /some/work/dir/.command.log']
127127
result.spec.containers[0].resources == [ requests: [cpu:1] ]
128128
result.spec.containers[0].env == [ [name:'NXF_OWNER', value:'501:502'] ]
129129

@@ -148,7 +148,7 @@ class K8sTaskHandlerTest extends Specification {
148148
and:
149149
result.metadata.namespace == 'namespace-x'
150150
result.spec.containers[0].image == 'user/alpine:1.0'
151-
result.spec.containers[0].command == ['/bin/bash', '-ue', '-c','bash /some/work/dir/.command.run 2>&1 | tee /some/work/dir/.command.log']
151+
result.spec.containers[0].command == ['/bin/bash', '-ue', '-o', 'pipefail', '-c', 'bash /some/work/dir/.command.run 2>&1 | tee /some/work/dir/.command.log']
152152
result.spec.containers[0].resources == [ requests: [cpu:4, memory:'16384Mi'], limits: [memory:'16384Mi'] ]
153153

154154
}
@@ -428,7 +428,7 @@ class K8sTaskHandlerTest extends Specification {
428428
containers: [[
429429
name: 'nf-123',
430430
image: 'debian:latest',
431-
command: ['/bin/bash', '-ue','-c','bash /some/work/dir/.command.run 2>&1 | tee /some/work/dir/.command.log']
431+
command: ['/bin/bash', '-ue', '-o', 'pipefail', '-c', 'bash /some/work/dir/.command.run 2>&1 | tee /some/work/dir/.command.log']
432432
]]
433433
]
434434
]

plugins/nf-k8s/src/test/nextflow/k8s/client/K8sClientTest.groovy

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,4 +1059,47 @@ class K8sClientTest extends Specification {
10591059
def e = thrown(PodUnschedulableException)
10601060
e.message == "K8s pod in Failed state"
10611061
}
1062+
1063+
def 'should fallback to job status when pod is gone and not return hardcoded exit code' () {
1064+
given:
1065+
def JOB_STATUS_JSON = '''
1066+
{
1067+
"apiVersion": "batch/v1",
1068+
"kind": "Job",
1069+
"metadata": {
1070+
"name": "test-job"
1071+
},
1072+
"status": {
1073+
"succeeded": 1,
1074+
"startTime": "2025-01-15T10:00:00Z",
1075+
"completionTime": "2025-01-15T10:05:00Z",
1076+
"conditions": [
1077+
{
1078+
"type": "Complete",
1079+
"status": "True",
1080+
"lastProbeTime": "2025-01-15T10:05:00Z",
1081+
"lastTransitionTime": "2025-01-15T10:05:00Z"
1082+
}
1083+
]
1084+
}
1085+
}
1086+
'''
1087+
def client = Spy(K8sClient)
1088+
final JOB_NAME = 'test-job'
1089+
1090+
when:
1091+
def result = client.jobStateFallback0(JOB_NAME)
1092+
1093+
then:
1094+
1 * client.jobStatus(JOB_NAME) >> new K8sResponseJson(JOB_STATUS_JSON)
1095+
1096+
and:
1097+
result.terminated != null
1098+
result.terminated.reason == 'Completed'
1099+
result.terminated.startedAt == '2025-01-15T10:00:00Z'
1100+
result.terminated.finishedAt == '2025-01-15T10:05:00Z'
1101+
// The key assertion: exitCode should not be present (null) so fallback to .exitcode file works
1102+
result.terminated.exitCode == null
1103+
result.terminated.exitcode == null
1104+
}
10621105
}

0 commit comments

Comments
 (0)