Skip to content

Commit 86ca369

Browse files
jagednabhi18av
authored andcommitted
refactor job status query logic
Signed-off-by: Jorge Aguilera <[email protected]>
1 parent 133a611 commit 86ca369

File tree

6 files changed

+149
-148
lines changed

6 files changed

+149
-148
lines changed

plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import nextflow.nomad.builders.JobBuilder
2828
import nextflow.nomad.config.NomadConfig
2929
import nextflow.processor.TaskRun
3030
import nextflow.exception.ProcessSubmitException
31+
import org.threeten.bp.OffsetDateTime
3132

3233
import java.nio.file.Path
3334

@@ -125,11 +126,11 @@ class NomadService implements Closeable{
125126
it.modifyIndex
126127
}?.last() : null
127128
TaskState currentState = last?.taskStates?.values()?.last()
128-
log.debug "Task $jobId , state=${currentState.state}"
129-
currentState
129+
log.debug "Task $jobId , state=${currentState?.state}"
130+
currentState ?: new TaskState(state: "unknown", failed: true, finishedAt: OffsetDateTime.now())
130131
}catch(Exception e){
131132
log.debug("[NOMAD] Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e)
132-
new TaskState(state: "unknow")
133+
new TaskState(state: "unknown", failed: true, finishedAt: OffsetDateTime.now())
133134
}
134135
}
135136

@@ -145,7 +146,7 @@ class NomadService implements Closeable{
145146
job.status
146147
}catch (Exception e){
147148
log.debug("[NOMAD] Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e)
148-
"Unknow"
149+
"unknown"
149150
}
150151
}
151152

plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadTaskHandler.groovy

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class NomadTaskHandler extends TaskHandler implements FusionAwareTask {
7878
if(isSubmitted()) {
7979
def state = taskState0()
8080
// include `terminated` state to allow the handler status to progress
81-
if( state && ( "running" == state.state || "terminated" == state.state)){
81+
if( state && ( ["running","terminated"].contains(state.state))){
8282
status = TaskStatus.RUNNING
8383
determineClientNode()
8484
return true
@@ -93,7 +93,7 @@ class NomadTaskHandler extends TaskHandler implements FusionAwareTask {
9393

9494
def state = taskState0()
9595

96-
final isFinished = state && state.finishedAt != null
96+
final isFinished = state && (state.finishedAt != null || state.state == "unknow")
9797

9898
log.debug "[NOMAD] checkIfCompleted task.name=$task.name; state=${state?.state} completed=$isFinished"
9999

plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ class NomadDSLSpec extends Dsl2Spec{
110110
if( recordedRequest.path.endsWith("/allocations")) {
111111
summary = true
112112
return new MockResponse().setResponseCode(200)
113-
.setBody('{"Summary": {"test":{"Complete":1}}}').addHeader("Content-Type", "application/json")
113+
.setBody(this.getClass().getResourceAsStream("/allocations.json").text).addHeader("Content-Type", "application/json")
114114
}else {
115115
return new MockResponse().setResponseCode(200)
116116
.setBody('{"Status": "dead"}').addHeader("Content-Type", "application/json")

plugins/nf-nomad/src/test/nextflow/nomad/executor/NomadServiceSpec.groovy

Lines changed: 3 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -73,145 +73,11 @@ class NomadServiceSpec extends Specification{
7373
recordedRequest.path == "/v1/job/theId/allocations"
7474

7575
and:
76-
state == "Unknown"
76+
state.state == "unknown"
7777

7878
when:
7979
mockWebServer.enqueue(new MockResponse()
80-
.setBody("""
81-
[
82-
{
83-
"ID": "ed344e0a-7290-d117-41d3-a64f853ca3c2",
84-
"EvalID": "a9c5effc-2242-51b2-f1fe-054ee11ab189",
85-
"Name": "example.cache[0]",
86-
"NodeID": "cb1f6030-a220-4f92-57dc-7baaabdc3823",
87-
"PreviousAllocation": "516d2753-0513-cfc7-57ac-2d6fac18b9dc",
88-
"NextAllocation": "cd13d9b9-4f97-7184-c88b-7b451981616b",
89-
"RescheduleTracker": {
90-
"Events": [
91-
{
92-
"PrevAllocID": "516d2753-0513-cfc7-57ac-2d6fac18b9dc",
93-
"PrevNodeID": "9230cd3b-3bda-9a3f-82f9-b2ea8dedb20e",
94-
"RescheduleTime": 1517434161192946200,
95-
"Delay": 5000000000
96-
}
97-
]
98-
},
99-
"JobID": "example",
100-
"TaskGroup": "cache",
101-
"DesiredStatus": "run",
102-
"DesiredDescription": "",
103-
"ClientStatus": "running",
104-
"ClientDescription": "",
105-
"TaskStates": {
106-
"redis": {
107-
"State": "running",
108-
"Failed": false,
109-
"StartedAt": "2017-05-25T23:41:23.240184101Z",
110-
"FinishedAt": "0001-01-01T00:00:00Z",
111-
"Events": [
112-
{
113-
"Type": "Received",
114-
"Time": 1495755675956923000,
115-
"FailsTask": false,
116-
"RestartReason": "",
117-
"SetupError": "",
118-
"DriverError": "",
119-
"ExitCode": 0,
120-
"Signal": 0,
121-
"Message": "",
122-
"KillTimeout": 0,
123-
"KillError": "",
124-
"KillReason": "",
125-
"StartDelay": 0,
126-
"DownloadError": "",
127-
"ValidationError": "",
128-
"DiskLimit": 0,
129-
"FailedSibling": "",
130-
"VaultError": "",
131-
"TaskSignalReason": "",
132-
"TaskSignal": "",
133-
"DriverMessage": ""
134-
},
135-
{
136-
"Type": "Task Setup",
137-
"Time": 1495755675957466400,
138-
"FailsTask": false,
139-
"RestartReason": "",
140-
"SetupError": "",
141-
"DriverError": "",
142-
"ExitCode": 0,
143-
"Signal": 0,
144-
"Message": "Building Task Directory",
145-
"KillTimeout": 0,
146-
"KillError": "",
147-
"KillReason": "",
148-
"StartDelay": 0,
149-
"DownloadError": "",
150-
"ValidationError": "",
151-
"DiskLimit": 0,
152-
"FailedSibling": "",
153-
"VaultError": "",
154-
"TaskSignalReason": "",
155-
"TaskSignal": "",
156-
"DriverMessage": ""
157-
},
158-
{
159-
"Type": "Driver",
160-
"Time": 1495755675970286800,
161-
"FailsTask": false,
162-
"RestartReason": "",
163-
"SetupError": "",
164-
"DriverError": "",
165-
"ExitCode": 0,
166-
"Signal": 0,
167-
"Message": "",
168-
"KillTimeout": 0,
169-
"KillError": "",
170-
"KillReason": "",
171-
"StartDelay": 0,
172-
"DownloadError": "",
173-
"ValidationError": "",
174-
"DiskLimit": 0,
175-
"FailedSibling": "",
176-
"VaultError": "",
177-
"TaskSignalReason": "",
178-
"TaskSignal": "",
179-
"DriverMessage": "Downloading image redis:7"
180-
},
181-
{
182-
"Type": "Started",
183-
"Time": 1495755683227522000,
184-
"FailsTask": false,
185-
"RestartReason": "",
186-
"SetupError": "",
187-
"DriverError": "",
188-
"ExitCode": 0,
189-
"Signal": 0,
190-
"Message": "",
191-
"KillTimeout": 0,
192-
"KillError": "",
193-
"KillReason": "",
194-
"StartDelay": 0,
195-
"DownloadError": "",
196-
"ValidationError": "",
197-
"DiskLimit": 0,
198-
"FailedSibling": "",
199-
"VaultError": "",
200-
"TaskSignalReason": "",
201-
"TaskSignal": "",
202-
"DriverMessage": ""
203-
}
204-
]
205-
}
206-
},
207-
"CreateIndex": 9,
208-
"ModifyIndex": 13,
209-
"CreateTime": 1495755675944527600,
210-
"ModifyTime": 1495755675944527600
211-
}
212-
]
213-
214-
""")
80+
.setBody(this.getClass().getResourceAsStream("/allocations.json").text)
21581
.addHeader("Content-Type", "application/json"));
21682

21783
state = service.getTaskState("theId")
@@ -222,7 +88,7 @@ class NomadServiceSpec extends Specification{
22288
recordedRequest.path == "/v1/job/theId/allocations"
22389

22490
and:
225-
state == "running"
91+
state.state == "running"
22692

22793
}
22894

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
[
2+
{
3+
"ID": "ed344e0a-7290-d117-41d3-a64f853ca3c2",
4+
"EvalID": "a9c5effc-2242-51b2-f1fe-054ee11ab189",
5+
"Name": "example.cache[0]",
6+
"NodeID": "cb1f6030-a220-4f92-57dc-7baaabdc3823",
7+
"PreviousAllocation": "516d2753-0513-cfc7-57ac-2d6fac18b9dc",
8+
"NextAllocation": "cd13d9b9-4f97-7184-c88b-7b451981616b",
9+
"RescheduleTracker": {
10+
"Events": [
11+
{
12+
"PrevAllocID": "516d2753-0513-cfc7-57ac-2d6fac18b9dc",
13+
"PrevNodeID": "9230cd3b-3bda-9a3f-82f9-b2ea8dedb20e",
14+
"RescheduleTime": 1517434161192946200,
15+
"Delay": 5000000000
16+
}
17+
]
18+
},
19+
"JobID": "example",
20+
"TaskGroup": "cache",
21+
"DesiredStatus": "run",
22+
"DesiredDescription": "",
23+
"ClientStatus": "running",
24+
"ClientDescription": "",
25+
"TaskStates": {
26+
"redis": {
27+
"State": "running",
28+
"Failed": false,
29+
"StartedAt": "2017-05-25T23:41:23.240184101Z",
30+
"FinishedAt": "0001-01-01T00:00:00Z",
31+
"Events": [
32+
{
33+
"Type": "Received",
34+
"Time": 1495755675956923000,
35+
"FailsTask": false,
36+
"RestartReason": "",
37+
"SetupError": "",
38+
"DriverError": "",
39+
"ExitCode": 0,
40+
"Signal": 0,
41+
"Message": "",
42+
"KillTimeout": 0,
43+
"KillError": "",
44+
"KillReason": "",
45+
"StartDelay": 0,
46+
"DownloadError": "",
47+
"ValidationError": "",
48+
"DiskLimit": 0,
49+
"FailedSibling": "",
50+
"VaultError": "",
51+
"TaskSignalReason": "",
52+
"TaskSignal": "",
53+
"DriverMessage": ""
54+
},
55+
{
56+
"Type": "Task Setup",
57+
"Time": 1495755675957466400,
58+
"FailsTask": false,
59+
"RestartReason": "",
60+
"SetupError": "",
61+
"DriverError": "",
62+
"ExitCode": 0,
63+
"Signal": 0,
64+
"Message": "Building Task Directory",
65+
"KillTimeout": 0,
66+
"KillError": "",
67+
"KillReason": "",
68+
"StartDelay": 0,
69+
"DownloadError": "",
70+
"ValidationError": "",
71+
"DiskLimit": 0,
72+
"FailedSibling": "",
73+
"VaultError": "",
74+
"TaskSignalReason": "",
75+
"TaskSignal": "",
76+
"DriverMessage": ""
77+
},
78+
{
79+
"Type": "Driver",
80+
"Time": 1495755675970286800,
81+
"FailsTask": false,
82+
"RestartReason": "",
83+
"SetupError": "",
84+
"DriverError": "",
85+
"ExitCode": 0,
86+
"Signal": 0,
87+
"Message": "",
88+
"KillTimeout": 0,
89+
"KillError": "",
90+
"KillReason": "",
91+
"StartDelay": 0,
92+
"DownloadError": "",
93+
"ValidationError": "",
94+
"DiskLimit": 0,
95+
"FailedSibling": "",
96+
"VaultError": "",
97+
"TaskSignalReason": "",
98+
"TaskSignal": "",
99+
"DriverMessage": "Downloading image redis:7"
100+
},
101+
{
102+
"Type": "Started",
103+
"Time": 1495755683227522000,
104+
"FailsTask": false,
105+
"RestartReason": "",
106+
"SetupError": "",
107+
"DriverError": "",
108+
"ExitCode": 0,
109+
"Signal": 0,
110+
"Message": "",
111+
"KillTimeout": 0,
112+
"KillError": "",
113+
"KillReason": "",
114+
"StartDelay": 0,
115+
"DownloadError": "",
116+
"ValidationError": "",
117+
"DiskLimit": 0,
118+
"FailedSibling": "",
119+
"VaultError": "",
120+
"TaskSignalReason": "",
121+
"TaskSignal": "",
122+
"DriverMessage": ""
123+
}
124+
]
125+
}
126+
},
127+
"CreateIndex": 9,
128+
"ModifyIndex": 13,
129+
"CreateTime": 1495755675944527600,
130+
"ModifyTime": 1495755675944527600
131+
}
132+
]

validation/run-all.sh

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,15 @@ if [ "$SKIPLOCAL" == 0 ]; then
4343
-r dev -profile test,docker \
4444
--outdir $(pwd)/nomad_temp/scratchdir/out
4545

46-
./run-pipeline.sh -c basic/nextflow.config bactopia/bactopia \
47-
--accession SRX4563634 --coverage 100 --genome_size 2800000 \
48-
-profile test,docker --outdir $(pwd)/nomad_temp/scratchdir/bactopia/outdir \
49-
--datasets_cache $(pwd)/nomad_temp/scratchdir/bactopia/datasets
46+
# Batctopia is failing with current version of nextflow due some bug in --max_cpus but cant find a fix
47+
# ./run-pipeline.sh -c basic/nextflow.config bactopia/bactopia \
48+
# --accession SRX4563634 --coverage 100 --genome_size median --max_cpus 2 \
49+
# -profile test,docker --outdir $(pwd)/nomad_temp/scratchdir/bactopia/outdir \
50+
# --datasets_cache $(pwd)/nomad_temp/scratchdir/bactopia/datasets
5051

5152
./run-pipeline.sh -c secrets/nextflow.config secrets/main.nf
5253

54+
./run-pipeline.sh -c tower/nextflow.config tower/main.nf
5355
else
5456
echo "skip local"
5557
fi

0 commit comments

Comments
 (0)