Skip to content

Commit d0bc4cb

Browse files
authored
Ensure jobs are created if task is in waiting state (#6176)
1 parent 1af3999 commit d0bc4cb

File tree

3 files changed

+86
-4
lines changed

3 files changed

+86
-4
lines changed

changes.d/6176.fix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix bug where jobs which fail to submit are not shown in GUI/TUI if submission retries are set.

cylc/flow/task_events_mgr.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1535,11 +1535,21 @@ def _insert_task_job(
15351535
else:
15361536
job_conf = itask.jobs[-1]
15371537

1538+
# Job status should be task status unless task is awaiting a
1539+
# retry:
1540+
if itask.state.status == TASK_STATUS_WAITING and itask.try_timers:
1541+
job_status = (
1542+
TASK_STATUS_SUBMITTED if submit_status == 0
1543+
else TASK_STATUS_SUBMIT_FAILED
1544+
)
1545+
else:
1546+
job_status = itask.state.status
1547+
15381548
# insert job into data store
15391549
self.data_store_mgr.insert_job(
15401550
itask.tdef.name,
15411551
itask.point,
1542-
itask.state.status,
1552+
job_status,
15431553
{
15441554
**job_conf,
15451555
# NOTE: the platform name may have changed since task

tests/integration/test_task_events_mgr.py

Lines changed: 74 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,19 @@
1414
# You should have received a copy of the GNU General Public License
1515
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1616

17-
from cylc.flow.task_events_mgr import TaskJobLogsRetrieveContext
18-
from cylc.flow.scheduler import Scheduler
19-
17+
from itertools import product
2018
import logging
2119
from typing import Any as Fixture
2220

21+
from cylc.flow.task_events_mgr import TaskJobLogsRetrieveContext
22+
from cylc.flow.scheduler import Scheduler
23+
from cylc.flow.data_store_mgr import (
24+
JOBS,
25+
TASK_STATUSES_ORDERED,
26+
TASK_STATUS_WAITING,
27+
TASK_STATUS_SUBMIT_FAILED,
28+
)
29+
2330

2431
async def test_process_job_logs_retrieval_warns_no_platform(
2532
one_conf: Fixture, flow: Fixture, scheduler: Fixture, run: Fixture,
@@ -99,3 +106,67 @@ async def test__insert_task_job(flow, one_conf, scheduler, start, validate):
99106
i.submit_num for i
100107
in schd.data_store_mgr.added['jobs'].values()
101108
] == [1, 2]
109+
110+
111+
async def test__always_insert_task_job(
112+
flow, scheduler, mock_glbl_cfg, start, run
113+
):
114+
"""Insert Task Job _Always_ inserts a task into the data store.
115+
116+
Bug https://github.com/cylc/cylc-flow/issues/6172 was caused
117+
by passing task state to data_store_mgr.insert_job: Where
118+
a submission retry was in progress the task state would be
119+
"waiting" which caused the data_store_mgr.insert_job
120+
to return without adding the task to the data store.
121+
This is testing two different cases:
122+
123+
* Could not select host from platform
124+
* Could not select host from platform group
125+
"""
126+
global_config = """
127+
[platforms]
128+
[[broken1]]
129+
hosts = no-such-host-1
130+
[[broken2]]
131+
hosts = no-such-host-2
132+
[platform groups]
133+
[[broken]]
134+
platforms = broken1
135+
"""
136+
mock_glbl_cfg('cylc.flow.platforms.glbl_cfg', global_config)
137+
138+
id_ = flow({
139+
'scheduling': {'graph': {'R1': 'broken & broken2'}},
140+
'runtime': {
141+
'root': {'submission retry delays': 'PT10M'},
142+
'broken': {'platform': 'broken'},
143+
'broken2': {'platform': 'broken2'}
144+
}
145+
})
146+
147+
schd = scheduler(id_, run_mode='live')
148+
schd.bad_hosts = {'no-such-host-1', 'no-such-host-2'}
149+
async with start(schd):
150+
schd.task_job_mgr.submit_task_jobs(
151+
schd.workflow,
152+
schd.pool.get_tasks(),
153+
schd.server.curve_auth,
154+
schd.server.client_pub_key_dir,
155+
is_simulation=False
156+
)
157+
158+
# Both tasks are in a waiting state:
159+
assert all(
160+
i.state.status == TASK_STATUS_WAITING
161+
for i in schd.pool.get_tasks())
162+
163+
# Both tasks have updated the data store with info
164+
# about a failed job:
165+
updates = {
166+
k.split('//')[-1]: v.state
167+
for k, v in schd.data_store_mgr.updated[JOBS].items()
168+
}
169+
assert updates == {
170+
'1/broken/01': 'submit-failed',
171+
'1/broken2/01': 'submit-failed'
172+
}

0 commit comments

Comments
 (0)