Skip to content

Commit 24f89fc

Browse files
authored
🤖 Merge 8.5.x-sync into master (#6996)
2 parents 67a0864 + 424782b commit 24f89fc

File tree

8 files changed

+162
-36
lines changed

8 files changed

+162
-36
lines changed

CHANGES.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@ $ towncrier create <PR-number>.<break|feat|fix>.md --content "Short description"
1111

1212
<!-- towncrier release notes start -->
1313

14+
## __cylc-8.5.4 (Released 2025-09-19)__
15+
16+
### 🔧 Fixes
17+
18+
[#6817](https://github.com/cylc/cylc-flow/pull/6817) - Fixes two rare bugs associated with reloading the workflow configuration after removing tasks or xtriggers.
19+
20+
[#6990](https://github.com/cylc/cylc-flow/pull/6990) - Fix a bug where setting 'batch system' and not setting host could lead to the wrong platform being selected.
21+
1422
## __cylc-8.5.3 (Released 2025-09-09)__
1523

1624
### 🔧 Fixes

changes.d/6990.fix.md

Lines changed: 0 additions & 1 deletion
This file was deleted.

cylc/flow/data_store_mgr.py

Lines changed: 77 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,37 @@
215215
WORKFLOW: {'log_records': 10}
216216
}
217217

218+
# internal runtime to protobuf field name mapping
219+
RUNTIME_CFG_MAP_TO_FIELD = {
220+
'completion': 'completion',
221+
'directives': 'directives',
222+
'environment': 'environment',
223+
'env-script': 'env_script',
224+
'err-script': 'err_script',
225+
'execution polling intervals': 'execution_polling_intervals',
226+
'execution retry delays': 'execution_retry_delays',
227+
'execution time limit': 'execution_time_limit',
228+
'exit-script': 'exit_script',
229+
'init-script': 'init_script',
230+
'outputs': 'outputs',
231+
'post-script': 'post_script',
232+
'platform': 'platform',
233+
'pre-script': 'pre_script',
234+
'run mode': 'run_mode',
235+
'script': 'script',
236+
'submission polling intervals': 'submission_polling_intervals',
237+
'submission retry delays': 'submission_retry_delays',
238+
'work sub-directory': 'work_sub_dir',
239+
}
240+
RUNTIME_LIST_JOINS = {
241+
'execution polling intervals',
242+
'execution retry delays',
243+
'submission polling intervals',
244+
'submission retry delays',
245+
}
246+
RUNTIME_JSON_DUMPS = {'directives', 'environment', 'outputs'}
247+
RUNTIME_STRINGIFYS = {'execution time limit'}
248+
218249

219250
def setbuff(obj, key, value):
220251
"""Set an attribute on a protobuf object.
@@ -319,6 +350,41 @@ def runtime_from_config(rtconfig):
319350
)
320351

321352

353+
def runtime_from_partial(rtconfig, runtimeold: Optional[PbRuntime] = None):
354+
"""Populate runtime object from partial/full config.
355+
356+
Potentially slower than the non-partial one, due to tha the setattr calls,
357+
but does not have expected fields.
358+
"""
359+
runtime = PbRuntime()
360+
if runtimeold is not None:
361+
runtime.CopyFrom(runtimeold)
362+
for key, val in rtconfig.items():
363+
if val is None or key not in RUNTIME_CFG_MAP_TO_FIELD:
364+
continue
365+
elif key in RUNTIME_LIST_JOINS:
366+
setattr(runtime, RUNTIME_CFG_MAP_TO_FIELD[key], listjoin(val))
367+
elif key in RUNTIME_JSON_DUMPS:
368+
setattr(
369+
runtime,
370+
RUNTIME_CFG_MAP_TO_FIELD[key],
371+
json.dumps(
372+
[
373+
{'key': k, 'value': v}
374+
for k, v in val.items()
375+
]
376+
)
377+
)
378+
elif key == 'platform' and isinstance(val, dict):
379+
with suppress(KeyError, TypeError):
380+
setattr(runtime, RUNTIME_CFG_MAP_TO_FIELD[key], val['name'])
381+
elif key in RUNTIME_STRINGIFYS:
382+
setattr(runtime, RUNTIME_CFG_MAP_TO_FIELD[key], str(val or ''))
383+
else:
384+
setattr(runtime, RUNTIME_CFG_MAP_TO_FIELD[key], val)
385+
return runtime
386+
387+
322388
def reset_protobuf_object(msg_class, msg_orig):
323389
"""Reset object to clear memory build-up."""
324390
# See: https://github.com/protocolbuffers/protobuf/issues/19674
@@ -1574,6 +1640,9 @@ def _process_internal_task_proxy(
15741640
ext_trig.satisfied = satisfied
15751641

15761642
for label, satisfied in itask.state.xtriggers.items():
1643+
# Reload may have removed xtrigger of orphan task
1644+
if label not in self.schd.xtrigger_mgr.xtriggers.functx_map:
1645+
continue
15771646
sig = self.schd.xtrigger_mgr.get_xtrig_ctx(
15781647
itask, label).get_signature()
15791648
xtrig = tproxy.xtriggers[f'{label}={sig}']
@@ -1657,13 +1726,9 @@ def insert_job(
16571726
)
16581727
# Not all fields are populated with some submit-failures,
16591728
# so use task cfg as base.
1660-
j_cfg = pdeepcopy(self._apply_broadcasts_to_runtime(
1661-
tp_tokens,
1662-
self.schd.config.cfg['runtime'][tproxy.name]
1663-
))
1664-
for key, val in job_conf.items():
1665-
j_cfg[key] = val
1666-
j_buf.runtime.CopyFrom(runtime_from_config(j_cfg))
1729+
j_buf.runtime.CopyFrom(
1730+
runtime_from_partial(job_conf, tproxy.runtime)
1731+
)
16671732

16681733
# Add in log files.
16691734
j_buf.job_log_dir = get_task_job_log(
@@ -2327,16 +2392,16 @@ def delta_broadcast(self):
23272392
self.updates_pending = True
23282393

23292394
def _generate_broadcast_node_deltas(self, node_data, node_type):
2330-
cfg = self.schd.config.cfg
2395+
rt_cfg = self.schd.config.cfg['runtime']
23312396
# NOTE: node_data may change during operation so make a copy
23322397
# see https://github.com/cylc/cylc-flow/pull/6397
23332398
for node_id, node in list(node_data.items()):
2399+
# Avoid removed tasks with deltas queued during reload.
2400+
if node.name not in rt_cfg:
2401+
continue
23342402
tokens = Tokens(node_id)
23352403
new_runtime = runtime_from_config(
2336-
self._apply_broadcasts_to_runtime(
2337-
tokens,
2338-
cfg['runtime'][node.name]
2339-
)
2404+
self._apply_broadcasts_to_runtime(tokens, rt_cfg[node.name])
23402405
)
23412406
new_sruntime = new_runtime.SerializeToString(
23422407
deterministic=True

cylc/flow/network/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ class ResponseDict(TypedDict, total=False):
6565
different versions of Cylc 8.
6666
"""
6767
data: object
68-
"""For most Cylc commands that issue GQL mutations, the data field will
69-
look like:
68+
"""For most Cylc commands that issue GraphQL mutations, the data field
69+
will look like:
7070
data: {
7171
<mutationName1>: {
7272
result: [

cylc/flow/network/schema.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
FAMILIES,
6363
FAMILY_PROXIES,
6464
JOBS,
65+
RUNTIME_CFG_MAP_TO_FIELD,
6566
TASK_PROXIES,
6667
TASKS,
6768
)
@@ -874,23 +875,14 @@ class Meta:
874875

875876

876877
RUNTIME_FIELD_TO_CFG_MAP = {
877-
**{
878-
k: k.replace('_', ' ') for k in Runtime.__dict__
879-
if not k.startswith('_')
880-
},
881-
'init_script': 'init-script',
882-
'env_script': 'env-script',
883-
'err_script': 'err-script',
884-
'exit_script': 'exit-script',
885-
'pre_script': 'pre-script',
886-
'post_script': 'post-script',
887-
'work_sub_dir': 'work sub-directory',
878+
v: k
879+
for k, v in RUNTIME_CFG_MAP_TO_FIELD.items()
888880
}
889-
"""Map GQL Runtime fields' names to workflow config setting names."""
881+
"""Map Pb/GraphQL Runtime fields' names to workflow config setting names."""
890882

891883

892884
def runtime_schema_to_cfg(runtime: dict) -> dict:
893-
"""Covert GQL Runtime field names to workflow config setting names and
885+
"""Covert GraphQL Runtime field names to workflow config setting names and
894886
perform any necessary processing on the values."""
895887
# We have to manually lowercase the run_mode field because we don't define
896888
# a proper schema for BroadcastSetting (it's just GenericScalar) so

tests/integration/test_data_store_mgr.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,11 @@ def job_config(schd):
8282
'script': 'sleep 5; echo "I come in peace"',
8383
'work_d': None,
8484
'directives': {},
85-
'environment': {},
85+
'environment': {"FOO": "foo"},
8686
'param_var': {},
8787
'platform': {'name': 'platform'},
88+
'execution retry delays': [10.0, 20.0],
89+
'execution time limit': 30.0,
8890
}
8991

9092

tests/integration/test_reload.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,69 @@ async def test_reload_global_platform_group(
308308
assert platform['meta']['x'] == '2'
309309

310310

311+
async def test_orphan_reload(
312+
flow,
313+
scheduler,
314+
start,
315+
log_filter,
316+
):
317+
"""Reload should not fail because of orphaned tasks.
318+
319+
The following aspects of reload-with-orphans are tested:
320+
- Broadcast deltas generated after reload.
321+
https://github.com/cylc/cylc-flow/issues/6814
322+
- Removal of both xtrigger and associated active/incomplete task.
323+
https://github.com/cylc/cylc-flow/issues/6815
324+
325+
(Orphans being active/incomplete tasks removed from reloaded workflow cfg.)
326+
"""
327+
before = {
328+
'scheduling': {
329+
'initial cycle point': '20010101T0000Z',
330+
'graph': {
331+
'R1': '''
332+
foo => bar
333+
@wall_clock => bar
334+
'''
335+
}
336+
}
337+
}
338+
after = {
339+
'scheduling': {
340+
'initial cycle point': '20010101T0000Z',
341+
'graph': {
342+
'R1': 'foo'
343+
}
344+
}
345+
}
346+
id_ = flow(before)
347+
schd = scheduler(id_)
348+
async with start(schd):
349+
# spawn in bar
350+
foo = schd.pool._get_task_by_id('20010101T0000Z/foo')
351+
schd.pool.task_events_mgr.process_message(foo, 'INFO', 'succeeded')
352+
bar = schd.pool._get_task_by_id('20010101T0000Z/bar')
353+
# set bar to failed
354+
schd.pool.task_events_mgr.process_message(bar, 'INFO', 'failed')
355+
356+
# Save our progress
357+
schd.workflow_db_mgr.put_task_pool(schd.pool)
358+
359+
# Change workflow to one without bar and xtrigger
360+
flow(after, workflow_id=id_)
361+
362+
# reload the workflow
363+
await commands.run_cmd(commands.reload_workflow(schd))
364+
365+
# test broadcast delta over orphaned task
366+
schd.data_store_mgr.delta_broadcast()
367+
368+
# the reload should have completed successfully
369+
assert log_filter(
370+
contains=('Reload completed')
371+
)
372+
373+
311374
async def test_data_store_tproxy(flow, scheduler, start):
312375
"""Check N>0 task proxy in data store has correct info on reload.
313376

tests/unit/network/test_schema.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,12 @@ def test_sort_args(elements, sort_args, expected_result):
9696
assert elements == expected_result
9797

9898

99-
@pytest.mark.parametrize(
100-
'field_name', RUNTIME_FIELD_TO_CFG_MAP.keys()
101-
)
102-
def test_runtime_field_to_cfg_map(field_name: str):
99+
def test_runtime_field_to_cfg_map():
103100
"""Ensure the Runtime type's fields can be mapped back to the workflow
104101
config."""
105-
cfg_name = RUNTIME_FIELD_TO_CFG_MAP[field_name]
106-
assert field_name in Runtime.__dict__
107-
assert WORKFLOW_SPEC.get('runtime', '__MANY__', cfg_name)
102+
assert set(RUNTIME_FIELD_TO_CFG_MAP) == set(Runtime._meta.fields)
103+
for cfg_name in RUNTIME_FIELD_TO_CFG_MAP.values():
104+
assert WORKFLOW_SPEC.get('runtime', '__MANY__', cfg_name)
108105

109106

110107
@pytest.mark.parametrize('runtime_dict,expected', [

0 commit comments

Comments
 (0)