Skip to content

Commit fe43c7c

Browse files
authored
Merge pull request #5872 from MetRonnie/cylc-clean
`cylc clean` remote timeout improvements
2 parents ec8eec8 + 5518b48 commit fe43c7c

File tree

6 files changed

+135
-19
lines changed

6 files changed

+135
-19
lines changed

changes.d/5872.feat.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improvements to `cylc clean` remote timeout handling.

cylc/flow/clean.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def init_clean(id_: str, opts: 'Values') -> None:
187187

188188
if platform_names and platform_names != {'localhost'}:
189189
remote_clean(
190-
id_, platform_names, opts.rm_dirs, opts.remote_timeout
190+
id_, platform_names, opts.remote_timeout, opts.rm_dirs
191191
)
192192

193193
if not opts.remote_only:
@@ -338,8 +338,8 @@ def _clean_using_glob(
338338
def remote_clean(
339339
id_: str,
340340
platform_names: Iterable[str],
341+
timeout: str,
341342
rm_dirs: Optional[List[str]] = None,
342-
timeout: str = '120'
343343
) -> None:
344344
"""Run subprocesses to clean a workflow on its remote install targets
345345
(skip localhost), given a set of platform names to look up.
@@ -348,8 +348,9 @@ def remote_clean(
348348
id_: Workflow name.
349349
platform_names: List of platform names to look up in the global
350350
config, in order to determine the install targets to clean on.
351+
timeout: ISO 8601 duration or number of seconds to wait before
352+
cancelling.
351353
rm_dirs: Sub dirs to remove instead of the whole run dir.
352-
timeout: Number of seconds to wait before cancelling.
353354
"""
354355
try:
355356
install_targets_map = (
@@ -358,6 +359,7 @@ def remote_clean(
358359
raise PlatformLookupError(
359360
f"Cannot clean {id_} on remote platforms as the workflow database "
360361
f"is out of date/inconsistent with the global config - {exc}")
362+
361363
queue: Deque[RemoteCleanQueueTuple] = deque()
362364
remote_clean_cmd = partial(
363365
_remote_clean_cmd, id_=id_, rm_dirs=rm_dirs, timeout=timeout
@@ -376,7 +378,7 @@ def remote_clean(
376378
remote_clean_cmd(platform=platforms[0]), target, platforms
377379
)
378380
)
379-
failed_targets: Dict[str, PlatformError] = {}
381+
failed_targets: Dict[str, Union[PlatformError, str]] = {}
380382
# Handle subproc pool results almost concurrently:
381383
while queue:
382384
item = queue.popleft()
@@ -387,7 +389,12 @@ def remote_clean(
387389
out, err = item.proc.communicate()
388390
if out:
389391
LOG.info(f"[{item.install_target}]\n{out}")
390-
if ret_code:
392+
if ret_code == 124:
393+
failed_targets[item.install_target] = (
394+
f"cylc clean timed out after {timeout}s. You can increase "
395+
"this timeout using the --timeout option."
396+
)
397+
elif ret_code:
391398
this_platform = item.platforms.pop(0)
392399
excp = PlatformError(
393400
PlatformError.MSG_TIDY,
@@ -415,9 +422,9 @@ def remote_clean(
415422
LOG.debug(f"[{item.install_target}]\n{err}")
416423
sleep(0.2)
417424
if failed_targets:
418-
for target, excp in failed_targets.items():
425+
for target, info in failed_targets.items():
419426
LOG.error(
420-
f"Could not clean {id_} on install target: {target}\n{excp}"
427+
f"Could not clean {id_} on install target: {target}\n{info}"
421428
)
422429
raise CylcError(f"Remote clean failed for {id_}")
423430

cylc/flow/scripts/clean.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@
6464
import sys
6565
from typing import TYPE_CHECKING, Iterable, List, Tuple
6666

67+
from metomi.isodatetime.exceptions import ISO8601SyntaxError
68+
from metomi.isodatetime.parsers import DurationParser
69+
6770
from cylc.flow import LOG
6871
from cylc.flow.clean import init_clean, get_contained_workflows
6972
from cylc.flow.exceptions import CylcError, InputError
@@ -120,9 +123,11 @@ def get_option_parser():
120123

121124
parser.add_option(
122125
'--timeout',
123-
help=("The number of seconds to wait for cleaning to take place on "
124-
r"remote hosts before cancelling. Default: %default."),
125-
action='store', default='120', dest='remote_timeout'
126+
help=(
127+
"The length of time to wait for cleaning to take place on "
128+
r"remote hosts before cancelling. Default: %default."
129+
),
130+
action='store', default='PT5M', dest='remote_timeout'
126131
)
127132

128133
parser.add_option(
@@ -138,6 +143,24 @@ def get_option_parser():
138143
CleanOptions = Options(get_option_parser())
139144

140145

146+
def parse_timeout(opts: 'Values') -> None:
147+
"""Parse timeout as ISO 8601 duration or number of seconds."""
148+
if opts.remote_timeout:
149+
try:
150+
timeout = int(
151+
DurationParser().parse(opts.remote_timeout).get_seconds()
152+
)
153+
except ISO8601SyntaxError:
154+
try:
155+
timeout = int(opts.remote_timeout)
156+
except ValueError:
157+
raise InputError(
158+
f"Invalid timeout: {opts.remote_timeout}. Must be "
159+
"an ISO 8601 duration or number of seconds."
160+
)
161+
opts.remote_timeout = str(timeout)
162+
163+
141164
def prompt(workflows: Iterable[str]) -> None:
142165
"""Ask user if they want to clean the given set of workflows."""
143166
print("Would clean the following workflows:")
@@ -218,7 +241,15 @@ async def run(*ids: str, opts: 'Values') -> None:
218241

219242

220243
@cli_function(get_option_parser)
221-
def main(_, opts: 'Values', *ids: str):
244+
def main(_parser, opts: 'Values', *ids: str):
245+
_main(opts, *ids)
246+
247+
248+
def _main(opts: 'Values', *ids: str):
249+
"""Run the clean command.
250+
251+
This is a separate function for ease of testing.
252+
"""
222253
if cylc.flow.flags.verbosity < 2:
223254
set_timestamps(LOG, False)
224255

@@ -227,4 +258,6 @@ def main(_, opts: 'Values', *ids: str):
227258
"--local and --remote options are mutually exclusive"
228259
)
229260

261+
parse_timeout(opts)
262+
230263
asyncio.run(run(*ids, opts=opts))

tests/functional/cylc-clean/01-remote.t

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,9 @@ __TREE__
108108

109109
# -----------------------------------------------------------------------------
110110

111-
TEST_NAME="cylc-clean"
112-
run_ok "$TEST_NAME" cylc clean "$WORKFLOW_NAME"
111+
TEST_NAME="cylc-clean-ok"
112+
run_ok "$TEST_NAME" cylc clean "$WORKFLOW_NAME" --timeout PT2M
113+
# (timeout opt is covered by unit tests but no harm double-checking here)
113114
dump_std "$TEST_NAME"
114115

115116
TEST_NAME="run-dir-not-exist-post-clean.local"

tests/unit/scripts/test_clean.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,14 @@
1616
# You should have received a copy of the GNU General Public License
1717
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1818

19-
from typing import Callable, List
19+
from typing import Callable, List, Type, Union
2020

2121
import pytest
2222

23-
from cylc.flow.scripts.clean import CleanOptions, scan, run
23+
from cylc.flow.exceptions import InputError
24+
from cylc.flow.scripts.clean import (
25+
CleanOptions, _main, parse_timeout, scan, run
26+
)
2427

2528

2629
async def test_scan(tmp_run_dir):
@@ -88,3 +91,40 @@ async def test_multi(tmp_run_dir: Callable, mute: List[str]):
8891
mute.clear()
8992
await run('*', opts=opts)
9093
assert mute == ['bar/pub/beer', 'baz/run1', 'foo']
94+
95+
96+
@pytest.mark.parametrize(
97+
'timeout, expected',
98+
[('100', '100'),
99+
('PT1M2S', '62'),
100+
('', ''),
101+
('oopsie', InputError),
102+
(' ', InputError)]
103+
)
104+
def test_parse_timeout(
105+
timeout: str,
106+
expected: Union[str, Type[InputError]]
107+
):
108+
"""It should accept ISO 8601 format or number of seconds."""
109+
opts = CleanOptions(remote_timeout=timeout)
110+
111+
if expected is InputError:
112+
with pytest.raises(expected):
113+
parse_timeout(opts)
114+
else:
115+
parse_timeout(opts)
116+
assert opts.remote_timeout == expected
117+
118+
119+
@pytest.mark.parametrize(
120+
'opts, expected_msg',
121+
[
122+
({'local_only': True, 'remote_only': True}, "mutually exclusive"),
123+
({'remote_timeout': 'oops'}, "Invalid timeout"),
124+
]
125+
)
126+
def test_bad_user_input(opts: dict, expected_msg: str, mute):
127+
"""It should raise an InputError for bad user input."""
128+
with pytest.raises(InputError) as exc_info:
129+
_main(CleanOptions(**opts), 'blah')
130+
assert expected_msg in str(exc_info.value)

tests/unit/test_clean.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import shutil
2020
from glob import iglob
2121
from pathlib import Path
22+
from subprocess import Popen
2223
from typing import (
2324
Any,
2425
Callable,
@@ -274,7 +275,8 @@ def test_init_clean__rm_dirs(
274275
init_clean(id_, opts=opts)
275276
mock_clean.assert_called_with(id_, run_dir, expected_clean)
276277
mock_remote_clean.assert_called_with(
277-
id_, platforms, expected_remote_clean, opts.remote_timeout)
278+
id_, platforms, opts.remote_timeout, expected_remote_clean
279+
)
278280

279281

280282
@pytest.mark.parametrize(
@@ -920,7 +922,7 @@ def test_remote_clean(
920922
# Remove randomness:
921923
monkeymock('cylc.flow.clean.shuffle')
922924

923-
def mocked_remote_clean_cmd_side_effect(id_, platform, rm_dirs, timeout):
925+
def mocked_remote_clean_cmd_side_effect(id_, platform, timeout, rm_dirs):
924926
proc_ret_code = 0
925927
if failed_platforms and platform['name'] in failed_platforms:
926928
proc_ret_code = failed_platforms[platform['name']]
@@ -942,11 +944,13 @@ def mocked_remote_clean_cmd_side_effect(id_, platform, rm_dirs, timeout):
942944
if exc_expected:
943945
with pytest.raises(CylcError) as exc:
944946
cylc_clean.remote_clean(
945-
id_, platform_names, rm_dirs, timeout='irrelevant')
947+
id_, platform_names, timeout='irrelevant', rm_dirs=rm_dirs
948+
)
946949
assert "Remote clean failed" in str(exc.value)
947950
else:
948951
cylc_clean.remote_clean(
949-
id_, platform_names, rm_dirs, timeout='irrelevant')
952+
id_, platform_names, timeout='irrelevant', rm_dirs=rm_dirs
953+
)
950954
for msg in expected_err_msgs:
951955
assert log_filter(caplog, level=logging.ERROR, contains=msg)
952956
if expected_platforms:
@@ -960,6 +964,36 @@ def mocked_remote_clean_cmd_side_effect(id_, platform, rm_dirs, timeout):
960964
assert f"{p_name} - {PlatformError.MSG_TIDY}" in caplog.text
961965

962966

967+
def test_remote_clean__timeout(
968+
monkeymock: MonkeyMock,
969+
monkeypatch: pytest.MonkeyPatch,
970+
caplog: pytest.LogCaptureFixture,
971+
):
972+
"""Test remote_clean() gives a sensible error message for return code 124.
973+
"""
974+
caplog.set_level(logging.ERROR, CYLC_LOG)
975+
monkeymock(
976+
'cylc.flow.clean._remote_clean_cmd',
977+
spec=_remote_clean_cmd,
978+
return_value=mock.Mock(
979+
spec=Popen, poll=lambda: 124, communicate=lambda: ('', '')
980+
)
981+
)
982+
monkeypatch.setattr(
983+
'cylc.flow.clean.get_install_target_to_platforms_map',
984+
lambda *a, **k: {'picard': [PLATFORMS['stargazer']]}
985+
)
986+
987+
with pytest.raises(CylcError):
988+
cylc_clean.remote_clean(
989+
'blah', platform_names=['blah'], timeout='blah'
990+
)
991+
assert "cylc clean timed out" in caplog.text
992+
# No need to log the remote clean cmd etc. for timeout
993+
assert "ssh" not in caplog.text.lower()
994+
assert "stderr" not in caplog.text.lower()
995+
996+
963997
@pytest.mark.parametrize(
964998
'rm_dirs, expected_args',
965999
[

0 commit comments

Comments
 (0)