Skip to content

Commit 8b2feae

Browse files
log_vc_info: handle long command output (#5821)
* Log the commands run by log_vc_info (debug level). * Handle the risk of large command output clogging up the buffer causing commands to hang.
1 parent e692a06 commit 8b2feae

File tree

4 files changed

+127
-9
lines changed

4 files changed

+127
-9
lines changed

changes.d/5821.fix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed issue where large uncommitted changes could cause `cylc install` to hang.

cylc/flow/install_plugins/log_vc_info.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,22 @@
6363
from pathlib import Path
6464
from subprocess import Popen, DEVNULL, PIPE
6565
from typing import (
66-
Any, Dict, Iterable, List, Optional, TYPE_CHECKING, TextIO, Union, overload
66+
Any,
67+
Dict,
68+
Iterable,
69+
List,
70+
Optional,
71+
TYPE_CHECKING,
72+
TextIO,
73+
Union,
74+
overload,
6775
)
6876

6977
from cylc.flow import LOG as _LOG, LoggerAdaptor
7078
from cylc.flow.exceptions import CylcError
7179
import cylc.flow.flags
80+
from cylc.flow.pipe_poller import pipe_poller
81+
from cylc.flow.util import format_cmd
7282
from cylc.flow.workflow_files import WorkflowFiles
7383

7484
if TYPE_CHECKING:
@@ -171,7 +181,7 @@ def get_vc_info(path: Union[Path, str]) -> Optional[Dict[str, Any]]:
171181
):
172182
LOG.debug(f"Source dir {path} is not a {vcs} repository")
173183
elif cylc.flow.flags.verbosity > -1:
174-
LOG.warning(f"$ {vcs} {' '.join(args)}\n{exc}")
184+
LOG.warning(f"$ {vcs} {format_cmd(args)}\n{exc}")
175185
continue
176186

177187
info['version control system'] = vcs
@@ -217,9 +227,7 @@ def _run_cmd(
217227
args: The args to pass to the version control command.
218228
cwd: Directory to run the command in.
219229
stdout: Where to redirect output (either PIPE or a
220-
text stream/file object). Note: only use PIPE for
221-
commands that will not generate a large output, otherwise
222-
the pipe might get blocked.
230+
text stream/file object).
223231
224232
Returns:
225233
Stdout output if stdout=PIPE, else None as the output has been
@@ -231,6 +239,7 @@ def _run_cmd(
231239
OSError: Non-zero return code for VCS command.
232240
"""
233241
cmd = [vcs, *args]
242+
LOG.debug(f'$ {format_cmd(cmd)}')
234243
try:
235244
proc = Popen( # nosec
236245
cmd,
@@ -245,13 +254,15 @@ def _run_cmd(
245254
# This will only be raised if the VCS command is not installed,
246255
# otherwise Popen() will succeed with a non-zero return code
247256
raise VCSNotInstalledError(vcs, exc)
248-
ret_code = proc.wait()
249-
out, err = proc.communicate()
250-
if ret_code:
257+
if stdout == PIPE:
258+
out, err = pipe_poller(proc, proc.stdout, proc.stderr)
259+
else:
260+
out, err = proc.communicate()
261+
if proc.returncode:
251262
if any(err.lower().startswith(msg) for msg in NO_BASE_ERRS[vcs]):
252263
# No base commit in repo
253264
raise VCSMissingBaseError(vcs, cwd)
254-
raise OSError(ret_code, err)
265+
raise OSError(proc.returncode, err)
255266
return out
256267

257268

cylc/flow/pipe_poller.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE.
2+
# Copyright (C) NIWA & British Crown (Met Office) & Contributors.
3+
#
4+
# This program is free software: you can redistribute it and/or modify
5+
# it under the terms of the GNU General Public License as published by
6+
# the Free Software Foundation, either version 3 of the License, or
7+
# (at your option) any later version.
8+
#
9+
# This program is distributed in the hope that it will be useful,
10+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
# GNU General Public License for more details.
13+
#
14+
# You should have received a copy of the GNU General Public License
15+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
16+
17+
"""Utility for preventing pipes from getting clogged up.
18+
19+
If you're reading files from Popen (i.e. to extract command output) where the
20+
command output has the potential to be long-ish, then you should use this
21+
function to protect against the buffer filling up.
22+
23+
Note, there is a more advanced version of this baked into the subprocpool.
24+
"""
25+
26+
from select import select
27+
28+
29+
def pipe_poller(proc, *files, chunk_size=4096):
30+
"""Read from a process without hitting buffer issues.
31+
32+
Standin for subprocess.Popen.communicate.
33+
34+
When PIPE'ing from subprocesses, the output goes into a buffer. If the
35+
buffer gets full, the subprocess will hang trying to write to it.
36+
37+
This function polls the process, reading output from the buffers into
38+
memory to prevent them from filling up.
39+
40+
Args:
41+
proc:
42+
The process to poll.
43+
files:
44+
The files you want to read from, likely anything you've directed to
45+
PIPE.
46+
chunk_size:
47+
The amount of text to read from the buffer on each pass.
48+
49+
Returns:
50+
tuple - The text read from each of the files in the order they were
51+
specified.
52+
53+
"""
54+
_files = {
55+
file: b'' if 'b' in getattr(file, 'mode', 'r') else ''
56+
for file in files
57+
}
58+
59+
def _read(timeout=1.0):
60+
# read any data from files
61+
nonlocal chunk_size, files
62+
for file in select(list(files), [], [], timeout)[0]:
63+
buffer = file.read(chunk_size)
64+
if len(buffer) > 0:
65+
_files[file] += buffer
66+
67+
while proc.poll() is None:
68+
# read from the buffers
69+
_read()
70+
# double check the buffers now that the process has finished
71+
_read(timeout=0.01)
72+
73+
return tuple(_files.values())

tests/unit/test_pipe_poller.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE.
2+
# Copyright (C) NIWA & British Crown (Met Office) & Contributors.
3+
#
4+
# This program is free software: you can redistribute it and/or modify
5+
# it under the terms of the GNU General Public License as published by
6+
# the Free Software Foundation, either version 3 of the License, or
7+
# (at your option) any later version.
8+
#
9+
# This program is distributed in the hope that it will be useful,
10+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
# GNU General Public License for more details.
13+
#
14+
# You should have received a copy of the GNU General Public License
15+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
16+
17+
from subprocess import Popen, PIPE
18+
19+
from cylc.flow.pipe_poller import pipe_poller
20+
21+
22+
def test_pipe_poller_str():
23+
proc = Popen(['echo', 'Hello World!'], stdout=PIPE, text=True)
24+
(stdout,) = pipe_poller(proc, proc.stdout)
25+
assert proc.returncode == 0
26+
assert stdout == 'Hello World!\n'
27+
28+
29+
def test_pipe_poller_bytes():
30+
proc = Popen(['echo', 'Hello World!'], stdout=PIPE, text=False)
31+
(stdout,) = pipe_poller(proc, proc.stdout)
32+
assert proc.returncode == 0
33+
assert stdout == b'Hello World!\n'

0 commit comments

Comments
 (0)