From 75fbed7f6365bb97de058ce35fbdb08a3038d190 Mon Sep 17 00:00:00 2001 From: Charlie Denton Date: Sat, 20 Jan 2024 17:59:31 +0000 Subject: [PATCH 1/2] Only sleep when input stream is waiting This change means that we don't delay reading the input stream when there is still data available to read from it. This provides a significant speed improvement to scripts which are passing a populated stream of data, rather than awaiting user input from stdin. See https://github.com/pyinvoke/invoke/issues/774 --- invoke/runners.py | 4 +++- tests/runners.py | 47 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/invoke/runners.py b/invoke/runners.py index f1c888f44..0d87eeb8c 100644 --- a/invoke/runners.py +++ b/invoke/runners.py @@ -894,8 +894,10 @@ def handle_stdin( # race conditions re: unread stdin.) if self.program_finished.is_set() and not data: break + # When data is None, we're waiting for input on stdin. # Take a nap so we're not chewing CPU. - time.sleep(self.input_sleep) + if data is None: + time.sleep(self.input_sleep) def should_echo_stdin(self, input_: IO, output: IO) -> bool: """ diff --git a/tests/runners.py b/tests/runners.py index 94c63d8b3..e55218aeb 100644 --- a/tests/runners.py +++ b/tests/runners.py @@ -8,6 +8,7 @@ import types from io import StringIO from io import BytesIO +from io import TextIOBase from itertools import chain, repeat from pytest import raises, skip @@ -1098,16 +1099,52 @@ def subclasses_can_override_input_sleep(self): class MyRunner(_Dummy): input_sleep = 0.007 + def fake_stdin_stream(): + # The value "foo" is eventually returned. + yield "f" + # None values simulate waiting for input on stdin. + yield None + yield "o" + yield None + yield "o" + yield None + # Once the stream is closed, stdin returns empty strings. + while True: + yield "" + + class FakeStdin(TextIOBase): + def __init__(self, stdin): + self.stream = stdin + + def read(self, size): + return next(self.stream) + with patch("invoke.runners.time") as mock_time: MyRunner(Context()).run( _, - in_stream=StringIO("foo"), + in_stream=FakeStdin(fake_stdin_stream()), out_stream=StringIO(), # null output to not pollute tests ) - # Just make sure the first few sleeps all look good. Can't know - # exact length of list due to stdin worker hanging out til end of - # process. Still worth testing more than the first tho. - assert mock_time.sleep.call_args_list[:3] == [call(0.007)] * 3 + # Just make sure the sleeps all look good. + # There are three calls because of the Nones in fake_stdin_stream. + assert mock_time.sleep.call_args_list == [call(0.007)] * 3 + + @mock_subprocess() + def populated_streams_do_not_sleep(self): + class MyRunner(_Dummy): + read_chunk_size = 1 + + runner = MyRunner(Context()) + with patch("invoke.runners.time") as mock_time: + with patch.object(runner, "wait"): + runner.run( + _, + in_stream=StringIO("lots of bytes to read"), + # null output to not pollute tests + out_stream=StringIO(), + ) + # Sleep should not be called before we break. + assert len(mock_time.sleep.call_args_list) == 0 class stdin_mirroring: def _test_mirroring(self, expect_mirroring, **kwargs): From 6f67de7a3f0c467f037f35d56b176d660a01d228 Mon Sep 17 00:00:00 2001 From: Charlie Denton Date: Mon, 22 Jan 2024 10:53:22 +0000 Subject: [PATCH 2/2] Read chunks in Python's default buffer size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I observed minor performance improvements by using this larger chunk read size. Python's docs describe `io.DEFAULT_BUFFER_SIZE` as: > An int containing the default buffer size used by the module’s > buffered I/O classes. open() uses the file’s blksize ... The docs on `blksize` say: > “Preferred” blocksize for efficient file system I/O. Writing to a file > in smaller chunks may cause an inefficient read-modify-rewrite. References: - https://docs.python.org/3/library/io.html#io.DEFAULT_BUFFER_SIZE - https://docs.python.org/3/library/os.html#os.stat_result.st_blksize --- invoke/runners.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/invoke/runners.py b/invoke/runners.py index 0d87eeb8c..04699ca4f 100644 --- a/invoke/runners.py +++ b/invoke/runners.py @@ -1,5 +1,6 @@ import errno import locale +import io import os import struct import sys @@ -71,7 +72,7 @@ class Runner: opts: Dict[str, Any] using_pty: bool - read_chunk_size = 1000 + read_chunk_size = io.DEFAULT_BUFFER_SIZE input_sleep = 0.01 def __init__(self, context: "Context") -> None: