Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 23 additions & 14 deletions Lib/_pyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1454,6 +1454,17 @@ def write(self, b):
return BufferedWriter.write(self, b)


def _new_buffersize(bytes_read):
# Parallels _io/fileio.c new_buffersize
if bytes_read > 65536:
addend = bytes_read >> 3
else:
addend = 256 + bytes_read
if addend < DEFAULT_BUFFER_SIZE:
addend = DEFAULT_BUFFER_SIZE
return bytes_read + addend


class FileIO(RawIOBase):
_fd = -1
_created = False
Expand Down Expand Up @@ -1672,22 +1683,20 @@ def readall(self):
except OSError:
pass

result = bytearray()
while True:
if len(result) >= bufsize:
bufsize = len(result)
bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
n = bufsize - len(result)
try:
chunk = os.read(self._fd, n)
except BlockingIOError:
if result:
break
result = bytearray(bufsize)
bytes_read = 0
try:
while n := os.readinto(self._fd, memoryview(result)[bytes_read:]):
bytes_read += n
if bytes_read >= len(result):
result.resize(_new_buffersize(bytes_read))
except BlockingIOError:
if not bytes_read:
return None
if not chunk: # reached the end of the file
break
result += chunk

assert len(result) - bytes_read >= 1, \
"os.readinto buffer size 0 will result in erroneous EOF / returns 0"
result.resize(bytes_read)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

result = memoryview(result)[bytes_read:] would avoid a truncation which can imply a memory copy in the worst case, no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the resize "shrink" in bytearray doesn't actually resize unless the buffer's "capacity" is 2x the requested size (https://github.com/python/cpython/blob/main/Objects/bytearrayobject.c#L201-L214). Just updates its internal "this is how long the bytes is" counter (which for things like full-file readall with known size, this should already be just one byte over the right size).

My plan currently is to make it so bytes(bytearray(10)) and bytearray(b'\0' * 10) both don't copy (Ongoing discussion in https://discuss.python.org/t/add-zero-copy-conversion-of-bytearray-to-bytes-by-providing-bytes/79164). Having a memoryview would mean there's more than one reference to the bytearray, and I couldn't do / use that optimization.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I'm fine with using result.resize() here.

return bytes(result)

def readinto(self, buffer):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
``_pyio.FileIO.readall()`` now allocates, resizes, and fills a data buffer
using the same algorithm ``_io.FileIO.readall()`` uses.
Loading