Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Doc/library/os.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1659,6 +1659,27 @@
:exc:`InterruptedError` exception (see :pep:`475` for the rationale).


.. function:: readinto(fd, buffer, /)

Read from a file descriptor *fd* into a mutable
:ref:`buffer protocol <bufferobjects>` *buffer*.

The *buffer* should be mutable and :term:`bytes-like <bytes-like object>`. On
success, returns the number of bytes read. Less bytes may be read than the
size of the buffer. Will retry the underlying system call when interrupted by
a signal. For other errors, the system call will not be retried.

.. note::

This function is intended for low-level I/O and must be applied to a file

Check warning on line 1674 in Doc/library/os.rst

View workflow job for this annotation

GitHub Actions / Docs / Docs

py:meth reference target not found: file.readinto [ref.meth]

Check warning on line 1674 in Doc/library/os.rst

View workflow job for this annotation

GitHub Actions / Docs / Docs

py:meth reference target not found: file.read [ref.meth]
descriptor as returned by :func:`os.open` or :func:`pipe`. To read a
"file object" returned by the built-in function :func:`open` or by
:func:`popen` or :func:`fdopen`, or :data:`sys.stdin`, use its
:meth:`~file.readinto` or :meth:`~file.read`.

.. versionadded:: 3.14


.. function:: sendfile(out_fd, in_fd, offset, count)
sendfile(out_fd, in_fd, offset, count, headers=(), trailers=(), flags=0)

Expand Down
3 changes: 3 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,9 @@ os
to the :mod:`os` module.
(Contributed by James Roy in :gh:`127688`.)

* Add the :func:`os.readinto` function to read into a
:ref:`buffer protocol <bufferobjects>` from a file descriptor.
(Contributed by Cody Maloney in :gh:`129205`.)

pathlib
-------
Expand Down
31 changes: 31 additions & 0 deletions Lib/test/_test_eintr.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,37 @@ def test_read(self):
self.assertEqual(data, os.read(rd, len(data)))
self.assertEqual(proc.wait(), 0)

def test_readinto(self):
rd, wr = os.pipe()
self.addCleanup(os.close, rd)
# wr closed explicitly by parent

# the payload below are smaller than PIPE_BUF, hence the writes will be
# atomic
datas = [b"hello", b"world", b"spam"]
bufs = [bytearray(5), bytearray(5), bytearray(4)]

code = '\n'.join((
'import os, sys, time',
'',
'wr = int(sys.argv[1])',
'datas = %r' % datas,
'sleep_time = %r' % self.sleep_time,
'',
'for data in datas:',
' # let the parent block on read()',
' time.sleep(sleep_time)',
' os.write(wr, data)',
))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
datas = [b"hello", b"world", b"spam"]
bufs = [bytearray(5), bytearray(5), bytearray(4)]
code = '\n'.join((
'import os, sys, time',
'',
'wr = int(sys.argv[1])',
'datas = %r' % datas,
'sleep_time = %r' % self.sleep_time,
'',
'for data in datas:',
' # let the parent block on read()',
' time.sleep(sleep_time)',
' os.write(wr, data)',
))
data = [b"hello", b"world", b"spam"]
bufs = [bytearray(5), bytearray(5), bytearray(4)]
code = '\n'.join((
'import os, sys, time',
'',
'wr = int(sys.argv[1])',
f'data = {data!r}',
f'sleep_time = {self.sleep_time!r}',
'',
'for datum in data:',
' # let the parent block on read()',
' time.sleep(sleep_time)',
' os.write(wr, datum)',
))

Strictly speaking, data is already the plural form of datum.

Copy link
Contributor Author

@cmaloney cmaloney Jan 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For data -> datum the code is copied from test_read just before with minor tweaks; I think the two being very close in code is useful for maintenance more than naming. I think I can get rid of bufs and zip though which will remove the ambiguities / make the code read better


proc = self.subprocess(code, str(wr), pass_fds=[wr])
with kill_on_error(proc):
os.close(wr)
for data, buffer in zip(datas, bufs):
os.readinto(rd, buffer)
self.assertEqual(data, buffer)
self.assertEqual(proc.wait(), 0)

def test_write(self):
rd, wr = os.pipe()
self.addCleanup(os.close, wr)
Expand Down
75 changes: 75 additions & 0 deletions Lib/test/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,54 @@ def test_read(self):
self.assertEqual(type(s), bytes)
self.assertEqual(s, b"spam")

def test_readinto(self):
with open(os_helper.TESTFN, "w+b") as fobj:
fobj.write(b"spam")
fobj.flush()
fd = fobj.fileno()
os.lseek(fd, 0, 0)
buffer = bytearray(7)
s = os.readinto(fd, buffer)
self.assertEqual(type(s), int)
self.assertEqual(s, 4)
# Should overwrite the first 4 bytes of the buffer.
self.assertEqual(buffer[:4], b"spam")

# Readinto at EOF should return 0 and not touch buffer.
buffer[:] = b"notspam"
s = os.readinto(fd, buffer)
self.assertEqual(type(s), int)
self.assertEqual(s, 0)
self.assertEqual(bytes(buffer), b"notspam")
s = os.readinto(fd, buffer)
self.assertEqual(s, 0)
self.assertEqual(bytes(buffer), b"notspam")

# Readinto a 0 length bytearray when at EOF should return 0
self.assertEqual(os.readinto(fd, bytearray()), 0)

# Readinto a 0 length bytearray with data available should return 0.
os.lseek(fd, 0, 0)
self.assertEqual(os.readinto(fd, bytearray()), 0)

def test_readinto_badbuffer(self):
with open(os_helper.TESTFN, "w+b") as fobj:
fobj.write(b"spam")
fobj.flush()
fd = fobj.fileno()
os.lseek(fd, 0, 0)

for bad_arg in ("test", bytes(), 14):
with self.subTest(f"{type(bad_arg)}"):
with self.assertRaises(TypeError):
os.readinto(fd, bad_arg)

# No data should have been read with the bad arguments.
buffer = bytearray(8)
s = os.readinto(fd, buffer)
self.assertEqual(s, 4)
self.assertEqual(bytes(buffer), b"spam\0\0\0\0")

@support.cpython_only
# Skip the test on 32-bit platforms: the number of bytes must fit in a
# Py_ssize_t type
Expand All @@ -249,6 +297,29 @@ def test_large_read(self, size):
# operating system is free to return less bytes than requested.
self.assertEqual(data, b'test')


@support.cpython_only
# Skip the test on 32-bit platforms: the number of bytes must fit in a
# Py_ssize_t type
@unittest.skipUnless(INT_MAX < PY_SSIZE_T_MAX,
"needs INT_MAX < PY_SSIZE_T_MAX")
@support.bigmemtest(size=INT_MAX + 10, memuse=1, dry_run=False)
def test_large_readinto(self, size):
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
create_file(os_helper.TESTFN, b'test')

# Issue #21932: For readinto the buffer contains the length rather than
# a length being passed explicitly to read, should still get capped to a
# valid size / not raise an OverflowError for sizes larger than INT_MAX.
buffer = bytearray(INT_MAX+10)
with open(os_helper.TESTFN, "rb") as fp:
length = os.readinto(fp.fileno(), buffer)

# The test does not try to read more than 2 GiB at once because the
# operating system is free to return less bytes than requested.
self.assertEqual(length, 4)
self.assertEqual(buffer[:4], b'test')

def test_write(self):
# os.write() accepts bytes- and buffer-like objects but not strings
fd = os.open(os_helper.TESTFN, os.O_CREAT | os.O_WRONLY)
Expand Down Expand Up @@ -2467,6 +2538,10 @@ def test_lseek(self):
def test_read(self):
self.check(os.read, 1)

@unittest.skipUnless(hasattr(os, 'readinto'), 'test needs os.readinto()')
def test_readinto(self):
self.check(os.readinto, bytearray(5))

@unittest.skipUnless(hasattr(os, 'readv'), 'test needs os.readv()')
def test_readv(self):
buf = bytearray(10)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add :func:`os.readinto` to read into a :ref:`buffer protocol <bufferobjects>` from a file descriptor.
53 changes: 52 additions & 1 deletion Modules/clinic/posixmodule.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions Modules/posixmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -11433,6 +11433,32 @@ os_read_impl(PyObject *module, int fd, Py_ssize_t length)
return buffer;
}

/*[clinic input]
os.readinto -> Py_ssize_t
fd: int
buffer: Py_buffer(accept={rwbuffer})
/

Read into a Buffer Protocol object from a file descriptor.

The buffer should be mutable and bytes-like. On success, returns the number of
bytes read. Less bytes may be read than the size of the buffer. Will retry the
underlying system call when interrupted by a signal. For other errors, the
system call will not be retried.
[clinic start generated code]*/

static Py_ssize_t
os_readinto_impl(PyObject *module, int fd, Py_buffer *buffer)
/*[clinic end generated code: output=8091a3513c683a80 input=2d815e709ab6a85b]*/
{
if (buffer->len < 0) {
errno = EINVAL;
PyErr_SetFromErrno(PyExc_OSError);
return -1;
}
return _Py_read(fd, buffer->buf, buffer->len);
}

#if (defined(HAVE_SENDFILE) && (defined(__FreeBSD__) || defined(__DragonFly__) \
|| defined(__APPLE__))) \
|| defined(HAVE_READV) || defined(HAVE_PREADV) || defined (HAVE_PREADV2) \
Expand Down Expand Up @@ -16973,6 +16999,7 @@ static PyMethodDef posix_methods[] = {
OS_LOCKF_METHODDEF
OS_LSEEK_METHODDEF
OS_READ_METHODDEF
OS_READINTO_METHODDEF
OS_READV_METHODDEF
OS_PREAD_METHODDEF
OS_PREADV_METHODDEF
Expand Down
Loading