Skip to content
Draft
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Lib/shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,23 @@ def _fastcopy_sendfile(fsrc, fdst):
_USE_CP_SENDFILE = False
raise _GiveupOnFastCopy(err)

if err.errno == errno.ENODATA:
# In rare cases, sendfile() on Linux Lustre returns
# ENODATA.
_USE_CP_SENDFILE = False

dstpos = os.lseek(outfd, 0, os.SEEK_CUR)
if dstpos > 0:
# Some data has already been written but we use
# sendfile in a mode that does not update the
# input fd position when reading. Hence seek the
# input fd to the correct position before falling
# back on POSIX read/write method. Since sendfile
# requires mmapable infd, it should also be seekable
os.lseek(infd, dstpos, os.SEEK_SET)
Comment on lines 223 to 230
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
dstpos = os.lseek(outfd, 0, os.SEEK_CUR)
if dstpos > 0:
# Some data has already been written but we use
# sendfile in a mode that does not update the
# input fd position when reading. Hence seek the
# input fd to the correct position before falling
# back on POSIX read/write method. Since sendfile
# requires mmapable infd, it should also be seekable
os.lseek(infd, dstpos, os.SEEK_SET)
# 'infd' and 'outfd' are assumed to be seekable,
# as they are checked to be "regular" files.
if offset > 0:
# Some data has already been written but we use
# sendfile in a mode that does not update the
# input fd position when reading. Hence seek the
# input fd to the correct position before falling
# back on POSIX read/write method.
os.lseek(infd, offset, os.SEEK_SET)

AFAIK, offset would match dstpos right? (or maybe offset + 1, I didn't think about it much here)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will check this but I think this would not work, because offset is updated only at the bottom with the return value of sendfile() (which is the number of bytes actually sent). If are catching the exception then no return value (I think it would be available in C). So this is why the offset is not used in in the next condition but rather the return of lseek, even if it is just to check no data have been written.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the comments. I think I would leave the dstpos = os.lseek(outfd, 0, os.SEEK_CUR) in case sendfile has failed after writing some data. This would be consistent with the usage below (Give up on first call and if no data was copied and subsequent line) which was I think done for same reason.


raise _GiveupOnFastCopy(err)

if err.errno == errno.ENOSPC: # filesystem is full
raise err from None

Expand Down
34 changes: 34 additions & 0 deletions Lib/test/test_shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3365,6 +3365,40 @@ def test_file2file_not_supported(self):
finally:
shutil._USE_CP_SENDFILE = True

def test_exception_on_enodata_call(self):
# Test logic when sendfile(2) call returns ENODATA error on
# the not-first call on the file and we need to fall back to
# traditional POSIX while preserving the position of where we
# got to in writing
def syscall(*args, **kwargs):
if not flag:
flag.append(None)
return orig_syscall(*args, **kwargs)
else:
raise OSError(errno.ENODATA, "yo")

flag = []
orig_syscall = eval(self.PATCHPOINT)
# Reduce block size so that multiple syscalls are needed
mock = unittest.mock.Mock()
mock.st_size = 65536 + 1
with unittest.mock.patch('os.fstat', return_value=mock) as m:
with unittest.mock.patch(self.PATCHPOINT, create=True,
side_effect=syscall) as m2:
with self.get_files() as (src, dst):
with self.assertRaises(_GiveupOnFastCopy) as cm:
self.zerocopy_fun(src, dst)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with (
    unittest.mock.patch('os.fstat', return_value=mock):
    unittest.mock.patch(self.PATCHPOINT, create=True,
                        side_effect=syscall):
    self.get_files() as (src, dst)
):
    self.assertRaises(_GiveupOnFastCopy, self.zerocopy_func, src, dst)

You'll need to dedent the rest of the code as well. Also, don't bind context managers if they are not used.


# Reset flag so that second syscall fails again
flag = []
with unittest.mock.patch(self.PATCHPOINT, create=True,
side_effect=syscall) as m2:
shutil._USE_CP_SENDFILE = True
shutil.copyfile(TESTFN, TESTFN2)
assert m2.called
shutil._USE_CP_SENDFILE = True
assert flag
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)

@unittest.skipUnless(shutil._USE_CP_COPY_FILE_RANGE, "os.copy_file_range() not supported")
class TestZeroCopyCopyFileRange(_ZeroCopyFileLinuxTest, unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:func:`shutil.copyfile`: Detect problem seen on Lustre filesystems
giving "Errno 61" due to the sendfile(2) optimised implementation and
fall back to the Posix standard read/write implementation.
Loading