From ee336eb8fda78d7e74eb7d42cbfaba6028aeab05 Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Mon, 29 Sep 2025 12:33:46 +0000 Subject: [PATCH 01/18] GH-139416: Fix copyfile failure due to sendfile + Lustre --- Lib/shutil.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Lib/shutil.py b/Lib/shutil.py index 8d8fe145567822..d366fb37e27150 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -213,6 +213,12 @@ def _fastcopy_sendfile(fsrc, fdst): _USE_CP_SENDFILE = False raise _GiveupOnFastCopy(err) + if err.errno == errno.ENODATA: + # In rare cases sendfile() on Linux Lsture call + # returns ENODATA. + _USE_CP_SENDFILE = False + raise _GiveupOnFastCopy(err) + if err.errno == errno.ENOSPC: # filesystem is full raise err from None From 646868af3d8b09bac2231339fac37ed5069bba40 Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Mon, 29 Sep 2025 13:24:56 +0000 Subject: [PATCH 02/18] Combine error check into existing ENOSTSOCK check --- Lib/shutil.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index d366fb37e27150..c4b73084723865 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -206,16 +206,12 @@ def _fastcopy_sendfile(fsrc, fdst): err.filename = fsrc.name err.filename2 = fdst.name - if err.errno == errno.ENOTSOCK: - # sendfile() on this platform (probably Linux < 2.6.33) - # does not support copies between regular files (only - # sockets). - _USE_CP_SENDFILE = False - raise _GiveupOnFastCopy(err) - - if err.errno == errno.ENODATA: - # In rare cases sendfile() on Linux Lsture call - # returns ENODATA. + if err.errno in (errno.ENOTSOCK, errno.ENODATA): + # ENOTSOCK: sendfile() on this platform (probably + # Linux < 2.6.33) does not support copies between + # regular files (only sockets). + # ENODATA: In rare cases sendfile() on Linux Lustre call + # returns ENODATA _USE_CP_SENDFILE = False raise _GiveupOnFastCopy(err) From 520ee095244914eb98ed4a58068f16e56a972d0e Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Mon, 29 Sep 2025 13:35:09 +0000 Subject: [PATCH 03/18] Add News entry --- .../Library/2025-09-29-14-30-00.gh-issue-139416.bnzz33.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-09-29-14-30-00.gh-issue-139416.bnzz33.rst diff --git a/Misc/NEWS.d/next/Library/2025-09-29-14-30-00.gh-issue-139416.bnzz33.rst b/Misc/NEWS.d/next/Library/2025-09-29-14-30-00.gh-issue-139416.bnzz33.rst new file mode 100644 index 00000000000000..8c7a26aece601b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-29-14-30-00.gh-issue-139416.bnzz33.rst @@ -0,0 +1,3 @@ +:func:`shutil.copyfile`: Detect problem seen on Lustre filesystems +giving "Errno 61" due to the sendfile(2) optimised implementation and +fall back to the Posix standard read/write implementation. From a09ff4d5fd236de54c6acf7734c4f69891f53334 Mon Sep 17 00:00:00 2001 From: bnikolic Date: Mon, 29 Sep 2025 16:04:50 +0100 Subject: [PATCH 04/18] Update Lib/shutil.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve whitespace Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/shutil.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index c4b73084723865..c4c7ab4868254d 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -210,8 +210,9 @@ def _fastcopy_sendfile(fsrc, fdst): # ENOTSOCK: sendfile() on this platform (probably # Linux < 2.6.33) does not support copies between # regular files (only sockets). - # ENODATA: In rare cases sendfile() on Linux Lustre call - # returns ENODATA + # + # ENODATA: In rare cases, sendfile() on Linux Lustre + # returns ENODATA. _USE_CP_SENDFILE = False raise _GiveupOnFastCopy(err) From 5917a6b1268bf59cb38768921d41d785e745a9a2 Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Tue, 30 Sep 2025 08:30:06 +0000 Subject: [PATCH 05/18] Handle if ENODATA error and some data already written --- Lib/shutil.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Lib/shutil.py b/Lib/shutil.py index c4c7ab4868254d..3759a10b73349c 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -214,6 +214,16 @@ def _fastcopy_sendfile(fsrc, fdst): # ENODATA: In rare cases, sendfile() on Linux Lustre # returns ENODATA. _USE_CP_SENDFILE = False + + dstpos = os.lseek(outfd, 0, os.SEEK_CUR) + if dstpos > 0: + # Some data has already been written but we use + # sendfile in a mode that does not update the + # input fd position when reading. Hence seek the + # input fd to the correct position before falling + # back on POSIX read/write method + os.lseek(infd, dstpos, os.SEEK_SET) + raise _GiveupOnFastCopy(err) if err.errno == errno.ENOSPC: # filesystem is full From 452c8771c3805e34265683204a9ae2d8dcf7cd7e Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Tue, 30 Sep 2025 08:31:01 +0000 Subject: [PATCH 06/18] Add test for ENODATA handling --- Lib/shutil.py | 2 +- Lib/test/test_shutil.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 3759a10b73349c..dcc45157a28a37 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -223,7 +223,7 @@ def _fastcopy_sendfile(fsrc, fdst): # input fd to the correct position before falling # back on POSIX read/write method os.lseek(infd, dstpos, os.SEEK_SET) - + raise _GiveupOnFastCopy(err) if err.errno == errno.ENOSPC: # filesystem is full diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index ebb6cf88336249..4060dc3284ceae 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -3365,6 +3365,42 @@ def test_file2file_not_supported(self): finally: shutil._USE_CP_SENDFILE = True + def test_exception_on_enodata_call(self): + # Test logic when sendfile(2) call returns ENODATA error on + # the not-first call on the file and we need to fall back to + # traditional POSIX while preserving the position of where we + # got to in writing + def syscall(*args, **kwargs): + if not flag: + flag.append(None) + return orig_syscall(*args, **kwargs) + else: + raise OSError(errno.ENODATA, "yo") + + flag = [] + orig_syscall = eval(self.PATCHPOINT) + # Reduce block size so that multiple syscalls are needed + mock = unittest.mock.Mock() + mock.st_size = 65536 + 1 + with unittest.mock.patch('os.fstat', return_value=mock) as m: + with unittest.mock.patch(self.PATCHPOINT, create=True, + side_effect=syscall) as m2: + with self.get_files() as (src, dst): + with self.assertRaises(_GiveupOnFastCopy) as cm: + self.zerocopy_fun(src, dst) + + # Reset flag so that second syscall fails again + flag = [] + with unittest.mock.patch(self.PATCHPOINT, create=True, + side_effect=syscall) as m2: + shutil._USE_CP_SENDFILE = True + shutil.copyfile(TESTFN, TESTFN2) + assert m2.called + shutil._USE_CP_SENDFILE = True + assert flag + self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) + + @unittest.skipUnless(shutil._USE_CP_COPY_FILE_RANGE, "os.copy_file_range() not supported") class TestZeroCopyCopyFileRange(_ZeroCopyFileLinuxTest, unittest.TestCase): From faa922983d2671b1091ba8e660ed97741d85a5c7 Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Tue, 30 Sep 2025 08:58:16 +0000 Subject: [PATCH 07/18] fixup whitespace --- Lib/test/test_shutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 4060dc3284ceae..2ef857ade68f57 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -3392,7 +3392,7 @@ def syscall(*args, **kwargs): # Reset flag so that second syscall fails again flag = [] with unittest.mock.patch(self.PATCHPOINT, create=True, - side_effect=syscall) as m2: + side_effect=syscall) as m2: shutil._USE_CP_SENDFILE = True shutil.copyfile(TESTFN, TESTFN2) assert m2.called From 482fb09e80414167562b2c8df008446c38291f1a Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Tue, 30 Sep 2025 09:03:23 +0000 Subject: [PATCH 08/18] Further whitespace fixup --- Lib/test/test_shutil.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 2ef857ade68f57..6f3ad5ffb5322a 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -3399,8 +3399,6 @@ def syscall(*args, **kwargs): shutil._USE_CP_SENDFILE = True assert flag self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) - - @unittest.skipUnless(shutil._USE_CP_COPY_FILE_RANGE, "os.copy_file_range() not supported") class TestZeroCopyCopyFileRange(_ZeroCopyFileLinuxTest, unittest.TestCase): From 883f158ae2f43632eb8caaf0e7b2b3ec859dcfe2 Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Tue, 30 Sep 2025 09:49:02 +0000 Subject: [PATCH 09/18] Split handling of ENOTSOCK and ENODATA --- Lib/shutil.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index dcc45157a28a37..fc243fb56a6ce3 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -206,13 +206,16 @@ def _fastcopy_sendfile(fsrc, fdst): err.filename = fsrc.name err.filename2 = fdst.name - if err.errno in (errno.ENOTSOCK, errno.ENODATA): + if err.errno == errno.ENOTSOCK: # ENOTSOCK: sendfile() on this platform (probably # Linux < 2.6.33) does not support copies between # regular files (only sockets). - # - # ENODATA: In rare cases, sendfile() on Linux Lustre - # returns ENODATA. + _USE_CP_SENDFILE = False + raise _GiveupOnFastCopy(err) + + if err.errno == errno.ENODATA: + # In rare cases, sendfile() on Linux Lustre returns + # ENODATA. _USE_CP_SENDFILE = False dstpos = os.lseek(outfd, 0, os.SEEK_CUR) @@ -224,7 +227,7 @@ def _fastcopy_sendfile(fsrc, fdst): # back on POSIX read/write method os.lseek(infd, dstpos, os.SEEK_SET) - raise _GiveupOnFastCopy(err) + raise _GiveupOnFastCopy(err) if err.errno == errno.ENOSPC: # filesystem is full raise err from None From 1316d6af90522e25818cc7660f7a6aa3e512894e Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Tue, 30 Sep 2025 09:51:55 +0000 Subject: [PATCH 10/18] clean comment and space --- Lib/shutil.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index fc243fb56a6ce3..d43cc0387714e1 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -207,9 +207,9 @@ def _fastcopy_sendfile(fsrc, fdst): err.filename2 = fdst.name if err.errno == errno.ENOTSOCK: - # ENOTSOCK: sendfile() on this platform (probably - # Linux < 2.6.33) does not support copies between - # regular files (only sockets). + # sendfile() on this platform (probably Linux < 2.6.33) + # does not support copies between regular files (only + # sockets). _USE_CP_SENDFILE = False raise _GiveupOnFastCopy(err) @@ -227,7 +227,7 @@ def _fastcopy_sendfile(fsrc, fdst): # back on POSIX read/write method os.lseek(infd, dstpos, os.SEEK_SET) - raise _GiveupOnFastCopy(err) + raise _GiveupOnFastCopy(err) if err.errno == errno.ENOSPC: # filesystem is full raise err from None From 3426f7b4396c0edcad1ca4acf7f16ef02c8a3203 Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Tue, 30 Sep 2025 09:58:13 +0000 Subject: [PATCH 11/18] Add comment on seekable property of infd --- Lib/shutil.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index d43cc0387714e1..e2d1956a35b054 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -224,7 +224,8 @@ def _fastcopy_sendfile(fsrc, fdst): # sendfile in a mode that does not update the # input fd position when reading. Hence seek the # input fd to the correct position before falling - # back on POSIX read/write method + # back on POSIX read/write method. Since sendfile + # requires mmapable infd, it should also be seekable os.lseek(infd, dstpos, os.SEEK_SET) raise _GiveupOnFastCopy(err) From 1b882694aece2b3486e5832280fb6ece2820c9ac Mon Sep 17 00:00:00 2001 From: bnikolic Date: Tue, 30 Sep 2025 14:18:40 +0100 Subject: [PATCH 12/18] Update Lib/test/test_shutil.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_shutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 6f3ad5ffb5322a..470c6d409708ca 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -3395,7 +3395,7 @@ def syscall(*args, **kwargs): side_effect=syscall) as m2: shutil._USE_CP_SENDFILE = True shutil.copyfile(TESTFN, TESTFN2) - assert m2.called + m2.assert_called() shutil._USE_CP_SENDFILE = True assert flag self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) From 1513ea3bc35d8cadf2f8eccf6dfda4ca77fcdee5 Mon Sep 17 00:00:00 2001 From: bnikolic Date: Tue, 30 Sep 2025 14:18:54 +0100 Subject: [PATCH 13/18] Update Lib/test/test_shutil.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_shutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 470c6d409708ca..e55fd98c6bde03 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -3390,7 +3390,7 @@ def syscall(*args, **kwargs): self.zerocopy_fun(src, dst) # Reset flag so that second syscall fails again - flag = [] + flag.clear() with unittest.mock.patch(self.PATCHPOINT, create=True, side_effect=syscall) as m2: shutil._USE_CP_SENDFILE = True From f59253b7580e36cc0f522b944439a1c64b1dbd4c Mon Sep 17 00:00:00 2001 From: bnikolic Date: Wed, 1 Oct 2025 09:33:48 +0100 Subject: [PATCH 14/18] Update Lib/test/test_shutil.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_shutil.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index e55fd98c6bde03..2b5a951537e4bc 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -3371,11 +3371,10 @@ def test_exception_on_enodata_call(self): # traditional POSIX while preserving the position of where we # got to in writing def syscall(*args, **kwargs): - if not flag: - flag.append(None) - return orig_syscall(*args, **kwargs) - else: + if flag: raise OSError(errno.ENODATA, "yo") + flag.append(None) + return eval(self.PATCHPOINT)(*args, **kwargs) flag = [] orig_syscall = eval(self.PATCHPOINT) From 44c0184890dd5e3f4da9e33b0e40d5072fcf4649 Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Wed, 1 Oct 2025 08:42:54 +0000 Subject: [PATCH 15/18] Pull comment on seekability ahead of dstpos lseek --- Lib/shutil.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index e2d1956a35b054..b946ac16ed3099 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -218,14 +218,15 @@ def _fastcopy_sendfile(fsrc, fdst): # ENODATA. _USE_CP_SENDFILE = False + # 'infd' and 'outfd' are assumed to be seekable, as + # they are checked to be "regular" files. dstpos = os.lseek(outfd, 0, os.SEEK_CUR) if dstpos > 0: # Some data has already been written but we use # sendfile in a mode that does not update the # input fd position when reading. Hence seek the # input fd to the correct position before falling - # back on POSIX read/write method. Since sendfile - # requires mmapable infd, it should also be seekable + # back on POSIX read/write method. os.lseek(infd, dstpos, os.SEEK_SET) raise _GiveupOnFastCopy(err) From ef900b01ff98493e98a815538be8a18d842b45a6 Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Wed, 1 Oct 2025 08:43:46 +0000 Subject: [PATCH 16/18] Update test case style as per review --- Lib/test/test_shutil.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 2b5a951537e4bc..a2ebceb60c5312 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -3379,14 +3379,16 @@ def syscall(*args, **kwargs): flag = [] orig_syscall = eval(self.PATCHPOINT) # Reduce block size so that multiple syscalls are needed - mock = unittest.mock.Mock() - mock.st_size = 65536 + 1 - with unittest.mock.patch('os.fstat', return_value=mock) as m: - with unittest.mock.patch(self.PATCHPOINT, create=True, - side_effect=syscall) as m2: - with self.get_files() as (src, dst): - with self.assertRaises(_GiveupOnFastCopy) as cm: - self.zerocopy_fun(src, dst) + fstat_mock = unittest.mock.Mock() + fstat_mock.st_size = 65536 + 1 + with unittest.mock.patch('os.fstat', return_value=fstat_mock): + with ( + unittest.mock.patch(self.PATCHPOINT, create=True, + side_effect=syscall), + self.get_files() as (src, dst) + ): + self.assertRaises(_GiveupOnFastCopy, + self.zerocopy_fun, src, dst) # Reset flag so that second syscall fails again flag.clear() @@ -3396,9 +3398,10 @@ def syscall(*args, **kwargs): shutil.copyfile(TESTFN, TESTFN2) m2.assert_called() shutil._USE_CP_SENDFILE = True - assert flag + self.assertEqual(flag, [None]) self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) + @unittest.skipUnless(shutil._USE_CP_COPY_FILE_RANGE, "os.copy_file_range() not supported") class TestZeroCopyCopyFileRange(_ZeroCopyFileLinuxTest, unittest.TestCase): PATCHPOINT = "os.copy_file_range" From 4987596b5765c3c6b6ec66debf003b5870958d29 Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Wed, 1 Oct 2025 08:52:05 +0000 Subject: [PATCH 17/18] Whitespace fixup --- Lib/test/test_shutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index a2ebceb60c5312..fff24cfdfbc714 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -3373,7 +3373,7 @@ def test_exception_on_enodata_call(self): def syscall(*args, **kwargs): if flag: raise OSError(errno.ENODATA, "yo") - flag.append(None) + flag.append(None) return eval(self.PATCHPOINT)(*args, **kwargs) flag = [] From e624cb5598a34a7b6b304ea2b30b0896cb6c0344 Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Wed, 1 Oct 2025 08:58:10 +0000 Subject: [PATCH 18/18] Another whitespace --- Lib/shutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index b946ac16ed3099..2da3b9e0478242 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -226,7 +226,7 @@ def _fastcopy_sendfile(fsrc, fdst): # sendfile in a mode that does not update the # input fd position when reading. Hence seek the # input fd to the correct position before falling - # back on POSIX read/write method. + # back on POSIX read/write method. os.lseek(infd, dstpos, os.SEEK_SET) raise _GiveupOnFastCopy(err)