From 3405a332f0939454c319475bd9f8860d52d232a2 Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:18:08 -0700 Subject: [PATCH 01/14] gh-113924: add tests for existing exceptions in zipfile stl (#113924) --- Lib/test/test_zipfile/test_core.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 36f7f542872897..f79dd1023b8356 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -608,14 +608,15 @@ def test_io_on_closed_zipextfile(self): self.assertRaises(ValueError, fid.seek, 0) self.assertRaises(ValueError, fid.tell) - def test_write_to_readonly(self): - """Check that trying to call write() on a readonly ZipFile object + def test_writing_to_readonly(self): + """Check that trying to write to a readonly ZipFile object raises a ValueError.""" with zipfile.ZipFile(TESTFN2, mode="w") as zipfp: zipfp.writestr("somefile.txt", "bogus") with zipfile.ZipFile(TESTFN2, mode="r") as zipfp: self.assertRaises(ValueError, zipfp.write, TESTFN) + self.assertRaises(ValueError, zipfp.writestr, TESTFN, "data") with zipfile.ZipFile(TESTFN2, mode="r") as zipfp: with self.assertRaises(ValueError): @@ -2020,6 +2021,7 @@ def test_closed_zip_raises_ValueError(self): # and report that the first file in the archive was corrupt. self.assertRaises(ValueError, zipf.read, "foo.txt") self.assertRaises(ValueError, zipf.open, "foo.txt") + self.assertRaises(ValueError, zipf.open, "foo.txt", "w") self.assertRaises(ValueError, zipf.testzip) self.assertRaises(ValueError, zipf.writestr, "bogus.txt", "bogus") with open(TESTFN, 'w', encoding='utf-8') as f: @@ -2242,6 +2244,16 @@ def test_zipfile_with_short_extra_field(self): # testzip returns the name of the first corrupt file, or None self.assertIsNone(zipf.testzip()) + def test_open_for_write_issues_exception_when_pwd_provided(self): + with zipfile.ZipFile(TESTFN2, 'w') as zipf: + with self.assertRaises(ValueError): + zipf.open("foo.txt", mode='w', pwd="password") + + def test_open_for_write_issues_exception_when_force_zip_not_allowed(self): + with zipfile.ZipFile(TESTFN2, 'w', allowZip64=False) as zipf: + with self.assertRaises(ValueError): + zipf.open("foo.txt", mode='w', force_zip64=True) + def test_open_conflicting_handles(self): # It's only possible to open one writable file handle at a time msg1 = b"It's fun to charter an accountant!" @@ -2255,6 +2267,8 @@ def test_open_conflicting_handles(self): zipf.open('handle', mode='w') with self.assertRaises(ValueError): zipf.open('foo', mode='r') + with self.assertRaises(ValueError): + zipf.read('foo') with self.assertRaises(ValueError): zipf.writestr('str', 'abcde') with self.assertRaises(ValueError): From 7102f293baa941c0fb7aa8f7c9eeb8df03e8d46d Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Fri, 18 Oct 2024 18:33:55 -0700 Subject: [PATCH 02/14] gh-113924: decouple ZipFile write mode from read mode (#113924) --- Lib/zipfile/__init__.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index e2aaf8bab4913d..f08b75ded711a4 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1622,21 +1622,14 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): raise ValueError( "Attempt to use ZIP archive that was already closed") - # Make sure we have an info object + if mode == 'w': + return self._open_to_write(name, force_zip64=force_zip64) + if isinstance(name, ZipInfo): - # 'name' is already an info object zinfo = name - elif mode == 'w': - zinfo = ZipInfo(name) - zinfo.compress_type = self.compression - zinfo.compress_level = self.compresslevel else: - # Get info object for name zinfo = self.getinfo(name) - if mode == 'w': - return self._open_to_write(zinfo, force_zip64=force_zip64) - if self._writing: raise ValueError("Can't read from the ZIP file while there " "is an open writing handle on it. " @@ -1700,7 +1693,7 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): zef_file.close() raise - def _open_to_write(self, zinfo, force_zip64=False): + def _open_to_write(self, name, force_zip64=False): if force_zip64 and not self._allowZip64: raise ValueError( "force_zip64 is True, but allowZip64 was False when opening " @@ -1711,6 +1704,13 @@ def _open_to_write(self, zinfo, force_zip64=False): "another write handle open on it. " "Close the first handle before opening another.") + if isinstance(name, ZipInfo): + zinfo = name + else: + zinfo = ZipInfo(name) + zinfo.compress_type = self.compression + zinfo._compresslevel = self.compresslevel + # Size and CRC are overwritten with correct data after processing the file zinfo.compress_size = 0 zinfo.CRC = 0 From be07ad7495be91247d875e08d9c88521f031d8f0 Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Thu, 17 Oct 2024 23:36:49 -0700 Subject: [PATCH 03/14] gh-113924: extract method for ZipFile reading logic (#113924) --- Lib/zipfile/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index f08b75ded711a4..05e4dd58e4b75a 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1624,7 +1624,10 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): if mode == 'w': return self._open_to_write(name, force_zip64=force_zip64) + if mode == "r": + return self._open_to_read(name, pwd) + def _open_to_read(self, name, pwd=None): if isinstance(name, ZipInfo): zinfo = name else: @@ -1688,7 +1691,7 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): else: pwd = None - return ZipExtFile(zef_file, mode + 'b', zinfo, pwd, True) + return ZipExtFile(zef_file, 'rb', zinfo, pwd, True) except: zef_file.close() raise From 8bdfab48ad6fbf83bc4117a9426994b7ba5df955 Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:48:31 -0700 Subject: [PATCH 04/14] gh-113924: extract method to raise if cannot write to zipfile (#113924) --- Lib/zipfile/__init__.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 05e4dd58e4b75a..505f09039f9350 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1627,6 +1627,20 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): if mode == "r": return self._open_to_read(name, pwd) + def _raise_if_cannot_write(self, force_zip64): + if not self.fp: + raise ValueError( + "Attempt to use ZIP archive that was already closed") + if force_zip64 and not self._allowZip64: + raise ValueError( + "force_zip64 is True, but allowZip64 was False when opening " + "the ZIP file." + ) + if self._writing: + raise ValueError("Can't write to the ZIP file while there is " + "another write handle open on it. " + "Close the first handle before opening another.") + def _open_to_read(self, name, pwd=None): if isinstance(name, ZipInfo): zinfo = name @@ -1697,15 +1711,7 @@ def _open_to_read(self, name, pwd=None): raise def _open_to_write(self, name, force_zip64=False): - if force_zip64 and not self._allowZip64: - raise ValueError( - "force_zip64 is True, but allowZip64 was False when opening " - "the ZIP file." - ) - if self._writing: - raise ValueError("Can't write to the ZIP file while there is " - "another write handle open on it. " - "Close the first handle before opening another.") + self._raise_if_cannot_write(force_zip64) if isinstance(name, ZipInfo): zinfo = name From 437917a6062231eb8e05993e3600edda0aac2f44 Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:49:57 -0700 Subject: [PATCH 05/14] gh-113924: extract method to setup writing to zipfile (#113924) --- Lib/zipfile/__init__.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 505f09039f9350..a78cebd3c1d9be 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1641,6 +1641,18 @@ def _raise_if_cannot_write(self, force_zip64): "another write handle open on it. " "Close the first handle before opening another.") + def _setup_for_writing(self, zinfo, is_zip64): + if self._seekable: + self.fp.seek(self.start_dir) + zinfo.header_offset = self.fp.tell() + + self._writecheck(zinfo) + self._didModify = True + + self.fp.write(zinfo.FileHeader(is_zip64)) + + self._writing = True + def _open_to_read(self, name, pwd=None): if isinstance(name, ZipInfo): zinfo = name @@ -1739,16 +1751,7 @@ def _open_to_write(self, name, force_zip64=False): if not self._allowZip64 and zip64: raise LargeZipFile("Filesize would require ZIP64 extensions") - if self._seekable: - self.fp.seek(self.start_dir) - zinfo.header_offset = self.fp.tell() - - self._writecheck(zinfo) - self._didModify = True - - self.fp.write(zinfo.FileHeader(zip64)) - - self._writing = True + self._setup_for_writing(zinfo, zip64) return _ZipWriteFile(self, zinfo, zip64) def extract(self, member, path=None, pwd=None): From 36a4a33804d2d92d4fe7613e43ccde8891627907 Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:54:28 -0700 Subject: [PATCH 06/14] gh-113924: make ZipFile encrypted files reusable in tests (#113924) --- Lib/test/test_zipfile/test_core.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index f79dd1023b8356..f13677144e06f1 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -2503,12 +2503,11 @@ class LzmaBadCrcTests(AbstractBadCrcTests, unittest.TestCase): b'\x00>\x00\x00\x00\x00\x00') -class DecryptionTests(unittest.TestCase): - """Check that ZIP decryption works. Since the library does not - support encryption at the moment, we use a pre-generated encrypted - ZIP file.""" +class EncryptedFiles: + """ Since the library does not support encryption at the moment, + we use pre-generated encrypted ZIP files.""" - data = ( + encrypted_zip1_data = ( b'PK\x03\x04\x14\x00\x01\x00\x00\x00n\x92i.#y\xef?&\x00\x00\x00\x1a\x00' b'\x00\x00\x08\x00\x00\x00test.txt\xfa\x10\xa0gly|\xfa-\xc5\xc0=\xf9y' b'\x18\xe0\xa8r\xb3Z}Lg\xbc\xae\xf9|\x9b\x19\xe4\x8b\xba\xbb)\x8c\xb0\xdbl' @@ -2516,7 +2515,10 @@ class DecryptionTests(unittest.TestCase): b'\x1a\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x01\x00 \x00\xb6\x81' b'\x00\x00\x00\x00test.txtPK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x006\x00' b'\x00\x00L\x00\x00\x00\x00\x00' ) - data2 = ( + + zip1_filename = "test.txt" + + encrypted_zip2_data = ( b'PK\x03\x04\x14\x00\t\x00\x08\x00\xcf}38xu\xaa\xb2\x14\x00\x00\x00\x00\x02' b'\x00\x00\x04\x00\x15\x00zeroUT\t\x00\x03\xd6\x8b\x92G\xda\x8b\x92GUx\x04' b'\x00\xe8\x03\xe8\x03\xc7 Date: Fri, 18 Oct 2024 14:18:54 -0700 Subject: [PATCH 07/14] gh-113924: add method and tests to copy file from ZipFile (#113924) --- Lib/test/test_zipfile/test_core.py | 147 +++++++++++++++++++++++++++++ Lib/zipfile/__init__.py | 112 +++++++++++++++++----- 2 files changed, 237 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index f13677144e06f1..d20b7d56adec73 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -12,6 +12,7 @@ import unittest import unittest.mock as mock import zipfile +import filecmp from tempfile import TemporaryFile @@ -2625,6 +2626,152 @@ def test_seek_tell(self): fp.read() +class AbstractCopyFileTests(EncryptedFiles): + @classmethod + def write_small_file(cls, destination): + destination.writestr(cls.small_file, cls.small_data, + compress_type=cls.compression) + + @classmethod + def write_large_file(cls, destination): + destination.writestr(cls.large_file, cls.large_data, + compress_type=cls.compression) + + @classmethod + def setUpClass(cls): + cls.small_zip = TESTFN + "_small.zip" + cls.large_zip = TESTFN + "_large.zip" + cls.small_large_zip = TESTFN + "_small_large.zip" + cls.emtpy_dir_zip = TESTFN + "_empty_dir.zip" + cls.encrypted_zip = TESTFN + "_encrypted.zip" + + # compressed size is larger than the source contents + cls.small_data = "a" + # compressed size is smaller than the source contents + cls.large_data = "b" * 400 + + cls.small_file = "small.txt" + cls.large_file = "large.txt" + cls.emtpy_dir_name = "directory/" + + # create zipfiles to compare against + # these zipfiles should never be written to outside of this method + with zipfile.ZipFile(cls.small_zip, "w") as destination: + cls.write_small_file(destination) + + with zipfile.ZipFile(cls.large_zip, "w") as destination: + cls.write_large_file(destination) + + with zipfile.ZipFile(cls.small_large_zip, "w") as destination: + cls.write_small_file(destination) + cls.write_large_file(destination) + + with zipfile.ZipFile(cls.emtpy_dir_zip, "w") as destination: + # use mode other than default to check that mode is copied + destination.mkdir(cls.emtpy_dir_name, mode=123) + + with open(cls.encrypted_zip, "wb") as destination: + destination.write(cls.encrypted_zip1_data) + + def tearDown(cls): + if os.path.exists(TESTFN): + unlink(TESTFN) + + @classmethod + def tearDownClass(cls): + unlink(cls.small_zip) + unlink(cls.large_zip) + unlink(cls.small_large_zip) + unlink(cls.emtpy_dir_zip) + unlink(cls.encrypted_zip) + + def assertIdentical(self, file1, file2): + self.assertTrue(filecmp.cmp(file1, file2, shallow=False)) + + def _test_copy_file(self, source_zipfile, source_filename): + with zipfile.ZipFile(TESTFN, 'w') as destination: + destination.copy_file(source_zipfile, source_filename) + self.assertIdentical(TESTFN, source_zipfile) + + # A compressed file can be larger than its uncompressed form, + # which are two different states we need to test + + # Copying tests with one small file + def test_copy_file__copy_one_small_file_to_new_ZipFile(self): + self._test_copy_file(self.small_zip, self.small_file) + + # Copying tests with one large file + def test_copy_file__copy_one_large_file_to_new_ZipFile(self): + self._test_copy_file(self.large_zip, self.large_file) + + # Copying tests with empty directory + def test_copy_file__copy_directory(self): + self._test_copy_file(self.emtpy_dir_zip, self.emtpy_dir_name) + + # Copying tests with encrypted file + def test_copy_file__copy_encrypted_file(self): + self._test_copy_file(self.encrypted_zip, self.zip1_filename) + + # Copying tests with nonempty destination zipfile + def _test_nonempty_zipfile(self, method_to_copy_large_file): + with zipfile.ZipFile(TESTFN, 'w') as destination: + self.write_small_file(destination) + method_to_copy_large_file(destination) + self.assertIdentical(TESTFN, self.small_large_zip) + + def test_copy_file__copy_to_nonempty_ZipFile(self): + copy_file = lambda x: x.copy_file(self.large_zip, self.large_file) + self._test_nonempty_zipfile(copy_file) + +class StoredCopyFileTests(AbstractCopyFileTests, unittest.TestCase): + compression = zipfile.ZIP_STORED + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.exceptions_zip = TESTFN + "_exception" + + with zipfile.ZipFile(cls.exceptions_zip, + "w") as destination: + cls.write_small_file(destination) + + @classmethod + def tearDownClass(cls): + super().tearDownClass() + unlink(cls.exceptions_zip) + + def copy_file(self, destination): + destination.copy_file(self.small_zip, self.small_file) + + def test_copy_methods_issue_exception_when_zipfile_not_open_for_write(self): + with zipfile.ZipFile(self.exceptions_zip, 'r') as destination: + self.assertRaises(ValueError, self.copy_file, destination) + + def test_copy_file__issues_exception_when_already_writing(self): + with zipfile.ZipFile(self.exceptions_zip, 'w') as destination: + with destination.open('foo', mode='w') as open_file: + self.assertRaises(ValueError, self.copy_file, destination) + + def test_copy_file__issues_exception_when_file_closed(self): + with zipfile.ZipFile(self.exceptions_zip, 'w') as destination: + # write a file to create the zipfile + destination.writestr("filename.txt", "file contents") + + self.assertRaises(ValueError, self.copy_file, destination) + +@requires_zlib() +class DeflateCopyFileTests(AbstractCopyFileTests, unittest.TestCase): + compression = zipfile.ZIP_DEFLATED + +@requires_bz2() +class Bzip2CopyFileTests(AbstractCopyFileTests, unittest.TestCase): + compression = zipfile.ZIP_BZIP2 + +@requires_lzma() +class LzmaCopyFileTests(AbstractCopyFileTests, unittest.TestCase): + compression = zipfile.ZIP_LZMA + + class AbstractTestsWithRandomBinaryFiles: @classmethod def setUpClass(cls): diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index a78cebd3c1d9be..dd5a8af7910602 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -874,16 +874,28 @@ class ZipExtFile(io.BufferedIOBase): MAX_SEEK_READ = 1 << 24 def __init__(self, fileobj, mode, zipinfo, pwd=None, - close_fileobj=False): + close_fileobj=False, decompress=True): self._fileobj = fileobj self._pwd = pwd self._close_fileobj = close_fileobj - self._compress_type = zipinfo.compress_type + if decompress: + self._compress_type = zipinfo.compress_type + else: + # if we aren't decompressing the data, + # we pretend that the data wasn't compressed + # so that we don't process the data + self._compress_type = ZIP_STORED self._compress_left = zipinfo.compress_size - self._left = zipinfo.file_size + if decompress: + self._left = zipinfo.file_size + else: + self._left = zipinfo.compress_size - self._decompressor = _get_decompressor(self._compress_type) + if decompress: + self._decompressor = _get_decompressor(self._compress_type) + else: + self._decompressor = None self._eof = False self._readbuffer = b'' @@ -894,7 +906,7 @@ def __init__(self, fileobj, mode, zipinfo, pwd=None, self.mode = mode self.name = zipinfo.filename - if hasattr(zipinfo, 'CRC'): + if hasattr(zipinfo, 'CRC') and decompress: self._expected_crc = zipinfo.CRC self._running_crc = crc32(b'') else: @@ -1203,15 +1215,24 @@ def tell(self): class _ZipWriteFile(io.BufferedIOBase): - def __init__(self, zf, zinfo, zip64): + def __init__(self, zf, zinfo, zip64, *, precompressed=False): self._zinfo = zinfo self._zip64 = zip64 self._zipfile = zf - self._compressor = _get_compressor(zinfo.compress_type, - zinfo.compress_level) - self._file_size = 0 + if precompressed: + self._compute_crc = False # Precomputed in zinfo. + self._crc = zinfo.CRC + self._compute_file_size = False # Precomputed in zinfo. + self._file_size = zinfo.file_size + self._compressor = None + else: + self._compute_crc = True + self._crc = 0 + self._compute_file_size = True + self._file_size = 0 + self._compressor = _get_compressor(zinfo.compress_type, + zinfo._compresslevel) self._compress_size = 0 - self._crc = 0 @property def _fileobj(self): @@ -1238,12 +1259,14 @@ def write(self, data): else: data = memoryview(data) nbytes = data.nbytes - self._file_size += nbytes + if self._compute_file_size: + self._file_size += nbytes - self._crc = crc32(data, self._crc) + if self._compute_crc: + self._crc = crc32(data, self._crc) if self._compressor: data = self._compressor.compress(data) - self._compress_size += len(data) + self._compress_size += len(data) self._fileobj.write(data) return nbytes @@ -1257,11 +1280,11 @@ def close(self): buf = self._compressor.flush() self._compress_size += len(buf) self._fileobj.write(buf) - self._zinfo.compress_size = self._compress_size - else: - self._zinfo.compress_size = self._file_size - self._zinfo.CRC = self._crc - self._zinfo.file_size = self._file_size + self._zinfo.compress_size = self._compress_size + if self._compute_crc: + self._zinfo.CRC = self._crc + if self._compute_file_size: + self._zinfo.file_size = self._file_size if not self._zip64: if self._file_size > ZIP64_LIMIT: @@ -1293,7 +1316,6 @@ def close(self): self._zipfile._writing = False - class ZipFile: """ Class with methods to open, read, write, close, list zip files. @@ -1653,7 +1675,7 @@ def _setup_for_writing(self, zinfo, is_zip64): self._writing = True - def _open_to_read(self, name, pwd=None): + def _open_to_read(self, name, pwd=None, *, decompress=True): if isinstance(name, ZipInfo): zinfo = name else: @@ -1706,7 +1728,7 @@ def _open_to_read(self, name, pwd=None): # check for encrypted flag & handle password is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED - if is_encrypted: + if is_encrypted and decompress: if not pwd: pwd = self.pwd if pwd and not isinstance(pwd, bytes): @@ -1717,11 +1739,21 @@ def _open_to_read(self, name, pwd=None): else: pwd = None - return ZipExtFile(zef_file, 'rb', zinfo, pwd, True) + return ZipExtFile(zef_file, 'rb', zinfo, pwd, True, decompress) except: zef_file.close() raise + def _open_to_write_precompressed(self, zinfo, force_zip64=False): + self._raise_if_cannot_write(force_zip64) + + zip64 = force_zip64 or (zinfo.compress_size > ZIP64_LIMIT) + if not self._allowZip64 and zip64: + raise LargeZipFile("Filesize would require ZIP64 extensions") + + self._setup_for_writing(zinfo, zip64) + return _ZipWriteFile(self, zinfo, zip64, precompressed=True) + def _open_to_write(self, name, force_zip64=False): self._raise_if_cannot_write(force_zip64) @@ -1873,6 +1905,25 @@ def _writecheck(self, zinfo): raise LargeZipFile(requires_zip64 + " would require ZIP64 extensions") + def _raise_if_archive_not_available_for_writing(self): + if not self.fp: + raise ValueError( + "Attempt to write to ZIP archive that was already closed") + if self._writing: + raise ValueError( + "Can't write to ZIP archive while an open writing handle exists" + ) + + def _write_precompressed(self, zinfo, file_contents): + self._raise_if_archive_not_available_for_writing() + + if zinfo.is_dir(): + self.mkdir(zinfo) + return + + with self._open_to_write_precompressed(zinfo) as destination: + shutil.copyfileobj(file_contents, destination, 1024*8) + def write(self, filename, arcname=None, compress_type=None, compresslevel=None): """Put the bytes from filename into the archive under the name @@ -1982,6 +2033,23 @@ def mkdir(self, zinfo_or_directory_name, mode=511): self.fp.write(zinfo.FileHeader(False)) self.start_dir = self.fp.tell() + def _raise_if_archive_not_in_writing_mode(self, caller_name): + if self.mode not in ('w', 'x', 'a'): + raise ValueError( + f"{caller_name}() requires mode 'w', 'x', or 'a', but " + f"mode is '{self.mode}'") + + def _copy_file(self, source_zipfile, source_zinfo): + with source_zipfile._open_to_read(source_zinfo, 'r', + decompress=False) as source_file_contents: + self._write_precompressed(source_zinfo, source_file_contents) + + def copy_file(self, source_zipfile, file): + self._raise_if_archive_not_in_writing_mode("copy_file") + with ZipFile(source_zipfile, 'r') as source: + source_zinfo = source.getinfo(file) + self._copy_file(source, source_zinfo) + def __del__(self): """Call the "close()" method in case the user forgot.""" self.close() From 7e9501e9856956a6671a6795f001ef2b00c77031 Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Fri, 18 Oct 2024 14:24:19 -0700 Subject: [PATCH 08/14] gh-113924: add method and tests to copy many files from ZipFile (#113924) --- Lib/test/test_zipfile/test_core.py | 32 ++++++++++++++++++++++++++++++ Lib/zipfile/__init__.py | 7 +++++++ 2 files changed, 39 insertions(+) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index d20b7d56adec73..49206203fa4106 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -2693,6 +2693,11 @@ def _test_copy_file(self, source_zipfile, source_filename): destination.copy_file(source_zipfile, source_filename) self.assertIdentical(TESTFN, source_zipfile) + def _test_copy_files(self, source_zipfile, source_filenames): + with zipfile.ZipFile(TESTFN, 'w') as destination: + destination.copy_files(source_zipfile, source_filenames) + self.assertIdentical(TESTFN, source_zipfile) + # A compressed file can be larger than its uncompressed form, # which are two different states we need to test @@ -2700,18 +2705,30 @@ def _test_copy_file(self, source_zipfile, source_filename): def test_copy_file__copy_one_small_file_to_new_ZipFile(self): self._test_copy_file(self.small_zip, self.small_file) + def test_copy_files__copy_one_small_file_to_new_ZipFile(self): + self._test_copy_files(self.small_zip, [self.small_file]) + # Copying tests with one large file def test_copy_file__copy_one_large_file_to_new_ZipFile(self): self._test_copy_file(self.large_zip, self.large_file) + def test_copy_files__copy_one_large_file_to_new_ZipFile(self): + self._test_copy_files(self.large_zip, [self.large_file]) + # Copying tests with empty directory def test_copy_file__copy_directory(self): self._test_copy_file(self.emtpy_dir_zip, self.emtpy_dir_name) + def test_copy_files__copy_directory(self): + self._test_copy_files(self.emtpy_dir_zip, [self.emtpy_dir_name]) + # Copying tests with encrypted file def test_copy_file__copy_encrypted_file(self): self._test_copy_file(self.encrypted_zip, self.zip1_filename) + def test_copy_files__copy_encrypted_file(self): + self._test_copy_files(self.encrypted_zip, [self.zip1_filename]) + # Copying tests with nonempty destination zipfile def _test_nonempty_zipfile(self, method_to_copy_large_file): with zipfile.ZipFile(TESTFN, 'w') as destination: @@ -2723,6 +2740,15 @@ def test_copy_file__copy_to_nonempty_ZipFile(self): copy_file = lambda x: x.copy_file(self.large_zip, self.large_file) self._test_nonempty_zipfile(copy_file) + def test_copy_files__copy_to_nonempty_ZipFile(self): + copy_files = lambda x: x.copy_files(self.large_zip, [self.large_file]) + self._test_nonempty_zipfile(copy_files) + + # Copying tests with two files + def test_copy_files__copy_two_files(self): + self._test_copy_files(self.small_large_zip, + [self.small_file, self.large_file]) + class StoredCopyFileTests(AbstractCopyFileTests, unittest.TestCase): compression = zipfile.ZIP_STORED @@ -2743,14 +2769,19 @@ def tearDownClass(cls): def copy_file(self, destination): destination.copy_file(self.small_zip, self.small_file) + def copy_files(self, destination): + destination.copy_files(self.small_zip, [self.small_file]) + def test_copy_methods_issue_exception_when_zipfile_not_open_for_write(self): with zipfile.ZipFile(self.exceptions_zip, 'r') as destination: self.assertRaises(ValueError, self.copy_file, destination) + self.assertRaises(ValueError, self.copy_files, destination) def test_copy_file__issues_exception_when_already_writing(self): with zipfile.ZipFile(self.exceptions_zip, 'w') as destination: with destination.open('foo', mode='w') as open_file: self.assertRaises(ValueError, self.copy_file, destination) + self.assertRaises(ValueError, self.copy_files, destination) def test_copy_file__issues_exception_when_file_closed(self): with zipfile.ZipFile(self.exceptions_zip, 'w') as destination: @@ -2758,6 +2789,7 @@ def test_copy_file__issues_exception_when_file_closed(self): destination.writestr("filename.txt", "file contents") self.assertRaises(ValueError, self.copy_file, destination) + self.assertRaises(ValueError, self.copy_files, destination) @requires_zlib() class DeflateCopyFileTests(AbstractCopyFileTests, unittest.TestCase): diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index dd5a8af7910602..5c2e82b546c71d 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -2050,6 +2050,13 @@ def copy_file(self, source_zipfile, file): source_zinfo = source.getinfo(file) self._copy_file(source, source_zinfo) + def copy_files(self, source_zipfile, files): + self._raise_if_archive_not_in_writing_mode("copy_files") + with ZipFile(source_zipfile, 'r') as source: + for file in files: + source_zinfo = source.getinfo(file) + self._copy_file(source, source_zinfo) + def __del__(self): """Call the "close()" method in case the user forgot.""" self.close() From ea8e00f05ab0b8abfa9b5f9c427a8347a2d25a1f Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Fri, 18 Oct 2024 14:28:16 -0700 Subject: [PATCH 09/14] gh-113924: add method and tests to copy all files from ZipFile (#113924) --- Lib/test/test_zipfile/test_core.py | 30 ++++++++++++++++++++++++++++++ Lib/zipfile/__init__.py | 7 +++++++ 2 files changed, 37 insertions(+) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 49206203fa4106..0127dbc302b8e9 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -2698,6 +2698,11 @@ def _test_copy_files(self, source_zipfile, source_filenames): destination.copy_files(source_zipfile, source_filenames) self.assertIdentical(TESTFN, source_zipfile) + def _test_copy_all_files(self, source_zipfile): + with zipfile.ZipFile(TESTFN, 'w') as destination: + destination.copy_all_files(source_zipfile) + self.assertIdentical(TESTFN, source_zipfile) + # A compressed file can be larger than its uncompressed form, # which are two different states we need to test @@ -2708,6 +2713,9 @@ def test_copy_file__copy_one_small_file_to_new_ZipFile(self): def test_copy_files__copy_one_small_file_to_new_ZipFile(self): self._test_copy_files(self.small_zip, [self.small_file]) + def test_copy_all_files__copy_one_small_file_to_new_ZipFile(self): + self._test_copy_all_files(self.small_zip) + # Copying tests with one large file def test_copy_file__copy_one_large_file_to_new_ZipFile(self): self._test_copy_file(self.large_zip, self.large_file) @@ -2715,6 +2723,9 @@ def test_copy_file__copy_one_large_file_to_new_ZipFile(self): def test_copy_files__copy_one_large_file_to_new_ZipFile(self): self._test_copy_files(self.large_zip, [self.large_file]) + def test_copy_all_files__copy_one_large_file_to_new_ZipFile(self): + self._test_copy_all_files(self.large_zip) + # Copying tests with empty directory def test_copy_file__copy_directory(self): self._test_copy_file(self.emtpy_dir_zip, self.emtpy_dir_name) @@ -2722,6 +2733,9 @@ def test_copy_file__copy_directory(self): def test_copy_files__copy_directory(self): self._test_copy_files(self.emtpy_dir_zip, [self.emtpy_dir_name]) + def test_copy_all_files__copy_directory(self): + self._test_copy_all_files(self.emtpy_dir_zip) + # Copying tests with encrypted file def test_copy_file__copy_encrypted_file(self): self._test_copy_file(self.encrypted_zip, self.zip1_filename) @@ -2729,6 +2743,9 @@ def test_copy_file__copy_encrypted_file(self): def test_copy_files__copy_encrypted_file(self): self._test_copy_files(self.encrypted_zip, [self.zip1_filename]) + def test_copy_all_files__copy_encrypted_file(self): + self._test_copy_all_files(self.encrypted_zip) + # Copying tests with nonempty destination zipfile def _test_nonempty_zipfile(self, method_to_copy_large_file): with zipfile.ZipFile(TESTFN, 'w') as destination: @@ -2744,11 +2761,18 @@ def test_copy_files__copy_to_nonempty_ZipFile(self): copy_files = lambda x: x.copy_files(self.large_zip, [self.large_file]) self._test_nonempty_zipfile(copy_files) + def test_copy_all_files__copy_to_nonempty_ZipFile(self): + copy_all_files = lambda x: x.copy_all_files(self.large_zip) + self._test_nonempty_zipfile(copy_all_files) + # Copying tests with two files def test_copy_files__copy_two_files(self): self._test_copy_files(self.small_large_zip, [self.small_file, self.large_file]) + def test_copy_all_files__copy_two_files(self): + self._test_copy_all_files(self.small_large_zip) + class StoredCopyFileTests(AbstractCopyFileTests, unittest.TestCase): compression = zipfile.ZIP_STORED @@ -2772,16 +2796,21 @@ def copy_file(self, destination): def copy_files(self, destination): destination.copy_files(self.small_zip, [self.small_file]) + def copy_all_files(self, destination): + destination.copy_all_files(self.small_zip) + def test_copy_methods_issue_exception_when_zipfile_not_open_for_write(self): with zipfile.ZipFile(self.exceptions_zip, 'r') as destination: self.assertRaises(ValueError, self.copy_file, destination) self.assertRaises(ValueError, self.copy_files, destination) + self.assertRaises(ValueError, self.copy_all_files, destination) def test_copy_file__issues_exception_when_already_writing(self): with zipfile.ZipFile(self.exceptions_zip, 'w') as destination: with destination.open('foo', mode='w') as open_file: self.assertRaises(ValueError, self.copy_file, destination) self.assertRaises(ValueError, self.copy_files, destination) + self.assertRaises(ValueError, self.copy_all_files, destination) def test_copy_file__issues_exception_when_file_closed(self): with zipfile.ZipFile(self.exceptions_zip, 'w') as destination: @@ -2790,6 +2819,7 @@ def test_copy_file__issues_exception_when_file_closed(self): self.assertRaises(ValueError, self.copy_file, destination) self.assertRaises(ValueError, self.copy_files, destination) + self.assertRaises(ValueError, self.copy_all_files, destination) @requires_zlib() class DeflateCopyFileTests(AbstractCopyFileTests, unittest.TestCase): diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 5c2e82b546c71d..2a1274fb3a459a 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -2057,6 +2057,13 @@ def copy_files(self, source_zipfile, files): source_zinfo = source.getinfo(file) self._copy_file(source, source_zinfo) + def copy_all_files(self, source_zipfile): + self._raise_if_archive_not_in_writing_mode("copy_all_files") + with ZipFile(source_zipfile, 'r') as source: + source_zinfos = source.infolist() + for source_zinfo in source_zinfos: + self._copy_file(source, source_zinfo) + def __del__(self): """Call the "close()" method in case the user forgot.""" self.close() From 6c5cb7ee57336c4503a69189086e300437396168 Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Fri, 18 Oct 2024 20:27:10 -0700 Subject: [PATCH 10/14] add entry to Misc/NEWS.d --- .../Library/2024-10-18-20-26-01.gh-issue-113924.ObB1IG.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-10-18-20-26-01.gh-issue-113924.ObB1IG.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-18-20-26-01.gh-issue-113924.ObB1IG.rst b/Misc/NEWS.d/next/Library/2024-10-18-20-26-01.gh-issue-113924.ObB1IG.rst new file mode 100644 index 00000000000000..f6e35f239f6bfe --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-18-20-26-01.gh-issue-113924.ObB1IG.rst @@ -0,0 +1,3 @@ +Add ``copy_file``, ``copy_files``, and ``copy_all_files`` to the ZipFile +class. These methods allow copying from a zipfile without the overhead of +decompressing and recompressing the data. From 2da2c4bc6d6b5a7609b7b64e15d44300b8acda44 Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Mon, 21 Oct 2024 00:10:19 -0700 Subject: [PATCH 11/14] remove copy_file and copy_all_files and repurpose copy_files --- Lib/test/test_zipfile/test_core.py | 106 ++++++++++------------------- Lib/zipfile/__init__.py | 27 +++----- 2 files changed, 47 insertions(+), 86 deletions(-) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 0127dbc302b8e9..1c639c9323e9de 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -2688,90 +2688,62 @@ def tearDownClass(cls): def assertIdentical(self, file1, file2): self.assertTrue(filecmp.cmp(file1, file2, shallow=False)) - def _test_copy_file(self, source_zipfile, source_filename): + def _test_copy_files(self, source_zipfile, filenames=None): with zipfile.ZipFile(TESTFN, 'w') as destination: - destination.copy_file(source_zipfile, source_filename) - self.assertIdentical(TESTFN, source_zipfile) - - def _test_copy_files(self, source_zipfile, source_filenames): - with zipfile.ZipFile(TESTFN, 'w') as destination: - destination.copy_files(source_zipfile, source_filenames) - self.assertIdentical(TESTFN, source_zipfile) - - def _test_copy_all_files(self, source_zipfile): - with zipfile.ZipFile(TESTFN, 'w') as destination: - destination.copy_all_files(source_zipfile) + destination.copy_files(source_zipfile, filenames) self.assertIdentical(TESTFN, source_zipfile) # A compressed file can be larger than its uncompressed form, # which are two different states we need to test # Copying tests with one small file - def test_copy_file__copy_one_small_file_to_new_ZipFile(self): - self._test_copy_file(self.small_zip, self.small_file) - - def test_copy_files__copy_one_small_file_to_new_ZipFile(self): + def test_copy_one_small_file_via_iterable(self): self._test_copy_files(self.small_zip, [self.small_file]) - def test_copy_all_files__copy_one_small_file_to_new_ZipFile(self): - self._test_copy_all_files(self.small_zip) + def test_copy_one_small_file_via_whole_zipfile(self): + self._test_copy_files(self.small_zip) # Copying tests with one large file - def test_copy_file__copy_one_large_file_to_new_ZipFile(self): - self._test_copy_file(self.large_zip, self.large_file) - - def test_copy_files__copy_one_large_file_to_new_ZipFile(self): + def test_copy_one_large_file_via_iterable(self): self._test_copy_files(self.large_zip, [self.large_file]) - def test_copy_all_files__copy_one_large_file_to_new_ZipFile(self): - self._test_copy_all_files(self.large_zip) + def test_copy_one_large_file_via_whole_zipfile(self): + self._test_copy_files(self.large_zip) # Copying tests with empty directory - def test_copy_file__copy_directory(self): - self._test_copy_file(self.emtpy_dir_zip, self.emtpy_dir_name) - - def test_copy_files__copy_directory(self): + def test_copy_directory_via_iterable(self): self._test_copy_files(self.emtpy_dir_zip, [self.emtpy_dir_name]) - def test_copy_all_files__copy_directory(self): - self._test_copy_all_files(self.emtpy_dir_zip) + def test_copy_directory_via_whole_zipfile(self): + self._test_copy_files(self.emtpy_dir_zip) # Copying tests with encrypted file - def test_copy_file__copy_encrypted_file(self): - self._test_copy_file(self.encrypted_zip, self.zip1_filename) - - def test_copy_files__copy_encrypted_file(self): + def test_copy_encrypted_file_via_iterable(self): self._test_copy_files(self.encrypted_zip, [self.zip1_filename]) - def test_copy_all_files__copy_encrypted_file(self): - self._test_copy_all_files(self.encrypted_zip) + def test_copy_encrypted_file_via_whole_zipfile(self): + self._test_copy_files(self.encrypted_zip) # Copying tests with nonempty destination zipfile - def _test_nonempty_zipfile(self, method_to_copy_large_file): + def _test_nonempty_zipfile(self, source_zipfile, filenames=None): with zipfile.ZipFile(TESTFN, 'w') as destination: self.write_small_file(destination) - method_to_copy_large_file(destination) + destination.copy_files(source_zipfile, filenames) self.assertIdentical(TESTFN, self.small_large_zip) - def test_copy_file__copy_to_nonempty_ZipFile(self): - copy_file = lambda x: x.copy_file(self.large_zip, self.large_file) - self._test_nonempty_zipfile(copy_file) - - def test_copy_files__copy_to_nonempty_ZipFile(self): - copy_files = lambda x: x.copy_files(self.large_zip, [self.large_file]) - self._test_nonempty_zipfile(copy_files) + def test_copy_to_nonempty_zipfile_via_iterable(self): + self._test_nonempty_zipfile(self.large_zip, [self.large_file]) - def test_copy_all_files__copy_to_nonempty_ZipFile(self): - copy_all_files = lambda x: x.copy_all_files(self.large_zip) - self._test_nonempty_zipfile(copy_all_files) + def test_copy_to_nonempty_zipfile_via_whole_zipfile(self): + self._test_nonempty_zipfile(self.large_zip) # Copying tests with two files - def test_copy_files__copy_two_files(self): + def test_copy_two_files_via_iterable(self): self._test_copy_files(self.small_large_zip, [self.small_file, self.large_file]) - def test_copy_all_files__copy_two_files(self): - self._test_copy_all_files(self.small_large_zip) + def test_copy_two_files_via_whole_zipfile(self): + self._test_copy_files(self.small_large_zip) class StoredCopyFileTests(AbstractCopyFileTests, unittest.TestCase): compression = zipfile.ZIP_STORED @@ -2790,36 +2762,30 @@ def tearDownClass(cls): super().tearDownClass() unlink(cls.exceptions_zip) - def copy_file(self, destination): - destination.copy_file(self.small_zip, self.small_file) - - def copy_files(self, destination): + def copy_via_iterable(self, destination): destination.copy_files(self.small_zip, [self.small_file]) - def copy_all_files(self, destination): - destination.copy_all_files(self.small_zip) + def copy_entire_zipfile(self, destination): + destination.copy_files(self.small_zip) def test_copy_methods_issue_exception_when_zipfile_not_open_for_write(self): with zipfile.ZipFile(self.exceptions_zip, 'r') as destination: - self.assertRaises(ValueError, self.copy_file, destination) - self.assertRaises(ValueError, self.copy_files, destination) - self.assertRaises(ValueError, self.copy_all_files, destination) + self.assertRaises(ValueError, self.copy_via_iterable, destination) + self.assertRaises(ValueError, self.copy_entire_zipfile, destination) def test_copy_file__issues_exception_when_already_writing(self): with zipfile.ZipFile(self.exceptions_zip, 'w') as destination: with destination.open('foo', mode='w') as open_file: - self.assertRaises(ValueError, self.copy_file, destination) - self.assertRaises(ValueError, self.copy_files, destination) - self.assertRaises(ValueError, self.copy_all_files, destination) + self.assertRaises(ValueError, self.copy_via_iterable, + destination) + self.assertRaises(ValueError, self.copy_entire_zipfile, + destination) def test_copy_file__issues_exception_when_file_closed(self): - with zipfile.ZipFile(self.exceptions_zip, 'w') as destination: - # write a file to create the zipfile - destination.writestr("filename.txt", "file contents") - - self.assertRaises(ValueError, self.copy_file, destination) - self.assertRaises(ValueError, self.copy_files, destination) - self.assertRaises(ValueError, self.copy_all_files, destination) + destination = zipfile.ZipFile(self.exceptions_zip, 'w') + destination.close() + self.assertRaises(ValueError, self.copy_via_iterable, destination) + self.assertRaises(ValueError, self.copy_entire_zipfile, destination) @requires_zlib() class DeflateCopyFileTests(AbstractCopyFileTests, unittest.TestCase): diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 2a1274fb3a459a..441b35d775e2a6 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -2044,25 +2044,20 @@ def _copy_file(self, source_zipfile, source_zinfo): decompress=False) as source_file_contents: self._write_precompressed(source_zinfo, source_file_contents) - def copy_file(self, source_zipfile, file): - self._raise_if_archive_not_in_writing_mode("copy_file") - with ZipFile(source_zipfile, 'r') as source: - source_zinfo = source.getinfo(file) - self._copy_file(source, source_zinfo) + def copy_files(self, source_zipfile, files=None): + if self.mode not in ('w', 'x', 'a'): + raise ValueError( + "copy_files() requires mode 'w', 'x', or 'a', but " + f"mode is '{self.mode}'") - def copy_files(self, source_zipfile, files): - self._raise_if_archive_not_in_writing_mode("copy_files") with ZipFile(source_zipfile, 'r') as source: - for file in files: - source_zinfo = source.getinfo(file) - self._copy_file(source, source_zinfo) + if files is None: + zinfos = source.infolist() + else: + zinfos = [source.getinfo(file) for file in files] - def copy_all_files(self, source_zipfile): - self._raise_if_archive_not_in_writing_mode("copy_all_files") - with ZipFile(source_zipfile, 'r') as source: - source_zinfos = source.infolist() - for source_zinfo in source_zinfos: - self._copy_file(source, source_zinfo) + for zinfo in zinfos: + self._copy_file(source, zinfo) def __del__(self): """Call the "close()" method in case the user forgot.""" From 871f6410bad9dce8a2efc18a8c26dbde39145be8 Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Mon, 21 Oct 2024 00:11:00 -0700 Subject: [PATCH 12/14] update NEWS.d --- .../Library/2024-10-18-20-26-01.gh-issue-113924.ObB1IG.rst | 3 --- .../Library/2024-10-21-00-08-11.gh-issue-113924.IuVDzq.rst | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2024-10-18-20-26-01.gh-issue-113924.ObB1IG.rst create mode 100644 Misc/NEWS.d/next/Library/2024-10-21-00-08-11.gh-issue-113924.IuVDzq.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-18-20-26-01.gh-issue-113924.ObB1IG.rst b/Misc/NEWS.d/next/Library/2024-10-18-20-26-01.gh-issue-113924.ObB1IG.rst deleted file mode 100644 index f6e35f239f6bfe..00000000000000 --- a/Misc/NEWS.d/next/Library/2024-10-18-20-26-01.gh-issue-113924.ObB1IG.rst +++ /dev/null @@ -1,3 +0,0 @@ -Add ``copy_file``, ``copy_files``, and ``copy_all_files`` to the ZipFile -class. These methods allow copying from a zipfile without the overhead of -decompressing and recompressing the data. diff --git a/Misc/NEWS.d/next/Library/2024-10-21-00-08-11.gh-issue-113924.IuVDzq.rst b/Misc/NEWS.d/next/Library/2024-10-21-00-08-11.gh-issue-113924.IuVDzq.rst new file mode 100644 index 00000000000000..0b657c30c53e75 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-21-00-08-11.gh-issue-113924.IuVDzq.rst @@ -0,0 +1 @@ +Added `zipfile.ZipFile.copy_files` to allow copying from a zipfile. From 1a58aac4396347d308f3826d3c85a81e1bf75a6d Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Mon, 21 Oct 2024 00:19:07 -0700 Subject: [PATCH 13/14] add documentation for copy_files --- Lib/zipfile/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 441b35d775e2a6..ee8515e87db436 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -2045,6 +2045,17 @@ def _copy_file(self, source_zipfile, source_zinfo): self._write_precompressed(source_zinfo, source_file_contents) def copy_files(self, source_zipfile, files=None): + """ + copy files from the source_zipfile to the ZIP archive. This method + copies the files as is, avoiding the overhead of decompressing and + compressing the data again. + + source_zipfile is a string of the path of the ZIP archive to copy from. + + files is an iterable of strings, where each string is a file name found + within the source_zipfile. If files=None, all files from source_zipfile + are copied. + """ if self.mode not in ('w', 'x', 'a'): raise ValueError( "copy_files() requires mode 'w', 'x', or 'a', but " From 86a0b825f8013a6b34ae3f2baf51f6117b8aa9cb Mon Sep 17 00:00:00 2001 From: sunrisesarsaparilla <179303609+sunrisesarsaparilla@users.noreply.github.com> Date: Mon, 21 Oct 2024 04:13:14 -0700 Subject: [PATCH 14/14] remove unused method --- Lib/zipfile/__init__.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index ee8515e87db436..3a9ef31e436a71 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -2033,12 +2033,6 @@ def mkdir(self, zinfo_or_directory_name, mode=511): self.fp.write(zinfo.FileHeader(False)) self.start_dir = self.fp.tell() - def _raise_if_archive_not_in_writing_mode(self, caller_name): - if self.mode not in ('w', 'x', 'a'): - raise ValueError( - f"{caller_name}() requires mode 'w', 'x', or 'a', but " - f"mode is '{self.mode}'") - def _copy_file(self, source_zipfile, source_zinfo): with source_zipfile._open_to_read(source_zinfo, 'r', decompress=False) as source_file_contents: