diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index a1261ec471c92e..1d803b7b1fc046 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -171,7 +171,8 @@ ZipFile Objects .. class:: ZipFile(file, mode='r', compression=ZIP_STORED, allowZip64=True, \ compresslevel=None, *, strict_timestamps=True, \ - metadata_encoding=None) + metadata_encoding=None, \ + zipinfo_class=ZipInfo, zipextfile_class=ZipExtFile) Open a ZIP file, where *file* can be a path to a file (a string), a file-like object or a :term:`path-like object`. @@ -228,6 +229,9 @@ ZipFile Objects :meth:`closed ` without adding any files to the archive, the appropriate ZIP structures for an empty archive will be written to the file. + The *zipinfo_class* and *zipextfile_class* arguments can be used to replace + the default :class:`ZipInfo` and :class:`!ZipExtFile` classes with different ones. + ZipFile is also a context manager and therefore supports the :keyword:`with` statement. In the example, *myzip* is closed after the :keyword:`!with` statement's suite is finished---even if an exception occurs:: @@ -278,6 +282,9 @@ ZipFile Objects Added support for specifying member name encoding for reading metadata in the zipfile's directory and file headers. + .. versionchanged:: next + Added the *zipinfo_class* and *zipextfile_class* parameters. + .. method:: ZipFile.close() diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 9f01b52f1aff3b..eb919e82d05d22 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -372,6 +372,14 @@ unittest (Contributed by Garry Cairns in :gh:`134567`.) +zipfile +------- + +* :class:`zipfile.ZipFile` now accepts the keyword-only arguments *zipinfo_class* + and *zipextfile_class* to make it easier to subclass and extend. + (Contributed by Adi Roiban in :gh:`81719`.) + + zlib ---- diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index c033059a515db6..f409d96c42f5dc 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -15,7 +15,7 @@ import unittest.mock as mock import zipfile - +from pathlib import Path from tempfile import TemporaryFile from random import randint, random, randbytes @@ -675,6 +675,112 @@ def test_add_file_after_2107(self): zinfo = zipfp.getinfo(TESTFN) self.assertEqual(zinfo.date_time, (2107, 12, 31, 23, 59, 59)) + class CustomZipInfo(zipfile.ZipInfo): + pass + + class CustomZipExtFile(zipfile.ZipExtFile): + pass + + def test_read_custom_zipinfo_and_zipextfile(self): + """ + A subclass of ZipFile can be implemented to read and handle the + archive content using custom ZipInfo and ZipExtFile implementations. + """ + # Create the file using the default Zipfile. + source = io.BytesIO() + with zipfile.ZipFile(source, 'w', zipfile.ZIP_STORED) as zipfp: + zipfp.writestr('test.txt', 'some-text-content') + source.seek(0) + + with zipfile.ZipFile( + source, 'r', + zipinfo_class=self.CustomZipInfo, + zipextfile_class=self.CustomZipExtFile, + ) as zipfp: + # Archive content returns the custom ZipInfo + members = zipfp.infolist() + self.assertEqual(1, len(members)) + self.assertIsInstance(members[0], self.CustomZipInfo) + + # Archive members can be opened using the custom ZipInfo + target_member = members[0] + with zipfp.open(target_member, mode='r') as memberfp: + self.assertIsInstance(memberfp, self.CustomZipExtFile) + self.assertEqual(b'some-text-content', memberfp.read()) + + def test_write_custom_zipinfo(self): + """ + A subclass of ZipFile can be implemented to write and handle the + archive content using custom ZipInfo implementation. + """ + destination = io.BytesIO() + with zipfile.ZipFile( + destination, 'w', zipinfo_class=self.CustomZipInfo) as zipfp: + # It can write using the specific custom class. + new_member = self.CustomZipInfo('new-member.txt') + with zipfp.open(new_member, mode='w') as memberfp: + self.assertIs(new_member, memberfp._zinfo) + + # When creating a new member using just the name, + # the custom ZipInfo is used internally. + with zipfp.open('other-member.txt', mode='w') as memberfp: + memberfp.write(b'some-content') + self.assertIsInstance( + zipfp.NameToInfo['other-member.txt'], self.CustomZipInfo) + + # ZipFile.writestr can handle the custom class or just the + # archive name as text. + custom_member = self.CustomZipInfo('some-member.txt') + zipfp.writestr(custom_member, b'some-new-content') + zipfp.writestr('some-name.txt', b'other-content') + self.assertIsInstance( + zipfp.NameToInfo['some-name.txt'], self.CustomZipInfo) + + # ZipFile.mkdir can handle the custom class or just text. + custom_dir = self.CustomZipInfo('some-directory/') + custom_dir.CRC = 0 + zipfp.mkdir(custom_dir) + zipfp.mkdir('dir-as-text/') + self.assertIsInstance( + zipfp.NameToInfo['dir-as-text/'], self.CustomZipInfo) + + # When writing from an external file, the file is created using + # the custom ZipInfo + with temp_dir() as source_dir: + source_file = Path(source_dir) / 'source.txt' + with open(source_file, 'wb') as fp: + fp.write(b'some-content') + zipfp.write(source_file, arcname='newly-file.txt') + self.assertIsInstance( + zipfp.NameToInfo['newly-file.txt'], self.CustomZipInfo) + + def test_extract_custom_zipinfo(self): + """ + A subclass of ZipFile can be implemented to extact the + archive content using custom ZipInfo implementation. + """ + + destination = io.BytesIO() + with zipfile.ZipFile(destination, 'w') as zipfp: + zipfp.mkdir('dir-as-text/') + zipfp.writestr('test.txt', b'new file content') + + destination.seek(0) + with zipfile.ZipFile( + destination, 'r', zipinfo_class=self.CustomZipInfo) as zipfp: + with temp_dir() as extract_dir: + expected_dir = Path(extract_dir) / 'dir-as-text' + expected_file = Path(extract_dir) / 'test.txt' + + # Check extracting using custom ZipInfo + dir_info = zipfp.NameToInfo['dir-as-text/'] + #zipfp.extract(dir_info, path=extract_dir) + #self.assertTrue(expected_dir.is_dir()) + # Check extracting using file name. + zipfp.extract('test.txt', path=extract_dir) + with expected_file.open('rb') as fp: + self.assertEqual(b'new file content', fp.read()) + @requires_zlib() class DeflateTestsWithSourceFile(AbstractTestsWithSourceFile, diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 2969f735e8abb9..e5b04795e61d9f 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -869,6 +869,10 @@ def _get_decompressor(compress_type): class _SharedFile: + """ + Protect an already opened member of the archive from being read or written + at the same time. + """ def __init__(self, file, pos, close, lock, writing): self._file = file self._pos = pos @@ -1372,7 +1376,7 @@ class ZipFile: """ Class with methods to open, read, write, close, list zip files. z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, - compresslevel=None) + compresslevel=None, zipinfo_class=ZipInfo, zipextfile_class=ZipExtFile) file: Either the path to the file, or a file-like object. If it is a path, the file will be opened and closed by ZipFile. @@ -1392,21 +1396,30 @@ class ZipFile: When using ZIP_ZSTANDARD integers -7 though 22 are common, see the CompressionParameter enum in compression.zstd for details. - + zipinfo_class: A class that can replace ZipInfo. This is designed to help + extend ZipFile. + For example, to implement other encryption or compression + methods. + zipextfile_class: A class that can replace ZipExtFile. This is designed to + help extend ZipFile. + For example to implement other encryption or compression + methods. """ fp = None # Set here since __del__ checks it _windows_illegal_name_trans_table = None def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, - compresslevel=None, *, strict_timestamps=True, metadata_encoding=None): + compresslevel=None, *, strict_timestamps=True, metadata_encoding=None, + zipinfo_class=ZipInfo, zipextfile_class=ZipExtFile): """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', or append 'a'.""" if mode not in ('r', 'w', 'x', 'a'): raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") _check_compression(compression) - + self._ZipInfo = zipinfo_class + self._ZipExtFile = zipextfile_class self._allowZip64 = allowZip64 self._didModify = False self.debug = 0 # Level of printing: 0 through 3 @@ -1558,7 +1571,7 @@ def _RealGetContents(self): # Historical ZIP filename encoding filename = filename.decode(self.metadata_encoding or 'cp437') # Create ZipInfo instance to store file information - x = ZipInfo(filename) + x = self._ZipInfo(filename) x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] @@ -1693,11 +1706,11 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): "Attempt to use ZIP archive that was already closed") # Make sure we have an info object - if isinstance(name, ZipInfo): + if isinstance(name, self._ZipInfo): # 'name' is already an info object zinfo = name elif mode == 'w': - zinfo = ZipInfo(name) + zinfo = self._ZipInfo(name) zinfo.compress_type = self.compression zinfo.compress_level = self.compresslevel else: @@ -1774,7 +1787,7 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): else: pwd = None - return ZipExtFile(zef_file, mode + 'b', zinfo, pwd, True) + return self._ZipExtFile(zef_file, mode + 'b', zinfo, pwd, True) except: zef_file.close() raise @@ -1872,7 +1885,7 @@ def _extract_member(self, member, targetpath, pwd): """Extract the ZipInfo object 'member' to a physical file on the path targetpath. """ - if not isinstance(member, ZipInfo): + if not isinstance(member, self._ZipInfo): member = self.getinfo(member) # build the destination pathname, replacing @@ -1952,7 +1965,7 @@ def write(self, filename, arcname=None, "Can't write to ZIP archive while an open writing handle exists" ) - zinfo = ZipInfo.from_file(filename, arcname, + zinfo = self._ZipInfo.from_file(filename, arcname, strict_timestamps=self._strict_timestamps) if zinfo.is_dir(): @@ -1982,10 +1995,10 @@ def writestr(self, zinfo_or_arcname, data, the name of the file in the archive.""" if isinstance(data, str): data = data.encode("utf-8") - if isinstance(zinfo_or_arcname, ZipInfo): + if isinstance(zinfo_or_arcname, self._ZipInfo): zinfo = zinfo_or_arcname else: - zinfo = ZipInfo(zinfo_or_arcname)._for_archive(self) + zinfo = self._ZipInfo(zinfo_or_arcname)._for_archive(self) if not self.fp: raise ValueError( @@ -2008,7 +2021,7 @@ def writestr(self, zinfo_or_arcname, data, def mkdir(self, zinfo_or_directory_name, mode=511): """Creates a directory inside the zip archive.""" - if isinstance(zinfo_or_directory_name, ZipInfo): + if isinstance(zinfo_or_directory_name, self._ZipInfo): zinfo = zinfo_or_directory_name if not zinfo.is_dir(): raise ValueError("The given ZipInfo does not describe a directory") @@ -2016,7 +2029,7 @@ def mkdir(self, zinfo_or_directory_name, mode=511): directory_name = zinfo_or_directory_name if not directory_name.endswith("/"): directory_name += "/" - zinfo = ZipInfo(directory_name) + zinfo = self._ZipInfo(directory_name) zinfo.compress_size = 0 zinfo.CRC = 0 zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16 diff --git a/Misc/NEWS.d/next/Library/2025-07-25-14-27-29.gh-issue-81719.hWp7Mn.rst b/Misc/NEWS.d/next/Library/2025-07-25-14-27-29.gh-issue-81719.hWp7Mn.rst new file mode 100644 index 00000000000000..b79aafba6a06c8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-25-14-27-29.gh-issue-81719.hWp7Mn.rst @@ -0,0 +1 @@ +:class:`zipfile.ZipFile` was given the *zipinfo_class* and *zipextfile_class* to make it easier to subclass and extend it.