Skip to content
Open
9 changes: 9 additions & 0 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,15 @@ unittest
(Contributed by Garry Cairns in :gh:`134567`.)


zipfile
-------

* :class:`zipfile.ZipFile` now has the ``zipinfo_class``
and ``zipextfile_class`` init arguments make it easier to subclass and
extend it.
(Contributed by Adi Roiban in :gh:`81719`.)


zlib
----

Expand Down
102 changes: 102 additions & 0 deletions Lib/test/test_zipfile/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import io
import itertools
import os
import pathlib
import posixpath
import stat
import struct
Expand Down Expand Up @@ -485,6 +486,9 @@ def tearDown(self):

class StoredTestsWithSourceFile(AbstractTestsWithSourceFile,
unittest.TestCase):
"""
Test in which the files inside the archive are not compressed.
"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This docstring isn't needed.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I have removed it. No problem.

I had to read the other tests to decide where I should add the new tests and I thought that this comment can help.

compression = zipfile.ZIP_STORED
test_low_compression = None

Expand Down Expand Up @@ -676,6 +680,104 @@ def test_add_file_after_2107(self):
self.assertEqual(zinfo.date_time, (2107, 12, 31, 23, 59, 59))


class CustomZipInfo(zipfile.ZipInfo):
"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here; please just avoid adding docstrings. This is all private code and keeping them up to date tends to be extra maintenance that we don't want.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is to document why this class was use and decide if you can reuse it in another test.

I have removed the docstring.

Support for testing extending and subclassing ZipFile.
"""

class CustomZipExtFile(zipfile.ZipExtFile):
"""
Support for testing extending and subclassing ZipFile.
"""

def test_read_custom_zipinfo_and_zipextfile(self):
"""
A subclass of ZipFile can be implemented to read and handle the
archive content using custom ZipInfo and ZipExtFile implementations.
"""
# Create the file using the default Zipfile.
source = io.BytesIO()
with zipfile.ZipFile(source, 'w', zipfile.ZIP_STORED) as zipfp:
zipfp.writestr('test.txt', 'some-text-content')

with zipfile.ZipFile(
source, 'r',
zipinfo_class=self.CustomZipInfo,
zipextfile_class=self.CustomZipExtFile,
) as zipfp:
# Archive content returns the custom ZipInfo
members = zipfp.infolist()
self.assertEqual(1, len(members))
self.assertIsInstance(members[0], self.CustomZipInfo)

# Archive members can be opened using the custom ZipInfo
target_member = members[0]
with zipfp.open(target_member, mode='r') as memberfp:
self.assertIsInstance(memberfp, self.CustomZipExtFile)

def test_write_custom_zipinfo(self):
"""
A subclass of ZipFile can be implemented to write and handle the
archive content using custom ZipInfo implementation.
"""
destination = io.BytesIO()
with zipfile.ZipFile(
destination, 'w', zipinfo_class=self.CustomZipInfo) as zipfp:
# It can write using the specific custom classe.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo:

Suggested change
# It can write using the specific custom classe.
# It can write using the specific custom class.

new_member = self.CustomZipInfo('new-member.txt')
with zipfp.open(new_member, mode='w') as memberfp:
self.assertIs(new_member, memberfp._zinfo)

# When creating a new member using just the name,
# the custom ZipInfo is used internally.
with zipfp.open('other-member.txt', mode='w') as memberfp:
self.assertIsInstance(memberfp._zinfo, self.CustomZipInfo)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please avoid using private members in tests. If we change how they work, it adds additional maintenance.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Make sense.

self.assertIsInstance(
zipfp.NameToInfo['other-member.txt'], self.CustomZipInfo)

# ZipFile.writestr can handle the custom class or just the
# archive name as text.
custom_member = self.CustomZipInfo('some-member.txt')
zipfp.writestr(custom_member, b'some-new-content')
zipfp.writestr('some-name.txt', b'other-content')
self.assertIsInstance(
zipfp.NameToInfo['some-name.txt'], self.CustomZipInfo)

# ZipFile.mkdir can handle the custom class or just text.
custom_dir = self.CustomZipInfo('some-directory/')
custom_dir.CRC = 0
zipfp.mkdir(custom_dir)
zipfp.mkdir('dir-as-text/')
self.assertIsInstance(
zipfp.NameToInfo['dir-as-text/'], self.CustomZipInfo)

# When writing from an external file, the file is created using
# the custom ZipInfo
with temp_dir() as source_dir:
source_file = pathlib.Path(source_dir).joinpath('source.txt')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should be able to just use Path(source_dir) / 'source.txt' here.

with open(source_file, 'wb') as fp:
fp.write(b'some-content')
zipfp.write(source_file, arcname='newly-file.txt')
self.assertIsInstance(
zipfp.NameToInfo['newly-file.txt'], self.CustomZipInfo)

def test_extract_custom_zipinfo(self):
"""
A subclass of ZipFile can be implemented to extact the
archive content using custom ZipInfo implementation.
"""
destination = io.BytesIO()
with zipfile.ZipFile(
destination, 'w', zipinfo_class=self.CustomZipInfo) as zipfp:
zipfp.mkdir('dir-as-text/')
dir_info = zipfp.NameToInfo['dir-as-text/']
self.assertIsInstance(dir_info, self.CustomZipInfo)

with temp_dir() as extract_dir:
zipfp.extract(dir_info, path=extract_dir)
zipfp.extract('dir-as-text/', path=extract_dir)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure this part of the test is necessary. We aren't stressing anything on the custom classes.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to test this change

    def _extract_member(self, member, targetpath, pwd):
        """Extract the ZipInfo object 'member' to a physical
           file on the path targetpath.
        """
-        if not isinstance(member, ZipInfo):
+        if not isinstance(member, self._ZipInfo):
            member = self.getinfo(member)

There is not much public API to check for this method.

I have updated the assertions as an end to end test.



@requires_zlib()
class DeflateTestsWithSourceFile(AbstractTestsWithSourceFile,
unittest.TestCase):
Expand Down
41 changes: 27 additions & 14 deletions Lib/zipfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,10 @@ def _get_decompressor(compress_type):


class _SharedFile:
"""
Protect an already opened member of the archive from being read or written
at the same time.
"""
def __init__(self, file, pos, close, lock, writing):
self._file = file
self._pos = pos
Expand Down Expand Up @@ -1372,7 +1376,7 @@ class ZipFile:
""" Class with methods to open, read, write, close, list zip files.

z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
compresslevel=None)
compresslevel=None, zipinfo_class=ZipInfo, zipextfile_class=ZipExtFile)

file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile.
Expand All @@ -1392,21 +1396,30 @@ class ZipFile:
When using ZIP_ZSTANDARD integers -7 though 22 are common,
see the CompressionParameter enum in compression.zstd for
details.

zipinfo_class: A class that can replace ZipInfo. This is designed to help
extend ZipFile.
For example, to implement other encryption or compression
methods.
zipextfile_class: A class that can replace ZipExtFile. This is designed to
help extend ZipFile.
For example to implement other encryption or compression
methods.
"""

fp = None # Set here since __del__ checks it
_windows_illegal_name_trans_table = None

def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
compresslevel=None, *, strict_timestamps=True, metadata_encoding=None,
zipinfo_class=ZipInfo, zipextfile_class=ZipExtFile):
"""Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
or append 'a'."""
if mode not in ('r', 'w', 'x', 'a'):
raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")

_check_compression(compression)

self._ZipInfo = zipinfo_class
self._ZipExtFile = zipextfile_class
self._allowZip64 = allowZip64
self._didModify = False
self.debug = 0 # Level of printing: 0 through 3
Expand Down Expand Up @@ -1558,7 +1571,7 @@ def _RealGetContents(self):
# Historical ZIP filename encoding
filename = filename.decode(self.metadata_encoding or 'cp437')
# Create ZipInfo instance to store file information
x = ZipInfo(filename)
x = self._ZipInfo(filename)
x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Expand Down Expand Up @@ -1693,11 +1706,11 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
"Attempt to use ZIP archive that was already closed")

# Make sure we have an info object
if isinstance(name, ZipInfo):
if isinstance(name, self._ZipInfo):
# 'name' is already an info object
zinfo = name
elif mode == 'w':
zinfo = ZipInfo(name)
zinfo = self._ZipInfo(name)
zinfo.compress_type = self.compression
zinfo.compress_level = self.compresslevel
else:
Expand Down Expand Up @@ -1774,7 +1787,7 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
else:
pwd = None

return ZipExtFile(zef_file, mode + 'b', zinfo, pwd, True)
return self._ZipExtFile(zef_file, mode + 'b', zinfo, pwd, True)
except:
zef_file.close()
raise
Expand Down Expand Up @@ -1872,7 +1885,7 @@ def _extract_member(self, member, targetpath, pwd):
"""Extract the ZipInfo object 'member' to a physical
file on the path targetpath.
"""
if not isinstance(member, ZipInfo):
if not isinstance(member, self._ZipInfo):
member = self.getinfo(member)

# build the destination pathname, replacing
Expand Down Expand Up @@ -1952,7 +1965,7 @@ def write(self, filename, arcname=None,
"Can't write to ZIP archive while an open writing handle exists"
)

zinfo = ZipInfo.from_file(filename, arcname,
zinfo = self._ZipInfo.from_file(filename, arcname,
strict_timestamps=self._strict_timestamps)

if zinfo.is_dir():
Expand Down Expand Up @@ -1982,10 +1995,10 @@ def writestr(self, zinfo_or_arcname, data,
the name of the file in the archive."""
if isinstance(data, str):
data = data.encode("utf-8")
if isinstance(zinfo_or_arcname, ZipInfo):
if isinstance(zinfo_or_arcname, self._ZipInfo):
zinfo = zinfo_or_arcname
else:
zinfo = ZipInfo(zinfo_or_arcname)._for_archive(self)
zinfo = self._ZipInfo(zinfo_or_arcname)._for_archive(self)

if not self.fp:
raise ValueError(
Expand All @@ -2008,15 +2021,15 @@ def writestr(self, zinfo_or_arcname, data,

def mkdir(self, zinfo_or_directory_name, mode=511):
"""Creates a directory inside the zip archive."""
if isinstance(zinfo_or_directory_name, ZipInfo):
if isinstance(zinfo_or_directory_name, self._ZipInfo):
zinfo = zinfo_or_directory_name
if not zinfo.is_dir():
raise ValueError("The given ZipInfo does not describe a directory")
elif isinstance(zinfo_or_directory_name, str):
directory_name = zinfo_or_directory_name
if not directory_name.endswith("/"):
directory_name += "/"
zinfo = ZipInfo(directory_name)
zinfo = self._ZipInfo(directory_name)
zinfo.compress_size = 0
zinfo.CRC = 0
zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:class:`zipfile.ZipFile` was given the ``zipinfo_class`` and ``zipextfile_class`` to make it easier to subclass and
extend it.
Loading