Skip to content

Commit 208de43

Browse files
gpsheadlkollar
authored andcommitted
pythongh-61206: Support zstandard compression in the zipimport module (pythonGH-138254)
* pythongh-61206: support zstd in zipimport * NEWS entry * versionchanged doc
1 parent 08f73fa commit 208de43

File tree

4 files changed

+112
-25
lines changed

4 files changed

+112
-25
lines changed

Doc/library/zipimport.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ Any files may be present in the ZIP archive, but importers are only invoked for
3030
corresponding :file:`.pyc` file, meaning that if a ZIP archive
3131
doesn't contain :file:`.pyc` files, importing may be rather slow.
3232

33+
.. versionchanged:: next
34+
Zstandard (*zstd*) compressed zip file entries are supported.
35+
3336
.. versionchanged:: 3.13
3437
ZIP64 is supported
3538

Lib/test/test_zipimport.py

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from test.support import import_helper
1515
from test.support import os_helper
1616

17-
from zipfile import ZipFile, ZipInfo, ZIP_STORED, ZIP_DEFLATED
17+
from zipfile import ZipFile, ZipInfo, ZIP_STORED, ZIP_DEFLATED, ZIP_ZSTANDARD
1818

1919
import zipimport
2020
import linecache
@@ -193,19 +193,38 @@ def testAFakeZlib(self):
193193
# occur in that case (builtin modules are always found first),
194194
# so we'll simply skip it then. Bug #765456.
195195
#
196-
if "zlib" in sys.builtin_module_names:
197-
self.skipTest('zlib is a builtin module')
198-
if "zlib" in sys.modules:
199-
del sys.modules["zlib"]
200-
files = {"zlib.py": test_src}
196+
if self.compression == ZIP_DEFLATED:
197+
mod_name = "zlib"
198+
if zipimport._zlib_decompress: # validate attr name
199+
# reset the cached import to avoid test order dependencies
200+
zipimport._zlib_decompress = None # reset cache
201+
elif self.compression == ZIP_ZSTANDARD:
202+
mod_name = "_zstd"
203+
if zipimport._zstd_decompressor_class: # validate attr name
204+
# reset the cached import to avoid test order dependencies
205+
zipimport._zstd_decompressor_class = None
206+
else:
207+
mod_name = "zlib" # the ZIP_STORED case below
208+
209+
if mod_name in sys.builtin_module_names:
210+
self.skipTest(f"{mod_name} is a builtin module")
211+
if mod_name in sys.modules:
212+
del sys.modules[mod_name]
213+
files = {f"{mod_name}.py": test_src}
201214
try:
202-
self.doTest(".py", files, "zlib")
215+
self.doTest(".py", files, mod_name)
203216
except ImportError:
204-
if self.compression != ZIP_DEFLATED:
205-
self.fail("expected test to not raise ImportError")
206-
else:
207217
if self.compression != ZIP_STORED:
208-
self.fail("expected test to raise ImportError")
218+
# Expected - fake compression module can't decompress
219+
pass
220+
else:
221+
self.fail("expected test to not raise ImportError for uncompressed")
222+
else:
223+
if self.compression == ZIP_STORED:
224+
# Expected - no compression needed, so fake module works
225+
pass
226+
else:
227+
self.fail("expected test to raise ImportError for compressed zip with fake compression module")
209228

210229
def testPy(self):
211230
files = {TESTMOD + ".py": test_src}
@@ -1000,10 +1019,15 @@ def assertDataEntry(name):
10001019

10011020

10021021
@support.requires_zlib()
1003-
class CompressedZipImportTestCase(UncompressedZipImportTestCase):
1022+
class DeflateCompressedZipImportTestCase(UncompressedZipImportTestCase):
10041023
compression = ZIP_DEFLATED
10051024

10061025

1026+
@support.requires_zstd()
1027+
class ZStdCompressedZipImportTestCase(UncompressedZipImportTestCase):
1028+
compression = ZIP_ZSTANDARD
1029+
1030+
10071031
class BadFileZipImportTestCase(unittest.TestCase):
10081032
def assertZipFailure(self, filename):
10091033
self.assertRaises(zipimport.ZipImportError,

Lib/zipimport.py

Lines changed: 72 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -557,11 +557,16 @@ def _read_directory(archive):
557557
)
558558

559559
_importing_zlib = False
560+
_zlib_decompress = None
560561

561562
# Return the zlib.decompress function object, or NULL if zlib couldn't
562563
# be imported. The function is cached when found, so subsequent calls
563564
# don't import zlib again.
564-
def _get_decompress_func():
565+
def _get_zlib_decompress_func():
566+
global _zlib_decompress
567+
if _zlib_decompress:
568+
return _zlib_decompress
569+
565570
global _importing_zlib
566571
if _importing_zlib:
567572
# Someone has a zlib.py[co] in their Zip file
@@ -571,15 +576,62 @@ def _get_decompress_func():
571576

572577
_importing_zlib = True
573578
try:
574-
from zlib import decompress
579+
from zlib import decompress as _zlib_decompress
575580
except Exception:
576581
_bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
577582
raise ZipImportError("can't decompress data; zlib not available")
578583
finally:
579584
_importing_zlib = False
580585

581586
_bootstrap._verbose_message('zipimport: zlib available')
582-
return decompress
587+
return _zlib_decompress
588+
589+
590+
_importing_zstd = False
591+
_zstd_decompressor_class = None
592+
593+
# Return the _zstd.ZstdDecompressor function object, or NULL if _zstd couldn't
594+
# be imported. The result is cached when found.
595+
def _get_zstd_decompressor_class():
596+
global _zstd_decompressor_class
597+
if _zstd_decompressor_class:
598+
return _zstd_decompressor_class
599+
600+
global _importing_zstd
601+
if _importing_zstd:
602+
# Someone has a _zstd.py[co] in their Zip file
603+
# let's avoid a stack overflow.
604+
_bootstrap._verbose_message("zipimport: zstd UNAVAILABLE")
605+
raise ZipImportError("can't decompress data; zstd not available")
606+
607+
_importing_zstd = True
608+
try:
609+
from _zstd import ZstdDecompressor as _zstd_decompressor_class
610+
except Exception:
611+
_bootstrap._verbose_message("zipimport: zstd UNAVAILABLE")
612+
raise ZipImportError("can't decompress data; zstd not available")
613+
finally:
614+
_importing_zstd = False
615+
616+
_bootstrap._verbose_message("zipimport: zstd available")
617+
return _zstd_decompressor_class
618+
619+
620+
def _zstd_decompress(data):
621+
# A simple version of compression.zstd.decompress() as we cannot import
622+
# that here as the stdlib itself could be being zipimported.
623+
results = []
624+
while True:
625+
decomp = _get_zstd_decompressor_class()()
626+
results.append(decomp.decompress(data))
627+
if not decomp.eof:
628+
raise ZipImportError("zipimport: zstd compressed data ended before "
629+
"the end-of-stream marker")
630+
data = decomp.unused_data
631+
if not data:
632+
break
633+
return b"".join(results)
634+
583635

584636
# Given a path to a Zip file and a toc_entry, return the (uncompressed) data.
585637
def _get_data(archive, toc_entry):
@@ -613,16 +665,23 @@ def _get_data(archive, toc_entry):
613665
if len(raw_data) != data_size:
614666
raise OSError("zipimport: can't read data")
615667

616-
if compress == 0:
617-
# data is not compressed
618-
return raw_data
619-
620-
# Decompress with zlib
621-
try:
622-
decompress = _get_decompress_func()
623-
except Exception:
624-
raise ZipImportError("can't decompress data; zlib not available")
625-
return decompress(raw_data, -15)
668+
match compress:
669+
case 0: # stored
670+
return raw_data
671+
case 8: # deflate aka zlib
672+
try:
673+
decompress = _get_zlib_decompress_func()
674+
except Exception:
675+
raise ZipImportError("can't decompress data; zlib not available")
676+
return decompress(raw_data, -15)
677+
case 93: # zstd
678+
try:
679+
return _zstd_decompress(raw_data)
680+
except Exception:
681+
raise ZipImportError("could not decompress zstd data")
682+
# bz2 and lzma could be added, but are largely obsolete.
683+
case _:
684+
raise ZipImportError(f"zipimport: unsupported compression {compress}")
626685

627686

628687
# Lenient date/time comparison function. The precision of the mtime
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:mod:`zipimport` now supports zstandard compressed zip file entries.

0 commit comments

Comments
 (0)