Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Lib/test/test_zipfile/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1991,6 +1991,25 @@ def test_is_zip_erroneous_file(self):
self.assertFalse(zipfile.is_zipfile(fp))
fp.seek(0, 0)
self.assertFalse(zipfile.is_zipfile(fp))
# - passing non-zipfile with ZIP header elements
# data created using pyPNG like so:
# d = [(ord('P'), ord('K'), 5, 6), (ord('P'), ord('K'), 6, 6)]
# w = png.Writer(1,2,alpha=True,compression=0)
# f = open('onepix.png', 'wb')
# w.write(f, d)
# w.close()
data = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00"
b"\x00\x02\x08\x06\x00\x00\x00\x99\x81\xb6'\x00\x00\x00\x15I"
b"DATx\x01\x01\n\x00\xf5\xff\x00PK\x05\x06\x00PK\x06\x06\x07"
b"\xac\x01N\xc6|a\r\x00\x00\x00\x00IEND\xaeB`\x82")
# - passing a filename
with open(TESTFN, "wb") as fp:
fp.write(data)
self.assertFalse(zipfile.is_zipfile(TESTFN))
# - passing a file-like object
fp = io.BytesIO()
fp.write(data)
self.assertFalse(zipfile.is_zipfile(fp))

def test_damaged_zipfile(self):
"""Check that zipfiles with missing bytes at the end raise BadZipFile."""
Expand Down
48 changes: 34 additions & 14 deletions Lib/zipfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,19 @@ def strip(cls, data, xids):

def _check_zipfile(fp):
try:
if _EndRecData(fp):
return True # file has correct magic number
endrec = _EndRecData(fp)
if endrec:
if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0:
return True # Empty zipfiles are still zipfiles
elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]:
# Central directory is on the same disk
fp.seek(sum(_handle_prepended_data(endrec)))
if endrec[_ECD_SIZE] >= sizeCentralDir:
data = fp.read(sizeCentralDir) # CD is where we expect it to be
if len(data) == sizeCentralDir:
centdir = struct.unpack(structCentralDir, data) # CD is the right size
if centdir[_CD_SIGNATURE] == stringCentralDir:
return True # First central directory entry has correct magic number
except OSError:
pass
return False
Expand All @@ -258,6 +269,22 @@ def is_zipfile(filename):
pass
return result

def _handle_prepended_data(endrec, debug=0):
size_cd = endrec[_ECD_SIZE] # bytes in central directory
offset_cd = endrec[_ECD_OFFSET] # offset of central directory

# "concat" is zero, unless zip was concatenated to another file
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
if endrec[_ECD_SIGNATURE] == stringEndArchive64:
# If Zip64 extension structures are present, account for them
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)

if debug > 2:
inferred = concat + offset_cd
print("given, inferred, offset", offset_cd, inferred, concat)

return offset_cd, concat

def _EndRecData64(fpin, offset, endrec):
"""
Read the ZIP64 end-of-archive records and use that to update endrec
Expand Down Expand Up @@ -1501,28 +1528,21 @@ def _RealGetContents(self):
raise BadZipFile("File is not a zip file")
if self.debug > 1:
print(endrec)
size_cd = endrec[_ECD_SIZE] # bytes in central directory
offset_cd = endrec[_ECD_OFFSET] # offset of central directory
self._comment = endrec[_ECD_COMMENT] # archive comment

# "concat" is zero, unless zip was concatenated to another file
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
if endrec[_ECD_SIGNATURE] == stringEndArchive64:
# If Zip64 extension structures are present, account for them
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
offset_cd, concat = _handle_prepended_data(endrec, self.debug)

# self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat

# store the offset to the beginning of data for the
# .data_offset property
self._data_offset = concat

if self.debug > 2:
inferred = concat + offset_cd
print("given, inferred, offset", offset_cd, inferred, concat)
# self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat
if self.start_dir < 0:
raise BadZipFile("Bad offset for central directory")
fp.seek(self.start_dir, 0)
size_cd = endrec[_ECD_SIZE]
data = fp.read(size_cd)
fp = io.BytesIO(data)
total = 0
Expand Down
13 changes: 13 additions & 0 deletions Misc/NEWS.d/next/Library/2017-12-30-18-21-00.bpo-28494.Dt_Wks.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Improve Zip file validation in :func:`zipfile.is_zipfile`.

Before this change :func:`zipfile.is_zipfile` only checked the End Central Directory
signature. If the signature could be found in the last 64k of the file,
success! This produced false positives on any file with ``'PK\x05\x06'`` in the
last 64k of the file - including PDFs and PNGs.

This is now corrected by actually validating the Central Directory location
and size based on the information provided by the End of Central Directory
along with verifying the Central Directory signature of the first entry.

This should be sufficient for the vast number of Zip files with fewer
false positives.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a very beefy NEWS, could it be condensed. We generally keep them quite brief, details go in issues or docs.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest to move most of the NEWS entry into the commit message, and keep the NEWS entry short.

Loading