Skip to content

Commit 8392b2f

Browse files
[3.12] gh-139700: Check consistency of the zip64 end of central directory record (GH-139702) (GH-139708) (GH-139712)
(cherry picked from commit 333d4a6) Support records with "zip64 extensible data" if there are no bytes prepended to the ZIP file. (cherry picked from commit 162997b) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent dea7e3d commit 8392b2f

File tree

3 files changed

+113
-23
lines changed

3 files changed

+113
-23
lines changed

Lib/test/test_zipfile/test_core.py

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,8 @@ def make_zip64_file(
885885
self, file_size_64_set=False, file_size_extra=False,
886886
compress_size_64_set=False, compress_size_extra=False,
887887
header_offset_64_set=False, header_offset_extra=False,
888+
extensible_data=b'',
889+
end_of_central_dir_size=None, offset_to_end_of_central_dir=None,
888890
):
889891
"""Generate bytes sequence for a zip with (incomplete) zip64 data.
890892
@@ -938,6 +940,12 @@ def make_zip64_file(
938940

939941
central_dir_size = struct.pack('<Q', 58 + 8 * len(central_zip64_fields))
940942
offset_to_central_dir = struct.pack('<Q', 50 + 8 * len(local_zip64_fields))
943+
if end_of_central_dir_size is None:
944+
end_of_central_dir_size = 44 + len(extensible_data)
945+
if offset_to_end_of_central_dir is None:
946+
offset_to_end_of_central_dir = (108
947+
+ 8 * len(local_zip64_fields)
948+
+ 8 * len(central_zip64_fields))
941949

942950
local_extra_length = struct.pack("<H", 4 + 8 * len(local_zip64_fields))
943951
central_extra_length = struct.pack("<H", 4 + 8 * len(central_zip64_fields))
@@ -966,14 +974,17 @@ def make_zip64_file(
966974
+ filename
967975
+ central_extra
968976
# Zip64 end of central directory
969-
+ b"PK\x06\x06,\x00\x00\x00\x00\x00\x00\x00-\x00-"
970-
+ b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00"
977+
+ b"PK\x06\x06"
978+
+ struct.pack('<Q', end_of_central_dir_size)
979+
+ b"-\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00"
971980
+ b"\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00"
972981
+ central_dir_size
973982
+ offset_to_central_dir
983+
+ extensible_data
974984
# Zip64 end of central directory locator
975-
+ b"PK\x06\x07\x00\x00\x00\x00l\x00\x00\x00\x00\x00\x00\x00\x01"
976-
+ b"\x00\x00\x00"
985+
+ b"PK\x06\x07\x00\x00\x00\x00"
986+
+ struct.pack('<Q', offset_to_end_of_central_dir)
987+
+ b"\x01\x00\x00\x00"
977988
# end of central directory
978989
+ b"PK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00:\x00\x00\x002\x00"
979990
+ b"\x00\x00\x00\x00"
@@ -1004,6 +1015,7 @@ def test_bad_zip64_extra(self):
10041015
with self.assertRaises(zipfile.BadZipFile) as e:
10051016
zipfile.ZipFile(io.BytesIO(missing_file_size_extra))
10061017
self.assertIn('file size', str(e.exception).lower())
1018+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_file_size_extra)))
10071019

10081020
# zip64 file size present, zip64 compress size present, one field in
10091021
# extra, expecting two, equals missing compress size.
@@ -1015,6 +1027,7 @@ def test_bad_zip64_extra(self):
10151027
with self.assertRaises(zipfile.BadZipFile) as e:
10161028
zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
10171029
self.assertIn('compress size', str(e.exception).lower())
1030+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_compress_size_extra)))
10181031

10191032
# zip64 compress size present, no fields in extra, expecting one,
10201033
# equals missing compress size.
@@ -1024,6 +1037,7 @@ def test_bad_zip64_extra(self):
10241037
with self.assertRaises(zipfile.BadZipFile) as e:
10251038
zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
10261039
self.assertIn('compress size', str(e.exception).lower())
1040+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_compress_size_extra)))
10271041

10281042
# zip64 file size present, zip64 compress size present, zip64 header
10291043
# offset present, two fields in extra, expecting three, equals missing
@@ -1038,6 +1052,7 @@ def test_bad_zip64_extra(self):
10381052
with self.assertRaises(zipfile.BadZipFile) as e:
10391053
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
10401054
self.assertIn('header offset', str(e.exception).lower())
1055+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
10411056

10421057
# zip64 compress size present, zip64 header offset present, one field
10431058
# in extra, expecting two, equals missing header offset
@@ -1050,6 +1065,7 @@ def test_bad_zip64_extra(self):
10501065
with self.assertRaises(zipfile.BadZipFile) as e:
10511066
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
10521067
self.assertIn('header offset', str(e.exception).lower())
1068+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
10531069

10541070
# zip64 file size present, zip64 header offset present, one field in
10551071
# extra, expecting two, equals missing header offset
@@ -1062,6 +1078,7 @@ def test_bad_zip64_extra(self):
10621078
with self.assertRaises(zipfile.BadZipFile) as e:
10631079
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
10641080
self.assertIn('header offset', str(e.exception).lower())
1081+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
10651082

10661083
# zip64 header offset present, no fields in extra, expecting one,
10671084
# equals missing header offset
@@ -1073,6 +1090,63 @@ def test_bad_zip64_extra(self):
10731090
with self.assertRaises(zipfile.BadZipFile) as e:
10741091
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
10751092
self.assertIn('header offset', str(e.exception).lower())
1093+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
1094+
1095+
def test_bad_zip64_end_of_central_dir(self):
1096+
zipdata = self.make_zip64_file(end_of_central_dir_size=0)
1097+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'):
1098+
zipfile.ZipFile(io.BytesIO(zipdata))
1099+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
1100+
1101+
zipdata = self.make_zip64_file(end_of_central_dir_size=100)
1102+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'):
1103+
zipfile.ZipFile(io.BytesIO(zipdata))
1104+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
1105+
1106+
zipdata = self.make_zip64_file(offset_to_end_of_central_dir=0)
1107+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'):
1108+
zipfile.ZipFile(io.BytesIO(zipdata))
1109+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
1110+
1111+
zipdata = self.make_zip64_file(offset_to_end_of_central_dir=1000)
1112+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*locator'):
1113+
zipfile.ZipFile(io.BytesIO(zipdata))
1114+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
1115+
1116+
def test_zip64_end_of_central_dir_record_not_found(self):
1117+
zipdata = self.make_zip64_file()
1118+
zipdata = zipdata.replace(b"PK\x06\x06", b'\x00'*4)
1119+
with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'):
1120+
zipfile.ZipFile(io.BytesIO(zipdata))
1121+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
1122+
1123+
zipdata = self.make_zip64_file(
1124+
extensible_data=b'\xca\xfe\x04\x00\x00\x00data')
1125+
zipdata = zipdata.replace(b"PK\x06\x06", b'\x00'*4)
1126+
with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'):
1127+
zipfile.ZipFile(io.BytesIO(zipdata))
1128+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
1129+
1130+
def test_zip64_extensible_data(self):
1131+
# These values are what is set in the make_zip64_file method.
1132+
expected_file_size = 8
1133+
expected_compress_size = 8
1134+
expected_header_offset = 0
1135+
expected_content = b"test1234"
1136+
1137+
zipdata = self.make_zip64_file(
1138+
extensible_data=b'\xca\xfe\x04\x00\x00\x00data')
1139+
with zipfile.ZipFile(io.BytesIO(zipdata)) as zf:
1140+
zinfo = zf.infolist()[0]
1141+
self.assertEqual(zinfo.file_size, expected_file_size)
1142+
self.assertEqual(zinfo.compress_size, expected_compress_size)
1143+
self.assertEqual(zinfo.header_offset, expected_header_offset)
1144+
self.assertEqual(zf.read(zinfo), expected_content)
1145+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(zipdata)))
1146+
1147+
with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'):
1148+
zipfile.ZipFile(io.BytesIO(b'prepended' + zipdata))
1149+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(b'prepended' + zipdata)))
10761150

10771151
def test_generated_valid_zip64_extra(self):
10781152
# These values are what is set in the make_zip64_file method.

Lib/zipfile/__init__.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -231,41 +231,57 @@ def is_zipfile(filename):
231231
else:
232232
with open(filename, "rb") as fp:
233233
result = _check_zipfile(fp)
234-
except OSError:
234+
except (OSError, BadZipFile):
235235
pass
236236
return result
237237

238238
def _EndRecData64(fpin, offset, endrec):
239239
"""
240240
Read the ZIP64 end-of-archive records and use that to update endrec
241241
"""
242-
try:
243-
fpin.seek(offset - sizeEndCentDir64Locator, 2)
244-
except OSError:
245-
# If the seek fails, the file is not large enough to contain a ZIP64
242+
offset -= sizeEndCentDir64Locator
243+
if offset < 0:
244+
# The file is not large enough to contain a ZIP64
246245
# end-of-archive record, so just return the end record we were given.
247246
return endrec
248-
247+
fpin.seek(offset)
249248
data = fpin.read(sizeEndCentDir64Locator)
250249
if len(data) != sizeEndCentDir64Locator:
251-
return endrec
250+
raise OSError("Unknown I/O error")
252251
sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
253252
if sig != stringEndArchive64Locator:
254253
return endrec
255254

256255
if diskno != 0 or disks > 1:
257256
raise BadZipFile("zipfiles that span multiple disks are not supported")
258257

259-
# Assume no 'zip64 extensible data'
260-
fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
258+
offset -= sizeEndCentDir64
259+
if reloff > offset:
260+
raise BadZipFile("Corrupt zip64 end of central directory locator")
261+
# First, check the assumption that there is no prepended data.
262+
fpin.seek(reloff)
263+
extrasz = offset - reloff
261264
data = fpin.read(sizeEndCentDir64)
262265
if len(data) != sizeEndCentDir64:
263-
return endrec
266+
raise OSError("Unknown I/O error")
267+
if not data.startswith(stringEndArchive64) and reloff != offset:
268+
# Since we already have seen the Zip64 EOCD Locator, it's
269+
# possible we got here because there is prepended data.
270+
# Assume no 'zip64 extensible data'
271+
fpin.seek(offset)
272+
extrasz = 0
273+
data = fpin.read(sizeEndCentDir64)
274+
if len(data) != sizeEndCentDir64:
275+
raise OSError("Unknown I/O error")
276+
if not data.startswith(stringEndArchive64):
277+
raise BadZipFile("Zip64 end of central directory record not found")
278+
264279
sig, sz, create_version, read_version, disk_num, disk_dir, \
265280
dircount, dircount2, dirsize, diroffset = \
266281
struct.unpack(structEndArchive64, data)
267-
if sig != stringEndArchive64:
268-
return endrec
282+
if (diroffset + dirsize != reloff or
283+
sz + 12 != sizeEndCentDir64 + extrasz):
284+
raise BadZipFile("Corrupt zip64 end of central directory record")
269285

270286
# Update the original endrec using data from the ZIP64 record
271287
endrec[_ECD_SIGNATURE] = sig
@@ -275,6 +291,7 @@ def _EndRecData64(fpin, offset, endrec):
275291
endrec[_ECD_ENTRIES_TOTAL] = dircount2
276292
endrec[_ECD_SIZE] = dirsize
277293
endrec[_ECD_OFFSET] = diroffset
294+
endrec[_ECD_LOCATION] = offset - extrasz
278295
return endrec
279296

280297

@@ -308,7 +325,7 @@ def _EndRecData(fpin):
308325
endrec.append(filesize - sizeEndCentDir)
309326

310327
# Try to read the "Zip64 end of central directory" structure
311-
return _EndRecData64(fpin, -sizeEndCentDir, endrec)
328+
return _EndRecData64(fpin, filesize - sizeEndCentDir, endrec)
312329

313330
# Either this is not a ZIP file, or it is a ZIP file with an archive
314331
# comment. Search the end of the file for the "end of central directory"
@@ -332,8 +349,7 @@ def _EndRecData(fpin):
332349
endrec.append(maxCommentStart + start)
333350

334351
# Try to read the "Zip64 end of central directory" structure
335-
return _EndRecData64(fpin, maxCommentStart + start - filesize,
336-
endrec)
352+
return _EndRecData64(fpin, maxCommentStart + start, endrec)
337353

338354
# Unable to find a valid end of central directory structure
339355
return None
@@ -1427,9 +1443,6 @@ def _RealGetContents(self):
14271443

14281444
# "concat" is zero, unless zip was concatenated to another file
14291445
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1430-
if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1431-
# If Zip64 extension structures are present, account for them
1432-
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
14331446

14341447
if self.debug > 2:
14351448
inferred = concat + offset_cd
@@ -2047,7 +2060,7 @@ def _write_end_record(self):
20472060
" would require ZIP64 extensions")
20482061
zip64endrec = struct.pack(
20492062
structEndArchive64, stringEndArchive64,
2050-
44, 45, 45, 0, 0, centDirCount, centDirCount,
2063+
sizeEndCentDir64 - 12, 45, 45, 0, 0, centDirCount, centDirCount,
20512064
centDirSize, centDirOffset)
20522065
self.fp.write(zip64endrec)
20532066

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Check consistency of the zip64 end of central directory record. Support
2+
records with "zip64 extensible data" if there are no bytes prepended to the
3+
ZIP file.

0 commit comments

Comments
 (0)