Skip to content

Commit 1d29afb

Browse files
[3.11] gh-139700: Check consistency of the zip64 end of central directory record (GH-139702) (GH-139708) (GH-139713)
(cherry picked from commit 333d4a6) Support records with "zip64 extensible data" if there are no bytes prepended to the ZIP file. (cherry picked from commit 162997b) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent 22d5724 commit 1d29afb

File tree

3 files changed

+113
-23
lines changed

3 files changed

+113
-23
lines changed

Lib/test/test_zipfile.py

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,8 @@ def make_zip64_file(
887887
self, file_size_64_set=False, file_size_extra=False,
888888
compress_size_64_set=False, compress_size_extra=False,
889889
header_offset_64_set=False, header_offset_extra=False,
890+
extensible_data=b'',
891+
end_of_central_dir_size=None, offset_to_end_of_central_dir=None,
890892
):
891893
"""Generate bytes sequence for a zip with (incomplete) zip64 data.
892894
@@ -940,6 +942,12 @@ def make_zip64_file(
940942

941943
central_dir_size = struct.pack('<Q', 58 + 8 * len(central_zip64_fields))
942944
offset_to_central_dir = struct.pack('<Q', 50 + 8 * len(local_zip64_fields))
945+
if end_of_central_dir_size is None:
946+
end_of_central_dir_size = 44 + len(extensible_data)
947+
if offset_to_end_of_central_dir is None:
948+
offset_to_end_of_central_dir = (108
949+
+ 8 * len(local_zip64_fields)
950+
+ 8 * len(central_zip64_fields))
943951

944952
local_extra_length = struct.pack("<H", 4 + 8 * len(local_zip64_fields))
945953
central_extra_length = struct.pack("<H", 4 + 8 * len(central_zip64_fields))
@@ -968,14 +976,17 @@ def make_zip64_file(
968976
+ filename
969977
+ central_extra
970978
# Zip64 end of central directory
971-
+ b"PK\x06\x06,\x00\x00\x00\x00\x00\x00\x00-\x00-"
972-
+ b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00"
979+
+ b"PK\x06\x06"
980+
+ struct.pack('<Q', end_of_central_dir_size)
981+
+ b"-\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00"
973982
+ b"\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00"
974983
+ central_dir_size
975984
+ offset_to_central_dir
985+
+ extensible_data
976986
# Zip64 end of central directory locator
977-
+ b"PK\x06\x07\x00\x00\x00\x00l\x00\x00\x00\x00\x00\x00\x00\x01"
978-
+ b"\x00\x00\x00"
987+
+ b"PK\x06\x07\x00\x00\x00\x00"
988+
+ struct.pack('<Q', offset_to_end_of_central_dir)
989+
+ b"\x01\x00\x00\x00"
979990
# end of central directory
980991
+ b"PK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00:\x00\x00\x002\x00"
981992
+ b"\x00\x00\x00\x00"
@@ -1006,6 +1017,7 @@ def test_bad_zip64_extra(self):
10061017
with self.assertRaises(zipfile.BadZipFile) as e:
10071018
zipfile.ZipFile(io.BytesIO(missing_file_size_extra))
10081019
self.assertIn('file size', str(e.exception).lower())
1020+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_file_size_extra)))
10091021

10101022
# zip64 file size present, zip64 compress size present, one field in
10111023
# extra, expecting two, equals missing compress size.
@@ -1017,6 +1029,7 @@ def test_bad_zip64_extra(self):
10171029
with self.assertRaises(zipfile.BadZipFile) as e:
10181030
zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
10191031
self.assertIn('compress size', str(e.exception).lower())
1032+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_compress_size_extra)))
10201033

10211034
# zip64 compress size present, no fields in extra, expecting one,
10221035
# equals missing compress size.
@@ -1026,6 +1039,7 @@ def test_bad_zip64_extra(self):
10261039
with self.assertRaises(zipfile.BadZipFile) as e:
10271040
zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
10281041
self.assertIn('compress size', str(e.exception).lower())
1042+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_compress_size_extra)))
10291043

10301044
# zip64 file size present, zip64 compress size present, zip64 header
10311045
# offset present, two fields in extra, expecting three, equals missing
@@ -1040,6 +1054,7 @@ def test_bad_zip64_extra(self):
10401054
with self.assertRaises(zipfile.BadZipFile) as e:
10411055
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
10421056
self.assertIn('header offset', str(e.exception).lower())
1057+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
10431058

10441059
# zip64 compress size present, zip64 header offset present, one field
10451060
# in extra, expecting two, equals missing header offset
@@ -1052,6 +1067,7 @@ def test_bad_zip64_extra(self):
10521067
with self.assertRaises(zipfile.BadZipFile) as e:
10531068
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
10541069
self.assertIn('header offset', str(e.exception).lower())
1070+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
10551071

10561072
# zip64 file size present, zip64 header offset present, one field in
10571073
# extra, expecting two, equals missing header offset
@@ -1064,6 +1080,7 @@ def test_bad_zip64_extra(self):
10641080
with self.assertRaises(zipfile.BadZipFile) as e:
10651081
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
10661082
self.assertIn('header offset', str(e.exception).lower())
1083+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
10671084

10681085
# zip64 header offset present, no fields in extra, expecting one,
10691086
# equals missing header offset
@@ -1075,6 +1092,63 @@ def test_bad_zip64_extra(self):
10751092
with self.assertRaises(zipfile.BadZipFile) as e:
10761093
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
10771094
self.assertIn('header offset', str(e.exception).lower())
1095+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
1096+
1097+
def test_bad_zip64_end_of_central_dir(self):
1098+
zipdata = self.make_zip64_file(end_of_central_dir_size=0)
1099+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'):
1100+
zipfile.ZipFile(io.BytesIO(zipdata))
1101+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
1102+
1103+
zipdata = self.make_zip64_file(end_of_central_dir_size=100)
1104+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'):
1105+
zipfile.ZipFile(io.BytesIO(zipdata))
1106+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
1107+
1108+
zipdata = self.make_zip64_file(offset_to_end_of_central_dir=0)
1109+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'):
1110+
zipfile.ZipFile(io.BytesIO(zipdata))
1111+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
1112+
1113+
zipdata = self.make_zip64_file(offset_to_end_of_central_dir=1000)
1114+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*locator'):
1115+
zipfile.ZipFile(io.BytesIO(zipdata))
1116+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
1117+
1118+
def test_zip64_end_of_central_dir_record_not_found(self):
1119+
zipdata = self.make_zip64_file()
1120+
zipdata = zipdata.replace(b"PK\x06\x06", b'\x00'*4)
1121+
with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'):
1122+
zipfile.ZipFile(io.BytesIO(zipdata))
1123+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
1124+
1125+
zipdata = self.make_zip64_file(
1126+
extensible_data=b'\xca\xfe\x04\x00\x00\x00data')
1127+
zipdata = zipdata.replace(b"PK\x06\x06", b'\x00'*4)
1128+
with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'):
1129+
zipfile.ZipFile(io.BytesIO(zipdata))
1130+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
1131+
1132+
def test_zip64_extensible_data(self):
1133+
# These values are what is set in the make_zip64_file method.
1134+
expected_file_size = 8
1135+
expected_compress_size = 8
1136+
expected_header_offset = 0
1137+
expected_content = b"test1234"
1138+
1139+
zipdata = self.make_zip64_file(
1140+
extensible_data=b'\xca\xfe\x04\x00\x00\x00data')
1141+
with zipfile.ZipFile(io.BytesIO(zipdata)) as zf:
1142+
zinfo = zf.infolist()[0]
1143+
self.assertEqual(zinfo.file_size, expected_file_size)
1144+
self.assertEqual(zinfo.compress_size, expected_compress_size)
1145+
self.assertEqual(zinfo.header_offset, expected_header_offset)
1146+
self.assertEqual(zf.read(zinfo), expected_content)
1147+
self.assertTrue(zipfile.is_zipfile(io.BytesIO(zipdata)))
1148+
1149+
with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'):
1150+
zipfile.ZipFile(io.BytesIO(b'prepended' + zipdata))
1151+
self.assertFalse(zipfile.is_zipfile(io.BytesIO(b'prepended' + zipdata)))
10781152

10791153
def test_generated_valid_zip64_extra(self):
10801154
# These values are what is set in the make_zip64_file method.

Lib/zipfile.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -236,41 +236,57 @@ def is_zipfile(filename):
236236
else:
237237
with open(filename, "rb") as fp:
238238
result = _check_zipfile(fp)
239-
except OSError:
239+
except (OSError, BadZipFile):
240240
pass
241241
return result
242242

243243
def _EndRecData64(fpin, offset, endrec):
244244
"""
245245
Read the ZIP64 end-of-archive records and use that to update endrec
246246
"""
247-
try:
248-
fpin.seek(offset - sizeEndCentDir64Locator, 2)
249-
except OSError:
250-
# If the seek fails, the file is not large enough to contain a ZIP64
247+
offset -= sizeEndCentDir64Locator
248+
if offset < 0:
249+
# The file is not large enough to contain a ZIP64
251250
# end-of-archive record, so just return the end record we were given.
252251
return endrec
253-
252+
fpin.seek(offset)
254253
data = fpin.read(sizeEndCentDir64Locator)
255254
if len(data) != sizeEndCentDir64Locator:
256-
return endrec
255+
raise OSError("Unknown I/O error")
257256
sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
258257
if sig != stringEndArchive64Locator:
259258
return endrec
260259

261260
if diskno != 0 or disks > 1:
262261
raise BadZipFile("zipfiles that span multiple disks are not supported")
263262

264-
# Assume no 'zip64 extensible data'
265-
fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
263+
offset -= sizeEndCentDir64
264+
if reloff > offset:
265+
raise BadZipFile("Corrupt zip64 end of central directory locator")
266+
# First, check the assumption that there is no prepended data.
267+
fpin.seek(reloff)
268+
extrasz = offset - reloff
266269
data = fpin.read(sizeEndCentDir64)
267270
if len(data) != sizeEndCentDir64:
268-
return endrec
271+
raise OSError("Unknown I/O error")
272+
if not data.startswith(stringEndArchive64) and reloff != offset:
273+
# Since we already have seen the Zip64 EOCD Locator, it's
274+
# possible we got here because there is prepended data.
275+
# Assume no 'zip64 extensible data'
276+
fpin.seek(offset)
277+
extrasz = 0
278+
data = fpin.read(sizeEndCentDir64)
279+
if len(data) != sizeEndCentDir64:
280+
raise OSError("Unknown I/O error")
281+
if not data.startswith(stringEndArchive64):
282+
raise BadZipFile("Zip64 end of central directory record not found")
283+
269284
sig, sz, create_version, read_version, disk_num, disk_dir, \
270285
dircount, dircount2, dirsize, diroffset = \
271286
struct.unpack(structEndArchive64, data)
272-
if sig != stringEndArchive64:
273-
return endrec
287+
if (diroffset + dirsize != reloff or
288+
sz + 12 != sizeEndCentDir64 + extrasz):
289+
raise BadZipFile("Corrupt zip64 end of central directory record")
274290

275291
# Update the original endrec using data from the ZIP64 record
276292
endrec[_ECD_SIGNATURE] = sig
@@ -280,6 +296,7 @@ def _EndRecData64(fpin, offset, endrec):
280296
endrec[_ECD_ENTRIES_TOTAL] = dircount2
281297
endrec[_ECD_SIZE] = dirsize
282298
endrec[_ECD_OFFSET] = diroffset
299+
endrec[_ECD_LOCATION] = offset - extrasz
283300
return endrec
284301

285302

@@ -313,7 +330,7 @@ def _EndRecData(fpin):
313330
endrec.append(filesize - sizeEndCentDir)
314331

315332
# Try to read the "Zip64 end of central directory" structure
316-
return _EndRecData64(fpin, -sizeEndCentDir, endrec)
333+
return _EndRecData64(fpin, filesize - sizeEndCentDir, endrec)
317334

318335
# Either this is not a ZIP file, or it is a ZIP file with an archive
319336
# comment. Search the end of the file for the "end of central directory"
@@ -337,8 +354,7 @@ def _EndRecData(fpin):
337354
endrec.append(maxCommentStart + start)
338355

339356
# Try to read the "Zip64 end of central directory" structure
340-
return _EndRecData64(fpin, maxCommentStart + start - filesize,
341-
endrec)
357+
return _EndRecData64(fpin, maxCommentStart + start, endrec)
342358

343359
# Unable to find a valid end of central directory structure
344360
return None
@@ -1386,9 +1402,6 @@ def _RealGetContents(self):
13861402

13871403
# "concat" is zero, unless zip was concatenated to another file
13881404
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1389-
if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1390-
# If Zip64 extension structures are present, account for them
1391-
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
13921405

13931406
if self.debug > 2:
13941407
inferred = concat + offset_cd
@@ -1989,7 +2002,7 @@ def _write_end_record(self):
19892002
" would require ZIP64 extensions")
19902003
zip64endrec = struct.pack(
19912004
structEndArchive64, stringEndArchive64,
1992-
44, 45, 45, 0, 0, centDirCount, centDirCount,
2005+
sizeEndCentDir64 - 12, 45, 45, 0, 0, centDirCount, centDirCount,
19932006
centDirSize, centDirOffset)
19942007
self.fp.write(zip64endrec)
19952008

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Check consistency of the zip64 end of central directory record. Support
2+
records with "zip64 extensible data" if there are no bytes prepended to the
3+
ZIP file.

0 commit comments

Comments
 (0)