|
| 1 | +From 3a5f8e17b419124092a9e3524c3b0d49d9b7bcbb Mon Sep 17 00:00:00 2001 |
| 2 | +From: "Miss Islington (bot)" |
| 3 | + |
| 4 | +Date: Wed, 8 Oct 2025 13:46:28 +0200 |
| 5 | +Subject: [PATCH] gh-139700: Check consistency of the zip64 end of central |
| 6 | + directory record (GH-139702) (GH-139708) (GH-139712) |
| 7 | + |
| 8 | +(cherry picked from commit 333d4a6f4967d3ace91492a39ededbcf3faa76a6) |
| 9 | + |
| 10 | +Support records with "zip64 extensible data" if there are no bytes |
| 11 | +prepended to the ZIP file. |
| 12 | +(cherry picked from commit 162997bb70e067668c039700141770687bc8f267) |
| 13 | + |
| 14 | +Co-authored-by: Serhiy Storchaka < [email protected]> |
| 15 | +Signed-off-by: Azure Linux Security Servicing Account < [email protected]> |
| 16 | +Upstream-reference: https://github.com/python/cpython/commit/8392b2f0d35678407d9ce7d95655a5b77de161b4.patch |
| 17 | +--- |
| 18 | + Lib/test/test_zipfile/test_core.py | 82 ++++++++++++++++++- |
| 19 | + Lib/zipfile/__init__.py | 51 +++++++----- |
| 20 | + ...-10-07-19-31-34.gh-issue-139700.vNHU1O.rst | 3 + |
| 21 | + 3 files changed, 113 insertions(+), 23 deletions(-) |
| 22 | + create mode 100644 Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst |
| 23 | + |
| 24 | +diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py |
| 25 | +index 03520e5..ba6a37e 100644 |
| 26 | +--- a/Lib/test/test_zipfile/test_core.py |
| 27 | ++++ b/Lib/test/test_zipfile/test_core.py |
| 28 | +@@ -885,6 +885,8 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles, |
| 29 | + self, file_size_64_set=False, file_size_extra=False, |
| 30 | + compress_size_64_set=False, compress_size_extra=False, |
| 31 | + header_offset_64_set=False, header_offset_extra=False, |
| 32 | ++ extensible_data=b'', |
| 33 | ++ end_of_central_dir_size=None, offset_to_end_of_central_dir=None, |
| 34 | + ): |
| 35 | + """Generate bytes sequence for a zip with (incomplete) zip64 data. |
| 36 | + |
| 37 | +@@ -938,6 +940,12 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles, |
| 38 | + |
| 39 | + central_dir_size = struct.pack('<Q', 58 + 8 * len(central_zip64_fields)) |
| 40 | + offset_to_central_dir = struct.pack('<Q', 50 + 8 * len(local_zip64_fields)) |
| 41 | ++ if end_of_central_dir_size is None: |
| 42 | ++ end_of_central_dir_size = 44 + len(extensible_data) |
| 43 | ++ if offset_to_end_of_central_dir is None: |
| 44 | ++ offset_to_end_of_central_dir = (108 |
| 45 | ++ + 8 * len(local_zip64_fields) |
| 46 | ++ + 8 * len(central_zip64_fields)) |
| 47 | + |
| 48 | + local_extra_length = struct.pack("<H", 4 + 8 * len(local_zip64_fields)) |
| 49 | + central_extra_length = struct.pack("<H", 4 + 8 * len(central_zip64_fields)) |
| 50 | +@@ -966,14 +974,17 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles, |
| 51 | + + filename |
| 52 | + + central_extra |
| 53 | + # Zip64 end of central directory |
| 54 | +- + b"PK\x06\x06,\x00\x00\x00\x00\x00\x00\x00-\x00-" |
| 55 | +- + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00" |
| 56 | ++ + b"PK\x06\x06" |
| 57 | ++ + struct.pack('<Q', end_of_central_dir_size) |
| 58 | ++ + b"-\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00" |
| 59 | + + b"\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00" |
| 60 | + + central_dir_size |
| 61 | + + offset_to_central_dir |
| 62 | ++ + extensible_data |
| 63 | + # Zip64 end of central directory locator |
| 64 | +- + b"PK\x06\x07\x00\x00\x00\x00l\x00\x00\x00\x00\x00\x00\x00\x01" |
| 65 | +- + b"\x00\x00\x00" |
| 66 | ++ + b"PK\x06\x07\x00\x00\x00\x00" |
| 67 | ++ + struct.pack('<Q', offset_to_end_of_central_dir) |
| 68 | ++ + b"\x01\x00\x00\x00" |
| 69 | + # end of central directory |
| 70 | + + b"PK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00:\x00\x00\x002\x00" |
| 71 | + + b"\x00\x00\x00\x00" |
| 72 | +@@ -1004,6 +1015,7 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles, |
| 73 | + with self.assertRaises(zipfile.BadZipFile) as e: |
| 74 | + zipfile.ZipFile(io.BytesIO(missing_file_size_extra)) |
| 75 | + self.assertIn('file size', str(e.exception).lower()) |
| 76 | ++ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_file_size_extra))) |
| 77 | + |
| 78 | + # zip64 file size present, zip64 compress size present, one field in |
| 79 | + # extra, expecting two, equals missing compress size. |
| 80 | +@@ -1015,6 +1027,7 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles, |
| 81 | + with self.assertRaises(zipfile.BadZipFile) as e: |
| 82 | + zipfile.ZipFile(io.BytesIO(missing_compress_size_extra)) |
| 83 | + self.assertIn('compress size', str(e.exception).lower()) |
| 84 | ++ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_compress_size_extra))) |
| 85 | + |
| 86 | + # zip64 compress size present, no fields in extra, expecting one, |
| 87 | + # equals missing compress size. |
| 88 | +@@ -1024,6 +1037,7 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles, |
| 89 | + with self.assertRaises(zipfile.BadZipFile) as e: |
| 90 | + zipfile.ZipFile(io.BytesIO(missing_compress_size_extra)) |
| 91 | + self.assertIn('compress size', str(e.exception).lower()) |
| 92 | ++ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_compress_size_extra))) |
| 93 | + |
| 94 | + # zip64 file size present, zip64 compress size present, zip64 header |
| 95 | + # offset present, two fields in extra, expecting three, equals missing |
| 96 | +@@ -1038,6 +1052,7 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles, |
| 97 | + with self.assertRaises(zipfile.BadZipFile) as e: |
| 98 | + zipfile.ZipFile(io.BytesIO(missing_header_offset_extra)) |
| 99 | + self.assertIn('header offset', str(e.exception).lower()) |
| 100 | ++ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra))) |
| 101 | + |
| 102 | + # zip64 compress size present, zip64 header offset present, one field |
| 103 | + # in extra, expecting two, equals missing header offset |
| 104 | +@@ -1050,6 +1065,7 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles, |
| 105 | + with self.assertRaises(zipfile.BadZipFile) as e: |
| 106 | + zipfile.ZipFile(io.BytesIO(missing_header_offset_extra)) |
| 107 | + self.assertIn('header offset', str(e.exception).lower()) |
| 108 | ++ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra))) |
| 109 | + |
| 110 | + # zip64 file size present, zip64 header offset present, one field in |
| 111 | + # extra, expecting two, equals missing header offset |
| 112 | +@@ -1062,6 +1078,7 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles, |
| 113 | + with self.assertRaises(zipfile.BadZipFile) as e: |
| 114 | + zipfile.ZipFile(io.BytesIO(missing_header_offset_extra)) |
| 115 | + self.assertIn('header offset', str(e.exception).lower()) |
| 116 | ++ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra))) |
| 117 | + |
| 118 | + # zip64 header offset present, no fields in extra, expecting one, |
| 119 | + # equals missing header offset |
| 120 | +@@ -1073,6 +1090,63 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles, |
| 121 | + with self.assertRaises(zipfile.BadZipFile) as e: |
| 122 | + zipfile.ZipFile(io.BytesIO(missing_header_offset_extra)) |
| 123 | + self.assertIn('header offset', str(e.exception).lower()) |
| 124 | ++ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra))) |
| 125 | ++ |
| 126 | ++ def test_bad_zip64_end_of_central_dir(self): |
| 127 | ++ zipdata = self.make_zip64_file(end_of_central_dir_size=0) |
| 128 | ++ with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'): |
| 129 | ++ zipfile.ZipFile(io.BytesIO(zipdata)) |
| 130 | ++ self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata))) |
| 131 | ++ |
| 132 | ++ zipdata = self.make_zip64_file(end_of_central_dir_size=100) |
| 133 | ++ with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'): |
| 134 | ++ zipfile.ZipFile(io.BytesIO(zipdata)) |
| 135 | ++ self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata))) |
| 136 | ++ |
| 137 | ++ zipdata = self.make_zip64_file(offset_to_end_of_central_dir=0) |
| 138 | ++ with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'): |
| 139 | ++ zipfile.ZipFile(io.BytesIO(zipdata)) |
| 140 | ++ self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata))) |
| 141 | ++ |
| 142 | ++ zipdata = self.make_zip64_file(offset_to_end_of_central_dir=1000) |
| 143 | ++ with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*locator'): |
| 144 | ++ zipfile.ZipFile(io.BytesIO(zipdata)) |
| 145 | ++ self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata))) |
| 146 | ++ |
| 147 | ++ def test_zip64_end_of_central_dir_record_not_found(self): |
| 148 | ++ zipdata = self.make_zip64_file() |
| 149 | ++ zipdata = zipdata.replace(b"PK\x06\x06", b'\x00'*4) |
| 150 | ++ with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'): |
| 151 | ++ zipfile.ZipFile(io.BytesIO(zipdata)) |
| 152 | ++ self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata))) |
| 153 | ++ |
| 154 | ++ zipdata = self.make_zip64_file( |
| 155 | ++ extensible_data=b'\xca\xfe\x04\x00\x00\x00data') |
| 156 | ++ zipdata = zipdata.replace(b"PK\x06\x06", b'\x00'*4) |
| 157 | ++ with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'): |
| 158 | ++ zipfile.ZipFile(io.BytesIO(zipdata)) |
| 159 | ++ self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata))) |
| 160 | ++ |
| 161 | ++ def test_zip64_extensible_data(self): |
| 162 | ++ # These values are what is set in the make_zip64_file method. |
| 163 | ++ expected_file_size = 8 |
| 164 | ++ expected_compress_size = 8 |
| 165 | ++ expected_header_offset = 0 |
| 166 | ++ expected_content = b"test1234" |
| 167 | ++ |
| 168 | ++ zipdata = self.make_zip64_file( |
| 169 | ++ extensible_data=b'\xca\xfe\x04\x00\x00\x00data') |
| 170 | ++ with zipfile.ZipFile(io.BytesIO(zipdata)) as zf: |
| 171 | ++ zinfo = zf.infolist()[0] |
| 172 | ++ self.assertEqual(zinfo.file_size, expected_file_size) |
| 173 | ++ self.assertEqual(zinfo.compress_size, expected_compress_size) |
| 174 | ++ self.assertEqual(zinfo.header_offset, expected_header_offset) |
| 175 | ++ self.assertEqual(zf.read(zinfo), expected_content) |
| 176 | ++ self.assertTrue(zipfile.is_zipfile(io.BytesIO(zipdata))) |
| 177 | ++ |
| 178 | ++ with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'): |
| 179 | ++ zipfile.ZipFile(io.BytesIO(b'prepended' + zipdata)) |
| 180 | ++ self.assertFalse(zipfile.is_zipfile(io.BytesIO(b'prepended' + zipdata))) |
| 181 | + |
| 182 | + def test_generated_valid_zip64_extra(self): |
| 183 | + # These values are what is set in the make_zip64_file method. |
| 184 | +diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py |
| 185 | +index 91b2e03..aa9750e 100644 |
| 186 | +--- a/Lib/zipfile/__init__.py |
| 187 | ++++ b/Lib/zipfile/__init__.py |
| 188 | +@@ -231,7 +231,7 @@ def is_zipfile(filename): |
| 189 | + else: |
| 190 | + with open(filename, "rb") as fp: |
| 191 | + result = _check_zipfile(fp) |
| 192 | +- except OSError: |
| 193 | ++ except (OSError, BadZipFile): |
| 194 | + pass |
| 195 | + return result |
| 196 | + |
| 197 | +@@ -239,16 +239,15 @@ def _EndRecData64(fpin, offset, endrec): |
| 198 | + """ |
| 199 | + Read the ZIP64 end-of-archive records and use that to update endrec |
| 200 | + """ |
| 201 | +- try: |
| 202 | +- fpin.seek(offset - sizeEndCentDir64Locator, 2) |
| 203 | +- except OSError: |
| 204 | +- # If the seek fails, the file is not large enough to contain a ZIP64 |
| 205 | ++ offset -= sizeEndCentDir64Locator |
| 206 | ++ if offset < 0: |
| 207 | ++ # The file is not large enough to contain a ZIP64 |
| 208 | + # end-of-archive record, so just return the end record we were given. |
| 209 | + return endrec |
| 210 | +- |
| 211 | ++ fpin.seek(offset) |
| 212 | + data = fpin.read(sizeEndCentDir64Locator) |
| 213 | + if len(data) != sizeEndCentDir64Locator: |
| 214 | +- return endrec |
| 215 | ++ raise OSError("Unknown I/O error") |
| 216 | + sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) |
| 217 | + if sig != stringEndArchive64Locator: |
| 218 | + return endrec |
| 219 | +@@ -256,16 +255,33 @@ def _EndRecData64(fpin, offset, endrec): |
| 220 | + if diskno != 0 or disks > 1: |
| 221 | + raise BadZipFile("zipfiles that span multiple disks are not supported") |
| 222 | + |
| 223 | +- # Assume no 'zip64 extensible data' |
| 224 | +- fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) |
| 225 | ++ offset -= sizeEndCentDir64 |
| 226 | ++ if reloff > offset: |
| 227 | ++ raise BadZipFile("Corrupt zip64 end of central directory locator") |
| 228 | ++ # First, check the assumption that there is no prepended data. |
| 229 | ++ fpin.seek(reloff) |
| 230 | ++ extrasz = offset - reloff |
| 231 | + data = fpin.read(sizeEndCentDir64) |
| 232 | + if len(data) != sizeEndCentDir64: |
| 233 | +- return endrec |
| 234 | ++ raise OSError("Unknown I/O error") |
| 235 | ++ if not data.startswith(stringEndArchive64) and reloff != offset: |
| 236 | ++ # Since we already have seen the Zip64 EOCD Locator, it's |
| 237 | ++ # possible we got here because there is prepended data. |
| 238 | ++ # Assume no 'zip64 extensible data' |
| 239 | ++ fpin.seek(offset) |
| 240 | ++ extrasz = 0 |
| 241 | ++ data = fpin.read(sizeEndCentDir64) |
| 242 | ++ if len(data) != sizeEndCentDir64: |
| 243 | ++ raise OSError("Unknown I/O error") |
| 244 | ++ if not data.startswith(stringEndArchive64): |
| 245 | ++ raise BadZipFile("Zip64 end of central directory record not found") |
| 246 | ++ |
| 247 | + sig, sz, create_version, read_version, disk_num, disk_dir, \ |
| 248 | + dircount, dircount2, dirsize, diroffset = \ |
| 249 | + struct.unpack(structEndArchive64, data) |
| 250 | +- if sig != stringEndArchive64: |
| 251 | +- return endrec |
| 252 | ++ if (diroffset + dirsize != reloff or |
| 253 | ++ sz + 12 != sizeEndCentDir64 + extrasz): |
| 254 | ++ raise BadZipFile("Corrupt zip64 end of central directory record") |
| 255 | + |
| 256 | + # Update the original endrec using data from the ZIP64 record |
| 257 | + endrec[_ECD_SIGNATURE] = sig |
| 258 | +@@ -275,6 +291,7 @@ def _EndRecData64(fpin, offset, endrec): |
| 259 | + endrec[_ECD_ENTRIES_TOTAL] = dircount2 |
| 260 | + endrec[_ECD_SIZE] = dirsize |
| 261 | + endrec[_ECD_OFFSET] = diroffset |
| 262 | ++ endrec[_ECD_LOCATION] = offset - extrasz |
| 263 | + return endrec |
| 264 | + |
| 265 | + |
| 266 | +@@ -308,7 +325,7 @@ def _EndRecData(fpin): |
| 267 | + endrec.append(filesize - sizeEndCentDir) |
| 268 | + |
| 269 | + # Try to read the "Zip64 end of central directory" structure |
| 270 | +- return _EndRecData64(fpin, -sizeEndCentDir, endrec) |
| 271 | ++ return _EndRecData64(fpin, filesize - sizeEndCentDir, endrec) |
| 272 | + |
| 273 | + # Either this is not a ZIP file, or it is a ZIP file with an archive |
| 274 | + # comment. Search the end of the file for the "end of central directory" |
| 275 | +@@ -332,8 +349,7 @@ def _EndRecData(fpin): |
| 276 | + endrec.append(maxCommentStart + start) |
| 277 | + |
| 278 | + # Try to read the "Zip64 end of central directory" structure |
| 279 | +- return _EndRecData64(fpin, maxCommentStart + start - filesize, |
| 280 | +- endrec) |
| 281 | ++ return _EndRecData64(fpin, maxCommentStart + start, endrec) |
| 282 | + |
| 283 | + # Unable to find a valid end of central directory structure |
| 284 | + return None |
| 285 | +@@ -1427,9 +1443,6 @@ class ZipFile: |
| 286 | + |
| 287 | + # "concat" is zero, unless zip was concatenated to another file |
| 288 | + concat = endrec[_ECD_LOCATION] - size_cd - offset_cd |
| 289 | +- if endrec[_ECD_SIGNATURE] == stringEndArchive64: |
| 290 | +- # If Zip64 extension structures are present, account for them |
| 291 | +- concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) |
| 292 | + |
| 293 | + if self.debug > 2: |
| 294 | + inferred = concat + offset_cd |
| 295 | +@@ -2039,7 +2052,7 @@ class ZipFile: |
| 296 | + " would require ZIP64 extensions") |
| 297 | + zip64endrec = struct.pack( |
| 298 | + structEndArchive64, stringEndArchive64, |
| 299 | +- 44, 45, 45, 0, 0, centDirCount, centDirCount, |
| 300 | ++ sizeEndCentDir64 - 12, 45, 45, 0, 0, centDirCount, centDirCount, |
| 301 | + centDirSize, centDirOffset) |
| 302 | + self.fp.write(zip64endrec) |
| 303 | + |
| 304 | +diff --git a/Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst b/Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst |
| 305 | +new file mode 100644 |
| 306 | +index 0000000..a8e7a1f |
| 307 | +--- /dev/null |
| 308 | ++++ b/Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst |
| 309 | +@@ -0,0 +1,3 @@ |
| 310 | ++Check consistency of the zip64 end of central directory record. Support |
| 311 | ++records with "zip64 extensible data" if there are no bytes prepended to the |
| 312 | ++ZIP file. |
| 313 | +-- |
| 314 | +2.45.4 |
| 315 | + |
0 commit comments