| 
 | 1 | +From f6e8423fca302558e250b6538d092678b2a27003 Mon Sep 17 00:00:00 2001  | 
 | 2 | +From: Alexander Urieles < [email protected]>  | 
 | 3 | +Date: Mon, 28 Jul 2025 17:37:26 +0200  | 
 | 4 | +Subject: [PATCH] gh-130577: tarfile now validates archives to ensure member  | 
 | 5 | + offsets are non-negative (GH-137027) (cherry picked from commit  | 
 | 6 | + 7040aa54f14676938970e10c5f74ea93cd56aa38)  | 
 | 7 | + | 
 | 8 | +Co-authored-by: Alexander Urieles < [email protected]>  | 
 | 9 | +Co-authored-by: Gregory P. Smith < [email protected]>  | 
 | 10 | +Signed-off-by: Azure Linux Security Servicing Account < [email protected]>  | 
 | 11 | +Upstream-reference: https://github.com/python/cpython/pull/137171.patch  | 
 | 12 | +---  | 
 | 13 | + Lib/tarfile.py                                |   3 +  | 
 | 14 | + Lib/test/test_tarfile.py                      | 156 ++++++++++++++++++  | 
 | 15 | + ...-07-23-00-35-29.gh-issue-130577.c7EITy.rst |   3 +  | 
 | 16 | + 3 files changed, 162 insertions(+)  | 
 | 17 | + create mode 100644 Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst  | 
 | 18 | + | 
 | 19 | +diff --git a/Lib/tarfile.py b/Lib/tarfile.py  | 
 | 20 | +index 9999a99..59d3f6e 100755  | 
 | 21 | +--- a/Lib/tarfile.py  | 
 | 22 | ++++ b/Lib/tarfile.py  | 
 | 23 | +@@ -1615,6 +1615,9 @@ class TarInfo(object):  | 
 | 24 | +         """Round up a byte count by BLOCKSIZE and return it,  | 
 | 25 | +            e.g. _block(834) => 1024.  | 
 | 26 | +         """  | 
 | 27 | ++        # Only non-negative offsets are allowed  | 
 | 28 | ++        if count < 0:  | 
 | 29 | ++            raise InvalidHeaderError("invalid offset")  | 
 | 30 | +         blocks, remainder = divmod(count, BLOCKSIZE)  | 
 | 31 | +         if remainder:  | 
 | 32 | +             blocks += 1  | 
 | 33 | +diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py  | 
 | 34 | +index 1c598e1..a6925bf 100644  | 
 | 35 | +--- a/Lib/test/test_tarfile.py  | 
 | 36 | ++++ b/Lib/test/test_tarfile.py  | 
 | 37 | +@@ -50,6 +50,7 @@ bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2")  | 
 | 38 | + xzname = os.path.join(TEMPDIR, "testtar.tar.xz")  | 
 | 39 | + tmpname = os.path.join(TEMPDIR, "tmp.tar")  | 
 | 40 | + dotlessname = os.path.join(TEMPDIR, "testtar")  | 
 | 41 | ++SPACE = b" "  | 
 | 42 | +   | 
 | 43 | + sha256_regtype = (  | 
 | 44 | +     "e09e4bc8b3c9d9177e77256353b36c159f5f040531bbd4b024a8f9b9196c71ce"  | 
 | 45 | +@@ -4485,6 +4486,161 @@ class OverwriteTests(archiver_tests.OverwriteTests, unittest.TestCase):  | 
 | 46 | +         ar.extractall(self.testdir, filter='fully_trusted')  | 
 | 47 | +   | 
 | 48 | +   | 
 | 49 | ++class OffsetValidationTests(unittest.TestCase):  | 
 | 50 | ++    tarname = tmpname  | 
 | 51 | ++    invalid_posix_header = (  | 
 | 52 | ++        # name: 100 bytes  | 
 | 53 | ++        tarfile.NUL * tarfile.LENGTH_NAME  | 
 | 54 | ++        # mode, space, null terminator: 8 bytes  | 
 | 55 | ++        + b"000755" + SPACE + tarfile.NUL  | 
 | 56 | ++        # uid, space, null terminator: 8 bytes  | 
 | 57 | ++        + b"000001" + SPACE + tarfile.NUL  | 
 | 58 | ++        # gid, space, null terminator: 8 bytes  | 
 | 59 | ++        + b"000001" + SPACE + tarfile.NUL  | 
 | 60 | ++        # size, space: 12 bytes  | 
 | 61 | ++        + b"\xff" * 11 + SPACE  | 
 | 62 | ++        # mtime, space: 12 bytes  | 
 | 63 | ++        + tarfile.NUL * 11 + SPACE  | 
 | 64 | ++        # chksum: 8 bytes  | 
 | 65 | ++        + b"0011407" + tarfile.NUL  | 
 | 66 | ++        # type: 1 byte  | 
 | 67 | ++        + tarfile.REGTYPE  | 
 | 68 | ++        # linkname: 100 bytes  | 
 | 69 | ++        + tarfile.NUL * tarfile.LENGTH_LINK  | 
 | 70 | ++        # magic: 6 bytes, version: 2 bytes  | 
 | 71 | ++        + tarfile.POSIX_MAGIC  | 
 | 72 | ++        # uname: 32 bytes  | 
 | 73 | ++        + tarfile.NUL * 32  | 
 | 74 | ++        # gname: 32 bytes  | 
 | 75 | ++        + tarfile.NUL * 32  | 
 | 76 | ++        # devmajor, space, null terminator: 8 bytes  | 
 | 77 | ++        + tarfile.NUL * 6 + SPACE + tarfile.NUL  | 
 | 78 | ++        # devminor, space, null terminator: 8 bytes  | 
 | 79 | ++        + tarfile.NUL * 6 + SPACE + tarfile.NUL  | 
 | 80 | ++        # prefix: 155 bytes  | 
 | 81 | ++        + tarfile.NUL * tarfile.LENGTH_PREFIX  | 
 | 82 | ++        # padding: 12 bytes  | 
 | 83 | ++        + tarfile.NUL * 12  | 
 | 84 | ++    )  | 
 | 85 | ++    invalid_gnu_header = (  | 
 | 86 | ++        # name: 100 bytes  | 
 | 87 | ++        tarfile.NUL * tarfile.LENGTH_NAME  | 
 | 88 | ++        # mode, null terminator: 8 bytes  | 
 | 89 | ++        + b"0000755" + tarfile.NUL  | 
 | 90 | ++        # uid, null terminator: 8 bytes  | 
 | 91 | ++        + b"0000001" + tarfile.NUL  | 
 | 92 | ++        # gid, space, null terminator: 8 bytes  | 
 | 93 | ++        + b"0000001" + tarfile.NUL  | 
 | 94 | ++        # size, space: 12 bytes  | 
 | 95 | ++        + b"\xff" * 11 + SPACE  | 
 | 96 | ++        # mtime, space: 12 bytes  | 
 | 97 | ++        + tarfile.NUL * 11 + SPACE  | 
 | 98 | ++        # chksum: 8 bytes  | 
 | 99 | ++        + b"0011327" + tarfile.NUL  | 
 | 100 | ++        # type: 1 byte  | 
 | 101 | ++        + tarfile.REGTYPE  | 
 | 102 | ++        # linkname: 100 bytes  | 
 | 103 | ++        + tarfile.NUL * tarfile.LENGTH_LINK  | 
 | 104 | ++        # magic: 8 bytes  | 
 | 105 | ++        + tarfile.GNU_MAGIC  | 
 | 106 | ++        # uname: 32 bytes  | 
 | 107 | ++        + tarfile.NUL * 32  | 
 | 108 | ++        # gname: 32 bytes  | 
 | 109 | ++        + tarfile.NUL * 32  | 
 | 110 | ++        # devmajor, null terminator: 8 bytes  | 
 | 111 | ++        + tarfile.NUL * 8  | 
 | 112 | ++        # devminor, null terminator: 8 bytes  | 
 | 113 | ++        + tarfile.NUL * 8  | 
 | 114 | ++        # padding: 167 bytes  | 
 | 115 | ++        + tarfile.NUL * 167  | 
 | 116 | ++    )  | 
 | 117 | ++    invalid_v7_header = (  | 
 | 118 | ++        # name: 100 bytes  | 
 | 119 | ++        tarfile.NUL * tarfile.LENGTH_NAME  | 
 | 120 | ++        # mode, space, null terminator: 8 bytes  | 
 | 121 | ++        + b"000755" + SPACE + tarfile.NUL  | 
 | 122 | ++        # uid, space, null terminator: 8 bytes  | 
 | 123 | ++        + b"000001" + SPACE + tarfile.NUL  | 
 | 124 | ++        # gid, space, null terminator: 8 bytes  | 
 | 125 | ++        + b"000001" + SPACE + tarfile.NUL  | 
 | 126 | ++        # size, space: 12 bytes  | 
 | 127 | ++        + b"\xff" * 11 + SPACE  | 
 | 128 | ++        # mtime, space: 12 bytes  | 
 | 129 | ++        + tarfile.NUL * 11 + SPACE  | 
 | 130 | ++        # chksum: 8 bytes  | 
 | 131 | ++        + b"0010070" + tarfile.NUL  | 
 | 132 | ++        # type: 1 byte  | 
 | 133 | ++        + tarfile.REGTYPE  | 
 | 134 | ++        # linkname: 100 bytes  | 
 | 135 | ++        + tarfile.NUL * tarfile.LENGTH_LINK  | 
 | 136 | ++        # padding: 255 bytes  | 
 | 137 | ++        + tarfile.NUL * 255  | 
 | 138 | ++    )  | 
 | 139 | ++    valid_gnu_header = tarfile.TarInfo("filename").tobuf(tarfile.GNU_FORMAT)  | 
 | 140 | ++    data_block = b"\xff" * tarfile.BLOCKSIZE  | 
 | 141 | ++  | 
 | 142 | ++    def _write_buffer(self, buffer):  | 
 | 143 | ++        with open(self.tarname, "wb") as f:  | 
 | 144 | ++            f.write(buffer)  | 
 | 145 | ++  | 
 | 146 | ++    def _get_members(self, ignore_zeros=None):  | 
 | 147 | ++        with open(self.tarname, "rb") as f:  | 
 | 148 | ++            with tarfile.open(  | 
 | 149 | ++                mode="r", fileobj=f, ignore_zeros=ignore_zeros  | 
 | 150 | ++            ) as tar:  | 
 | 151 | ++                return tar.getmembers()  | 
 | 152 | ++  | 
 | 153 | ++    def _assert_raises_read_error_exception(self):  | 
 | 154 | ++        with self.assertRaisesRegex(  | 
 | 155 | ++            tarfile.ReadError, "file could not be opened successfully"  | 
 | 156 | ++        ):  | 
 | 157 | ++            self._get_members()  | 
 | 158 | ++  | 
 | 159 | ++    def test_invalid_offset_header_validations(self):  | 
 | 160 | ++        for tar_format, invalid_header in (  | 
 | 161 | ++            ("posix", self.invalid_posix_header),  | 
 | 162 | ++            ("gnu", self.invalid_gnu_header),  | 
 | 163 | ++            ("v7", self.invalid_v7_header),  | 
 | 164 | ++        ):  | 
 | 165 | ++            with self.subTest(format=tar_format):  | 
 | 166 | ++                self._write_buffer(invalid_header)  | 
 | 167 | ++                self._assert_raises_read_error_exception()  | 
 | 168 | ++  | 
 | 169 | ++    def test_early_stop_at_invalid_offset_header(self):  | 
 | 170 | ++        buffer = self.valid_gnu_header + self.invalid_gnu_header + self.valid_gnu_header  | 
 | 171 | ++        self._write_buffer(buffer)  | 
 | 172 | ++        members = self._get_members()  | 
 | 173 | ++        self.assertEqual(len(members), 1)  | 
 | 174 | ++        self.assertEqual(members[0].name, "filename")  | 
 | 175 | ++        self.assertEqual(members[0].offset, 0)  | 
 | 176 | ++  | 
 | 177 | ++    def test_ignore_invalid_archive(self):  | 
 | 178 | ++        # 3 invalid headers with their respective data  | 
 | 179 | ++        buffer = (self.invalid_gnu_header + self.data_block) * 3  | 
 | 180 | ++        self._write_buffer(buffer)  | 
 | 181 | ++        members = self._get_members(ignore_zeros=True)  | 
 | 182 | ++        self.assertEqual(len(members), 0)  | 
 | 183 | ++  | 
 | 184 | ++    def test_ignore_invalid_offset_headers(self):  | 
 | 185 | ++        for first_block, second_block, expected_offset in (  | 
 | 186 | ++            (  | 
 | 187 | ++                (self.valid_gnu_header),  | 
 | 188 | ++                (self.invalid_gnu_header + self.data_block),  | 
 | 189 | ++                0,  | 
 | 190 | ++            ),  | 
 | 191 | ++            (  | 
 | 192 | ++                (self.invalid_gnu_header + self.data_block),  | 
 | 193 | ++                (self.valid_gnu_header),  | 
 | 194 | ++                1024,  | 
 | 195 | ++            ),  | 
 | 196 | ++        ):  | 
 | 197 | ++            self._write_buffer(first_block + second_block)  | 
 | 198 | ++            members = self._get_members(ignore_zeros=True)  | 
 | 199 | ++            self.assertEqual(len(members), 1)  | 
 | 200 | ++            self.assertEqual(members[0].name, "filename")  | 
 | 201 | ++            self.assertEqual(members[0].offset, expected_offset)  | 
 | 202 | ++  | 
 | 203 | ++  | 
 | 204 | + def setUpModule():  | 
 | 205 | +     os_helper.unlink(TEMPDIR)  | 
 | 206 | +     os.makedirs(TEMPDIR)  | 
 | 207 | +diff --git a/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst  | 
 | 208 | +new file mode 100644  | 
 | 209 | +index 0000000..342cabb  | 
 | 210 | +--- /dev/null  | 
 | 211 | ++++ b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst  | 
 | 212 | +@@ -0,0 +1,3 @@  | 
 | 213 | ++:mod:`tarfile` now validates archives to ensure member offsets are  | 
 | 214 | ++non-negative.  (Contributed by Alexander Enrique Urieles Nieto in  | 
 | 215 | ++:gh:`130577`.)  | 
 | 216 | +--   | 
 | 217 | +2.45.4  | 
 | 218 | + | 
0 commit comments