Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ tarfile
:func:`~tarfile.TarFile.errorlevel` is zero.
(Contributed by Matt Prodani and Petr Viktorin in :gh:`112887`
and :cve:`2025-4435`.)
* :mod:`tarfile` now validates archives to ensure member offsets are non-negative.
(Contributed by Alexander Enrique Urieles Nieto in :gh:`130577`.)


types
Expand Down
3 changes: 3 additions & 0 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1647,6 +1647,9 @@ def _block(self, count):
"""Round up a byte count by BLOCKSIZE and return it,
e.g. _block(834) => 1024.
"""
# Only non-negative offsets are allowed
if count < 0:
raise InvalidHeaderError("invalid offset")
blocks, remainder = divmod(count, BLOCKSIZE)
if remainder:
blocks += 1
Expand Down
156 changes: 156 additions & 0 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def sha256sum(data):
zstname = os.path.join(TEMPDIR, "testtar.tar.zst")
tmpname = os.path.join(TEMPDIR, "tmp.tar")
dotlessname = os.path.join(TEMPDIR, "testtar")
SPACE = b" "

sha256_regtype = (
"e09e4bc8b3c9d9177e77256353b36c159f5f040531bbd4b024a8f9b9196c71ce"
Expand Down Expand Up @@ -4602,6 +4603,161 @@ def extractall(self, ar):
ar.extractall(self.testdir, filter='fully_trusted')


class OffsetValidationTests(unittest.TestCase):
tarname = tmpname
invalid_posix_header = (
# name: 100 bytes
tarfile.NUL * tarfile.LENGTH_NAME
# mode, space, null terminator: 8 bytes
+ b"000755" + SPACE + tarfile.NUL
# uid, space, null terminator: 8 bytes
+ b"000001" + SPACE + tarfile.NUL
# gid, space, null terminator: 8 bytes
+ b"000001" + SPACE + tarfile.NUL
# size, space: 12 bytes
+ b"\xff" * 11 + SPACE
# mtime, space: 12 bytes
+ tarfile.NUL * 11 + SPACE
# chksum: 8 bytes
+ b"0011407" + tarfile.NUL
# type: 1 byte
+ tarfile.REGTYPE
# linkname: 100 bytes
+ tarfile.NUL * tarfile.LENGTH_LINK
# magic: 6 bytes, version: 2 bytes
+ tarfile.POSIX_MAGIC
# uname: 32 bytes
+ tarfile.NUL * 32
# gname: 32 bytes
+ tarfile.NUL * 32
# devmajor, space, null terminator: 8 bytes
+ tarfile.NUL * 6 + SPACE + tarfile.NUL
# devminor, space, null terminator: 8 bytes
+ tarfile.NUL * 6 + SPACE + tarfile.NUL
# prefix: 155 bytes
+ tarfile.NUL * tarfile.LENGTH_PREFIX
# padding: 12 bytes
+ tarfile.NUL * 12
)
invalid_gnu_header = (
# name: 100 bytes
tarfile.NUL * tarfile.LENGTH_NAME
# mode, null terminator: 8 bytes
+ b"0000755" + tarfile.NUL
# uid, null terminator: 8 bytes
+ b"0000001" + tarfile.NUL
# gid, space, null terminator: 8 bytes
+ b"0000001" + tarfile.NUL
# size, space: 12 bytes
+ b"\xff" * 11 + SPACE
# mtime, space: 12 bytes
+ tarfile.NUL * 11 + SPACE
# chksum: 8 bytes
+ b"0011327" + tarfile.NUL
# type: 1 byte
+ tarfile.REGTYPE
# linkname: 100 bytes
+ tarfile.NUL * tarfile.LENGTH_LINK
# magic: 8 bytes
+ tarfile.GNU_MAGIC
# uname: 32 bytes
+ tarfile.NUL * 32
# gname: 32 bytes
+ tarfile.NUL * 32
# devmajor, null terminator: 8 bytes
+ tarfile.NUL * 8
# devminor, null terminator: 8 bytes
+ tarfile.NUL * 8
# padding: 167 bytes
+ tarfile.NUL * 167
)
invalid_v7_header = (
# name: 100 bytes
tarfile.NUL * tarfile.LENGTH_NAME
# mode, space, null terminator: 8 bytes
+ b"000755" + SPACE + tarfile.NUL
# uid, space, null terminator: 8 bytes
+ b"000001" + SPACE + tarfile.NUL
# gid, space, null terminator: 8 bytes
+ b"000001" + SPACE + tarfile.NUL
# size, space: 12 bytes
+ b"\xff" * 11 + SPACE
# mtime, space: 12 bytes
+ tarfile.NUL * 11 + SPACE
# chksum: 8 bytes
+ b"0010070" + tarfile.NUL
# type: 1 byte
+ tarfile.REGTYPE
# linkname: 100 bytes
+ tarfile.NUL * tarfile.LENGTH_LINK
# padding: 255 bytes
+ tarfile.NUL * 255
)
valid_gnu_header = tarfile.TarInfo("filename").tobuf(tarfile.GNU_FORMAT)
data_block = b"\xff" * tarfile.BLOCKSIZE

def _write_buffer(self, buffer):
with open(self.tarname, "wb") as f:
f.write(buffer)

def _get_members(self, ignore_zeros=None):
with open(self.tarname, "rb") as f:
with tarfile.open(
mode="r", fileobj=f, ignore_zeros=ignore_zeros
) as tar:
return tar.getmembers()

def _assert_raises_read_error_exception(self):
with self.assertRaisesRegex(
tarfile.ReadError, "file could not be opened successfully"
):
self._get_members()

def test_invalid_offset_header_validations(self):
for tar_format, invalid_header in (
("posix", self.invalid_posix_header),
("gnu", self.invalid_gnu_header),
("v7", self.invalid_v7_header),
):
with self.subTest(format=tar_format):
self._write_buffer(invalid_header)
self._assert_raises_read_error_exception()

def test_early_stop_at_invalid_offset_header(self):
buffer = self.valid_gnu_header + self.invalid_gnu_header + self.valid_gnu_header
self._write_buffer(buffer)
members = self._get_members()
self.assertEqual(len(members), 1)
self.assertEqual(members[0].name, "filename")
self.assertEqual(members[0].offset, 0)

def test_ignore_invalid_archive(self):
# 3 invalid headers with their respective data
buffer = (self.invalid_gnu_header + self.data_block) * 3
self._write_buffer(buffer)
members = self._get_members(ignore_zeros=True)
self.assertEqual(len(members), 0)

def test_ignore_invalid_offset_headers(self):
for first_block, second_block, expected_offset in (
(
(self.valid_gnu_header),
(self.invalid_gnu_header + self.data_block),
0,
),
(
(self.invalid_gnu_header + self.data_block),
(self.valid_gnu_header),
1024,
),
):
self._write_buffer(first_block + second_block)
members = self._get_members(ignore_zeros=True)
self.assertEqual(len(members), 1)
self.assertEqual(members[0].name, "filename")
self.assertEqual(members[0].offset, expected_offset)


def setUpModule():
os_helper.unlink(TEMPDIR)
os.makedirs(TEMPDIR)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:mod:`tarfile` now validates archives to ensure member offsets are
non-negative.
Loading