Skip to content

Commit 7afa2fe

Browse files
sandarshmeta-codesync[bot]
authored andcommitted
Fix tarfile.extractall path traversal vulnerability
Summary: `tarfile.extractall()` is called without member validation in two places, allowing a malicious archive to write files outside the target directory via crafted paths like `../../etc/passwd`. Added `safe_extractall()` which: 1. Pre-validates all member paths to block absolute paths and `..` traversal 2. Uses Python 3.12+ `filter='data'` when available (blocks symlinks, device files, setuid bits) 3. Handles both tar and zip archives Patched both call sites: - `fetcher.py` — dependency source archive extraction - `getdeps.py` — build cache artifact extraction (also fixed resource leak: tarfile now uses `with` block) GitHub Issue: facebook/mvfst#430 Reviewed By: bigfootjon Differential Revision: D99477527 fbshipit-source-id: 2572f3c15a3d99d0c1621edb39be9bf1f8ea2a57
1 parent 879832a commit 7afa2fe

File tree

3 files changed

+144
-6
lines changed

3 files changed

+144
-6
lines changed

build/fbcode_builder/getdeps.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
file_name_is_cmake_file,
2525
is_public_commit,
2626
list_files_under_dir_newer_than_timestamp,
27+
safe_extractall,
2728
SystemPackageFetcher,
2829
)
2930
from getdeps.load import ManifestLoader
@@ -283,11 +284,12 @@ def download(self):
283284
try:
284285
target_file_name = os.path.join(dl_dir, self.cache_file_name)
285286
if self.cache.download_to_file(self.cache_file_name, target_file_name):
286-
tf = tarfile.open(target_file_name, "r")
287-
print(
288-
"Extracting %s -> %s..." % (self.cache_file_name, self.inst_dir)
289-
)
290-
tf.extractall(self.inst_dir)
287+
with tarfile.open(target_file_name, "r") as tf:
288+
print(
289+
"Extracting %s -> %s..."
290+
% (self.cache_file_name, self.inst_dir)
291+
)
292+
safe_extractall(tf, self.inst_dir)
291293

292294
cached_marker = os.path.join(self.inst_dir, ".getdeps-cached-build")
293295
with open(cached_marker, "w") as f:

build/fbcode_builder/getdeps/fetcher.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,33 @@
3939
from .manifest import ManifestContext, ManifestParser
4040

4141

42+
def _validate_archive_members(names: list[str], dest_dir: str) -> None:
43+
"""Validate archive member paths to prevent path traversal (Zip Slip) attacks."""
44+
dest_dir = os.path.realpath(dest_dir)
45+
for name in names:
46+
if os.path.isabs(name):
47+
raise ValueError(f"Blocked absolute path in archive: {name!r}")
48+
member_path = os.path.realpath(os.path.join(dest_dir, name))
49+
if not member_path.startswith(dest_dir + os.sep) and member_path != dest_dir:
50+
raise ValueError(f"Blocked path traversal in archive: {name!r}")
51+
52+
53+
def safe_extractall(archive: tarfile.TarFile | zipfile.ZipFile, dest: str) -> None:
54+
"""Safely extract a tar or zip archive with path traversal protection."""
55+
if isinstance(archive, tarfile.TarFile):
56+
_validate_archive_members(archive.getnames(), dest)
57+
try:
58+
archive.extractall(dest, filter="data")
59+
except TypeError:
60+
# Python < 3.12 without filter support
61+
archive.extractall(dest)
62+
elif isinstance(archive, zipfile.ZipFile):
63+
_validate_archive_members(archive.namelist(), dest)
64+
archive.extractall(dest)
65+
else:
66+
raise TypeError(f"Unsupported archive type: {type(archive)}")
67+
68+
4269
def file_name_is_cmake_file(file_name: str) -> bool:
4370
file_name = file_name.lower()
4471
base = os.path.basename(file_name)
@@ -1132,7 +1159,7 @@ def update(self) -> ChangeStatus:
11321159
# the boost tarball it makes some assumptions and tries to convert
11331160
# a non-ascii path to ascii and throws.
11341161
src = str(src)
1135-
t.extractall(src)
1162+
safe_extractall(t, src)
11361163

11371164
if is_windows():
11381165
subdir = self.manifest.get("build", "subdir")
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
#
3+
# This source code is licensed under the MIT license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
# pyre-strict
7+
8+
9+
import io
10+
import os
11+
import tarfile
12+
import tempfile
13+
import unittest
14+
import zipfile
15+
16+
from ..fetcher import _validate_archive_members, safe_extractall
17+
18+
19+
class ValidateArchiveMembersTest(unittest.TestCase):
20+
def test_valid_paths(self) -> None:
21+
with tempfile.TemporaryDirectory() as dest:
22+
_validate_archive_members(["foo.txt", "subdir/bar.txt", "a/b/c.txt"], dest)
23+
24+
def test_blocks_absolute_path(self) -> None:
25+
with tempfile.TemporaryDirectory() as dest:
26+
with self.assertRaises(ValueError) as ctx:
27+
_validate_archive_members(["/etc/passwd"], dest)
28+
self.assertIn("absolute path", str(ctx.exception))
29+
30+
def test_blocks_path_traversal(self) -> None:
31+
with tempfile.TemporaryDirectory() as dest:
32+
with self.assertRaises(ValueError) as ctx:
33+
_validate_archive_members(["../../etc/passwd"], dest)
34+
self.assertIn("path traversal", str(ctx.exception))
35+
36+
def test_allows_dotdot_that_stays_within_dest(self) -> None:
37+
with tempfile.TemporaryDirectory() as dest:
38+
_validate_archive_members(["a/../b.txt"], dest)
39+
40+
41+
class SafeExtractallTarTest(unittest.TestCase):
42+
def _create_tar(self, members: dict[str, bytes]) -> str:
43+
fd, path = tempfile.mkstemp(suffix=".tar.gz")
44+
os.close(fd)
45+
with tarfile.open(path, "w:gz") as tar:
46+
for name, data in members.items():
47+
info = tarfile.TarInfo(name=name)
48+
info.size = len(data)
49+
tar.addfile(info, io.BytesIO(data))
50+
return path
51+
52+
def test_extracts_valid_tar(self) -> None:
53+
archive = self._create_tar(
54+
{"hello.txt": b"hello world", "sub/nested.txt": b"nested"}
55+
)
56+
try:
57+
with tempfile.TemporaryDirectory() as dest:
58+
with tarfile.open(archive) as tar:
59+
safe_extractall(tar, dest)
60+
self.assertTrue(os.path.isfile(os.path.join(dest, "hello.txt")))
61+
self.assertTrue(os.path.isfile(os.path.join(dest, "sub/nested.txt")))
62+
with open(os.path.join(dest, "hello.txt")) as f:
63+
self.assertEqual(f.read(), "hello world")
64+
finally:
65+
os.unlink(archive)
66+
67+
def test_blocks_traversal_tar(self) -> None:
68+
archive = self._create_tar({"../../evil.txt": b"pwned"})
69+
try:
70+
with tempfile.TemporaryDirectory() as dest:
71+
with tarfile.open(archive) as tar:
72+
with self.assertRaises(ValueError):
73+
safe_extractall(tar, dest)
74+
finally:
75+
os.unlink(archive)
76+
77+
78+
class SafeExtractallZipTest(unittest.TestCase):
79+
def _create_zip(self, members: dict[str, bytes]) -> str:
80+
fd, path = tempfile.mkstemp(suffix=".zip")
81+
os.close(fd)
82+
with zipfile.ZipFile(path, "w") as zf:
83+
for name, data in members.items():
84+
zf.writestr(name, data)
85+
return path
86+
87+
def test_extracts_valid_zip(self) -> None:
88+
archive = self._create_zip(
89+
{"hello.txt": b"hello world", "sub/nested.txt": b"nested"}
90+
)
91+
try:
92+
with tempfile.TemporaryDirectory() as dest:
93+
with zipfile.ZipFile(archive) as zf:
94+
safe_extractall(zf, dest)
95+
self.assertTrue(os.path.isfile(os.path.join(dest, "hello.txt")))
96+
with open(os.path.join(dest, "hello.txt")) as f:
97+
self.assertEqual(f.read(), "hello world")
98+
finally:
99+
os.unlink(archive)
100+
101+
def test_blocks_traversal_zip(self) -> None:
102+
archive = self._create_zip({"../../evil.txt": b"pwned"})
103+
try:
104+
with tempfile.TemporaryDirectory() as dest:
105+
with zipfile.ZipFile(archive) as zf:
106+
with self.assertRaises(ValueError):
107+
safe_extractall(zf, dest)
108+
finally:
109+
os.unlink(archive)

0 commit comments

Comments
 (0)