From 382e7c4c68a1ddd4665560d548924ffd99587b9e Mon Sep 17 00:00:00 2001 From: Christoph Walcher Date: Sun, 31 Aug 2025 22:14:43 +0200 Subject: [PATCH 1/5] gh-57911: Sanitize symlink targets in tarfile on win32 --- Lib/tarfile.py | 8 +++++++- Lib/test/test_tarfile.py | 15 +++++++++++++++ .../2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst | 3 +++ 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 7aa4a032b63e6b..149709d0cedb97 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2718,7 +2718,13 @@ def makelink_with_filter(self, tarinfo, targetpath, if os.path.lexists(targetpath): # Avoid FileExistsError on following os.symlink. os.unlink(targetpath) - os.symlink(tarinfo.linkname, targetpath) + link_target = tarinfo.linkname + if os.name == "nt": + # gh-57911: Posix-flavoured forward-slash path separators in + # symlink targets aren't sanitized by Windows automaticly, + # resulting in corrupted links. + link_target = link_target.replace("/", os.path.sep) + os.symlink(link_target, targetpath) return else: if os.path.exists(tarinfo._link_target): diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 860413b88eb6b5..16fc5e8062c722 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -4039,6 +4039,21 @@ def test_absolute_symlink(self): tarfile.AbsoluteLinkError, "'parent' is a link to an absolute path") + @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') + @symlink_test + def test_symlink_target_sanitized_on_windows(self): + with ArchiveMaker() as arc: + arc.add('link', symlink_to="relative/test/path") + + with self.check_context(arc.open(), 'fully_trusted'): + self.expect_file('link', type=tarfile.SYMTYPE) + link_path = os.path.normpath(self.destdir / "link") + link_target = os.readlink(link_path) + if os.name == "nt": + self.assertEqual(link_target, "relative\\test\\path") + else: + self.assertEqual(link_target, "relative/test/path") + def test_absolute_hardlink(self): # Test hardlink to an absolute path # Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives diff --git a/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst b/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst new file mode 100644 index 00000000000000..1db95e1acc5364 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst @@ -0,0 +1,3 @@ +When extracting tar files on Windows the path of symlink targets will be +sanitized to use backward-slashes as path separator because Posix flavoured +path separators result in corrupted links. From 2ccc462f92b81097b5a16d8a9db7a018c56ea161 Mon Sep 17 00:00:00 2001 From: Christoph Walcher Date: Mon, 1 Sep 2025 01:02:53 +0200 Subject: [PATCH 2/5] fix test as dotdot resolves early on win32 --- Lib/test/test_tarfile.py | 84 ++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 16fc5e8062c722..7d9a89997badcc 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -2777,7 +2777,7 @@ def test_useful_error_message_when_modules_missing(self): str(excinfo.exception), ) - @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') + @os_helper.skip_unless_symlink @unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod") @unittest.mock.patch('os.chmod') def test_deferred_directory_attributes_update(self, mock_chmod): @@ -3663,6 +3663,37 @@ class TestExtractionFilters(unittest.TestCase): # The destination for the extraction, within `outerdir` destdir = outerdir / 'dest' + @classmethod + def setUpClass(cls): + # Posix and Windows have different pathname resolution: + # either symlink or a '..' component resolve first. + # Let's see which we are on. + if os_helper.can_symlink(): + testpath = os.path.join(TEMPDIR, 'resolution_test') + os.mkdir(testpath) + + # testpath/current links to `.` which is all of: + # - `testpath` + # - `testpath/current` + # - `testpath/current/current` + # - etc. + os.symlink('.', os.path.join(testpath, 'current')) + + # we'll test where `testpath/current/../file` ends up + with open(os.path.join(testpath, 'current', '..', 'file'), 'w'): + pass + + if os.path.exists(os.path.join(testpath, 'file')): + # Windows collapses 'current\..' to '.' first, leaving + # 'testpath\file' + cls.dotdot_resolves_early = True + elif os.path.exists(os.path.join(testpath, '..', 'file')): + # Posix resolves 'current' to '.' first, leaving + # 'testpath/../file' + cls.dotdot_resolves_early = False + else: + raise AssertionError('Could not determine link resolution') + @contextmanager def check_context(self, tar, filter, *, check_flag=True): """Extracts `tar` to `self.destdir` and allows checking the result @@ -3809,23 +3840,21 @@ def test_parent_symlink(self): arc.add('current', symlink_to='.') # effectively points to ./../ - arc.add('parent', symlink_to='current/..') + if self.dotdot_resolves_early: + arc.add('parent', symlink_to='current/../..') + else: + arc.add('parent', symlink_to='current/..') arc.add('parent/evil') if os_helper.can_symlink(): with self.check_context(arc.open(), 'fully_trusted'): - if self.raised_exception is not None: - # Windows will refuse to create a file that's a symlink to itself - # (and tarfile doesn't swallow that exception) - self.expect_exception(FileExistsError) - # The other cases will fail with this error too. - # Skip the rest of this test. - return + self.expect_file('current', symlink_to='.') + if self.dotdot_resolves_early: + self.expect_file('parent', symlink_to='current/../..') else: - self.expect_file('current', symlink_to='.') self.expect_file('parent', symlink_to='current/..') - self.expect_file('../evil') + self.expect_file('../evil') with self.check_context(arc.open(), 'tar'): self.expect_exception( @@ -3927,35 +3956,6 @@ def test_parent_symlink2(self): # Test interplaying symlinks # Inspired by 'dirsymlink2b' in jwilk/traversal-archives - # Posix and Windows have different pathname resolution: - # either symlink or a '..' component resolve first. - # Let's see which we are on. - if os_helper.can_symlink(): - testpath = os.path.join(TEMPDIR, 'resolution_test') - os.mkdir(testpath) - - # testpath/current links to `.` which is all of: - # - `testpath` - # - `testpath/current` - # - `testpath/current/current` - # - etc. - os.symlink('.', os.path.join(testpath, 'current')) - - # we'll test where `testpath/current/../file` ends up - with open(os.path.join(testpath, 'current', '..', 'file'), 'w'): - pass - - if os.path.exists(os.path.join(testpath, 'file')): - # Windows collapses 'current\..' to '.' first, leaving - # 'testpath\file' - dotdot_resolves_early = True - elif os.path.exists(os.path.join(testpath, '..', 'file')): - # Posix resolves 'current' to '.' first, leaving - # 'testpath/../file' - dotdot_resolves_early = False - else: - raise AssertionError('Could not determine link resolution') - with ArchiveMaker() as arc: # `current` links to `.` which is both the destination directory @@ -3991,7 +3991,7 @@ def test_parent_symlink2(self): with self.check_context(arc.open(), 'data'): if os_helper.can_symlink(): - if dotdot_resolves_early: + if self.dotdot_resolves_early: # Fail when extracting a file outside destination self.expect_exception( tarfile.OutsideDestinationError, @@ -4039,8 +4039,8 @@ def test_absolute_symlink(self): tarfile.AbsoluteLinkError, "'parent' is a link to an absolute path") - @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') @symlink_test + @os_helper.skip_unless_symlink def test_symlink_target_sanitized_on_windows(self): with ArchiveMaker() as arc: arc.add('link', symlink_to="relative/test/path") From 58f5c2bf7062bcc505eef69255acb2d39ae3426a Mon Sep 17 00:00:00 2001 From: Christoph Walcher Date: Mon, 1 Sep 2025 22:23:12 +0200 Subject: [PATCH 3/5] adjust wording --- Lib/tarfile.py | 4 ++-- Lib/test/test_tarfile.py | 2 +- .../Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst | 5 ++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 149709d0cedb97..7db3a40c9b33cf 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2721,8 +2721,8 @@ def makelink_with_filter(self, tarinfo, targetpath, link_target = tarinfo.linkname if os.name == "nt": # gh-57911: Posix-flavoured forward-slash path separators in - # symlink targets aren't sanitized by Windows automaticly, - # resulting in corrupted links. + # symlink targets aren't acknowledged by Windows, resulting + # in corrupted links. link_target = link_target.replace("/", os.path.sep) os.symlink(link_target, targetpath) return diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 7d9a89997badcc..0a3819c370b142 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -4041,7 +4041,7 @@ def test_absolute_symlink(self): @symlink_test @os_helper.skip_unless_symlink - def test_symlink_target_sanitized_on_windows(self): + def test_symlink_target_seperator_rewrite_on_windows(self): with ArchiveMaker() as arc: arc.add('link', symlink_to="relative/test/path") diff --git a/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst b/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst index 1db95e1acc5364..6aa7e618fde321 100644 --- a/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst +++ b/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst @@ -1,3 +1,2 @@ -When extracting tar files on Windows the path of symlink targets will be -sanitized to use backward-slashes as path separator because Posix flavoured -path separators result in corrupted links. +When extracting tar files on Windows Posix flavoured path separators in symlink +targets will be replaced by backward-slashes to prevent corrupted links. From 942b6e36ad2c4b7a26372cdc7e7fd58d525edffb Mon Sep 17 00:00:00 2001 From: Christoph Walcher Date: Tue, 2 Sep 2025 22:42:51 +0200 Subject: [PATCH 4/5] default dotdot_resolves_early=True --- Lib/test/test_tarfile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 0a3819c370b142..e5466c3bf2a5e8 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -3693,6 +3693,8 @@ def setUpClass(cls): cls.dotdot_resolves_early = False else: raise AssertionError('Could not determine link resolution') + else: + cls.dotdot_resolves_early = True @contextmanager def check_context(self, tar, filter, *, check_flag=True): From c04488a046b6d470032b73d6d53c740306aeb987 Mon Sep 17 00:00:00 2001 From: Christoph Walcher Date: Wed, 3 Sep 2025 17:39:22 +0200 Subject: [PATCH 5/5] add whatsnew entry --- Doc/whatsnew/3.15.rst | 4 ++++ .../Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 932bb100cbee23..61203686326c39 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -463,6 +463,10 @@ tarfile :func:`~tarfile.TarFile.errorlevel` is zero. (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887` and :cve:`2025-4435`.) +* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall` + now replace slashes by backslashes in symlink targets on Windows to prevent + creation of corrupted links. + (Contributed by Christoph Walcher in :gh:`57911`.) types diff --git a/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst b/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst index 6aa7e618fde321..b0d306c8c683a8 100644 --- a/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst +++ b/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst @@ -1,2 +1,2 @@ -When extracting tar files on Windows Posix flavoured path separators in symlink -targets will be replaced by backward-slashes to prevent corrupted links. +When extracting tar files on Windows, slashes in symlink targets will be +replaced by backslashes to prevent corrupted links.