Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions Lib/test/test_zipapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,30 @@ def skip_pyc_files(path):
self.assertIn('test.py', z.namelist())
self.assertNotIn('test.pyc', z.namelist())

def test_create_archive_self_insertion(self):
# When creating an archive, we shouldn't
# include the archive in the list of files to add.
source = self.tmpdir
(source / '__main__.py').touch()
(source / 'test.py').touch()
target = self.tmpdir / 'target.pyz'

zipapp.create_archive(source, target)
with zipfile.ZipFile(target, 'r') as z:
self.assertEqual(len(z.namelist()), 2)
self.assertIn('__main__.py', z.namelist())
self.assertIn('test.py', z.namelist())

def test_target_overwrites_source_file(self):
# The target cannot be one of the files to add.
source = self.tmpdir
(source / '__main__.py').touch()
target = source / 'target.pyz'
target.touch()

with self.assertRaises(zipapp.ZipAppError):
zipapp.create_archive(source, target)

def test_create_archive_filter_exclude_dir(self):
# Test packing a directory and using a filter to exclude a
# subdirectory (ensures that the path supplied to include
Expand Down
28 changes: 26 additions & 2 deletions Lib/zipapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,38 @@ def create_archive(source, target=None, interpreter=None, main=None,
elif not hasattr(target, 'write'):
target = pathlib.Path(target)

# Create the list of files to add to the archive now, in case
# the target is being created in the source directory - we
# don't want the target being added to itself
files_to_add = sorted(source.rglob('*'))

# The target cannot be in the list of files to add. If it were, we'd
# end up overwriting the source file and writing the archive into
# itself, which is an error. We therefore check for that case and
# provide a helpful message for the user.

# Note that we only do a simple path equality check. This won't
# catch every case, but it will catch the common case where the
# source is the CWD and the target is a file in the CWD. More
# thorough checks don't provide enough value to justify the extra
# cost.

# https://github.com/python/cpython/issues/104527 tracks making
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think that zipfile should catch this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I'll remove that part of the comment. I think the same logic applies here - if adding the check can't be justified in zipfile, it's not worth adding it here either. We can stick with the simple but incomplete check.

# the zipfile module catch writing an archive to itself at a
# lower level, which could help here in cases that our check
# doesn't catch.
if target in files_to_add:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

target can be a file-like object. target in files_to_add can invoke comparison of a file-like object with the Path objects. Is it what we need?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe it's fine. A file-like object will simply test as not equal to all the Path objects. I guess I could add some sort of exception handling in case the user passes an object with a custom equality check that fails, but that seems like overkill to me.

I was going to add a note to that effect in the comment, but it's long enough already and I wasn't sure it was worth it. The fact that you pointed it out suggests that it would be, though.

raise ZipAppError(
f"The target archive {target} overwrites one of the source files.")

with _maybe_open(target, 'wb') as fd:
_write_file_prefix(fd, interpreter)
compression = (zipfile.ZIP_DEFLATED if compressed else
zipfile.ZIP_STORED)
with zipfile.ZipFile(fd, 'w', compression=compression) as z:
for child in sorted(source.rglob('*')):
for child in files_to_add:
arcname = child.relative_to(source)
if filter is None or filter(arcname) and child.resolve() != arcname.resolve():
if filter is None or filter(arcname):
z.write(child, arcname.as_posix())
if main_py:
z.writestr('__main__.py', main_py.encode('utf-8'))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The zipapp module now calculates the list of files to be added to the archive before creating the archive. This avoids accidentally including the target when it is being created in the source directory.
Loading