Skip to content

Commit 28c425d

Browse files
committed
gh-117829 : simplify logic, patterns are standard glob patterns, as implemented by PurePath.match
1 parent 9ec1cf5 commit 28c425d

File tree

2 files changed

+34
-81
lines changed

2 files changed

+34
-81
lines changed

Lib/test/test_zipapp.py

Lines changed: 3 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,8 @@ def test_cmdline_multiple_includes_commas_and_extend(self):
504504

505505
args = [
506506
str(source),
507-
'--include', 'pkg,data/*.txt',
507+
'--include', 'pkg/**',
508+
'--include', 'data/*.txt',
508509
'--include', 'data/keep.bin',
509510
]
510511
zipapp.main(args)
@@ -515,7 +516,6 @@ def test_cmdline_multiple_includes_commas_and_extend(self):
515516
# did not include root files
516517
self.assertNotIn('__main__.py', names)
517518
# from "pkg"
518-
self.assertIn('pkg/', names)
519519
self.assertIn('pkg/x.py', names)
520520
self.assertIn('pkg/y.txt', names)
521521
# from "data/*.txt"
@@ -537,7 +537,7 @@ def test_cmdline_exclude_directory_over_included_files(self):
537537
args = [
538538
str(source),
539539
'--include', '*.py',
540-
'--exclude', 'foo',
540+
'--exclude', 'foo/**',
541541
]
542542
zipapp.main(args)
543543

@@ -552,42 +552,6 @@ def test_cmdline_exclude_directory_over_included_files(self):
552552
# bar/c.py remains
553553
self.assertIn('bar/c.py', names)
554554

555-
def test_cmdline_normalization_and_dir_implies_subtree(self):
556-
source = self.tmpdir / 'norm'
557-
source.mkdir()
558-
self._make_tree(source, [
559-
'__main__.py',
560-
'a/b/c.txt',
561-
'a/d/e.py',
562-
'x/y/z.py',
563-
])
564-
565-
# Use Windows-style backslashes and a leading './'
566-
# 'a\\b' should imply both 'a/b' and 'a/b/**'
567-
args = [
568-
str(source),
569-
'--include', r'.\a\b',
570-
'--include', r'a\d', # also directory → subtree
571-
'--exclude', '**/*.py', # exclude all *.py after include
572-
]
573-
zipapp.main(args)
574-
575-
target = source.with_suffix('.pyz')
576-
with zipfile.ZipFile(target, 'r') as z:
577-
names = set(z.namelist())
578-
# did not include root files
579-
self.assertNotIn('__main__.py', names)
580-
# from a/b subtree, c.txt should be present
581-
self.assertIn('a/b/', names)
582-
self.assertIn('a/b/c.txt', names)
583-
# from a/d subtree, e.py would match include but then be excluded by **/*.py
584-
self.assertIn('a/d/', names)
585-
self.assertNotIn('a/d/e.py', names)
586-
# x/y/z.py not included at all (not in includes)
587-
self.assertNotIn('x/', names)
588-
self.assertNotIn('x/y/', names)
589-
self.assertNotIn('x/y/z.py', names)
590-
591555

592556
if __name__ == "__main__":
593557
unittest.main()

Lib/zipapp.py

Lines changed: 31 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -181,60 +181,43 @@ def get_interpreter(archive):
181181

182182
def _normalize_patterns(values: Iterable[str] | None) -> list[str]:
183183
"""
184-
Split comma-separated items, strip whitespace, drop empties.
185-
If a token has no glob metacharacters, treat it as a directory prefix:
186-
expand 'foo' into ['foo', 'foo/**'] (after normalizing slashes).
184+
Return patterns exactly as provided by the CLI (no comma splitting).
185+
Each item is stripped of surrounding whitespace; empty items are dropped.
187186
"""
188187
if not values:
189188
return []
190-
191-
def has_glob(s: str) -> bool:
192-
return any(ch in s for ch in "*?[]")
193-
194189
out: list[str] = []
195190
for v in values:
196-
for raw in (p.strip() for p in v.split(',')):
197-
if not raw:
198-
continue
199-
# normalize user input to POSIX-like form (match against rel.as_posix())
200-
tok = raw.replace('\\', '/').lstrip('./').rstrip('/')
201-
if not tok:
202-
continue
203-
if has_glob(tok):
204-
out.append(tok)
205-
else:
206-
# directory name implies subtree
207-
out.append(tok)
208-
out.append(f"{tok}/**")
191+
v = v.strip()
192+
if v:
193+
out.append(v)
209194
return out
210195

211196
def _make_glob_filter(
212197
includes: Iterable[str] | None,
213-
excludes: Iterable[str] | None
198+
excludes: Iterable[str] | None,
214199
) -> Callable[[pathlib.Path], bool]:
215200
"""
216-
Build a filter(relative_path: Path) -> bool applying include first, then exclude.
217-
- Path argument is relative to source_root
218-
- Patterns are matched against POSIX-style relative paths
219-
- If includes is empty, defaults to ["**"] (include all)
201+
Build a filter(relative_path: Path) -> bool applying includes first, then excludes.
202+
203+
Semantics:
204+
- Patterns are standard glob patterns as implemented by PurePath.match.
205+
- If 'includes' is empty, all files/dirs are initially eligible.
206+
- If any exclude pattern matches, the path is rejected.
207+
- Matching respects the current platform's path flavor (separators, case).
220208
"""
221-
inc = _normalize_patterns(includes)
222-
exc = _normalize_patterns(excludes)
223-
if not inc:
224-
inc = ["**"]
209+
inc = _normalize_patterns(values=includes)
210+
exc = _normalize_patterns(values=excludes)
225211

226-
def matches_any(patterns: list[str], rel: pathlib.Path) -> bool:
227-
posix = rel.as_posix()
228-
# pathlib.Path.match uses glob semantics with ** (recursive)
229-
return any(rel.match(pat) or pathlib.PurePosixPath(posix).match(pat)
230-
for pat in patterns)
212+
if not inc and not exc:
213+
return None
231214

232215
def _filter(rel: pathlib.Path) -> bool:
233-
# Always work on files and directories; we'll add both. If a directory
234-
# is excluded, its children still get visited by rglob('*') but will fail here.
235-
if not matches_any(inc, rel):
216+
# If includes were provided, at least one must match.
217+
if inc and not any(rel.match(pat) for pat in inc):
236218
return False
237-
if exc and matches_any(exc, rel):
219+
# Any exclude match removes the path.
220+
if exc and any(rel.match(pat) for pat in exc):
238221
return False
239222
return True
240223

@@ -268,10 +251,13 @@ def main(args=None):
268251
help="Source directory (or existing archive).")
269252
parser.add_argument('--include', action='extend', nargs='+', default=None,
270253
help=("Glob pattern(s) of files/dirs to include (relative to SOURCE). "
271-
"Repeat or use commas. Defaults to '**' (everything)."))
254+
"Repeat the flag for multiple patterns. "
255+
"To include a directory and its contents, use 'foo/**'."))
272256
parser.add_argument('--exclude', action='extend', nargs='+', default=None,
273257
help=("Glob pattern(s) of files/dirs to exclude (relative to SOURCE). "
274-
"Repeat or use commas. Applied after --include."))
258+
"Repeat the flag for multiple patterns. "
259+
"To exclude a directory and its contents, use 'foo/**'. "
260+
"Applied after --include."))
275261

276262
args = parser.parse_args(args)
277263

@@ -293,8 +279,11 @@ def main(args=None):
293279
# build a filter from include and exclude flags
294280
filter_fn = None
295281
src_path = pathlib.Path(args.source)
296-
if src_path.exists() and src_path.is_dir():
297-
filter_fn = _make_glob_filter(args.include, args.exclude)
282+
if src_path.exists() and src_path.is_dir() and (args.include or args.exclude):
283+
filter_fn = _make_glob_filter(
284+
includes=args.include,
285+
excludes=args.exclude
286+
)
298287

299288
create_archive(args.source, args.output,
300289
interpreter=args.python, main=args.main,

0 commit comments

Comments
 (0)