Skip to content
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions Lib/fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ def _translate(pat, star, question_mark):
res = []
add = res.append
star_indices = []
inside_range = False
add_negative_lookahead = False
question_mark_char = re.sub(r'\[|\]|\^', '', question_mark)

i, n = 0, len(pat)
while i < n:
Expand Down Expand Up @@ -135,6 +138,9 @@ def _translate(pat, star, question_mark):
if chunks[k-1][-1] > chunks[k][0]:
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
del chunks[k]
if len(chunks)>1:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if len(chunks)>1:
if len(chunks) > 1:

if question_mark_char:
inside_range = chunks[0][-1] <= question_mark_char <= chunks[-1][0]
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
Expand All @@ -145,14 +151,24 @@ def _translate(pat, star, question_mark):
add('(?!)')
elif stuff == '!':
# Negated empty range: match any character.
add('.')
add(question_mark)
else:
negative_lookahead=''
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
negative_lookahead=''
negative_lookahead = ''

if question_mark != '.' and inside_range:
add_negative_lookahead = True
negative_lookahead = negative_lookahead + question_mark_char
# Escape set operations (&&, ~~ and ||).
stuff = _re_setops_sub(r'\\\1', stuff)
if stuff[0] == '!':
if question_mark_char not in stuff and question_mark != '.':
add_negative_lookahead = True
negative_lookahead = negative_lookahead + question_mark_char
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
elif stuff[0] in ('^', '[', question_mark_char):
stuff = '\\' + stuff
if add_negative_lookahead:
add(f'(?![{negative_lookahead}])')
add_negative_lookahead = False
add(f'[{stuff}]')
else:
add(_re_escape(c))
Expand Down
1 change: 0 additions & 1 deletion Lib/glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ def escape(pathname):
_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
_no_recurse_symlinks = object()


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please revert

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please revert.

def translate(pat, *, recursive=False, include_hidden=False, seps=None):
"""Translate a pathname with shell wildcards to a regular expression.

Expand Down
9 changes: 8 additions & 1 deletion Lib/test/test_glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,14 @@ def fn(pat):
return glob.translate(pat, recursive=True, include_hidden=True, seps=['/', '\\'])
self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More generally, can you upodate test_translate_matching and include the examples of https://man7.org/linux/man-pages/man7/glob.7.html so that we have a compliant implementation?

self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z')

self.assertEqual(fn('foo[!a]bar'), r'(?s:foo(?![/\\])[^a]bar)\Z')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also need new tests for fnmatch.translate.

self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo(?![/\\])[%-0]bar)\Z')
self.assertEqual(fn('foo[%-0][1-9]bar'), r'(?s:foo(?![/\\])[%-0][1-9]bar)\Z')
self.assertEqual(fn('foo[0-%]bar'), r'(?s:foo(?!)bar)\Z')
self.assertEqual(fn('foo[^-'), r'(?s:foo\[\^\-)\Z')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need also a test case with multiple ranges and incomplete ones, e.g., [0-%][0-%[0-%]. And possibly with an additional tail after the last range.

self.assertEqual(fn('foo[/-/]bar'), r'(?s:foo\[[/\\]\-[/\\]\]bar)\Z')
self.assertEqual(fn('foo[%-/]bar'), r'(?s:foo\[%\-[/\\]\]bar)\Z')
self.assertEqual(fn('foo[/]bar'), r'(?s:foo\[[/\\]\]bar)\Z')

if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Glob.translate negative-lookaheads path separators regex ranges that ecompass path seperator. For ranges which include path separator literals, the range is escaped.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This requires a better indication. In addition, a versionchanged:: next should be added for both glob.translate() and fnmatch.translate(). Note that the meaning of / in fnmatch.translate() is different from glob.translate() because / is not special at all.

Suggested change
Glob.translate negative-lookaheads path separators regex ranges that ecompass path seperator. For ranges which include path separator literals, the range is escaped.
:func:`glob.translate` now correctly handles ranges implicitly containing path
separators (for instance, ``[0-%]`` contains ``/``). In addition, ranges including
path separator literals are now correctly escaped, as specified by POSIX specifications.

This suggestion is not perfect so we will likely come back later. However for the translate() functions need to be updated.

Loading