From be37b54d1e9593528e0d98f597661f9665f6238b Mon Sep 17 00:00:00 2001 From: Dmitri Date: Sat, 8 Mar 2025 14:37:53 -0800 Subject: [PATCH 01/12] added testcase for globbing with a ranged seperator --- Lib/test/test_glob.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index da73769c16e9af..ff14570588a0e3 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -514,6 +514,10 @@ def fn(pat): self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z') self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z') + r1 = re.compile(fn('a[%-0]c')) + self.assertEqual(bool(r1.match("a/c")), False) + + if __name__ == "__main__": unittest.main() From 69905660db1e2e65d0c7172f112d32ea5329e404 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sat, 8 Mar 2025 23:26:51 +0000 Subject: [PATCH 02/12] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst diff --git a/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst new file mode 100644 index 00000000000000..2c8906b016ddc7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst @@ -0,0 +1 @@ +Glob.translate no longer matches path separators in ranges From cea1f5e4846f1f54fad81c6a990ee0ed553bc55b Mon Sep 17 00:00:00 2001 From: Dmitri Date: Mon, 10 Mar 2025 09:55:39 -0700 Subject: [PATCH 03/12] WIP - need to refine glob testcases. --- Lib/glob.py | 54 +++++++++++++++++++ Lib/test/test_glob.py | 4 +- ...-03-08-23-26-50.gh-issue-130942.jxRMK_.rst | 2 +- 3 files changed, 56 insertions(+), 4 deletions(-) diff --git a/Lib/glob.py b/Lib/glob.py index d1a6dddeeb1610..21ad4ca17ce5b2 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -263,6 +263,54 @@ def escape(pathname): _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) _no_recurse_symlinks = object() +def escape_pathname_range_including_seps(pat, seps): + """Escape ranges containing seperators in a path + """ + pat = list(pat) + ordinal_seps=set(map(ord, seps)) + + insideRange = False + ds=[] + + buf='' + idx1=0 + idx2=0 + rangeIncludesSep=False + + for path_idx, path_ch in enumerate(pat): + if path_idx > 0: + if path_ch == '[' and pat[path_idx-1] != '\\': + insideRange = True + idx1=path_idx + continue + if path_ch == ']' and pat[path_idx-1] != '\\': + insideRange = False + idx2=path_idx+1 + + if insideRange: + buf+=path_ch + if path_ch == '-': + glob_range = list(range(ord(pat[path_idx-1]), ord(pat[path_idx+1]))) + if ordinal_seps.intersection(glob_range): + rangeIncludesSep = True + + elif len(buf)>0: + ds.append([idx1, idx2, rangeIncludesSep]) + + buf='' + idx1=1 + idx2=2 + rangeIncludesSep=False + + for ds_idx, ds_elem in enumerate(ds): + idx1=ds_elem[0] + idx2=ds_elem[1] + rangeIncludesSep=ds_elem[2] + if rangeIncludesSep: + pat.insert(idx1, '\\') + pat.insert(idx2, '\\') + + return ''.join(pat) def translate(pat, *, recursive=False, include_hidden=False, seps=None): """Translate a pathname with shell wildcards to a regular expression. @@ -282,6 +330,8 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): seps = (os.path.sep, os.path.altsep) else: seps = os.path.sep + + escaped_seps = ''.join(map(re.escape, seps)) any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps not_sep = f'[^{escaped_seps}]' @@ -312,10 +362,14 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): if part: if not include_hidden and part[0] in '*?': results.append(r'(?!\.)') + results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)[0]) + if idx < last_part_idx: results.append(any_sep) + res = ''.join(results) + res=escape_pathname_range_including_seps(res, seps=seps) return fr'(?s:{res})\Z' diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index ff14570588a0e3..5ec609800c521a 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -514,9 +514,7 @@ def fn(pat): self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z') self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z') - r1 = re.compile(fn('a[%-0]c')) - self.assertEqual(bool(r1.match("a/c")), False) - + self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo\[%-0\]bar)\Z') if __name__ == "__main__": diff --git a/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst index 2c8906b016ddc7..1d41b314109f85 100644 --- a/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst +++ b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst @@ -1 +1 @@ -Glob.translate no longer matches path separators in ranges +Glob.translate escapes regex ranges that ecompass path seperator. From dd1b15553bc85be29aec991b4d0ecf65bfe7c4f8 Mon Sep 17 00:00:00 2001 From: Dmitri Date: Mon, 10 Mar 2025 10:47:44 -0700 Subject: [PATCH 04/12] Escape regex ranges including seperators in glob.translate. --- Lib/test/test_glob.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index 5ec609800c521a..e6ed04f1400761 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -515,6 +515,7 @@ def fn(pat): self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z') self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo\[%-0\]bar)\Z') + self.assertEqual(fn('foo[U-d]bar'), r'(?s:foo\[U-d\]bar)\Z') if __name__ == "__main__": From 9f461a53ffc84c9050323a47a512e947c093367b Mon Sep 17 00:00:00 2001 From: Dmitri Date: Mon, 10 Mar 2025 11:23:42 -0700 Subject: [PATCH 05/12] Typo function name in glob.py --- Lib/glob.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/glob.py b/Lib/glob.py index 65542c58b4a13e..25c7a609caa892 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -263,7 +263,7 @@ def escape(pathname): _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) _no_recurse_symlinks = object() -def escape_pathname_range_including_seps(pat, seps): +def escape_regex_range_including_seps(pat, seps): """Escape ranges containing seperators in a path """ pat = list(pat) @@ -369,7 +369,7 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): results.append(any_sep) res = ''.join(results) - res=escape_pathname_range_including_seps(res, seps=seps) + res=escape_regex_range_including_seps(res, seps=seps) return fr'(?s:{res})\Z' From c7f6d878154edebcde78271af0dcb5af52857003 Mon Sep 17 00:00:00 2001 From: Dmitri Date: Wed, 12 Mar 2025 00:05:04 -0700 Subject: [PATCH 06/12] Lookahead to ignore path separators in ranges which span path separators in fnmatch._translate --- Lib/fnmatch.py | 16 +++++++++++++- Lib/glob.py | 50 ------------------------------------------- Lib/test/test_glob.py | 3 +-- 3 files changed, 16 insertions(+), 53 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 865baea23467ea..0ebb1373ffce02 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -87,6 +87,7 @@ def _translate(pat, star, question_mark): res = [] add = res.append star_indices = [] + inside_range=False i, n = 0, len(pat) while i < n: @@ -135,18 +136,31 @@ def _translate(pat, star, question_mark): if chunks[k-1][-1] > chunks[k][0]: chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:] del chunks[k] + + if len(chunks)>1: + char_range=set(range(ord(chunks[0][-1]), ord(chunks[-1][0]))) + + question_mark_char=question_mark.replace('\\', '').replace('[', '').replace(']', '').replace('^', '') + question_mark_char=set(map(ord, question_mark_char)) + + if question_mark_char.intersection(char_range): + inside_range=True + # Escape backslashes and hyphens for set difference (--). # Hyphens that create ranges shouldn't be escaped. stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') for s in chunks) + i = j+1 if not stuff: # Empty range: never match. add('(?!)') elif stuff == '!': # Negated empty range: match any character. - add('.') + add(question_mark) else: + if question_mark != '.' and inside_range: + add(f'(?={question_mark})') # Escape set operations (&&, ~~ and ||). stuff = _re_setops_sub(r'\\\1', stuff) if stuff[0] == '!': diff --git a/Lib/glob.py b/Lib/glob.py index 25c7a609caa892..9bb764940223d0 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -263,55 +263,6 @@ def escape(pathname): _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) _no_recurse_symlinks = object() -def escape_regex_range_including_seps(pat, seps): - """Escape ranges containing seperators in a path - """ - pat = list(pat) - ordinal_seps=set(map(ord, seps)) - - insideRange = False - ds=[] - - buf='' - idx1=0 - idx2=0 - rangeIncludesSep=False - - for path_idx, path_ch in enumerate(pat): - if path_idx > 0: - if path_ch == '[' and pat[path_idx-1] != '\\': - insideRange = True - idx1=path_idx - continue - if path_ch == ']' and pat[path_idx-1] != '\\': - insideRange = False - idx2=path_idx+1 - - if insideRange: - buf+=path_ch - if path_ch == '-': - glob_range = list(range(ord(pat[path_idx-1]), ord(pat[path_idx+1]))) - if ordinal_seps.intersection(glob_range): - rangeIncludesSep = True - - elif len(buf)>0: - ds.append([idx1, idx2, rangeIncludesSep]) - - buf='' - idx1=1 - idx2=2 - rangeIncludesSep=False - - for ds_idx, ds_elem in enumerate(ds): - idx1=ds_elem[0] - idx2=ds_elem[1] - rangeIncludesSep=ds_elem[2] - if rangeIncludesSep: - pat.insert(idx1, '\\') - pat.insert(idx2, '\\') - - return ''.join(pat) - def translate(pat, *, recursive=False, include_hidden=False, seps=None): """Translate a pathname with shell wildcards to a regular expression. @@ -369,7 +320,6 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): results.append(any_sep) res = ''.join(results) - res=escape_regex_range_including_seps(res, seps=seps) return fr'(?s:{res})\Z' diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index e6ed04f1400761..fbf9bc6e9010d7 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -514,8 +514,7 @@ def fn(pat): self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z') self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z') - self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo\[%-0\]bar)\Z') - self.assertEqual(fn('foo[U-d]bar'), r'(?s:foo\[U-d\]bar)\Z') + self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo(?=[^/\\])[%-0]bar)\Z') if __name__ == "__main__": From d5748b8da3858b93150ebcc267d6d553b7637365 Mon Sep 17 00:00:00 2001 From: Dmitri Date: Wed, 12 Mar 2025 00:23:03 -0700 Subject: [PATCH 07/12] Added empty negative lookahead in front of ranges which encompass path separator in fnmatch._translate(). --- Lib/fnmatch.py | 2 +- Lib/test/test_glob.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 0ebb1373ffce02..7ba8c5119a9733 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -160,7 +160,7 @@ def _translate(pat, star, question_mark): add(question_mark) else: if question_mark != '.' and inside_range: - add(f'(?={question_mark})') + add(f'(?!)') # Escape set operations (&&, ~~ and ||). stuff = _re_setops_sub(r'\\\1', stuff) if stuff[0] == '!': diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index fbf9bc6e9010d7..93feeb057f26b0 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -514,7 +514,7 @@ def fn(pat): self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z') self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z') - self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo(?=[^/\\])[%-0]bar)\Z') + self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo(?!)[%-0]bar)\Z') if __name__ == "__main__": From 95b4ccf07df9fc6a8bcba16e1486339ba9c50f47 Mon Sep 17 00:00:00 2001 From: Dmitri Date: Thu, 13 Mar 2025 11:34:48 -0700 Subject: [PATCH 08/12] Revert "Added empty negative lookahead in front of ranges which encompass path separator in fnmatch._translate()." --- Lib/fnmatch.py | 2 +- Lib/test/test_glob.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 7ba8c5119a9733..0ebb1373ffce02 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -160,7 +160,7 @@ def _translate(pat, star, question_mark): add(question_mark) else: if question_mark != '.' and inside_range: - add(f'(?!)') + add(f'(?={question_mark})') # Escape set operations (&&, ~~ and ||). stuff = _re_setops_sub(r'\\\1', stuff) if stuff[0] == '!': diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index 93feeb057f26b0..fbf9bc6e9010d7 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -514,7 +514,7 @@ def fn(pat): self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z') self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z') - self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo(?!)[%-0]bar)\Z') + self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo(?=[^/\\])[%-0]bar)\Z') if __name__ == "__main__": From cdfcf47686916ca6ee8147dc7e517ee90d5806c8 Mon Sep 17 00:00:00 2001 From: Dmitri Date: Mon, 17 Mar 2025 00:06:32 -0700 Subject: [PATCH 09/12] Refine testcases and and escape ranges including path separator literals. --- Lib/fnmatch.py | 28 +++++++++++++++------------- Lib/glob.py | 5 ----- Lib/test/test_glob.py | 11 ++++++++--- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 0ebb1373ffce02..80362d4cf67b60 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -87,7 +87,9 @@ def _translate(pat, star, question_mark): res = [] add = res.append star_indices = [] - inside_range=False + inside_range = False + add_negative_lookahead = False + question_mark_char = re.sub(r'\[|\]|\^', '', question_mark) i, n = 0, len(pat) while i < n: @@ -136,21 +138,13 @@ def _translate(pat, star, question_mark): if chunks[k-1][-1] > chunks[k][0]: chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:] del chunks[k] - if len(chunks)>1: - char_range=set(range(ord(chunks[0][-1]), ord(chunks[-1][0]))) - - question_mark_char=question_mark.replace('\\', '').replace('[', '').replace(']', '').replace('^', '') - question_mark_char=set(map(ord, question_mark_char)) - - if question_mark_char.intersection(char_range): - inside_range=True - + if question_mark_char: + inside_range = chunks[0][-1] <= question_mark_char <= chunks[-1][0] # Escape backslashes and hyphens for set difference (--). # Hyphens that create ranges shouldn't be escaped. stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') for s in chunks) - i = j+1 if not stuff: # Empty range: never match. @@ -159,14 +153,22 @@ def _translate(pat, star, question_mark): # Negated empty range: match any character. add(question_mark) else: + negative_lookahead='' if question_mark != '.' and inside_range: - add(f'(?={question_mark})') + add_negative_lookahead = True + negative_lookahead = negative_lookahead + question_mark_char # Escape set operations (&&, ~~ and ||). stuff = _re_setops_sub(r'\\\1', stuff) if stuff[0] == '!': + if question_mark_char not in stuff and question_mark != '.': + add_negative_lookahead = True + negative_lookahead = negative_lookahead + question_mark_char stuff = '^' + stuff[1:] - elif stuff[0] in ('^', '['): + elif stuff[0] in ('^', '[', question_mark_char): stuff = '\\' + stuff + if add_negative_lookahead: + add(f'(?![{negative_lookahead}])') + add_negative_lookahead = False add(f'[{stuff}]') else: add(_re_escape(c)) diff --git a/Lib/glob.py b/Lib/glob.py index 9bb764940223d0..873614fce3a6a7 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -281,8 +281,6 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): seps = (os.path.sep, os.path.altsep) else: seps = os.path.sep - - escaped_seps = ''.join(map(re.escape, seps)) any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps not_sep = f'[^{escaped_seps}]' @@ -313,12 +311,9 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): if part: if not include_hidden and part[0] in '*?': results.append(r'(?!\.)') - results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)[0]) - if idx < last_part_idx: results.append(any_sep) - res = ''.join(results) return fr'(?s:{res})\Z' diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index fbf9bc6e9010d7..48cfc197ff8138 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -513,9 +513,14 @@ def fn(pat): return glob.translate(pat, recursive=True, include_hidden=True, seps=['/', '\\']) self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z') self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z') - - self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo(?=[^/\\])[%-0]bar)\Z') - + self.assertEqual(fn('foo[!a]bar'), r'(?s:foo(?![/\\])[^a]bar)\Z') + self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo(?![/\\])[%-0]bar)\Z') + self.assertEqual(fn('foo[%-0][1-9]bar'), r'(?s:foo(?![/\\])[%-0][1-9]bar)\Z') + self.assertEqual(fn('foo[0-%]bar'), r'(?s:foo(?!)bar)\Z') + self.assertEqual(fn('foo[^-'), r'(?s:foo\[\^\-)\Z') + self.assertEqual(fn('foo[/-/]bar'), r'(?s:foo\[[/\\]\-[/\\]\]bar)\Z') + self.assertEqual(fn('foo[%-/]bar'), r'(?s:foo\[%\-[/\\]\]bar)\Z') + self.assertEqual(fn('foo[/]bar'), r'(?s:foo\[[/\\]\]bar)\Z') if __name__ == "__main__": unittest.main() From 3929b06dee57712be0c80f1d78797ddb267394b4 Mon Sep 17 00:00:00 2001 From: Dmitri Date: Mon, 17 Mar 2025 00:09:31 -0700 Subject: [PATCH 10/12] fix blurb. --- .../next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst index 1d41b314109f85..80ae96e2aa2122 100644 --- a/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst +++ b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst @@ -1 +1 @@ -Glob.translate escapes regex ranges that ecompass path seperator. +Glob.translate negative-lookaheads path separators regex ranges that ecompass path seperator. For ranges which include path separator literals, the range is escaped. From e5abc80267550cb7275c44dafe7db344fff89277 Mon Sep 17 00:00:00 2001 From: Dmitri Date: Tue, 18 Mar 2025 22:33:55 -0700 Subject: [PATCH 11/12] Refine fnmatch translate and glob translate testcases. --- Lib/fnmatch.py | 14 ++++---------- Lib/glob.py | 1 - Lib/test/test_fnmatch.py | 7 +++++++ Lib/test/test_glob.py | 7 ++++++- .../2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst | 13 ++++++++++++- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 80362d4cf67b60..5d38891c7da632 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -88,7 +88,6 @@ def _translate(pat, star, question_mark): add = res.append star_indices = [] inside_range = False - add_negative_lookahead = False question_mark_char = re.sub(r'\[|\]|\^', '', question_mark) i, n = 0, len(pat) @@ -138,7 +137,7 @@ def _translate(pat, star, question_mark): if chunks[k-1][-1] > chunks[k][0]: chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:] del chunks[k] - if len(chunks)>1: + if len(chunks) > 1: if question_mark_char: inside_range = chunks[0][-1] <= question_mark_char <= chunks[-1][0] # Escape backslashes and hyphens for set difference (--). @@ -155,20 +154,15 @@ def _translate(pat, star, question_mark): else: negative_lookahead='' if question_mark != '.' and inside_range: - add_negative_lookahead = True - negative_lookahead = negative_lookahead + question_mark_char + add(f'(?![{question_mark_char}])') # Escape set operations (&&, ~~ and ||). stuff = _re_setops_sub(r'\\\1', stuff) if stuff[0] == '!': if question_mark_char not in stuff and question_mark != '.': - add_negative_lookahead = True - negative_lookahead = negative_lookahead + question_mark_char + stuff = f'^{question_mark_char}' + '^' + stuff[1:] stuff = '^' + stuff[1:] - elif stuff[0] in ('^', '[', question_mark_char): + elif stuff[0] in ('^', '['): stuff = '\\' + stuff - if add_negative_lookahead: - add(f'(?![{negative_lookahead}])') - add_negative_lookahead = False add(f'[{stuff}]') else: add(_re_escape(c)) diff --git a/Lib/glob.py b/Lib/glob.py index 873614fce3a6a7..f2a19167a82550 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -262,7 +262,6 @@ def escape(pathname): _special_parts = ('', '.', '..') _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) _no_recurse_symlinks = object() - def translate(pat, *, recursive=False, include_hidden=False, seps=None): """Translate a pathname with shell wildcards to a regular expression. diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 9f360e1dc10f47..0066878b0b1c5f 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -236,6 +236,13 @@ def test_translate(self): self.assertEqual(translate('A*********'), r'(?s:A.*)\Z') self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') + self.assertEqual(translate('foo[%-0]bar'), r'(?s:foo[%-0]bar)\Z') + self.assertEqual(translate('foo[%-0][%-0[%-0]bar'), r'(?s:foo[%-0][%-0[%-0]bar)\Z') + self.assertEqual(translate('foo[/-/]bar'), r'(?s:foo[/-/]bar)\Z') + self.assertEqual(translate('foo[%-0][1-9]bar'), r'(?s:foo[%-0][1-9]bar)\Z') + self.assertEqual(translate('foo[%-/]bar'), r'(?s:foo[%-/]bar)\Z') + self.assertEqual(translate('foo?'), r'(?s:foo.)\Z') + self.assertEqual(translate('foo.'), r'(?s:foo\.)\Z') # fancy translation to prevent exponential-time match failure t = translate('**a*a****a') self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\Z') diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index 48cfc197ff8138..7651ad80a6150d 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -456,6 +456,8 @@ def test_translate_matching(self): self.assertIsNone(match(os.path.join('foo', '.bar'))) self.assertIsNotNone(match(os.path.join('foo', 'bar.txt'))) self.assertIsNone(match(os.path.join('foo', '.bar.txt'))) + match = re.compile(glob.translate('foo[%-0]bar', recursive=True)).match + self.assertIsNone(match(os.path.join('foo', 'bar'))) def test_translate(self): def fn(pat): @@ -513,7 +515,7 @@ def fn(pat): return glob.translate(pat, recursive=True, include_hidden=True, seps=['/', '\\']) self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z') self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z') - self.assertEqual(fn('foo[!a]bar'), r'(?s:foo(?![/\\])[^a]bar)\Z') + self.assertEqual(fn('foo[!a]bar'), r'(?s:foo[^/\\^a]bar)\Z') self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo(?![/\\])[%-0]bar)\Z') self.assertEqual(fn('foo[%-0][1-9]bar'), r'(?s:foo(?![/\\])[%-0][1-9]bar)\Z') self.assertEqual(fn('foo[0-%]bar'), r'(?s:foo(?!)bar)\Z') @@ -521,6 +523,9 @@ def fn(pat): self.assertEqual(fn('foo[/-/]bar'), r'(?s:foo\[[/\\]\-[/\\]\]bar)\Z') self.assertEqual(fn('foo[%-/]bar'), r'(?s:foo\[%\-[/\\]\]bar)\Z') self.assertEqual(fn('foo[/]bar'), r'(?s:foo\[[/\\]\]bar)\Z') + self.assertEqual(fn('foo[%-0][0-%[%-0]bar'), r'(?s:foo(?![/\\])[%-0](?![/\\])[\[%-0]bar)\Z') + self.assertEqual(fn('foo?'), r'(?s:foo[^/\\])\Z') + self.assertEqual(fn('foo.'), r'(?s:foo\.)\Z') if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst index 80ae96e2aa2122..a92f8c615fe74e 100644 --- a/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst +++ b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst @@ -1 +1,12 @@ -Glob.translate negative-lookaheads path separators regex ranges that ecompass path seperator. For ranges which include path separator literals, the range is escaped. +.. versionchanged:: next + :func:`glob.translate` now correctly handles ranges implicitly containing path + separators (for instance, ``[%-0]`` contains ``/``) by adding either a negative + lookahead (``(?!/)``) or by not including the path separator (``^/``). In addition, + ranges including path separator literals are now correctly escaped, as specified by + POSIX specifications. + +.. versionchanged:: next + :func:`fnmatch.translate` does not treat path separator characters as having any + special meaning at all, so it still matches ranges implicitly containing path + separators (for instance, ``[%-0]`` contains ``/``) and ranges explicitly + containing path separators (for instance, ``[/-/]`` contains ``/``). From 93c3092178d9964c3efb64f6d7308bc01a3cc9b8 Mon Sep 17 00:00:00 2001 From: Dmitri Date: Thu, 20 Mar 2025 10:08:05 -0700 Subject: [PATCH 12/12] Add some more matching tests for glob tests. --- Lib/test/test_glob.py | 6 ++++++ .../Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index 7651ad80a6150d..0b52e7a0d18630 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -458,6 +458,12 @@ def test_translate_matching(self): self.assertIsNone(match(os.path.join('foo', '.bar.txt'))) match = re.compile(glob.translate('foo[%-0]bar', recursive=True)).match self.assertIsNone(match(os.path.join('foo', 'bar'))) + match = re.compile(glob.translate('foo?bar', recursive=True)).match + self.assertIsNone(match('foo/bar')) + match = re.compile(glob.translate('foo.', recursive=True)).match + self.assertIsNone(match('foo/')) + match = re.compile(glob.translate('foo*', recursive=True)).match + self.assertIsNone(match('foo/')) def test_translate(self): def fn(pat): diff --git a/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst index a92f8c615fe74e..735e4e70382724 100644 --- a/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst +++ b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst @@ -4,7 +4,6 @@ lookahead (``(?!/)``) or by not including the path separator (``^/``). In addition, ranges including path separator literals are now correctly escaped, as specified by POSIX specifications. - .. versionchanged:: next :func:`fnmatch.translate` does not treat path separator characters as having any special meaning at all, so it still matches ranges implicitly containing path