Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Lib/re/_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
# Character set contains non-BMP character codes.
# For range, all BMP characters in the range are already
# proceeded.
if fixup:
if fixes:
hascased = True
# For now, IN_UNI_IGNORE+LITERAL and
# IN_UNI_IGNORE+RANGE_UNI_IGNORE work for all non-BMP
Expand Down
9 changes: 9 additions & 0 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -2626,6 +2626,15 @@ def test_character_set_none(self):
self.assertIsNone(re.search(p, s))
self.assertIsNone(re.search('(?s:.)' + p, s))

def test_ascii_character_range_non_bmp(self):
# gh-126505
# should match in Unicode mode
Comment on lines +2630 to +2631
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# gh-126505
# should match in Unicode mode
# GH-126505: should match in Unicode mode

self.assertEqual(re.compile("[\ua7aa-\uffff]", re.IGNORECASE).match("\u0266").span(), (0, 1))
# should not match in ASCII mode
self.assertIsNone(re.compile("[\ua7aa-\uffff]", re.ASCII | re.IGNORECASE).match("\u0266"))
# should not match in ASCII mode, even if upper bound is outside of BMP
self.assertIsNone(re.compile("[\ua7aa-\U00010000]", re.ASCII | re.IGNORECASE).match("\u0266"))


def get_debug_out(pat):
with captured_stdout() as out:
Expand Down
Loading