diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index 29109f8812ee7b..63b73cb93b9a65 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -298,7 +298,7 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None): # Character set contains non-BMP character codes. # For range, all BMP characters in the range are already # proceeded. - if fixup: + if fixes: hascased = True # For now, IN_UNI_IGNORE+LITERAL and # IN_UNI_IGNORE+RANGE_UNI_IGNORE work for all non-BMP diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index ff95f54026e172..8ee5882827e1f8 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2626,6 +2626,15 @@ def test_character_set_none(self): self.assertIsNone(re.search(p, s)) self.assertIsNone(re.search('(?s:.)' + p, s)) + def test_ascii_character_range_non_bmp(self): + # gh-126505 + # should match in Unicode mode + self.assertEqual(re.compile("[\ua7aa-\uffff]", re.IGNORECASE).match("\u0266").span(), (0, 1)) + # should not match in ASCII mode + self.assertIsNone(re.compile("[\ua7aa-\uffff]", re.ASCII | re.IGNORECASE).match("\u0266")) + # should not match in ASCII mode, even if upper bound is outside of BMP + self.assertIsNone(re.compile("[\ua7aa-\U00010000]", re.ASCII | re.IGNORECASE).match("\u0266")) + def get_debug_out(pat): with captured_stdout() as out: