python · jirkamarsik · Nov 7, 2024 · ZeroIntensity · Nov 7, 2024
diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py
@@ -298,7 +298,7 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
                 # Character set contains non-BMP character codes.
                 # For range, all BMP characters in the range are already
                 # proceeded.
-                if fixup:
+                if fixes:
                     hascased = True
                     # For now, IN_UNI_IGNORE+LITERAL and
                     # IN_UNI_IGNORE+RANGE_UNI_IGNORE work for all non-BMP

diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
@@ -2626,6 +2626,15 @@ def test_character_set_none(self):
                 self.assertIsNone(re.search(p, s))
                 self.assertIsNone(re.search('(?s:.)' + p, s))
 
+    def test_ascii_character_range_non_bmp(self):
+        # gh-126505
+        # should match in Unicode mode
-        # gh-126505
-        # should match in Unicode mode
+        # GH-126505: should match in Unicode mode
-        # gh-126505
-        # should match in Unicode mode
+        # GH-126505: should match in Unicode mode
+        self.assertEqual(re.compile("[\ua7aa-\uffff]", re.IGNORECASE).match("\u0266").span(), (0, 1))
+        # should not match in ASCII mode
+        self.assertIsNone(re.compile("[\ua7aa-\uffff]", re.ASCII | re.IGNORECASE).match("\u0266"))
+        # should not match in ASCII mode, even if upper bound is outside of BMP
+        self.assertIsNone(re.compile("[\ua7aa-\U00010000]", re.ASCII | re.IGNORECASE).match("\u0266"))
+
 
 def get_debug_out(pat):
     with captured_stdout() as out: