python · vstinner · Sep 22, 2025 · Sep 22, 2025 · Sep 22, 2025 · Sep 23, 2025
@@ -1485,6 +1485,14 @@ def test_syntaxerror_latin1(self):
         readline = self.get_readline(lines)
         self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
 
+    def test_nonascii_coding(self):
+        # gh-63161: test non-ASCII coding
+        lines = (
+            '#coding=iso8859-15 €'.encode('iso8859-15'),
+            )
+        readline = self.get_readline(lines)
+        found, consumed_lines = tokenize.detect_encoding(readline)
+        self.assertEqual(found, "iso8859-15")
 
     def test_utf8_normalization(self):
         # See get_normal_name() in Parser/tokenizer/helpers.c.

@@ -386,20 +386,24 @@ def read_or_stop():
             return b''
 
     def find_cookie(line):
-        try:
-            # Decode as UTF-8. Either the line is an encoding declaration,
-            # in which case it should be pure ASCII, or it must be UTF-8
-            # per default encoding.
-            line_string = line.decode('utf-8')
-        except UnicodeDecodeError:
-            msg = "invalid or missing encoding declaration"
-            if filename is not None:
-                msg = '{} for {!r}'.format(msg, filename)
-            raise SyntaxError(msg)
+        # gh-63161: Use surrogateescape error handler to escape potential
+        # non-ASCII characters after the coding declaration.
+        line_string = line.decode('utf-8', 'surrogateescape')
 
         match = cookie_re.match(line_string)
         if not match:
+            try:
+                # Decode as UTF-8. Either the line is an encoding declaration,
+                # in which case it should be pure ASCII, or it must be UTF-8
+                # per default encoding.
+                line.decode('utf-8')
+            except UnicodeDecodeError:
+                msg = "invalid or missing encoding declaration"
+                if filename is not None:
+                    msg = '{} for {!r}'.format(msg, filename)
+                raise SyntaxError(msg)
             return None
+
         encoding = _get_normal_name(match.group(1))
         try:
             codec = lookup(encoding)

diff --git a/Misc/NEWS.d/next/Library/2025-09-22-15-07-33.gh-issue-63161.1f6k5q.rst b/Misc/NEWS.d/next/Library/2025-09-22-15-07-33.gh-issue-63161.1f6k5q.rst
@@ -0,0 +1,2 @@
+Fix :func:`tokenize.detect_encoding` for non-ASCII coding. Patch by Victor
+Stinner.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Fix :func:`tokenize.detect_encoding` for non-ASCII coding. Patch by Victor
		Stinner.