File tree Expand file tree Collapse file tree 2 files changed +22
-10
lines changed Expand file tree Collapse file tree 2 files changed +22
-10
lines changed Original file line number Diff line number Diff line change @@ -1485,6 +1485,14 @@ def test_syntaxerror_latin1(self):
14851485 readline = self .get_readline (lines )
14861486 self .assertRaises (SyntaxError , tokenize .detect_encoding , readline )
14871487
1488+ def test_nonascii_coding (self ):
1489+ # gh-63161: test non-ASCII coding
1490+ lines = (
1491+ '#coding=iso8859-15 €' .encode ('iso8859-15' ),
1492+ )
1493+ readline = self .get_readline (lines )
1494+ found , consumed_lines = tokenize .detect_encoding (readline )
1495+ self .assertEqual (found , "iso8859-15" )
14881496
14891497 def test_utf8_normalization (self ):
14901498 # See get_normal_name() in Parser/tokenizer/helpers.c.
Original file line number Diff line number Diff line change @@ -386,20 +386,24 @@ def read_or_stop():
386386 return b''
387387
388388 def find_cookie (line ):
389- try :
390- # Decode as UTF-8. Either the line is an encoding declaration,
391- # in which case it should be pure ASCII, or it must be UTF-8
392- # per default encoding.
393- line_string = line .decode ('utf-8' )
394- except UnicodeDecodeError :
395- msg = "invalid or missing encoding declaration"
396- if filename is not None :
397- msg = '{} for {!r}' .format (msg , filename )
398- raise SyntaxError (msg )
389+ # gh-63161: Use surrogateescape error handler to escape potential
390+ # non-ASCII characters after the coding declaration.
391+ line_string = line .decode ('utf-8' , 'surrogateescape' )
399392
400393 match = cookie_re .match (line_string )
401394 if not match :
395+ try :
396+ # Decode as UTF-8. Either the line is an encoding declaration,
397+ # in which case it should be pure ASCII, or it must be UTF-8
398+ # per default encoding.
399+ line .decode ('utf-8' )
400+ except UnicodeDecodeError :
401+ msg = "invalid or missing encoding declaration"
402+ if filename is not None :
403+ msg = '{} for {!r}' .format (msg , filename )
404+ raise SyntaxError (msg )
402405 return None
406+
403407 encoding = _get_normal_name (match .group (1 ))
404408 try :
405409 codec = lookup (encoding )
You can’t perform that action at this time.
0 commit comments