@@ -1495,6 +1495,61 @@ def test_cookie_second_line_noncommented_first_line(self):
14951495 expected = [b"print('\xc2 \xa3 ')\n " ]
14961496 self .assertEqual (consumed_lines , expected )
14971497
1498+ def test_first_non_utf8_coding_line (self ):
1499+ lines = (
1500+ b'#coding:iso-8859-15 \xa4 \n ' ,
1501+ b'print(something)\n '
1502+ )
1503+ encoding , consumed_lines = tokenize .detect_encoding (self .get_readline (lines ))
1504+ self .assertEqual (encoding , 'iso-8859-15' )
1505+ self .assertEqual (consumed_lines , list (lines [:1 ]))
1506+
1507+ def test_first_utf8_coding_line_error (self ):
1508+ lines = (
1509+ b'#coding:ascii \xc3 \xa4 \n ' ,
1510+ b'print(something)\n '
1511+ )
1512+ with self .assertRaises (SyntaxError ):
1513+ tokenize .detect_encoding (self .get_readline (lines ))
1514+
1515+ def test_second_non_utf8_coding_line (self ):
1516+ lines = (
1517+ b'#!/usr/bin/python\n ' ,
1518+ b'#coding:iso-8859-15 \xa4 \n ' ,
1519+ b'print(something)\n '
1520+ )
1521+ encoding , consumed_lines = tokenize .detect_encoding (self .get_readline (lines ))
1522+ self .assertEqual (encoding , 'iso-8859-15' )
1523+ self .assertEqual (consumed_lines , list (lines [:2 ]))
1524+
1525+ def test_second_utf8_coding_line_error (self ):
1526+ lines = (
1527+ b'#!/usr/bin/python\n ' ,
1528+ b'#coding:ascii \xc3 \xa4 \n ' ,
1529+ b'print(something)\n '
1530+ )
1531+ with self .assertRaises (SyntaxError ):
1532+ tokenize .detect_encoding (self .get_readline (lines ))
1533+
1534+ def test_non_utf8_shebang (self ):
1535+ lines = (
1536+ b'#!/home/\xa4 /bin/python\n ' ,
1537+ b'#coding:iso-8859-15\n ' ,
1538+ b'print(something)\n '
1539+ )
1540+ encoding , consumed_lines = tokenize .detect_encoding (self .get_readline (lines ))
1541+ self .assertEqual (encoding , 'iso-8859-15' )
1542+ self .assertEqual (consumed_lines , list (lines [:2 ]))
1543+
1544+ def test_utf8_shebang_error (self ):
1545+ lines = (
1546+ b'#!/home/\xc3 \xa4 /bin/python\n ' ,
1547+ b'#coding:ascii\n ' ,
1548+ b'print(something)\n '
1549+ )
1550+ with self .assertRaises (SyntaxError ):
1551+ tokenize .detect_encoding (self .get_readline (lines ))
1552+
14981553 def test_cookie_second_line_empty_first_line (self ):
14991554 lines = (
15001555 b'\n ' ,
@@ -1548,6 +1603,28 @@ def test_double_coding_utf8(self):
15481603 self .assertEqual (encoding , 'utf-8' )
15491604 self .assertEqual (consumed_lines , list (lines [:1 ]))
15501605
1606+ def test_nul_in_first_coding_line (self ):
1607+ lines = (
1608+ b'#coding:iso8859-15\x00 \n ' ,
1609+ b'\n ' ,
1610+ b'\n ' ,
1611+ b'print(something)\n '
1612+ )
1613+ with self .assertRaisesRegex (SyntaxError ,
1614+ "source code cannot contain null bytes" ):
1615+ tokenize .detect_encoding (self .get_readline (lines ))
1616+
1617+ def test_nul_in_second_coding_line (self ):
1618+ lines = (
1619+ b'#!/usr/bin/python\n ' ,
1620+ b'#coding:iso8859-15\x00 \n ' ,
1621+ b'\n ' ,
1622+ b'print(something)\n '
1623+ )
1624+ with self .assertRaisesRegex (SyntaxError ,
1625+ "source code cannot contain null bytes" ):
1626+ tokenize .detect_encoding (self .get_readline (lines ))
1627+
15511628 def test_latin1_normalization (self ):
15521629 # See get_normal_name() in Parser/tokenizer/helpers.c.
15531630 encodings = ("latin-1" , "iso-8859-1" , "iso-latin-1" , "latin-1-unix" ,
0 commit comments