@@ -1478,6 +1478,61 @@ def test_cookie_second_line_noncommented_first_line(self):
1478
1478
expected = [b"print('\xc2 \xa3 ')\n " ]
1479
1479
self .assertEqual (consumed_lines , expected )
1480
1480
1481
+ def test_first_non_utf8_coding_line (self ):
1482
+ lines = (
1483
+ b'#coding:iso-8859-15 \xa4 \n ' ,
1484
+ b'print(something)\n '
1485
+ )
1486
+ encoding , consumed_lines = tokenize .detect_encoding (self .get_readline (lines ))
1487
+ self .assertEqual (encoding , 'iso-8859-15' )
1488
+ self .assertEqual (consumed_lines , list (lines [:1 ]))
1489
+
1490
+ def test_first_utf8_coding_line_error (self ):
1491
+ lines = (
1492
+ b'#coding:ascii \xc3 \xa4 \n ' ,
1493
+ b'print(something)\n '
1494
+ )
1495
+ with self .assertRaises (SyntaxError ):
1496
+ tokenize .detect_encoding (self .get_readline (lines ))
1497
+
1498
+ def test_second_non_utf8_coding_line (self ):
1499
+ lines = (
1500
+ b'#!/usr/bin/python\n ' ,
1501
+ b'#coding:iso-8859-15 \xa4 \n ' ,
1502
+ b'print(something)\n '
1503
+ )
1504
+ encoding , consumed_lines = tokenize .detect_encoding (self .get_readline (lines ))
1505
+ self .assertEqual (encoding , 'iso-8859-15' )
1506
+ self .assertEqual (consumed_lines , list (lines [:2 ]))
1507
+
1508
+ def test_second_utf8_coding_line_error (self ):
1509
+ lines = (
1510
+ b'#!/usr/bin/python\n ' ,
1511
+ b'#coding:ascii \xc3 \xa4 \n ' ,
1512
+ b'print(something)\n '
1513
+ )
1514
+ with self .assertRaises (SyntaxError ):
1515
+ tokenize .detect_encoding (self .get_readline (lines ))
1516
+
1517
+ def test_non_utf8_shebang (self ):
1518
+ lines = (
1519
+ b'#!/home/\xa4 /bin/python\n ' ,
1520
+ b'#coding:iso-8859-15\n ' ,
1521
+ b'print(something)\n '
1522
+ )
1523
+ encoding , consumed_lines = tokenize .detect_encoding (self .get_readline (lines ))
1524
+ self .assertEqual (encoding , 'iso-8859-15' )
1525
+ self .assertEqual (consumed_lines , list (lines [:2 ]))
1526
+
1527
+ def test_utf8_shebang_error (self ):
1528
+ lines = (
1529
+ b'#!/home/\xc3 \xa4 /bin/python\n ' ,
1530
+ b'#coding:ascii\n ' ,
1531
+ b'print(something)\n '
1532
+ )
1533
+ with self .assertRaises (SyntaxError ):
1534
+ tokenize .detect_encoding (self .get_readline (lines ))
1535
+
1481
1536
def test_cookie_second_line_empty_first_line (self ):
1482
1537
lines = (
1483
1538
b'\n ' ,
@@ -1531,6 +1586,28 @@ def test_double_coding_utf8(self):
1531
1586
self .assertEqual (encoding , 'utf-8' )
1532
1587
self .assertEqual (consumed_lines , list (lines [:1 ]))
1533
1588
1589
+ def test_nul_in_first_coding_line (self ):
1590
+ lines = (
1591
+ b'#coding:iso8859-15\x00 \n ' ,
1592
+ b'\n ' ,
1593
+ b'\n ' ,
1594
+ b'print(something)\n '
1595
+ )
1596
+ with self .assertRaisesRegex (SyntaxError ,
1597
+ "source code cannot contain null bytes" ):
1598
+ tokenize .detect_encoding (self .get_readline (lines ))
1599
+
1600
+ def test_nul_in_second_coding_line (self ):
1601
+ lines = (
1602
+ b'#!/usr/bin/python\n ' ,
1603
+ b'#coding:iso8859-15\x00 \n ' ,
1604
+ b'\n ' ,
1605
+ b'print(something)\n '
1606
+ )
1607
+ with self .assertRaisesRegex (SyntaxError ,
1608
+ "source code cannot contain null bytes" ):
1609
+ tokenize .detect_encoding (self .get_readline (lines ))
1610
+
1534
1611
def test_latin1_normalization (self ):
1535
1612
# See get_normal_name() in Parser/tokenizer/helpers.c.
1536
1613
encodings = ("latin-1" , "iso-8859-1" , "iso-latin-1" , "latin-1-unix" ,
0 commit comments