11# -*- coding: utf-8 -*-
22
33import unittest
4- from test .support import script_helper , captured_stdout , requires_subprocess , requires_resource
4+ from test import support
5+ from test .support import script_helper
56from test .support .os_helper import TESTFN , unlink , rmtree
67from test .support .import_helper import unload
78import importlib
@@ -64,7 +65,7 @@ def test_issue7820(self):
6465 # two bytes in common with the UTF-8 BOM
6566 self .assertRaises (SyntaxError , eval , b'\xef \xbb \x20 ' )
6667
67- @requires_subprocess ()
68+ @support . requires_subprocess ()
6869 def test_20731 (self ):
6970 sub = subprocess .Popen ([sys .executable ,
7071 os .path .join (os .path .dirname (__file__ ),
@@ -267,6 +268,17 @@ def test_second_non_utf8_coding_line(self):
267268 b'print(ascii("\xc3 \xa4 "))\n ' )
268269 self .check_script_output (src , br"'\xc3\u20ac'" )
269270
271+ def test_first_utf8_coding_line_error (self ):
272+ src = (b'#coding:ascii \xc3 \xa4 \n '
273+ b'raise RuntimeError\n ' )
274+ self .check_script_error (src , br"(\(unicode error\) )?'ascii' codec can't decode byte" )
275+
276+ def test_second_utf8_coding_line_error (self ):
277+ src = (b'#!/usr/bin/python\n '
278+ b'#coding:ascii \xc3 \xa4 \n '
279+ b'raise RuntimeError\n ' )
280+ self .check_script_error (src , br"(\(unicode error\) )?'ascii' codec can't decode byte" )
281+
270282 def test_utf8_bom (self ):
271283 src = (b'\xef \xbb \xbf print(ascii("\xc3 \xa4 "))\n ' )
272284 self .check_script_output (src , br"'\xe4'" )
@@ -282,10 +294,80 @@ def test_utf8_bom_and_utf8_coding_line(self):
282294 b'print(ascii("\xc3 \xa4 "))\n ' )
283295 self .check_script_output (src , br"'\xe4'" )
284296
285- def test_utf8_non_utf8_comment_line_error (self ):
297+ def test_utf8_bom_and_non_utf8_first_coding_line (self ):
298+ src = (b'\xef \xbb \xbf #coding:iso-8859-15\n '
299+ b'raise RuntimeError\n ' )
300+ self .check_script_error (src ,
301+ br"encoding problem: iso-8859-15 with BOM" ,
302+ lineno = 1 )
303+
304+ def test_utf8_bom_and_non_utf8_second_coding_line (self ):
305+ src = (b'\xef \xbb \xbf #first\n '
306+ b'#coding:iso-8859-15\n '
307+ b'raise RuntimeError\n ' )
308+ self .check_script_error (src ,
309+ br"encoding problem: iso-8859-15 with BOM" ,
310+ lineno = 2 )
311+
312+ def test_non_utf8_shebang (self ):
313+ src = (b'#!/home/\xa4 /bin/python\n '
314+ b'#coding:iso-8859-15\n '
315+ b'print(ascii("\xc3 \xa4 "))\n ' )
316+ self .check_script_output (src , br"'\xc3\u20ac'" )
317+
318+ def test_utf8_shebang_error (self ):
319+ src = (b'#!/home/\xc3 \xa4 /bin/python\n '
320+ b'#coding:ascii\n '
321+ b'raise RuntimeError\n ' )
322+ self .check_script_error (src , br"(\(unicode error\) )?'ascii' codec can't decode byte" )
323+
324+ def test_non_utf8_shebang_error (self ):
325+ src = (b'#!/home/\xa4 /bin/python\n '
326+ b'raise RuntimeError\n ' )
327+ self .check_script_error (src , br"Non-UTF-8 code starting with .* on line 1" ,
328+ lineno = 1 )
329+
330+ def test_non_utf8_second_line_error (self ):
331+ src = (b'#first\n '
332+ b'#second\xa4 \n '
333+ b'raise RuntimeError\n ' )
334+ self .check_script_error (src ,
335+ br"Non-UTF-8 code starting with .* on line 2" ,
336+ lineno = 2 )
337+
338+ def test_non_utf8_third_line_error (self ):
339+ src = (b'#first\n '
340+ b'#second\n '
341+ b'#third\xa4 \n '
342+ b'raise RuntimeError\n ' )
343+ self .check_script_error (src ,
344+ br"Non-UTF-8 code starting with .* on line 3" ,
345+ lineno = 3 )
346+
347+ def test_utf8_bom_non_utf8_third_line_error (self ):
348+ src = (b'\xef \xbb \xbf #first\n '
349+ b'#second\n '
350+ b'#third\xa4 \n '
351+ b'raise RuntimeError\n ' )
352+ self .check_script_error (src ,
353+ br"Non-UTF-8 code starting with .* on line 3|"
354+ br"'utf-8' codec can't decode byte" ,
355+ lineno = 3 )
356+
357+ def test_utf_8_non_utf8_third_line_error (self ):
358+ src = (b'#coding: utf-8\n '
359+ b'#second\n '
360+ b'#third\xa4 \n '
361+ b'raise RuntimeError\n ' )
362+ self .check_script_error (src ,
363+ br"Non-UTF-8 code starting with .* on line 3|"
364+ br"'utf-8' codec can't decode byte" ,
365+ lineno = 3 )
366+
367+ def test_utf8_non_utf8_third_line_error (self ):
286368 src = (b'#coding: utf8\n '
287- b'#\n '
288- b'#\xa4 \n '
369+ b'#second \n '
370+ b'#third \xa4 \n '
289371 b'raise RuntimeError\n ' )
290372 self .check_script_error (src ,
291373 br"'utf-8' codec can't decode byte|"
@@ -326,7 +408,7 @@ def test_nul_in_second_coding_line(self):
326408class UTF8ValidatorTest (unittest .TestCase ):
327409 @unittest .skipIf (not sys .platform .startswith ("linux" ),
328410 "Too slow to run on non-Linux platforms" )
329- @requires_resource ('cpu' )
411+ @support . requires_resource ('cpu' )
330412 def test_invalid_utf8 (self ):
331413 # This is a port of test_utf8_decode_invalid_sequences in
332414 # test_unicode.py to exercise the separate utf8 validator in
@@ -392,19 +474,29 @@ def check(content):
392474 check (b'\xF4 ' + cb + b'\xBF \xBF ' )
393475
394476
477+ @support .force_not_colorized_test_class
395478class BytesSourceEncodingTest (AbstractSourceEncodingTest , unittest .TestCase ):
396479
397480 def check_script_output (self , src , expected ):
398- with captured_stdout () as stdout :
481+ with support . captured_stdout () as stdout :
399482 exec (src )
400483 out = stdout .getvalue ().encode ('latin1' )
401484 self .assertEqual (out .rstrip (), expected )
402485
403- def check_script_error (self , src , expected ):
404- with self .assertRaisesRegex (SyntaxError , expected . decode () ) as cm :
486+ def check_script_error (self , src , expected , lineno = ... ):
487+ with self .assertRaises (SyntaxError ) as cm :
405488 exec (src )
489+ exc = cm .exception
490+ self .assertRegex (str (exc ), expected .decode ())
491+ if lineno is not ...:
492+ self .assertEqual (exc .lineno , lineno )
493+ line = src .splitlines ()[lineno - 1 ].decode (errors = 'replace' )
494+ if lineno == 1 :
495+ line = line .removeprefix ('\ufeff ' )
496+ self .assertEqual (line , exc .text )
406497
407498
499+ @support .force_not_colorized_test_class
408500class FileSourceEncodingTest (AbstractSourceEncodingTest , unittest .TestCase ):
409501
410502 def check_script_output (self , src , expected ):
@@ -415,13 +507,23 @@ def check_script_output(self, src, expected):
415507 res = script_helper .assert_python_ok (fn )
416508 self .assertEqual (res .out .rstrip (), expected )
417509
418- def check_script_error (self , src , expected ):
510+ def check_script_error (self , src , expected , lineno = ... ):
419511 with tempfile .TemporaryDirectory () as tmpd :
420512 fn = os .path .join (tmpd , 'test.py' )
421513 with open (fn , 'wb' ) as fp :
422514 fp .write (src )
423515 res = script_helper .assert_python_failure (fn )
424- self .assertRegex (res .err .rstrip ().splitlines ()[- 1 ], b'SyntaxError.*?' + expected )
516+ err = res .err .rstrip ()
517+ self .assertRegex (err .splitlines ()[- 1 ], b'SyntaxError: ' + expected )
518+ if lineno is not ...:
519+ self .assertIn (f', line { lineno } \n ' .encode (),
520+ err .replace (os .linesep .encode (), b'\n ' ))
521+ line = src .splitlines ()[lineno - 1 ].decode (errors = 'replace' )
522+ if lineno == 1 :
523+ line = line .removeprefix ('\ufeff ' )
524+ line = line .encode (sys .stderr .encoding , sys .stderr .errors )
525+ self .assertIn (line , err )
526+
425527
426528
427529if __name__ == "__main__" :
0 commit comments