1
1
# -*- coding: utf-8 -*-
2
2
3
3
import unittest
4
- from test .support import script_helper , captured_stdout , requires_subprocess , requires_resource
4
+ from test import support
5
+ from test .support import script_helper
5
6
from test .support .os_helper import TESTFN , unlink , rmtree
6
7
from test .support .import_helper import unload
7
8
import importlib
@@ -64,7 +65,7 @@ def test_issue7820(self):
64
65
# two bytes in common with the UTF-8 BOM
65
66
self .assertRaises (SyntaxError , eval , b'\xef \xbb \x20 ' )
66
67
67
- @requires_subprocess ()
68
+ @support . requires_subprocess ()
68
69
def test_20731 (self ):
69
70
sub = subprocess .Popen ([sys .executable ,
70
71
os .path .join (os .path .dirname (__file__ ),
@@ -267,6 +268,17 @@ def test_second_non_utf8_coding_line(self):
267
268
b'print(ascii("\xc3 \xa4 "))\n ' )
268
269
self .check_script_output (src , br"'\xc3\u20ac'" )
269
270
271
+ def test_first_utf8_coding_line_error (self ):
272
+ src = (b'#coding:ascii \xc3 \xa4 \n '
273
+ b'raise RuntimeError\n ' )
274
+ self .check_script_error (src , br"(\(unicode error\) )?'ascii' codec can't decode byte" )
275
+
276
+ def test_second_utf8_coding_line_error (self ):
277
+ src = (b'#!/usr/bin/python\n '
278
+ b'#coding:ascii \xc3 \xa4 \n '
279
+ b'raise RuntimeError\n ' )
280
+ self .check_script_error (src , br"(\(unicode error\) )?'ascii' codec can't decode byte" )
281
+
270
282
def test_utf8_bom (self ):
271
283
src = (b'\xef \xbb \xbf print(ascii("\xc3 \xa4 "))\n ' )
272
284
self .check_script_output (src , br"'\xe4'" )
@@ -282,10 +294,80 @@ def test_utf8_bom_and_utf8_coding_line(self):
282
294
b'print(ascii("\xc3 \xa4 "))\n ' )
283
295
self .check_script_output (src , br"'\xe4'" )
284
296
285
- def test_utf8_non_utf8_comment_line_error (self ):
297
+ def test_utf8_bom_and_non_utf8_first_coding_line (self ):
298
+ src = (b'\xef \xbb \xbf #coding:iso-8859-15\n '
299
+ b'raise RuntimeError\n ' )
300
+ self .check_script_error (src ,
301
+ br"encoding problem: iso-8859-15 with BOM" ,
302
+ lineno = 1 )
303
+
304
+ def test_utf8_bom_and_non_utf8_second_coding_line (self ):
305
+ src = (b'\xef \xbb \xbf #first\n '
306
+ b'#coding:iso-8859-15\n '
307
+ b'raise RuntimeError\n ' )
308
+ self .check_script_error (src ,
309
+ br"encoding problem: iso-8859-15 with BOM" ,
310
+ lineno = 2 )
311
+
312
+ def test_non_utf8_shebang (self ):
313
+ src = (b'#!/home/\xa4 /bin/python\n '
314
+ b'#coding:iso-8859-15\n '
315
+ b'print(ascii("\xc3 \xa4 "))\n ' )
316
+ self .check_script_output (src , br"'\xc3\u20ac'" )
317
+
318
+ def test_utf8_shebang_error (self ):
319
+ src = (b'#!/home/\xc3 \xa4 /bin/python\n '
320
+ b'#coding:ascii\n '
321
+ b'raise RuntimeError\n ' )
322
+ self .check_script_error (src , br"(\(unicode error\) )?'ascii' codec can't decode byte" )
323
+
324
+ def test_non_utf8_shebang_error (self ):
325
+ src = (b'#!/home/\xa4 /bin/python\n '
326
+ b'raise RuntimeError\n ' )
327
+ self .check_script_error (src , br"Non-UTF-8 code starting with .* on line 1" ,
328
+ lineno = 1 )
329
+
330
+ def test_non_utf8_second_line_error (self ):
331
+ src = (b'#first\n '
332
+ b'#second\xa4 \n '
333
+ b'raise RuntimeError\n ' )
334
+ self .check_script_error (src ,
335
+ br"Non-UTF-8 code starting with .* on line 2" ,
336
+ lineno = 2 )
337
+
338
+ def test_non_utf8_third_line_error (self ):
339
+ src = (b'#first\n '
340
+ b'#second\n '
341
+ b'#third\xa4 \n '
342
+ b'raise RuntimeError\n ' )
343
+ self .check_script_error (src ,
344
+ br"Non-UTF-8 code starting with .* on line 3" ,
345
+ lineno = 3 )
346
+
347
+ def test_utf8_bom_non_utf8_third_line_error (self ):
348
+ src = (b'\xef \xbb \xbf #first\n '
349
+ b'#second\n '
350
+ b'#third\xa4 \n '
351
+ b'raise RuntimeError\n ' )
352
+ self .check_script_error (src ,
353
+ br"Non-UTF-8 code starting with .* on line 3|"
354
+ br"'utf-8' codec can't decode byte" ,
355
+ lineno = 3 )
356
+
357
+ def test_utf_8_non_utf8_third_line_error (self ):
358
+ src = (b'#coding: utf-8\n '
359
+ b'#second\n '
360
+ b'#third\xa4 \n '
361
+ b'raise RuntimeError\n ' )
362
+ self .check_script_error (src ,
363
+ br"Non-UTF-8 code starting with .* on line 3|"
364
+ br"'utf-8' codec can't decode byte" ,
365
+ lineno = 3 )
366
+
367
+ def test_utf8_non_utf8_third_line_error (self ):
286
368
src = (b'#coding: utf8\n '
287
- b'#\n '
288
- b'#\xa4 \n '
369
+ b'#second \n '
370
+ b'#third \xa4 \n '
289
371
b'raise RuntimeError\n ' )
290
372
self .check_script_error (src ,
291
373
br"'utf-8' codec can't decode byte|"
@@ -326,7 +408,7 @@ def test_nul_in_second_coding_line(self):
326
408
class UTF8ValidatorTest (unittest .TestCase ):
327
409
@unittest .skipIf (not sys .platform .startswith ("linux" ),
328
410
"Too slow to run on non-Linux platforms" )
329
- @requires_resource ('cpu' )
411
+ @support . requires_resource ('cpu' )
330
412
def test_invalid_utf8 (self ):
331
413
# This is a port of test_utf8_decode_invalid_sequences in
332
414
# test_unicode.py to exercise the separate utf8 validator in
@@ -392,19 +474,29 @@ def check(content):
392
474
check (b'\xF4 ' + cb + b'\xBF \xBF ' )
393
475
394
476
477
+ @support .force_not_colorized_test_class
395
478
class BytesSourceEncodingTest (AbstractSourceEncodingTest , unittest .TestCase ):
396
479
397
480
def check_script_output (self , src , expected ):
398
- with captured_stdout () as stdout :
481
+ with support . captured_stdout () as stdout :
399
482
exec (src )
400
483
out = stdout .getvalue ().encode ('latin1' )
401
484
self .assertEqual (out .rstrip (), expected )
402
485
403
- def check_script_error (self , src , expected ):
404
- with self .assertRaisesRegex (SyntaxError , expected . decode () ) as cm :
486
+ def check_script_error (self , src , expected , lineno = ... ):
487
+ with self .assertRaises (SyntaxError ) as cm :
405
488
exec (src )
489
+ exc = cm .exception
490
+ self .assertRegex (str (exc ), expected .decode ())
491
+ if lineno is not ...:
492
+ self .assertEqual (exc .lineno , lineno )
493
+ line = src .splitlines ()[lineno - 1 ].decode (errors = 'replace' )
494
+ if lineno == 1 :
495
+ line = line .removeprefix ('\ufeff ' )
496
+ self .assertEqual (line , exc .text )
406
497
407
498
499
+ @support .force_not_colorized_test_class
408
500
class FileSourceEncodingTest (AbstractSourceEncodingTest , unittest .TestCase ):
409
501
410
502
def check_script_output (self , src , expected ):
@@ -415,13 +507,22 @@ def check_script_output(self, src, expected):
415
507
res = script_helper .assert_python_ok (fn )
416
508
self .assertEqual (res .out .rstrip (), expected )
417
509
418
- def check_script_error (self , src , expected ):
510
+ def check_script_error (self , src , expected , lineno = ... ):
419
511
with tempfile .TemporaryDirectory () as tmpd :
420
512
fn = os .path .join (tmpd , 'test.py' )
421
513
with open (fn , 'wb' ) as fp :
422
514
fp .write (src )
423
515
res = script_helper .assert_python_failure (fn )
424
- self .assertRegex (res .err .rstrip ().splitlines ()[- 1 ], b'SyntaxError.*?' + expected )
516
+ err = res .err .rstrip ()
517
+ self .assertRegex (err .splitlines ()[- 1 ], b'SyntaxError: ' + expected )
518
+ if lineno is not ...:
519
+ self .assertIn (f', line { lineno } \n ' .encode (),
520
+ err .replace (os .linesep .encode (), b'\n ' ))
521
+ line = src .splitlines ()[lineno - 1 ].decode (errors = 'replace' )
522
+ if lineno == 1 :
523
+ line = line .removeprefix ('\ufeff ' )
524
+ self .assertIn (line .encode (), err )
525
+
425
526
426
527
427
528
if __name__ == "__main__" :
0 commit comments