@@ -173,6 +173,8 @@ def test_tokenizer_fstring_warning_in_first_line(self):
173
173
os .unlink (TESTFN )
174
174
175
175
176
+ BUFSIZ = 2 ** 13
177
+
176
178
class AbstractSourceEncodingTest :
177
179
178
180
def test_default_coding (self ):
@@ -185,14 +187,20 @@ def test_first_coding_line(self):
185
187
self .check_script_output (src , br"'\xc3\u20ac'" )
186
188
187
189
def test_second_coding_line (self ):
188
- src = (b'#\n '
190
+ src = (b'#!/usr/bin/python\n '
191
+ b'#coding:iso8859-15\n '
192
+ b'print(ascii("\xc3 \xa4 "))\n ' )
193
+ self .check_script_output (src , br"'\xc3\u20ac'" )
194
+
195
+ def test_second_coding_line_empty_first_line (self ):
196
+ src = (b'\n '
189
197
b'#coding:iso8859-15\n '
190
198
b'print(ascii("\xc3 \xa4 "))\n ' )
191
199
self .check_script_output (src , br"'\xc3\u20ac'" )
192
200
193
201
def test_third_coding_line (self ):
194
202
# Only first two lines are tested for a magic comment.
195
- src = (b'#\n '
203
+ src = (b'#!/usr/bin/python \n '
196
204
b'#\n '
197
205
b'#coding:iso8859-15\n '
198
206
b'print(ascii("\xc3 \xa4 "))\n ' )
@@ -210,13 +218,52 @@ def test_double_coding_same_line(self):
210
218
b'print(ascii("\xc3 \xa4 "))\n ' )
211
219
self .check_script_output (src , br"'\xc3\u20ac'" )
212
220
221
+ def test_double_coding_utf8 (self ):
222
+ src = (b'#coding:utf-8\n '
223
+ b'#coding:latin1\n '
224
+ b'print(ascii("\xc3 \xa4 "))\n ' )
225
+ self .check_script_output (src , br"'\xe4'" )
226
+
227
+ def test_long_first_coding_line (self ):
228
+ src = (b'#' + b' ' * BUFSIZ + b'coding:iso8859-15\n '
229
+ b'print(ascii("\xc3 \xa4 "))\n ' )
230
+ self .check_script_output (src , br"'\xc3\u20ac'" )
231
+
232
+ def test_long_second_coding_line (self ):
233
+ src = (b'#!/usr/bin/python\n '
234
+ b'#' + b' ' * BUFSIZ + b'coding:iso8859-15\n '
235
+ b'print(ascii("\xc3 \xa4 "))\n ' )
236
+ self .check_script_output (src , br"'\xc3\u20ac'" )
237
+
238
+ def test_long_coding_line (self ):
239
+ src = (b'#coding:iso-8859-15' + b' ' * BUFSIZ + b'\n '
240
+ b'print(ascii("\xc3 \xa4 "))\n ' )
241
+ self .check_script_output (src , br"'\xc3\u20ac'" )
242
+
243
+ def test_long_coding_name (self ):
244
+ src = (b'#coding:iso-8859-1-' + b'x' * BUFSIZ + b'\n '
245
+ b'print(ascii("\xc3 \xa4 "))\n ' )
246
+ self .check_script_output (src , br"'\xc3\xa4'" )
247
+
248
+ def test_long_first_utf8_line (self ):
249
+ src = b'#' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
250
+ self .check_script_output (src , b'' )
251
+ src = b'# ' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
252
+ self .check_script_output (src , b'' )
253
+
254
+ def test_long_second_utf8_line (self ):
255
+ src = b'\n #' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
256
+ self .check_script_output (src , b'' )
257
+ src = b'\n # ' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
258
+ self .check_script_output (src , b'' )
259
+
213
260
def test_first_non_utf8_coding_line (self ):
214
261
src = (b'#coding:iso-8859-15 \xa4 \n '
215
262
b'print(ascii("\xc3 \xa4 "))\n ' )
216
263
self .check_script_output (src , br"'\xc3\u20ac'" )
217
264
218
265
def test_second_non_utf8_coding_line (self ):
219
- src = (b'\n '
266
+ src = (b'#!/usr/bin/python \n '
220
267
b'#coding:iso-8859-15 \xa4 \n '
221
268
b'print(ascii("\xc3 \xa4 "))\n ' )
222
269
self .check_script_output (src , br"'\xc3\u20ac'" )
@@ -225,27 +272,56 @@ def test_utf8_bom(self):
225
272
src = (b'\xef \xbb \xbf print(ascii("\xc3 \xa4 "))\n ' )
226
273
self .check_script_output (src , br"'\xe4'" )
227
274
275
+ def test_utf8_bom_utf8_comments (self ):
276
+ src = (b'\xef \xbb \xbf #\xc3 \xa4 \n '
277
+ b'#\xc3 \xa4 \n '
278
+ b'print(ascii("\xc3 \xa4 "))\n ' )
279
+ self .check_script_output (src , br"'\xe4'" )
280
+
228
281
def test_utf8_bom_and_utf8_coding_line (self ):
229
282
src = (b'\xef \xbb \xbf #coding:utf-8\n '
230
283
b'print(ascii("\xc3 \xa4 "))\n ' )
231
284
self .check_script_output (src , br"'\xe4'" )
232
285
286
+ def test_utf8_non_utf8_comment_line_error (self ):
287
+ src = (b'#coding: utf8\n '
288
+ b'#\n '
289
+ b'#\xa4 \n '
290
+ b'raise RuntimeError\n ' )
291
+ self .check_script_error (src ,
292
+ br"'utf-8' codec can't decode byte|"
293
+ br"encoding problem: utf8" )
294
+
233
295
def test_crlf (self ):
234
296
src = (b'print(ascii("""\r \n """))\n ' )
235
- out = self .check_script_output (src , br"'\n'" )
297
+ self .check_script_output (src , br"'\n'" )
236
298
237
299
def test_crcrlf (self ):
238
300
src = (b'print(ascii("""\r \r \n """))\n ' )
239
- out = self .check_script_output (src , br"'\n\n'" )
301
+ self .check_script_output (src , br"'\n\n'" )
240
302
241
303
def test_crcrcrlf (self ):
242
304
src = (b'print(ascii("""\r \r \r \n """))\n ' )
243
- out = self .check_script_output (src , br"'\n\n\n'" )
305
+ self .check_script_output (src , br"'\n\n\n'" )
244
306
245
307
def test_crcrcrlf2 (self ):
246
308
src = (b'#coding:iso-8859-1\n '
247
309
b'print(ascii("""\r \r \r \n """))\n ' )
248
- out = self .check_script_output (src , br"'\n\n\n'" )
310
+ self .check_script_output (src , br"'\n\n\n'" )
311
+
312
+ def test_nul_in_first_coding_line (self ):
313
+ src = (b'#coding:iso8859-15\x00 \n '
314
+ b'\n '
315
+ b'\n '
316
+ b'raise RuntimeError\n ' )
317
+ self .check_script_error (src , br"source code (string )?cannot contain null bytes" )
318
+
319
+ def test_nul_in_second_coding_line (self ):
320
+ src = (b'#!/usr/bin/python\n '
321
+ b'#coding:iso8859-15\x00 \n '
322
+ b'\n '
323
+ b'raise RuntimeError\n ' )
324
+ self .check_script_error (src , br"source code (string )?cannot contain null bytes" )
249
325
250
326
251
327
class UTF8ValidatorTest (unittest .TestCase ):
@@ -325,6 +401,10 @@ def check_script_output(self, src, expected):
325
401
out = stdout .getvalue ().encode ('latin1' )
326
402
self .assertEqual (out .rstrip (), expected )
327
403
404
+ def check_script_error (self , src , expected ):
405
+ with self .assertRaisesRegex (SyntaxError , expected .decode ()) as cm :
406
+ exec (src )
407
+
328
408
329
409
class FileSourceEncodingTest (AbstractSourceEncodingTest , unittest .TestCase ):
330
410
@@ -336,6 +416,14 @@ def check_script_output(self, src, expected):
336
416
res = script_helper .assert_python_ok (fn )
337
417
self .assertEqual (res .out .rstrip (), expected )
338
418
419
+ def check_script_error (self , src , expected ):
420
+ with tempfile .TemporaryDirectory () as tmpd :
421
+ fn = os .path .join (tmpd , 'test.py' )
422
+ with open (fn , 'wb' ) as fp :
423
+ fp .write (src )
424
+ res = script_helper .assert_python_failure (fn )
425
+ self .assertRegex (res .err .rstrip ().splitlines ()[- 1 ], b'SyntaxError.*?' + expected )
426
+
339
427
340
428
if __name__ == "__main__" :
341
429
unittest .main ()
0 commit comments