@@ -172,6 +172,8 @@ def test_tokenizer_fstring_warning_in_first_line(self):
172
172
os .unlink (TESTFN )
173
173
174
174
175
+ BUFSIZ = 2 ** 13
176
+
175
177
class AbstractSourceEncodingTest :
176
178
177
179
def test_default_coding (self ):
@@ -184,14 +186,20 @@ def test_first_coding_line(self):
184
186
self .check_script_output (src , br"'\xc3\u20ac'" )
185
187
186
188
def test_second_coding_line (self ):
187
- src = (b'#\n '
189
+ src = (b'#!/usr/bin/python\n '
190
+ b'#coding:iso8859-15\n '
191
+ b'print(ascii("\xc3 \xa4 "))\n ' )
192
+ self .check_script_output (src , br"'\xc3\u20ac'" )
193
+
194
+ def test_second_coding_line_empty_first_line (self ):
195
+ src = (b'\n '
188
196
b'#coding:iso8859-15\n '
189
197
b'print(ascii("\xc3 \xa4 "))\n ' )
190
198
self .check_script_output (src , br"'\xc3\u20ac'" )
191
199
192
200
def test_third_coding_line (self ):
193
201
# Only first two lines are tested for a magic comment.
194
- src = (b'#\n '
202
+ src = (b'#!/usr/bin/python \n '
195
203
b'#\n '
196
204
b'#coding:iso8859-15\n '
197
205
b'print(ascii("\xc3 \xa4 "))\n ' )
@@ -209,13 +217,52 @@ def test_double_coding_same_line(self):
209
217
b'print(ascii("\xc3 \xa4 "))\n ' )
210
218
self .check_script_output (src , br"'\xc3\u20ac'" )
211
219
220
+ def test_double_coding_utf8 (self ):
221
+ src = (b'#coding:utf-8\n '
222
+ b'#coding:latin1\n '
223
+ b'print(ascii("\xc3 \xa4 "))\n ' )
224
+ self .check_script_output (src , br"'\xe4'" )
225
+
226
+ def test_long_first_coding_line (self ):
227
+ src = (b'#' + b' ' * BUFSIZ + b'coding:iso8859-15\n '
228
+ b'print(ascii("\xc3 \xa4 "))\n ' )
229
+ self .check_script_output (src , br"'\xc3\u20ac'" )
230
+
231
+ def test_long_second_coding_line (self ):
232
+ src = (b'#!/usr/bin/python\n '
233
+ b'#' + b' ' * BUFSIZ + b'coding:iso8859-15\n '
234
+ b'print(ascii("\xc3 \xa4 "))\n ' )
235
+ self .check_script_output (src , br"'\xc3\u20ac'" )
236
+
237
+ def test_long_coding_line (self ):
238
+ src = (b'#coding:iso-8859-15' + b' ' * BUFSIZ + b'\n '
239
+ b'print(ascii("\xc3 \xa4 "))\n ' )
240
+ self .check_script_output (src , br"'\xc3\u20ac'" )
241
+
242
+ def test_long_coding_name (self ):
243
+ src = (b'#coding:iso-8859-1-' + b'x' * BUFSIZ + b'\n '
244
+ b'print(ascii("\xc3 \xa4 "))\n ' )
245
+ self .check_script_output (src , br"'\xc3\xa4'" )
246
+
247
+ def test_long_first_utf8_line (self ):
248
+ src = b'#' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
249
+ self .check_script_output (src , b'' )
250
+ src = b'# ' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
251
+ self .check_script_output (src , b'' )
252
+
253
+ def test_long_second_utf8_line (self ):
254
+ src = b'\n #' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
255
+ self .check_script_output (src , b'' )
256
+ src = b'\n # ' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
257
+ self .check_script_output (src , b'' )
258
+
212
259
def test_first_non_utf8_coding_line (self ):
213
260
src = (b'#coding:iso-8859-15 \xa4 \n '
214
261
b'print(ascii("\xc3 \xa4 "))\n ' )
215
262
self .check_script_output (src , br"'\xc3\u20ac'" )
216
263
217
264
def test_second_non_utf8_coding_line (self ):
218
- src = (b'\n '
265
+ src = (b'#!/usr/bin/python \n '
219
266
b'#coding:iso-8859-15 \xa4 \n '
220
267
b'print(ascii("\xc3 \xa4 "))\n ' )
221
268
self .check_script_output (src , br"'\xc3\u20ac'" )
@@ -224,27 +271,56 @@ def test_utf8_bom(self):
224
271
src = (b'\xef \xbb \xbf print(ascii("\xc3 \xa4 "))\n ' )
225
272
self .check_script_output (src , br"'\xe4'" )
226
273
274
+ def test_utf8_bom_utf8_comments (self ):
275
+ src = (b'\xef \xbb \xbf #\xc3 \xa4 \n '
276
+ b'#\xc3 \xa4 \n '
277
+ b'print(ascii("\xc3 \xa4 "))\n ' )
278
+ self .check_script_output (src , br"'\xe4'" )
279
+
227
280
def test_utf8_bom_and_utf8_coding_line (self ):
228
281
src = (b'\xef \xbb \xbf #coding:utf-8\n '
229
282
b'print(ascii("\xc3 \xa4 "))\n ' )
230
283
self .check_script_output (src , br"'\xe4'" )
231
284
285
+ def test_utf8_non_utf8_comment_line_error (self ):
286
+ src = (b'#coding: utf8\n '
287
+ b'#\n '
288
+ b'#\xa4 \n '
289
+ b'raise RuntimeError\n ' )
290
+ self .check_script_error (src ,
291
+ br"'utf-8' codec can't decode byte|"
292
+ br"encoding problem: utf8" )
293
+
232
294
def test_crlf (self ):
233
295
src = (b'print(ascii("""\r \n """))\n ' )
234
- out = self .check_script_output (src , br"'\n'" )
296
+ self .check_script_output (src , br"'\n'" )
235
297
236
298
def test_crcrlf (self ):
237
299
src = (b'print(ascii("""\r \r \n """))\n ' )
238
- out = self .check_script_output (src , br"'\n\n'" )
300
+ self .check_script_output (src , br"'\n\n'" )
239
301
240
302
def test_crcrcrlf (self ):
241
303
src = (b'print(ascii("""\r \r \r \n """))\n ' )
242
- out = self .check_script_output (src , br"'\n\n\n'" )
304
+ self .check_script_output (src , br"'\n\n\n'" )
243
305
244
306
def test_crcrcrlf2 (self ):
245
307
src = (b'#coding:iso-8859-1\n '
246
308
b'print(ascii("""\r \r \r \n """))\n ' )
247
- out = self .check_script_output (src , br"'\n\n\n'" )
309
+ self .check_script_output (src , br"'\n\n\n'" )
310
+
311
+ def test_nul_in_first_coding_line (self ):
312
+ src = (b'#coding:iso8859-15\x00 \n '
313
+ b'\n '
314
+ b'\n '
315
+ b'raise RuntimeError\n ' )
316
+ self .check_script_error (src , br"source code (string )?cannot contain null bytes" )
317
+
318
+ def test_nul_in_second_coding_line (self ):
319
+ src = (b'#!/usr/bin/python\n '
320
+ b'#coding:iso8859-15\x00 \n '
321
+ b'\n '
322
+ b'raise RuntimeError\n ' )
323
+ self .check_script_error (src , br"source code (string )?cannot contain null bytes" )
248
324
249
325
250
326
class UTF8ValidatorTest (unittest .TestCase ):
@@ -324,6 +400,10 @@ def check_script_output(self, src, expected):
324
400
out = stdout .getvalue ().encode ('latin1' )
325
401
self .assertEqual (out .rstrip (), expected )
326
402
403
+ def check_script_error (self , src , expected ):
404
+ with self .assertRaisesRegex (SyntaxError , expected .decode ()) as cm :
405
+ exec (src )
406
+
327
407
328
408
class FileSourceEncodingTest (AbstractSourceEncodingTest , unittest .TestCase ):
329
409
@@ -335,6 +415,14 @@ def check_script_output(self, src, expected):
335
415
res = script_helper .assert_python_ok (fn )
336
416
self .assertEqual (res .out .rstrip (), expected )
337
417
418
+ def check_script_error (self , src , expected ):
419
+ with tempfile .TemporaryDirectory () as tmpd :
420
+ fn = os .path .join (tmpd , 'test.py' )
421
+ with open (fn , 'wb' ) as fp :
422
+ fp .write (src )
423
+ res = script_helper .assert_python_failure (fn )
424
+ self .assertRegex (res .err .rstrip ().splitlines ()[- 1 ], b'SyntaxError.*?' + expected )
425
+
338
426
339
427
if __name__ == "__main__" :
340
428
unittest .main ()
0 commit comments