@@ -3014,11 +3014,12 @@ class ArchiveMaker:
3014
3014
with t.open() as tar:
3015
3015
... # `tar` is now a TarFile with 'filename' in it!
3016
3016
"""
3017
- def __init__ (self ):
3017
+ def __init__ (self , ** kwargs ):
3018
3018
self .bio = io .BytesIO ()
3019
+ self .tar_kwargs = dict (kwargs )
3019
3020
3020
3021
def __enter__ (self ):
3021
- self .tar_w = tarfile .TarFile (mode = 'w' , fileobj = self .bio )
3022
+ self .tar_w = tarfile .TarFile (mode = 'w' , fileobj = self .bio , ** self . tar_kwargs )
3022
3023
return self
3023
3024
3024
3025
def __exit__ (self , * exc ):
@@ -3425,7 +3426,10 @@ def test_tar_filter(self):
3425
3426
# that in the test archive.)
3426
3427
with tarfile .TarFile .open (tarname ) as tar :
3427
3428
for tarinfo in tar .getmembers ():
3428
- filtered = tarfile .tar_filter (tarinfo , '' )
3429
+ try :
3430
+ filtered = tarfile .tar_filter (tarinfo , '' )
3431
+ except UnicodeEncodeError :
3432
+ continue
3429
3433
self .assertIs (filtered .name , tarinfo .name )
3430
3434
self .assertIs (filtered .type , tarinfo .type )
3431
3435
@@ -3436,13 +3440,50 @@ def test_data_filter(self):
3436
3440
for tarinfo in tar .getmembers ():
3437
3441
try :
3438
3442
filtered = tarfile .data_filter (tarinfo , '' )
3439
- except tarfile .FilterError :
3443
+ except ( tarfile .FilterError , UnicodeEncodeError ) :
3440
3444
continue
3441
3445
self .assertIs (filtered .name , tarinfo .name )
3442
3446
self .assertIs (filtered .type , tarinfo .type )
3443
3447
3444
- def test_default_filter_warns_not (self ):
3445
- """Ensure the default filter does not warn (like in 3.12)"""
3448
+ @unittest .skipIf (sys .platform == 'win32' , 'requires native bytes paths' )
3449
+ def test_filter_unencodable (self ):
3450
+ # Sanity check using a valid path.
3451
+ tarinfo = tarfile .TarInfo (os_helper .TESTFN )
3452
+ filtered = tarfile .tar_filter (tarinfo , '' )
3453
+ self .assertIs (filtered .name , tarinfo .name )
3454
+ filtered = tarfile .data_filter (tarinfo , '' )
3455
+ self .assertIs (filtered .name , tarinfo .name )
3456
+
3457
+ tarinfo = tarfile .TarInfo ('test\x00 ' )
3458
+ self .assertRaises (ValueError , tarfile .tar_filter , tarinfo , '' )
3459
+ self .assertRaises (ValueError , tarfile .data_filter , tarinfo , '' )
3460
+ tarinfo = tarfile .TarInfo ('\ud800 ' )
3461
+ self .assertRaises (UnicodeEncodeError , tarfile .tar_filter , tarinfo , '' )
3462
+ self .assertRaises (UnicodeEncodeError , tarfile .data_filter , tarinfo , '' )
3463
+
3464
+ @unittest .skipIf (sys .platform == 'win32' , 'requires native bytes paths' )
3465
+ def test_extract_unencodable (self ):
3466
+ # Create a member with name \xed\xa0\x80 which is UTF-8 encoded
3467
+ # lone surrogate \ud800.
3468
+ with ArchiveMaker (encoding = 'ascii' , errors = 'surrogateescape' ) as arc :
3469
+ arc .add ('\udced \udca0 \udc80 ' )
3470
+ with os_helper .temp_cwd () as tmp :
3471
+ tar = arc .open (encoding = 'utf-8' , errors = 'surrogatepass' ,
3472
+ errorlevel = 1 )
3473
+ self .assertEqual (tar .getnames (), ['\ud800 ' ])
3474
+ with self .assertRaises (UnicodeEncodeError ):
3475
+ tar .extractall ()
3476
+ self .assertEqual (os .listdir (), [])
3477
+
3478
+ tar = arc .open (encoding = 'utf-8' , errors = 'surrogatepass' ,
3479
+ errorlevel = 0 , debug = 1 )
3480
+ with support .captured_stderr () as stderr :
3481
+ tar .extractall ()
3482
+ self .assertEqual (os .listdir (), [])
3483
+ self .assertIn ('tarfile: UnicodeEncodeError ' , stderr .getvalue ())
3484
+
3485
+ def test_default_filter_warns (self ):
3486
+ """Ensure the default filter warns"""
3446
3487
with ArchiveMaker () as arc :
3447
3488
arc .add ('foo' )
3448
3489
# Replicate warnings_helper.check_no_warnings
0 commit comments