@@ -3490,11 +3490,12 @@ class ArchiveMaker:
34903490 with t.open() as tar:
34913491 ... # `tar` is now a TarFile with 'filename' in it!
34923492 """
3493- def __init__ (self ):
3493+ def __init__ (self , ** kwargs ):
34943494 self .bio = io .BytesIO ()
3495+ self .tar_kwargs = dict (kwargs )
34953496
34963497 def __enter__ (self ):
3497- self .tar_w = tarfile .TarFile (mode = 'w' , fileobj = self .bio )
3498+ self .tar_w = tarfile .TarFile (mode = 'w' , fileobj = self .bio , ** self . tar_kwargs )
34983499 return self
34993500
35003501 def __exit__ (self , * exc ):
@@ -4073,7 +4074,10 @@ def test_tar_filter(self):
40734074 # that in the test archive.)
40744075 with tarfile .TarFile .open (tarname ) as tar :
40754076 for tarinfo in tar .getmembers ():
4076- filtered = tarfile .tar_filter (tarinfo , '' )
4077+ try :
4078+ filtered = tarfile .tar_filter (tarinfo , '' )
4079+ except UnicodeEncodeError :
4080+ continue
40774081 self .assertIs (filtered .name , tarinfo .name )
40784082 self .assertIs (filtered .type , tarinfo .type )
40794083
@@ -4084,11 +4088,48 @@ def test_data_filter(self):
40844088 for tarinfo in tar .getmembers ():
40854089 try :
40864090 filtered = tarfile .data_filter (tarinfo , '' )
4087- except tarfile .FilterError :
4091+ except ( tarfile .FilterError , UnicodeEncodeError ) :
40884092 continue
40894093 self .assertIs (filtered .name , tarinfo .name )
40904094 self .assertIs (filtered .type , tarinfo .type )
40914095
4096+ @unittest .skipIf (sys .platform == 'win32' , 'requires native bytes paths' )
4097+ def test_filter_unencodable (self ):
4098+ # Sanity check using a valid path.
4099+ tarinfo = tarfile .TarInfo (os_helper .TESTFN )
4100+ filtered = tarfile .tar_filter (tarinfo , '' )
4101+ self .assertIs (filtered .name , tarinfo .name )
4102+ filtered = tarfile .data_filter (tarinfo , '' )
4103+ self .assertIs (filtered .name , tarinfo .name )
4104+
4105+ tarinfo = tarfile .TarInfo ('test\x00 ' )
4106+ self .assertRaises (ValueError , tarfile .tar_filter , tarinfo , '' )
4107+ self .assertRaises (ValueError , tarfile .data_filter , tarinfo , '' )
4108+ tarinfo = tarfile .TarInfo ('\ud800 ' )
4109+ self .assertRaises (UnicodeEncodeError , tarfile .tar_filter , tarinfo , '' )
4110+ self .assertRaises (UnicodeEncodeError , tarfile .data_filter , tarinfo , '' )
4111+
4112+ @unittest .skipIf (sys .platform == 'win32' , 'requires native bytes paths' )
4113+ def test_extract_unencodable (self ):
4114+ # Create a member with name \xed\xa0\x80 which is UTF-8 encoded
4115+ # lone surrogate \ud800.
4116+ with ArchiveMaker (encoding = 'ascii' , errors = 'surrogateescape' ) as arc :
4117+ arc .add ('\udced \udca0 \udc80 ' )
4118+ with os_helper .temp_cwd () as tmp :
4119+ tar = arc .open (encoding = 'utf-8' , errors = 'surrogatepass' ,
4120+ errorlevel = 1 )
4121+ self .assertEqual (tar .getnames (), ['\ud800 ' ])
4122+ with self .assertRaises (UnicodeEncodeError ):
4123+ tar .extractall ()
4124+ self .assertEqual (os .listdir (), [])
4125+
4126+ tar = arc .open (encoding = 'utf-8' , errors = 'surrogatepass' ,
4127+ errorlevel = 0 , debug = 1 )
4128+ with support .captured_stderr () as stderr :
4129+ tar .extractall ()
4130+ self .assertEqual (os .listdir (), [])
4131+ self .assertIn ('tarfile: UnicodeEncodeError ' , stderr .getvalue ())
4132+
40924133 def test_change_default_filter_on_instance (self ):
40934134 tar = tarfile .TarFile (tarname , 'r' )
40944135 def strict_filter (tarinfo , path ):
0 commit comments