@@ -39,6 +39,25 @@ def test_csv_encoding(path, encoding, strings, wrong_encoding, exception, line_t
3939 assert df .equals (df2 )
4040
4141
42+ @pytest .mark .parametrize (
43+ "encoding,strings,wrong_encoding" ,
44+ [
45+ ("utf-8" , ["漢字" , "ãóú" , "г, д, ж, з, к, л" ], "ascii" ),
46+ ("ISO-8859-15" , ["Ö, ö, Ü, ü" , "ãóú" , "øe" ], "ascii" ),
47+ ],
48+ )
49+ def test_csv_ignore_encoding_errors (path , encoding , strings , wrong_encoding ):
50+ file_path = f"{ path } 0.csv"
51+ df = pd .DataFrame ({"c0" : [1 , 2 , 3 ], "c1" : strings })
52+ wr .s3 .to_csv (df , file_path , index = False , encoding = encoding )
53+ with pytest .raises (UnicodeDecodeError ):
54+ df2 = wr .s3 .read_csv (file_path , encoding = wrong_encoding )
55+ df2 = wr .s3 .read_csv (file_path , encoding = wrong_encoding , encoding_errors = "ignore" )
56+ if isinstance (df2 , pd .DataFrame ) is False :
57+ df2 = pd .concat (df2 , ignore_index = True )
58+ assert df2 .shape == (3 , 4 )
59+
60+
4261@pytest .mark .parametrize ("use_threads" , [True , False , 2 ])
4362@pytest .mark .parametrize ("chunksize" , [None , 1 ])
4463def test_read_partitioned_json_paths (path , use_threads , chunksize ):
0 commit comments