1
1
""" test parquet compat """
2
2
import datetime
3
3
from distutils .version import LooseVersion
4
+ import locale
4
5
import os
5
6
from warnings import catch_warnings
6
7
@@ -129,7 +130,6 @@ def check_round_trip(
129
130
read_kwargs = None ,
130
131
expected = None ,
131
132
check_names = True ,
132
- check_like = False ,
133
133
repeat = 2 ,
134
134
):
135
135
"""Verify parquet serializer and deserializer produce the same results.
@@ -149,8 +149,6 @@ def check_round_trip(
149
149
Expected deserialization result, otherwise will be equal to `df`
150
150
check_names: list of str, optional
151
151
Closed set of column names to be compared
152
- check_like: bool, optional
153
- If True, ignore the order of index & columns.
154
152
repeat: int, optional
155
153
How many times to repeat the test
156
154
"""
@@ -171,9 +169,7 @@ def compare(repeat):
171
169
with catch_warnings (record = True ):
172
170
actual = read_parquet (path , ** read_kwargs )
173
171
174
- tm .assert_frame_equal (
175
- expected , actual , check_names = check_names , check_like = check_like
176
- )
172
+ tm .assert_frame_equal (expected , actual , check_names = check_names )
177
173
178
174
if path is None :
179
175
with tm .ensure_clean () as path :
@@ -489,37 +485,15 @@ def test_categorical(self, pa):
489
485
expected = df .astype (object )
490
486
check_round_trip (df , pa , expected = expected )
491
487
488
+ # GH#33077 2020-03-27
489
+ @pytest .mark .xfail (
490
+ locale .getlocale ()[0 ] in ["zh_CN" , "it_IT" ],
491
+ reason = "dateutil cannot parse e.g. '五, 27 3月 2020 21:45:38 GMT'" ,
492
+ )
492
493
def test_s3_roundtrip (self , df_compat , s3_resource , pa ):
493
494
# GH #19134
494
495
check_round_trip (df_compat , pa , path = "s3://pandas-test/pyarrow.parquet" )
495
496
496
- @td .skip_if_no ("s3fs" )
497
- @pytest .mark .parametrize ("partition_col" , [["A" ], []])
498
- def test_s3_roundtrip_for_dir (self , df_compat , s3_resource , pa , partition_col ):
499
- from pandas .io .s3 import get_fs as get_s3_fs
500
-
501
- # GH #26388
502
- # https://github.com/apache/arrow/blob/master/python/pyarrow/tests/test_parquet.py#L2716
503
- # As per pyarrow partitioned columns become 'categorical' dtypes
504
- # and are added to back of dataframe on read
505
-
506
- expected_df = df_compat .copy ()
507
- if partition_col :
508
- expected_df [partition_col ] = expected_df [partition_col ].astype ("category" )
509
- check_round_trip (
510
- df_compat ,
511
- pa ,
512
- expected = expected_df ,
513
- path = "s3://pandas-test/parquet_dir" ,
514
- write_kwargs = {
515
- "partition_cols" : partition_col ,
516
- "compression" : None ,
517
- "filesystem" : get_s3_fs (),
518
- },
519
- check_like = True ,
520
- repeat = 1 ,
521
- )
522
-
523
497
def test_partition_cols_supported (self , pa , df_full ):
524
498
# GH #23283
525
499
partition_cols = ["bool" , "int" ]
0 commit comments