1919 pa_version_under11p0 ,
2020 pa_version_under13p0 ,
2121 pa_version_under15p0 ,
22+ pa_version_under19p0 ,
2223)
2324
2425import pandas as pd
@@ -261,8 +262,10 @@ def test_invalid_engine(df_compat):
261262 check_round_trip (df_compat , "foo" , "bar" )
262263
263264
264- def test_options_py (df_compat , pa ):
265+ def test_options_py (df_compat , pa , using_infer_string ):
265266 # use the set option
267+ if using_infer_string and not pa_version_under19p0 :
268+ df_compat .columns = df_compat .columns .astype ("str" )
266269
267270 with pd .option_context ("io.parquet.engine" , "pyarrow" ):
268271 check_round_trip (df_compat )
@@ -798,18 +801,21 @@ def test_unsupported_float16_cleanup(self, pa, path_type):
798801
799802 def test_categorical (self , pa ):
800803 # supported in >= 0.7.0
801- df = pd .DataFrame ()
802- df ["a" ] = pd .Categorical (list ("abcdef" ))
803-
804- # test for null, out-of-order values, and unobserved category
805- df ["b" ] = pd .Categorical (
806- ["bar" , "foo" , "foo" , "bar" , None , "bar" ],
807- dtype = pd .CategoricalDtype (["foo" , "bar" , "baz" ]),
808- )
809-
810- # test for ordered flag
811- df ["c" ] = pd .Categorical (
812- ["a" , "b" , "c" , "a" , "c" , "b" ], categories = ["b" , "c" , "d" ], ordered = True
804+ df = pd .DataFrame (
805+ {
806+ "a" : pd .Categorical (list ("abcdef" )),
807+ # test for null, out-of-order values, and unobserved category
808+ "b" : pd .Categorical (
809+ ["bar" , "foo" , "foo" , "bar" , None , "bar" ],
810+ dtype = pd .CategoricalDtype (["foo" , "bar" , "baz" ]),
811+ ),
812+ # test for ordered flag
813+ "c" : pd .Categorical (
814+ ["a" , "b" , "c" , "a" , "c" , "b" ],
815+ categories = ["b" , "c" , "d" ],
816+ ordered = True ,
817+ ),
818+ }
813819 )
814820
815821 check_round_trip (df , pa )
@@ -878,11 +884,13 @@ def test_s3_roundtrip_for_dir(
878884 repeat = 1 ,
879885 )
880886
881- def test_read_file_like_obj_support (self , df_compat ):
887+ def test_read_file_like_obj_support (self , df_compat , using_infer_string ):
882888 pytest .importorskip ("pyarrow" )
883889 buffer = BytesIO ()
884890 df_compat .to_parquet (buffer )
885891 df_from_buf = read_parquet (buffer )
892+ if using_infer_string and not pa_version_under19p0 :
893+ df_compat .columns = df_compat .columns .astype ("str" )
886894 tm .assert_frame_equal (df_compat , df_from_buf )
887895
888896 def test_expand_user (self , df_compat , monkeypatch ):
@@ -949,7 +957,7 @@ def test_additional_extension_arrays(self, pa, using_infer_string):
949957 "c" : pd .Series (["a" , None , "c" ], dtype = "string" ),
950958 }
951959 )
952- if using_infer_string :
960+ if using_infer_string and pa_version_under19p0 :
953961 check_round_trip (df , pa , expected = df .astype ({"c" : "str" }))
954962 else :
955963 check_round_trip (df , pa )
@@ -963,7 +971,10 @@ def test_pyarrow_backed_string_array(self, pa, string_storage, using_infer_strin
963971 df = pd .DataFrame ({"a" : pd .Series (["a" , None , "c" ], dtype = "string[pyarrow]" )})
964972 with pd .option_context ("string_storage" , string_storage ):
965973 if using_infer_string :
966- expected = df .astype ("str" )
974+ if pa_version_under19p0 :
975+ expected = df .astype ("str" )
976+ else :
977+ expected = df .astype (f"string[{ string_storage } ]" )
967978 expected .columns = expected .columns .astype ("str" )
968979 else :
969980 expected = df .astype (f"string[{ string_storage } ]" )
@@ -1128,17 +1139,24 @@ def test_df_attrs_persistence(self, tmp_path, pa):
11281139 new_df = read_parquet (path , engine = pa )
11291140 assert new_df .attrs == df .attrs
11301141
1131- def test_string_inference (self , tmp_path , pa ):
1142+ def test_string_inference (self , tmp_path , pa , using_infer_string ):
11321143 # GH#54431
11331144 path = tmp_path / "test_string_inference.p"
11341145 df = pd .DataFrame (data = {"a" : ["x" , "y" ]}, index = ["a" , "b" ])
1135- df .to_parquet (path , engine = "pyarrow" )
1146+ df .to_parquet (path , engine = pa )
11361147 with pd .option_context ("future.infer_string" , True ):
1137- result = read_parquet (path , engine = "pyarrow" )
1148+ result = read_parquet (path , engine = pa )
1149+ dtype = pd .StringDtype (na_value = np .nan )
11381150 expected = pd .DataFrame (
11391151 data = {"a" : ["x" , "y" ]},
1140- dtype = pd .StringDtype (na_value = np .nan ),
1141- index = pd .Index (["a" , "b" ], dtype = pd .StringDtype (na_value = np .nan )),
1152+ dtype = dtype ,
1153+ index = pd .Index (["a" , "b" ], dtype = dtype ),
1154+ columns = pd .Index (
1155+ ["a" ],
1156+ dtype = object
1157+ if pa_version_under19p0 and not using_infer_string
1158+ else dtype ,
1159+ ),
11421160 )
11431161 tm .assert_frame_equal (result , expected )
11441162
@@ -1151,7 +1169,10 @@ def test_roundtrip_decimal(self, tmp_path, pa):
11511169 df = pd .DataFrame ({"a" : [Decimal ("123.00" )]}, dtype = "string[pyarrow]" )
11521170 df .to_parquet (path , schema = pa .schema ([("a" , pa .decimal128 (5 ))]))
11531171 result = read_parquet (path )
1154- expected = pd .DataFrame ({"a" : ["123" ]}, dtype = "string[python]" )
1172+ if pa_version_under19p0 :
1173+ expected = pd .DataFrame ({"a" : ["123" ]}, dtype = "string[python]" )
1174+ else :
1175+ expected = pd .DataFrame ({"a" : [Decimal ("123.00" )]}, dtype = "object" )
11551176 tm .assert_frame_equal (result , expected )
11561177
11571178 def test_infer_string_large_string_type (self , tmp_path , pa ):
0 commit comments