5252 pytest .mark .filterwarnings (
5353 "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
5454 ),
55- pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False ),
5655]
5756
5857
6160 params = [
6261 pytest .param (
6362 "fastparquet" ,
64- marks = pytest .mark .skipif (
65- not _HAVE_FASTPARQUET ,
66- reason = "fastparquet is not installed" ,
67- ),
63+ marks = [
64+ pytest .mark .skipif (
65+ not _HAVE_FASTPARQUET ,
66+ reason = "fastparquet is not installed" ,
67+ ),
68+ pytest .mark .xfail (
69+ using_string_dtype (),
70+ reason = "TODO(infer_string) fastparquet" ,
71+ strict = False ,
72+ ),
73+ ],
6874 ),
6975 pytest .param (
7076 "pyarrow" ,
@@ -86,15 +92,22 @@ def pa():
8692
8793
8894@pytest .fixture
89- def fp ():
95+ def fp (request ):
9096 if not _HAVE_FASTPARQUET :
9197 pytest .skip ("fastparquet is not installed" )
98+ if using_string_dtype ():
99+ request .applymarker (
100+ pytest .mark .xfail (reason = "TODO(infer_string) fastparquet" , strict = False )
101+ )
92102 return "fastparquet"
93103
94104
95105@pytest .fixture
96106def df_compat ():
97- return pd .DataFrame ({"A" : [1 , 2 , 3 ], "B" : "foo" })
107+ # TODO(infer_string) should this give str columns?
108+ return pd .DataFrame (
109+ {"A" : [1 , 2 , 3 ], "B" : "foo" }, columns = pd .Index (["A" , "B" ], dtype = object )
110+ )
98111
99112
100113@pytest .fixture
@@ -366,16 +379,6 @@ def check_external_error_on_write(self, df, engine, exc):
366379 with tm .external_error_raised (exc ):
367380 to_parquet (df , path , engine , compression = None )
368381
369- @pytest .mark .network
370- @pytest .mark .single_cpu
371- def test_parquet_read_from_url (self , httpserver , datapath , df_compat , engine ):
372- if engine != "auto" :
373- pytest .importorskip (engine )
374- with open (datapath ("io" , "data" , "parquet" , "simple.parquet" ), mode = "rb" ) as f :
375- httpserver .serve_content (content = f .read ())
376- df = read_parquet (httpserver .url )
377- tm .assert_frame_equal (df , df_compat )
378-
379382
380383class TestBasic (Base ):
381384 def test_error (self , engine ):
@@ -673,6 +676,16 @@ def test_read_empty_array(self, pa, dtype):
673676 df , pa , read_kwargs = {"dtype_backend" : "numpy_nullable" }, expected = expected
674677 )
675678
679+ @pytest .mark .network
680+ @pytest .mark .single_cpu
681+ def test_parquet_read_from_url (self , httpserver , datapath , df_compat , engine ):
682+ if engine != "auto" :
683+ pytest .importorskip (engine )
684+ with open (datapath ("io" , "data" , "parquet" , "simple.parquet" ), mode = "rb" ) as f :
685+ httpserver .serve_content (content = f .read ())
686+ df = read_parquet (httpserver .url , engine = engine )
687+ tm .assert_frame_equal (df , df_compat )
688+
676689
677690class TestParquetPyArrow (Base ):
678691 @pytest .mark .xfail (reason = "datetime_with_nat unit doesn't round-trip" )
@@ -906,7 +919,7 @@ def test_write_with_schema(self, pa):
906919 out_df = df .astype (bool )
907920 check_round_trip (df , pa , write_kwargs = {"schema" : schema }, expected = out_df )
908921
909- def test_additional_extension_arrays (self , pa ):
922+ def test_additional_extension_arrays (self , pa , using_infer_string ):
910923 # test additional ExtensionArrays that are supported through the
911924 # __arrow_array__ protocol
912925 pytest .importorskip ("pyarrow" )
@@ -917,17 +930,25 @@ def test_additional_extension_arrays(self, pa):
917930 "c" : pd .Series (["a" , None , "c" ], dtype = "string" ),
918931 }
919932 )
920- check_round_trip (df , pa )
933+ if using_infer_string :
934+ check_round_trip (df , pa , expected = df .astype ({"c" : "str" }))
935+ else :
936+ check_round_trip (df , pa )
921937
922938 df = pd .DataFrame ({"a" : pd .Series ([1 , 2 , 3 , None ], dtype = "Int64" )})
923939 check_round_trip (df , pa )
924940
925- def test_pyarrow_backed_string_array (self , pa , string_storage ):
941+ def test_pyarrow_backed_string_array (self , pa , string_storage , using_infer_string ):
926942 # test ArrowStringArray supported through the __arrow_array__ protocol
927943 pytest .importorskip ("pyarrow" )
928944 df = pd .DataFrame ({"a" : pd .Series (["a" , None , "c" ], dtype = "string[pyarrow]" )})
929945 with pd .option_context ("string_storage" , string_storage ):
930- check_round_trip (df , pa , expected = df .astype (f"string[{ string_storage } ]" ))
946+ if using_infer_string :
947+ expected = df .astype ("str" )
948+ expected .columns = expected .columns .astype ("str" )
949+ else :
950+ expected = df .astype (f"string[{ string_storage } ]" )
951+ check_round_trip (df , pa , expected = expected )
931952
932953 def test_additional_extension_types (self , pa ):
933954 # test additional ExtensionArrays that are supported through the
0 commit comments