55import numpy as np
66import pytest
77
8- from pandas ._config import using_string_dtype
9-
108from pandas ._libs .tslibs import Timestamp
119from pandas .compat import PY312
1210
2523 ensure_clean_store ,
2624)
2725
28- pytestmark = [
29- pytest .mark .single_cpu ,
30- pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False ),
31- ]
26+ pytestmark = [pytest .mark .single_cpu ]
3227
3328tables = pytest .importorskip ("tables" )
3429
@@ -40,7 +35,7 @@ def test_append(setup_path):
4035 # tables.NaturalNameWarning):
4136 df = DataFrame (
4237 np .random .default_rng (2 ).standard_normal ((20 , 4 )),
43- columns = Index (list ("ABCD" ), dtype = object ),
38+ columns = Index (list ("ABCD" )),
4439 index = date_range ("2000-01-01" , periods = 20 , freq = "B" ),
4540 )
4641 _maybe_remove (store , "df1" )
@@ -203,7 +198,7 @@ def test_append_some_nans(setup_path):
203198 tm .assert_frame_equal (store ["df3" ], df3 , check_index_type = True )
204199
205200
206- def test_append_all_nans (setup_path ):
201+ def test_append_all_nans (setup_path , using_infer_string ):
207202 with ensure_clean_store (setup_path ) as store :
208203 df = DataFrame (
209204 {
@@ -255,7 +250,13 @@ def test_append_all_nans(setup_path):
255250 _maybe_remove (store , "df" )
256251 store .append ("df" , df [:10 ], dropna = True )
257252 store .append ("df" , df [10 :], dropna = True )
258- tm .assert_frame_equal (store ["df" ], df , check_index_type = True )
253+ result = store ["df" ]
254+ expected = df
255+ if using_infer_string :
256+ # TODO: Test is incorrect when not using_infer_string.
257+ # Should take the last 4 rows uncondiationally.
258+ expected = expected [16 :]
259+ tm .assert_frame_equal (result , expected , check_index_type = True )
259260
260261 _maybe_remove (store , "df2" )
261262 store .append ("df2" , df [:10 ], dropna = False )
@@ -294,7 +295,7 @@ def test_append_frame_column_oriented(setup_path, request):
294295 # column oriented
295296 df = DataFrame (
296297 np .random .default_rng (2 ).standard_normal ((10 , 4 )),
297- columns = Index (list ("ABCD" ), dtype = object ),
298+ columns = Index (list ("ABCD" )),
298299 index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
299300 )
300301 df .index = df .index ._with_freq (None ) # freq doesn't round-trip
@@ -426,7 +427,7 @@ def check_col(key, name, size):
426427 {
427428 "A" : [0.0 , 1.0 , 2.0 , 3.0 , 4.0 ],
428429 "B" : [0.0 , 1.0 , 0.0 , 1.0 , 0.0 ],
429- "C" : Index (["foo1" , "foo2" , "foo3" , "foo4" , "foo5" ], dtype = object ),
430+ "C" : Index (["foo1" , "foo2" , "foo3" , "foo4" , "foo5" ]),
430431 "D" : date_range ("20130101" , periods = 5 ),
431432 }
432433 ).set_index ("C" )
@@ -453,7 +454,7 @@ def check_col(key, name, size):
453454 _maybe_remove (store , "df" )
454455 df = DataFrame (
455456 np .random .default_rng (2 ).standard_normal ((10 , 4 )),
456- columns = Index (list ("ABCD" ), dtype = object ),
457+ columns = Index (list ("ABCD" )),
457458 index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
458459 )
459460 df ["string" ] = "foo"
@@ -517,7 +518,7 @@ def test_append_with_data_columns(setup_path):
517518 with ensure_clean_store (setup_path ) as store :
518519 df = DataFrame (
519520 np .random .default_rng (2 ).standard_normal ((10 , 4 )),
520- columns = Index (list ("ABCD" ), dtype = object ),
521+ columns = Index (list ("ABCD" )),
521522 index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
522523 )
523524 df .iloc [0 , df .columns .get_loc ("B" )] = 1.0
@@ -693,8 +694,8 @@ def test_append_misc(setup_path):
693694 with ensure_clean_store (setup_path ) as store :
694695 df = DataFrame (
695696 1.1 * np .arange (120 ).reshape ((30 , 4 )),
696- columns = Index (list ("ABCD" ), dtype = object ),
697- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
697+ columns = Index (list ("ABCD" )),
698+ index = Index ([f"i-{ i } " for i in range (30 )]),
698699 )
699700 store .append ("df" , df , chunksize = 1 )
700701 result = store .select ("df" )
@@ -710,8 +711,8 @@ def test_append_misc_chunksize(setup_path, chunksize):
710711 # more chunksize in append tests
711712 df = DataFrame (
712713 1.1 * np .arange (120 ).reshape ((30 , 4 )),
713- columns = Index (list ("ABCD" ), dtype = object ),
714- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
714+ columns = Index (list ("ABCD" )),
715+ index = Index ([f"i-{ i } " for i in range (30 )]),
715716 )
716717 df ["string" ] = "foo"
717718 df ["float322" ] = 1.0
@@ -747,15 +748,15 @@ def test_append_misc_empty_frame(setup_path):
747748 tm .assert_frame_equal (store .select ("df2" ), df )
748749
749750
750- def test_append_raise (setup_path ):
751+ def test_append_raise (setup_path , using_infer_string ):
751752 with ensure_clean_store (setup_path ) as store :
752753 # test append with invalid input to get good error messages
753754
754755 # list in column
755756 df = DataFrame (
756757 1.1 * np .arange (120 ).reshape ((30 , 4 )),
757- columns = Index (list ("ABCD" ), dtype = object ),
758- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
758+ columns = Index (list ("ABCD" )),
759+ index = Index ([f"i-{ i } " for i in range (30 )]),
759760 )
760761 df ["invalid" ] = [["a" ]] * len (df )
761762 assert df .dtypes ["invalid" ] == np .object_
@@ -775,8 +776,8 @@ def test_append_raise(setup_path):
775776 # datetime with embedded nans as object
776777 df = DataFrame (
777778 1.1 * np .arange (120 ).reshape ((30 , 4 )),
778- columns = Index (list ("ABCD" ), dtype = object ),
779- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
779+ columns = Index (list ("ABCD" )),
780+ index = Index ([f"i-{ i } " for i in range (30 )]),
780781 )
781782 s = Series (datetime .datetime (2001 , 1 , 2 ), index = df .index )
782783 s = s .astype (object )
@@ -803,8 +804,8 @@ def test_append_raise(setup_path):
803804 # appending an incompatible table
804805 df = DataFrame (
805806 1.1 * np .arange (120 ).reshape ((30 , 4 )),
806- columns = Index (list ("ABCD" ), dtype = object ),
807- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
807+ columns = Index (list ("ABCD" )),
808+ index = Index ([f"i-{ i } " for i in range (30 )]),
808809 )
809810 store .append ("df" , df )
810811
@@ -822,10 +823,11 @@ def test_append_raise(setup_path):
822823 df ["foo" ] = Timestamp ("20130101" )
823824 store .append ("df" , df )
824825 df ["foo" ] = "bar"
826+ shape = "(30,)" if using_infer_string else "(1, 30)"
825827 msg = re .escape (
826828 "invalid combination of [values_axes] on appending data "
827829 "[name->values_block_1,cname->values_block_1,"
828- "dtype->bytes24,kind->string,shape->(1, 30) ] "
830+ f "dtype->bytes24,kind->string,shape->{ shape } ] "
829831 "vs current table "
830832 "[name->values_block_1,cname->values_block_1,"
831833 "dtype->datetime64[s],kind->datetime64[s],shape->None]"
@@ -884,7 +886,7 @@ def test_append_with_timedelta(setup_path):
884886def test_append_to_multiple (setup_path ):
885887 df1 = DataFrame (
886888 np .random .default_rng (2 ).standard_normal ((10 , 4 )),
887- columns = Index (list ("ABCD" ), dtype = object ),
889+ columns = Index (list ("ABCD" )),
888890 index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
889891 )
890892 df2 = df1 .copy ().rename (columns = "{}_2" .format )
@@ -921,12 +923,12 @@ def test_append_to_multiple(setup_path):
921923def test_append_to_multiple_dropna (setup_path ):
922924 df1 = DataFrame (
923925 np .random .default_rng (2 ).standard_normal ((10 , 4 )),
924- columns = Index (list ("ABCD" ), dtype = object ),
926+ columns = Index (list ("ABCD" )),
925927 index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
926928 )
927929 df2 = DataFrame (
928930 np .random .default_rng (2 ).standard_normal ((10 , 4 )),
929- columns = Index (list ("ABCD" ), dtype = object ),
931+ columns = Index (list ("ABCD" )),
930932 index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
931933 ).rename (columns = "{}_2" .format )
932934 df1 .iloc [1 , df1 .columns .get_indexer (["A" , "B" ])] = np .nan
@@ -946,7 +948,7 @@ def test_append_to_multiple_dropna(setup_path):
946948def test_append_to_multiple_dropna_false (setup_path ):
947949 df1 = DataFrame (
948950 np .random .default_rng (2 ).standard_normal ((10 , 4 )),
949- columns = Index (list ("ABCD" ), dtype = object ),
951+ columns = Index (list ("ABCD" )),
950952 index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
951953 )
952954 df2 = df1 .copy ().rename (columns = "{}_2" .format )
0 commit comments