5
5
import numpy as np
6
6
import pytest
7
7
8
- from pandas ._config import using_string_dtype
9
-
10
8
from pandas ._libs .tslibs import Timestamp
11
9
from pandas .compat import PY312
12
10
27
25
28
26
pytestmark = [
29
27
pytest .mark .single_cpu ,
30
- pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False ),
31
28
]
32
29
33
30
tables = pytest .importorskip ("tables" )
@@ -40,7 +37,7 @@ def test_append(setup_path):
40
37
# tables.NaturalNameWarning):
41
38
df = DataFrame (
42
39
np .random .default_rng (2 ).standard_normal ((20 , 4 )),
43
- columns = Index (list ("ABCD" ), dtype = object ),
40
+ columns = Index (list ("ABCD" )),
44
41
index = date_range ("2000-01-01" , periods = 20 , freq = "B" ),
45
42
)
46
43
_maybe_remove (store , "df1" )
@@ -203,7 +200,7 @@ def test_append_some_nans(setup_path):
203
200
tm .assert_frame_equal (store ["df3" ], df3 , check_index_type = True )
204
201
205
202
206
- def test_append_all_nans (setup_path ):
203
+ def test_append_all_nans (setup_path , using_infer_string ):
207
204
with ensure_clean_store (setup_path ) as store :
208
205
df = DataFrame (
209
206
{
@@ -255,7 +252,13 @@ def test_append_all_nans(setup_path):
255
252
_maybe_remove (store , "df" )
256
253
store .append ("df" , df [:10 ], dropna = True )
257
254
store .append ("df" , df [10 :], dropna = True )
258
- tm .assert_frame_equal (store ["df" ], df , check_index_type = True )
255
+ result = store ["df" ]
256
+ expected = df
257
+ if using_infer_string :
258
+ # TODO: Test is incorrect when not using_infer_string.
259
+ # Should take the last 4 rows uncondiationally.
260
+ expected = expected [16 :]
261
+ tm .assert_frame_equal (result , expected , check_index_type = True )
259
262
260
263
_maybe_remove (store , "df2" )
261
264
store .append ("df2" , df [:10 ], dropna = False )
@@ -294,7 +297,7 @@ def test_append_frame_column_oriented(setup_path, request):
294
297
# column oriented
295
298
df = DataFrame (
296
299
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
297
- columns = Index (list ("ABCD" ), dtype = object ),
300
+ columns = Index (list ("ABCD" )),
298
301
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
299
302
)
300
303
df .index = df .index ._with_freq (None ) # freq doesn't round-trip
@@ -426,7 +429,7 @@ def check_col(key, name, size):
426
429
{
427
430
"A" : [0.0 , 1.0 , 2.0 , 3.0 , 4.0 ],
428
431
"B" : [0.0 , 1.0 , 0.0 , 1.0 , 0.0 ],
429
- "C" : Index (["foo1" , "foo2" , "foo3" , "foo4" , "foo5" ], dtype = object ),
432
+ "C" : Index (["foo1" , "foo2" , "foo3" , "foo4" , "foo5" ]),
430
433
"D" : date_range ("20130101" , periods = 5 ),
431
434
}
432
435
).set_index ("C" )
@@ -453,7 +456,7 @@ def check_col(key, name, size):
453
456
_maybe_remove (store , "df" )
454
457
df = DataFrame (
455
458
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
456
- columns = Index (list ("ABCD" ), dtype = object ),
459
+ columns = Index (list ("ABCD" )),
457
460
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
458
461
)
459
462
df ["string" ] = "foo"
@@ -517,7 +520,7 @@ def test_append_with_data_columns(setup_path):
517
520
with ensure_clean_store (setup_path ) as store :
518
521
df = DataFrame (
519
522
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
520
- columns = Index (list ("ABCD" ), dtype = object ),
523
+ columns = Index (list ("ABCD" )),
521
524
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
522
525
)
523
526
df .iloc [0 , df .columns .get_loc ("B" )] = 1.0
@@ -693,8 +696,12 @@ def test_append_misc(setup_path):
693
696
with ensure_clean_store (setup_path ) as store :
694
697
df = DataFrame (
695
698
1.1 * np .arange (120 ).reshape ((30 , 4 )),
696
- columns = Index (list ("ABCD" ), dtype = object ),
697
- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
699
+ columns = Index (
700
+ list ("ABCD" ),
701
+ ),
702
+ index = Index (
703
+ [f"i-{ i } " for i in range (30 )],
704
+ ),
698
705
)
699
706
store .append ("df" , df , chunksize = 1 )
700
707
result = store .select ("df" )
@@ -710,8 +717,12 @@ def test_append_misc_chunksize(setup_path, chunksize):
710
717
# more chunksize in append tests
711
718
df = DataFrame (
712
719
1.1 * np .arange (120 ).reshape ((30 , 4 )),
713
- columns = Index (list ("ABCD" ), dtype = object ),
714
- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
720
+ columns = Index (
721
+ list ("ABCD" ),
722
+ ),
723
+ index = Index (
724
+ [f"i-{ i } " for i in range (30 )],
725
+ ),
715
726
)
716
727
df ["string" ] = "foo"
717
728
df ["float322" ] = 1.0
@@ -747,15 +758,19 @@ def test_append_misc_empty_frame(setup_path):
747
758
tm .assert_frame_equal (store .select ("df2" ), df )
748
759
749
760
750
- def test_append_raise (setup_path ):
761
+ def test_append_raise (setup_path , using_infer_string ):
751
762
with ensure_clean_store (setup_path ) as store :
752
763
# test append with invalid input to get good error messages
753
764
754
765
# list in column
755
766
df = DataFrame (
756
767
1.1 * np .arange (120 ).reshape ((30 , 4 )),
757
- columns = Index (list ("ABCD" ), dtype = object ),
758
- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
768
+ columns = Index (
769
+ list ("ABCD" ),
770
+ ),
771
+ index = Index (
772
+ [f"i-{ i } " for i in range (30 )],
773
+ ),
759
774
)
760
775
df ["invalid" ] = [["a" ]] * len (df )
761
776
assert df .dtypes ["invalid" ] == np .object_
@@ -775,8 +790,12 @@ def test_append_raise(setup_path):
775
790
# datetime with embedded nans as object
776
791
df = DataFrame (
777
792
1.1 * np .arange (120 ).reshape ((30 , 4 )),
778
- columns = Index (list ("ABCD" ), dtype = object ),
779
- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
793
+ columns = Index (
794
+ list ("ABCD" ),
795
+ ),
796
+ index = Index (
797
+ [f"i-{ i } " for i in range (30 )],
798
+ ),
780
799
)
781
800
s = Series (datetime .datetime (2001 , 1 , 2 ), index = df .index )
782
801
s = s .astype (object )
@@ -803,8 +822,12 @@ def test_append_raise(setup_path):
803
822
# appending an incompatible table
804
823
df = DataFrame (
805
824
1.1 * np .arange (120 ).reshape ((30 , 4 )),
806
- columns = Index (list ("ABCD" ), dtype = object ),
807
- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
825
+ columns = Index (
826
+ list ("ABCD" ),
827
+ ),
828
+ index = Index (
829
+ [f"i-{ i } " for i in range (30 )],
830
+ ),
808
831
)
809
832
store .append ("df" , df )
810
833
@@ -822,10 +845,11 @@ def test_append_raise(setup_path):
822
845
df ["foo" ] = Timestamp ("20130101" )
823
846
store .append ("df" , df )
824
847
df ["foo" ] = "bar"
848
+ shape = "(30,)" if using_infer_string else "(1, 30)"
825
849
msg = re .escape (
826
850
"invalid combination of [values_axes] on appending data "
827
851
"[name->values_block_1,cname->values_block_1,"
828
- "dtype->bytes24,kind->string,shape->(1, 30) ] "
852
+ f "dtype->bytes24,kind->string,shape->{ shape } ] "
829
853
"vs current table "
830
854
"[name->values_block_1,cname->values_block_1,"
831
855
"dtype->datetime64[s],kind->datetime64[s],shape->None]"
@@ -884,7 +908,9 @@ def test_append_with_timedelta(setup_path):
884
908
def test_append_to_multiple (setup_path ):
885
909
df1 = DataFrame (
886
910
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
887
- columns = Index (list ("ABCD" ), dtype = object ),
911
+ columns = Index (
912
+ list ("ABCD" ),
913
+ ),
888
914
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
889
915
)
890
916
df2 = df1 .copy ().rename (columns = "{}_2" .format )
@@ -921,12 +947,16 @@ def test_append_to_multiple(setup_path):
921
947
def test_append_to_multiple_dropna (setup_path ):
922
948
df1 = DataFrame (
923
949
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
924
- columns = Index (list ("ABCD" ), dtype = object ),
950
+ columns = Index (
951
+ list ("ABCD" ),
952
+ ),
925
953
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
926
954
)
927
955
df2 = DataFrame (
928
956
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
929
- columns = Index (list ("ABCD" ), dtype = object ),
957
+ columns = Index (
958
+ list ("ABCD" ),
959
+ ),
930
960
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
931
961
).rename (columns = "{}_2" .format )
932
962
df1 .iloc [1 , df1 .columns .get_indexer (["A" , "B" ])] = np .nan
@@ -946,7 +976,9 @@ def test_append_to_multiple_dropna(setup_path):
946
976
def test_append_to_multiple_dropna_false (setup_path ):
947
977
df1 = DataFrame (
948
978
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
949
- columns = Index (list ("ABCD" ), dtype = object ),
979
+ columns = Index (
980
+ list ("ABCD" ),
981
+ ),
950
982
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
951
983
)
952
984
df2 = df1 .copy ().rename (columns = "{}_2" .format )
0 commit comments