4
4
import numpy as np
5
5
import pytest
6
6
7
+ from pandas ._config import using_string_dtype
8
+
7
9
from pandas ._libs .tslibs import Timestamp
8
10
from pandas .compat import is_platform_windows
9
11
@@ -66,6 +68,7 @@ def roundtrip(key, obj, **kwargs):
66
68
tm .assert_frame_equal (result , expected )
67
69
68
70
71
+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
69
72
def test_long_strings (setup_path ):
70
73
# GH6166
71
74
data = ["a" * 50 ] * 10
@@ -206,6 +209,7 @@ def test_put_integer(setup_path):
206
209
_check_roundtrip (df , tm .assert_frame_equal , setup_path )
207
210
208
211
212
+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
209
213
def test_table_values_dtypes_roundtrip (setup_path ):
210
214
with ensure_clean_store (setup_path ) as store :
211
215
df1 = DataFrame ({"a" : [1 , 2 , 3 ]}, dtype = "f8" )
@@ -375,7 +379,7 @@ def test_timeseries_preepoch(setup_path, request):
375
379
@pytest .mark .parametrize (
376
380
"compression" , [False , pytest .param (True , marks = td .skip_if_windows )]
377
381
)
378
- def test_frame (compression , setup_path ):
382
+ def test_frame (compression , setup_path , using_infer_string ):
379
383
df = DataFrame (
380
384
1.1 * np .arange (120 ).reshape ((30 , 4 )),
381
385
columns = Index (list ("ABCD" ), dtype = object ),
@@ -386,20 +390,40 @@ def test_frame(compression, setup_path):
386
390
df .iloc [0 , 0 ] = np .nan
387
391
df .iloc [5 , 3 ] = np .nan
388
392
393
+ expected = df .copy ()
394
+ if using_infer_string :
395
+ expected .index = expected .index .astype ("str" )
396
+ expected .columns = expected .columns .astype ("str" )
397
+
389
398
_check_roundtrip_table (
390
- df , tm .assert_frame_equal , path = setup_path , compression = compression
399
+ df ,
400
+ tm .assert_frame_equal ,
401
+ path = setup_path ,
402
+ compression = compression ,
403
+ expected = expected ,
391
404
)
392
405
_check_roundtrip (
393
- df , tm .assert_frame_equal , path = setup_path , compression = compression
406
+ df ,
407
+ tm .assert_frame_equal ,
408
+ path = setup_path ,
409
+ compression = compression ,
410
+ expected = expected ,
394
411
)
395
412
396
413
tdf = DataFrame (
397
414
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
398
415
columns = Index (list ("ABCD" ), dtype = object ),
399
416
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
400
417
)
418
+ expected = tdf .copy ()
419
+ if using_infer_string :
420
+ expected .columns = expected .columns .astype ("str" )
401
421
_check_roundtrip (
402
- tdf , tm .assert_frame_equal , path = setup_path , compression = compression
422
+ tdf ,
423
+ tm .assert_frame_equal ,
424
+ path = setup_path ,
425
+ compression = compression ,
426
+ expected = expected ,
403
427
)
404
428
405
429
with ensure_clean_store (setup_path ) as store :
@@ -410,7 +434,10 @@ def test_frame(compression, setup_path):
410
434
assert recons ._mgr .is_consolidated ()
411
435
412
436
# empty
413
- _check_roundtrip (df [:0 ], tm .assert_frame_equal , path = setup_path )
437
+ expected = df [:0 ]
438
+ if using_infer_string :
439
+ expected .columns = expected .columns .astype ("str" )
440
+ _check_roundtrip (df [:0 ], tm .assert_frame_equal , path = setup_path , expected = expected )
414
441
415
442
416
443
def test_empty_series_frame (setup_path ):
@@ -442,9 +469,21 @@ def test_can_serialize_dates(setup_path):
442
469
_check_roundtrip (frame , tm .assert_frame_equal , path = setup_path )
443
470
444
471
445
- def test_store_hierarchical (setup_path , multiindex_dataframe_random_data ):
472
+ def test_store_hierarchical (
473
+ setup_path , multiindex_dataframe_random_data , using_infer_string
474
+ ):
446
475
frame = multiindex_dataframe_random_data
447
476
477
+ if using_infer_string :
478
+ msg = "Saving a MultiIndex with an extension dtype is not supported."
479
+ with pytest .raises (NotImplementedError , match = msg ):
480
+ _check_roundtrip (frame , tm .assert_frame_equal , path = setup_path )
481
+ with pytest .raises (NotImplementedError , match = msg ):
482
+ _check_roundtrip (frame .T , tm .assert_frame_equal , path = setup_path )
483
+ with pytest .raises (NotImplementedError , match = msg ):
484
+ _check_roundtrip (frame ["A" ], tm .assert_series_equal , path = setup_path )
485
+ return
486
+
448
487
_check_roundtrip (frame , tm .assert_frame_equal , path = setup_path )
449
488
_check_roundtrip (frame .T , tm .assert_frame_equal , path = setup_path )
450
489
_check_roundtrip (frame ["A" ], tm .assert_series_equal , path = setup_path )
@@ -459,7 +498,7 @@ def test_store_hierarchical(setup_path, multiindex_dataframe_random_data):
459
498
@pytest .mark .parametrize (
460
499
"compression" , [False , pytest .param (True , marks = td .skip_if_windows )]
461
500
)
462
- def test_store_mixed (compression , setup_path ):
501
+ def test_store_mixed (compression , setup_path , using_infer_string ):
463
502
def _make_one ():
464
503
df = DataFrame (
465
504
1.1 * np .arange (120 ).reshape ((30 , 4 )),
@@ -477,57 +516,91 @@ def _make_one():
477
516
df1 = _make_one ()
478
517
df2 = _make_one ()
479
518
480
- _check_roundtrip (df1 , tm .assert_frame_equal , path = setup_path )
481
- _check_roundtrip (df2 , tm .assert_frame_equal , path = setup_path )
519
+ expected = df1 .copy ()
520
+ if using_infer_string :
521
+ expected .index = expected .index .astype ("str" )
522
+ expected .columns = expected .columns .astype ("str" )
523
+ _check_roundtrip (df1 , tm .assert_frame_equal , path = setup_path , expected = expected )
524
+
525
+ expected = df2 .copy ()
526
+ if using_infer_string :
527
+ expected .index = expected .index .astype ("str" )
528
+ expected .columns = expected .columns .astype ("str" )
529
+ _check_roundtrip (df2 , tm .assert_frame_equal , path = setup_path , expected = expected )
482
530
483
531
with ensure_clean_store (setup_path ) as store :
484
532
store ["obj" ] = df1
485
- tm .assert_frame_equal (store ["obj" ], df1 )
533
+ expected = df1 .copy ()
534
+ if using_infer_string :
535
+ expected .index = expected .index .astype ("str" )
536
+ expected .columns = expected .columns .astype ("str" )
537
+ tm .assert_frame_equal (store ["obj" ], expected )
538
+
486
539
store ["obj" ] = df2
487
- tm .assert_frame_equal (store ["obj" ], df2 )
540
+ expected = df2 .copy ()
541
+ if using_infer_string :
542
+ expected .index = expected .index .astype ("str" )
543
+ expected .columns = expected .columns .astype ("str" )
544
+ tm .assert_frame_equal (store ["obj" ], expected )
488
545
489
546
# check that can store Series of all of these types
547
+ expected = df1 ["obj1" ]
548
+ if using_infer_string :
549
+ expected .index = expected .index .astype ("str" )
490
550
_check_roundtrip (
491
551
df1 ["obj1" ],
492
552
tm .assert_series_equal ,
493
553
path = setup_path ,
494
554
compression = compression ,
555
+ expected = expected ,
495
556
)
557
+ expected = df1 ["bool1" ]
558
+ if using_infer_string :
559
+ expected .index = expected .index .astype ("str" )
496
560
_check_roundtrip (
497
561
df1 ["bool1" ],
498
562
tm .assert_series_equal ,
499
563
path = setup_path ,
500
564
compression = compression ,
565
+ expected = expected ,
501
566
)
567
+ expected = df1 ["int1" ]
568
+ if using_infer_string :
569
+ expected .index = expected .index .astype ("str" )
502
570
_check_roundtrip (
503
571
df1 ["int1" ],
504
572
tm .assert_series_equal ,
505
573
path = setup_path ,
506
574
compression = compression ,
575
+ expected = expected ,
507
576
)
508
577
509
578
510
- def _check_roundtrip (obj , comparator , path , compression = False , ** kwargs ):
579
+ def _check_roundtrip (obj , comparator , path , compression = False , expected = None , ** kwargs ):
511
580
options = {}
512
581
if compression :
513
582
options ["complib" ] = "blosc"
583
+ if expected is None :
584
+ expected = obj
514
585
515
586
with ensure_clean_store (path , "w" , ** options ) as store :
516
587
store ["obj" ] = obj
517
588
retrieved = store ["obj" ]
518
- comparator (retrieved , obj , ** kwargs )
589
+ comparator (retrieved , expected , ** kwargs )
519
590
520
591
521
- def _check_roundtrip_table (obj , comparator , path , compression = False ):
592
+ def _check_roundtrip_table (obj , comparator , path , compression = False , expected = None ):
522
593
options = {}
523
594
if compression :
524
595
options ["complib" ] = "blosc"
596
+ if expected is None :
597
+ expected = obj
525
598
526
599
with ensure_clean_store (path , "w" , ** options ) as store :
527
600
store .put ("obj" , obj , format = "table" )
528
601
retrieved = store ["obj" ]
529
602
530
- comparator (retrieved , obj )
603
+ comparator (retrieved , expected )
531
604
532
605
533
606
def test_unicode_index (setup_path ):
@@ -540,6 +613,7 @@ def test_unicode_index(setup_path):
540
613
_check_roundtrip (s , tm .assert_series_equal , path = setup_path )
541
614
542
615
616
+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
543
617
def test_unicode_longer_encoded (setup_path ):
544
618
# GH 11234
545
619
char = "\u0394 "
@@ -565,6 +639,7 @@ def test_store_datetime_mixed(setup_path):
565
639
_check_roundtrip (df , tm .assert_frame_equal , path = setup_path )
566
640
567
641
642
+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
568
643
def test_round_trip_equals (tmp_path , setup_path ):
569
644
# GH 9330
570
645
df = DataFrame ({"B" : [1 , 2 ], "A" : ["x" , "y" ]})
0 commit comments