17
17
PossibleDataLossError ,
18
18
)
19
19
20
+ import pandas as pd
20
21
from pandas import (
21
22
DataFrame ,
22
23
HDFStore ,
35
36
from pandas .io import pytables
36
37
from pandas .io .pytables import Term
37
38
38
- pytestmark = [
39
- pytest .mark .single_cpu ,
40
- pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False ),
41
- ]
42
-
43
39
44
40
@pytest .mark .parametrize ("mode" , ["r" , "r+" , "a" , "w" ])
45
- def test_mode (setup_path , tmp_path , mode ):
41
+ def test_mode (setup_path , tmp_path , mode , using_infer_string ):
46
42
df = DataFrame (
47
43
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
48
44
columns = Index (list ("ABCD" ), dtype = object ),
@@ -91,10 +87,14 @@ def test_mode(setup_path, tmp_path, mode):
91
87
read_hdf (path , "df" , mode = mode )
92
88
else :
93
89
result = read_hdf (path , "df" , mode = mode )
90
+ if using_infer_string :
91
+ df .columns = df .columns .astype (
92
+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
93
+ )
94
94
tm .assert_frame_equal (result , df )
95
95
96
96
97
- def test_default_mode (tmp_path , setup_path ):
97
+ def test_default_mode (tmp_path , setup_path , using_infer_string ):
98
98
# read_hdf uses default mode
99
99
df = DataFrame (
100
100
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
@@ -104,6 +104,10 @@ def test_default_mode(tmp_path, setup_path):
104
104
path = tmp_path / setup_path
105
105
df .to_hdf (path , key = "df" , mode = "w" )
106
106
result = read_hdf (path , "df" )
107
+ if using_infer_string :
108
+ df .columns = df .columns .astype (
109
+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
110
+ )
107
111
tm .assert_frame_equal (result , df )
108
112
109
113
@@ -163,7 +167,7 @@ def test_reopen_handle(tmp_path, setup_path):
163
167
assert not store .is_open
164
168
165
169
166
- def test_open_args (setup_path ):
170
+ def test_open_args (setup_path , using_infer_string ):
167
171
with tm .ensure_clean (setup_path ) as path :
168
172
df = DataFrame (
169
173
1.1 * np .arange (120 ).reshape ((30 , 4 )),
@@ -178,8 +182,17 @@ def test_open_args(setup_path):
178
182
store ["df" ] = df
179
183
store .append ("df2" , df )
180
184
181
- tm .assert_frame_equal (store ["df" ], df )
182
- tm .assert_frame_equal (store ["df2" ], df )
185
+ expected = df .copy ()
186
+ if using_infer_string :
187
+ expected .index = expected .index .astype (
188
+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
189
+ )
190
+ expected .columns = expected .columns .astype (
191
+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
192
+ )
193
+
194
+ tm .assert_frame_equal (store ["df" ], expected )
195
+ tm .assert_frame_equal (store ["df2" ], expected )
183
196
184
197
store .close ()
185
198
@@ -194,7 +207,7 @@ def test_flush(setup_path):
194
207
store .flush (fsync = True )
195
208
196
209
197
- def test_complibs_default_settings (tmp_path , setup_path ):
210
+ def test_complibs_default_settings (tmp_path , setup_path , using_infer_string ):
198
211
# GH15943
199
212
df = DataFrame (
200
213
1.1 * np .arange (120 ).reshape ((30 , 4 )),
@@ -207,7 +220,15 @@ def test_complibs_default_settings(tmp_path, setup_path):
207
220
tmpfile = tmp_path / setup_path
208
221
df .to_hdf (tmpfile , key = "df" , complevel = 9 )
209
222
result = read_hdf (tmpfile , "df" )
210
- tm .assert_frame_equal (result , df )
223
+ expected = df .copy ()
224
+ if using_infer_string :
225
+ expected .index = expected .index .astype (
226
+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
227
+ )
228
+ expected .columns = expected .columns .astype (
229
+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
230
+ )
231
+ tm .assert_frame_equal (result , expected )
211
232
212
233
with tables .open_file (tmpfile , mode = "r" ) as h5file :
213
234
for node in h5file .walk_nodes (where = "/df" , classname = "Leaf" ):
@@ -218,7 +239,15 @@ def test_complibs_default_settings(tmp_path, setup_path):
218
239
tmpfile = tmp_path / setup_path
219
240
df .to_hdf (tmpfile , key = "df" , complib = "zlib" )
220
241
result = read_hdf (tmpfile , "df" )
221
- tm .assert_frame_equal (result , df )
242
+ expected = df .copy ()
243
+ if using_infer_string :
244
+ expected .index = expected .index .astype (
245
+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
246
+ )
247
+ expected .columns = expected .columns .astype (
248
+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
249
+ )
250
+ tm .assert_frame_equal (result , expected )
222
251
223
252
with tables .open_file (tmpfile , mode = "r" ) as h5file :
224
253
for node in h5file .walk_nodes (where = "/df" , classname = "Leaf" ):
@@ -229,7 +258,15 @@ def test_complibs_default_settings(tmp_path, setup_path):
229
258
tmpfile = tmp_path / setup_path
230
259
df .to_hdf (tmpfile , key = "df" )
231
260
result = read_hdf (tmpfile , "df" )
232
- tm .assert_frame_equal (result , df )
261
+ expected = df .copy ()
262
+ if using_infer_string :
263
+ expected .index = expected .index .astype (
264
+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
265
+ )
266
+ expected .columns = expected .columns .astype (
267
+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
268
+ )
269
+ tm .assert_frame_equal (result , expected )
233
270
234
271
with tables .open_file (tmpfile , mode = "r" ) as h5file :
235
272
for node in h5file .walk_nodes (where = "/df" , classname = "Leaf" ):
@@ -308,6 +345,7 @@ def test_complibs(tmp_path, lvl, lib, request):
308
345
assert node .filters .complib == lib
309
346
310
347
348
+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
311
349
@pytest .mark .skipif (
312
350
not is_platform_little_endian (), reason = "reason platform is not little endian"
313
351
)
@@ -325,6 +363,7 @@ def test_encoding(setup_path):
325
363
tm .assert_frame_equal (result , expected )
326
364
327
365
366
+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
328
367
@pytest .mark .parametrize (
329
368
"val" ,
330
369
[
@@ -340,7 +379,7 @@ def test_encoding(setup_path):
340
379
],
341
380
)
342
381
@pytest .mark .parametrize ("dtype" , ["category" , object ])
343
- def test_latin_encoding (tmp_path , setup_path , dtype , val ):
382
+ def test_latin_encoding (tmp_path , setup_path , dtype , val , using_infer_string ):
344
383
enc = "latin-1"
345
384
nan_rep = ""
346
385
key = "data"
0 commit comments