1717 PossibleDataLossError ,
1818)
1919
20+ import pandas as pd
2021from pandas import (
2122 DataFrame ,
2223 HDFStore ,
3536from pandas .io import pytables
3637from pandas .io .pytables import Term
3738
38- pytestmark = [
39- pytest .mark .single_cpu ,
40- pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False ),
41- ]
42-
4339
4440@pytest .mark .parametrize ("mode" , ["r" , "r+" , "a" , "w" ])
45- def test_mode (setup_path , tmp_path , mode ):
41+ def test_mode (setup_path , tmp_path , mode , using_infer_string ):
4642 df = DataFrame (
4743 np .random .default_rng (2 ).standard_normal ((10 , 4 )),
4844 columns = Index (list ("ABCD" ), dtype = object ),
@@ -91,10 +87,14 @@ def test_mode(setup_path, tmp_path, mode):
9187 read_hdf (path , "df" , mode = mode )
9288 else :
9389 result = read_hdf (path , "df" , mode = mode )
90+ if using_infer_string :
91+ df .columns = df .columns .astype (
92+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
93+ )
9494 tm .assert_frame_equal (result , df )
9595
9696
97- def test_default_mode (tmp_path , setup_path ):
97+ def test_default_mode (tmp_path , setup_path , using_infer_string ):
9898 # read_hdf uses default mode
9999 df = DataFrame (
100100 np .random .default_rng (2 ).standard_normal ((10 , 4 )),
@@ -104,6 +104,10 @@ def test_default_mode(tmp_path, setup_path):
104104 path = tmp_path / setup_path
105105 df .to_hdf (path , key = "df" , mode = "w" )
106106 result = read_hdf (path , "df" )
107+ if using_infer_string :
108+ df .columns = df .columns .astype (
109+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
110+ )
107111 tm .assert_frame_equal (result , df )
108112
109113
@@ -163,7 +167,7 @@ def test_reopen_handle(tmp_path, setup_path):
163167 assert not store .is_open
164168
165169
166- def test_open_args (setup_path ):
170+ def test_open_args (setup_path , using_infer_string ):
167171 with tm .ensure_clean (setup_path ) as path :
168172 df = DataFrame (
169173 1.1 * np .arange (120 ).reshape ((30 , 4 )),
@@ -178,8 +182,17 @@ def test_open_args(setup_path):
178182 store ["df" ] = df
179183 store .append ("df2" , df )
180184
181- tm .assert_frame_equal (store ["df" ], df )
182- tm .assert_frame_equal (store ["df2" ], df )
185+ expected = df .copy ()
186+ if using_infer_string :
187+ expected .index = expected .index .astype (
188+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
189+ )
190+ expected .columns = expected .columns .astype (
191+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
192+ )
193+
194+ tm .assert_frame_equal (store ["df" ], expected )
195+ tm .assert_frame_equal (store ["df2" ], expected )
183196
184197 store .close ()
185198
@@ -194,7 +207,7 @@ def test_flush(setup_path):
194207 store .flush (fsync = True )
195208
196209
197- def test_complibs_default_settings (tmp_path , setup_path ):
210+ def test_complibs_default_settings (tmp_path , setup_path , using_infer_string ):
198211 # GH15943
199212 df = DataFrame (
200213 1.1 * np .arange (120 ).reshape ((30 , 4 )),
@@ -207,7 +220,15 @@ def test_complibs_default_settings(tmp_path, setup_path):
207220 tmpfile = tmp_path / setup_path
208221 df .to_hdf (tmpfile , key = "df" , complevel = 9 )
209222 result = read_hdf (tmpfile , "df" )
210- tm .assert_frame_equal (result , df )
223+ expected = df .copy ()
224+ if using_infer_string :
225+ expected .index = expected .index .astype (
226+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
227+ )
228+ expected .columns = expected .columns .astype (
229+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
230+ )
231+ tm .assert_frame_equal (result , expected )
211232
212233 with tables .open_file (tmpfile , mode = "r" ) as h5file :
213234 for node in h5file .walk_nodes (where = "/df" , classname = "Leaf" ):
@@ -218,7 +239,15 @@ def test_complibs_default_settings(tmp_path, setup_path):
218239 tmpfile = tmp_path / setup_path
219240 df .to_hdf (tmpfile , key = "df" , complib = "zlib" )
220241 result = read_hdf (tmpfile , "df" )
221- tm .assert_frame_equal (result , df )
242+ expected = df .copy ()
243+ if using_infer_string :
244+ expected .index = expected .index .astype (
245+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
246+ )
247+ expected .columns = expected .columns .astype (
248+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
249+ )
250+ tm .assert_frame_equal (result , expected )
222251
223252 with tables .open_file (tmpfile , mode = "r" ) as h5file :
224253 for node in h5file .walk_nodes (where = "/df" , classname = "Leaf" ):
@@ -229,7 +258,15 @@ def test_complibs_default_settings(tmp_path, setup_path):
229258 tmpfile = tmp_path / setup_path
230259 df .to_hdf (tmpfile , key = "df" )
231260 result = read_hdf (tmpfile , "df" )
232- tm .assert_frame_equal (result , df )
261+ expected = df .copy ()
262+ if using_infer_string :
263+ expected .index = expected .index .astype (
264+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
265+ )
266+ expected .columns = expected .columns .astype (
267+ pd .StringDtype (storage = "pyarrow" , na_value = np .nan )
268+ )
269+ tm .assert_frame_equal (result , expected )
233270
234271 with tables .open_file (tmpfile , mode = "r" ) as h5file :
235272 for node in h5file .walk_nodes (where = "/df" , classname = "Leaf" ):
@@ -308,6 +345,7 @@ def test_complibs(tmp_path, lvl, lib, request):
308345 assert node .filters .complib == lib
309346
310347
348+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
311349@pytest .mark .skipif (
312350 not is_platform_little_endian (), reason = "reason platform is not little endian"
313351)
@@ -325,6 +363,7 @@ def test_encoding(setup_path):
325363 tm .assert_frame_equal (result , expected )
326364
327365
366+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
328367@pytest .mark .parametrize (
329368 "val" ,
330369 [
@@ -340,7 +379,7 @@ def test_encoding(setup_path):
340379 ],
341380)
342381@pytest .mark .parametrize ("dtype" , ["category" , object ])
343- def test_latin_encoding (tmp_path , setup_path , dtype , val ):
382+ def test_latin_encoding (tmp_path , setup_path , dtype , val , using_infer_string ):
344383 enc = "latin-1"
345384 nan_rep = ""
346385 key = "data"
0 commit comments