Skip to content

Commit 335ead5

Browse files
committed
More xfails
1 parent d496125 commit 335ead5

File tree

1 file changed

+54
-15
lines changed

1 file changed

+54
-15
lines changed

pandas/tests/io/pytables/test_file_handling.py

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
PossibleDataLossError,
1818
)
1919

20+
import pandas as pd
2021
from pandas import (
2122
DataFrame,
2223
HDFStore,
@@ -35,14 +36,9 @@
3536
from pandas.io import pytables
3637
from pandas.io.pytables import Term
3738

38-
pytestmark = [
39-
pytest.mark.single_cpu,
40-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
41-
]
42-
4339

4440
@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
45-
def test_mode(setup_path, tmp_path, mode):
41+
def test_mode(setup_path, tmp_path, mode, using_infer_string):
4642
df = DataFrame(
4743
np.random.default_rng(2).standard_normal((10, 4)),
4844
columns=Index(list("ABCD"), dtype=object),
@@ -91,10 +87,14 @@ def test_mode(setup_path, tmp_path, mode):
9187
read_hdf(path, "df", mode=mode)
9288
else:
9389
result = read_hdf(path, "df", mode=mode)
90+
if using_infer_string:
91+
df.columns = df.columns.astype(
92+
pd.StringDtype(storage="pyarrow", na_value=np.nan)
93+
)
9494
tm.assert_frame_equal(result, df)
9595

9696

97-
def test_default_mode(tmp_path, setup_path):
97+
def test_default_mode(tmp_path, setup_path, using_infer_string):
9898
# read_hdf uses default mode
9999
df = DataFrame(
100100
np.random.default_rng(2).standard_normal((10, 4)),
@@ -104,6 +104,10 @@ def test_default_mode(tmp_path, setup_path):
104104
path = tmp_path / setup_path
105105
df.to_hdf(path, key="df", mode="w")
106106
result = read_hdf(path, "df")
107+
if using_infer_string:
108+
df.columns = df.columns.astype(
109+
pd.StringDtype(storage="pyarrow", na_value=np.nan)
110+
)
107111
tm.assert_frame_equal(result, df)
108112

109113

@@ -163,7 +167,7 @@ def test_reopen_handle(tmp_path, setup_path):
163167
assert not store.is_open
164168

165169

166-
def test_open_args(setup_path):
170+
def test_open_args(setup_path, using_infer_string):
167171
with tm.ensure_clean(setup_path) as path:
168172
df = DataFrame(
169173
1.1 * np.arange(120).reshape((30, 4)),
@@ -178,8 +182,17 @@ def test_open_args(setup_path):
178182
store["df"] = df
179183
store.append("df2", df)
180184

181-
tm.assert_frame_equal(store["df"], df)
182-
tm.assert_frame_equal(store["df2"], df)
185+
expected = df.copy()
186+
if using_infer_string:
187+
expected.index = expected.index.astype(
188+
pd.StringDtype(storage="pyarrow", na_value=np.nan)
189+
)
190+
expected.columns = expected.columns.astype(
191+
pd.StringDtype(storage="pyarrow", na_value=np.nan)
192+
)
193+
194+
tm.assert_frame_equal(store["df"], expected)
195+
tm.assert_frame_equal(store["df2"], expected)
183196

184197
store.close()
185198

@@ -194,7 +207,7 @@ def test_flush(setup_path):
194207
store.flush(fsync=True)
195208

196209

197-
def test_complibs_default_settings(tmp_path, setup_path):
210+
def test_complibs_default_settings(tmp_path, setup_path, using_infer_string):
198211
# GH15943
199212
df = DataFrame(
200213
1.1 * np.arange(120).reshape((30, 4)),
@@ -207,7 +220,15 @@ def test_complibs_default_settings(tmp_path, setup_path):
207220
tmpfile = tmp_path / setup_path
208221
df.to_hdf(tmpfile, key="df", complevel=9)
209222
result = read_hdf(tmpfile, "df")
210-
tm.assert_frame_equal(result, df)
223+
expected = df.copy()
224+
if using_infer_string:
225+
expected.index = expected.index.astype(
226+
pd.StringDtype(storage="pyarrow", na_value=np.nan)
227+
)
228+
expected.columns = expected.columns.astype(
229+
pd.StringDtype(storage="pyarrow", na_value=np.nan)
230+
)
231+
tm.assert_frame_equal(result, expected)
211232

212233
with tables.open_file(tmpfile, mode="r") as h5file:
213234
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
@@ -218,7 +239,15 @@ def test_complibs_default_settings(tmp_path, setup_path):
218239
tmpfile = tmp_path / setup_path
219240
df.to_hdf(tmpfile, key="df", complib="zlib")
220241
result = read_hdf(tmpfile, "df")
221-
tm.assert_frame_equal(result, df)
242+
expected = df.copy()
243+
if using_infer_string:
244+
expected.index = expected.index.astype(
245+
pd.StringDtype(storage="pyarrow", na_value=np.nan)
246+
)
247+
expected.columns = expected.columns.astype(
248+
pd.StringDtype(storage="pyarrow", na_value=np.nan)
249+
)
250+
tm.assert_frame_equal(result, expected)
222251

223252
with tables.open_file(tmpfile, mode="r") as h5file:
224253
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
@@ -229,7 +258,15 @@ def test_complibs_default_settings(tmp_path, setup_path):
229258
tmpfile = tmp_path / setup_path
230259
df.to_hdf(tmpfile, key="df")
231260
result = read_hdf(tmpfile, "df")
232-
tm.assert_frame_equal(result, df)
261+
expected = df.copy()
262+
if using_infer_string:
263+
expected.index = expected.index.astype(
264+
pd.StringDtype(storage="pyarrow", na_value=np.nan)
265+
)
266+
expected.columns = expected.columns.astype(
267+
pd.StringDtype(storage="pyarrow", na_value=np.nan)
268+
)
269+
tm.assert_frame_equal(result, expected)
233270

234271
with tables.open_file(tmpfile, mode="r") as h5file:
235272
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
@@ -308,6 +345,7 @@ def test_complibs(tmp_path, lvl, lib, request):
308345
assert node.filters.complib == lib
309346

310347

348+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
311349
@pytest.mark.skipif(
312350
not is_platform_little_endian(), reason="reason platform is not little endian"
313351
)
@@ -325,6 +363,7 @@ def test_encoding(setup_path):
325363
tm.assert_frame_equal(result, expected)
326364

327365

366+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
328367
@pytest.mark.parametrize(
329368
"val",
330369
[
@@ -340,7 +379,7 @@ def test_encoding(setup_path):
340379
],
341380
)
342381
@pytest.mark.parametrize("dtype", ["category", object])
343-
def test_latin_encoding(tmp_path, setup_path, dtype, val):
382+
def test_latin_encoding(tmp_path, setup_path, dtype, val, using_infer_string):
344383
enc = "latin-1"
345384
nan_rep = ""
346385
key = "data"

0 commit comments

Comments
 (0)