Skip to content

Commit 120b0bd

Browse files
committed
TST(string dtype): Resolve xfails in pytables
1 parent 8fe2720 commit 120b0bd

File tree

13 files changed

+143
-156
lines changed

13 files changed

+143
-156
lines changed

pandas/io/pytables.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3382,6 +3382,8 @@ def read(
33823382
if (
33833383
using_string_dtype()
33843384
and isinstance(values, np.ndarray)
3385+
# TODO: Should is_string_array return True for an empty object ndarray?
3386+
and values.size != 0
33853387
and is_string_array(values, skipna=True)
33863388
):
33873389
df = df.astype(StringDtype(na_value=np.nan))
@@ -5112,6 +5114,8 @@ def _maybe_convert_for_string_atom(
51125114
errors,
51135115
columns: list[str],
51145116
):
5117+
if isinstance(bvalues.dtype, StringDtype):
5118+
bvalues = bvalues.to_numpy()
51155119
if bvalues.dtype != object:
51165120
return bvalues
51175121

pandas/tests/io/pytables/test_append.py

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
from pandas._config import using_string_dtype
9-
108
from pandas._libs.tslibs import Timestamp
119
from pandas.compat import PY312
1210

@@ -25,10 +23,7 @@
2523
ensure_clean_store,
2624
)
2725

28-
pytestmark = [
29-
pytest.mark.single_cpu,
30-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
31-
]
26+
pytestmark = [pytest.mark.single_cpu]
3227

3328
tables = pytest.importorskip("tables")
3429

@@ -40,7 +35,7 @@ def test_append(setup_path):
4035
# tables.NaturalNameWarning):
4136
df = DataFrame(
4237
np.random.default_rng(2).standard_normal((20, 4)),
43-
columns=Index(list("ABCD"), dtype=object),
38+
columns=Index(list("ABCD")),
4439
index=date_range("2000-01-01", periods=20, freq="B"),
4540
)
4641
_maybe_remove(store, "df1")
@@ -203,7 +198,7 @@ def test_append_some_nans(setup_path):
203198
tm.assert_frame_equal(store["df3"], df3, check_index_type=True)
204199

205200

206-
def test_append_all_nans(setup_path):
201+
def test_append_all_nans(setup_path, using_infer_string):
207202
with ensure_clean_store(setup_path) as store:
208203
df = DataFrame(
209204
{
@@ -255,7 +250,13 @@ def test_append_all_nans(setup_path):
255250
_maybe_remove(store, "df")
256251
store.append("df", df[:10], dropna=True)
257252
store.append("df", df[10:], dropna=True)
258-
tm.assert_frame_equal(store["df"], df, check_index_type=True)
253+
result = store["df"]
254+
expected = df
255+
if using_infer_string:
256+
# TODO: Test is incorrect when not using_infer_string.
257+
# Should take the last 4 rows uncondiationally.
258+
expected = expected[16:]
259+
tm.assert_frame_equal(result, expected, check_index_type=True)
259260

260261
_maybe_remove(store, "df2")
261262
store.append("df2", df[:10], dropna=False)
@@ -294,7 +295,7 @@ def test_append_frame_column_oriented(setup_path, request):
294295
# column oriented
295296
df = DataFrame(
296297
np.random.default_rng(2).standard_normal((10, 4)),
297-
columns=Index(list("ABCD"), dtype=object),
298+
columns=Index(list("ABCD")),
298299
index=date_range("2000-01-01", periods=10, freq="B"),
299300
)
300301
df.index = df.index._with_freq(None) # freq doesn't round-trip
@@ -426,7 +427,7 @@ def check_col(key, name, size):
426427
{
427428
"A": [0.0, 1.0, 2.0, 3.0, 4.0],
428429
"B": [0.0, 1.0, 0.0, 1.0, 0.0],
429-
"C": Index(["foo1", "foo2", "foo3", "foo4", "foo5"], dtype=object),
430+
"C": Index(["foo1", "foo2", "foo3", "foo4", "foo5"]),
430431
"D": date_range("20130101", periods=5),
431432
}
432433
).set_index("C")
@@ -453,7 +454,7 @@ def check_col(key, name, size):
453454
_maybe_remove(store, "df")
454455
df = DataFrame(
455456
np.random.default_rng(2).standard_normal((10, 4)),
456-
columns=Index(list("ABCD"), dtype=object),
457+
columns=Index(list("ABCD")),
457458
index=date_range("2000-01-01", periods=10, freq="B"),
458459
)
459460
df["string"] = "foo"
@@ -517,7 +518,7 @@ def test_append_with_data_columns(setup_path):
517518
with ensure_clean_store(setup_path) as store:
518519
df = DataFrame(
519520
np.random.default_rng(2).standard_normal((10, 4)),
520-
columns=Index(list("ABCD"), dtype=object),
521+
columns=Index(list("ABCD")),
521522
index=date_range("2000-01-01", periods=10, freq="B"),
522523
)
523524
df.iloc[0, df.columns.get_loc("B")] = 1.0
@@ -693,8 +694,8 @@ def test_append_misc(setup_path):
693694
with ensure_clean_store(setup_path) as store:
694695
df = DataFrame(
695696
1.1 * np.arange(120).reshape((30, 4)),
696-
columns=Index(list("ABCD"), dtype=object),
697-
index=Index([f"i-{i}" for i in range(30)], dtype=object),
697+
columns=Index(list("ABCD")),
698+
index=Index([f"i-{i}" for i in range(30)]),
698699
)
699700
store.append("df", df, chunksize=1)
700701
result = store.select("df")
@@ -710,8 +711,8 @@ def test_append_misc_chunksize(setup_path, chunksize):
710711
# more chunksize in append tests
711712
df = DataFrame(
712713
1.1 * np.arange(120).reshape((30, 4)),
713-
columns=Index(list("ABCD"), dtype=object),
714-
index=Index([f"i-{i}" for i in range(30)], dtype=object),
714+
columns=Index(list("ABCD")),
715+
index=Index([f"i-{i}" for i in range(30)]),
715716
)
716717
df["string"] = "foo"
717718
df["float322"] = 1.0
@@ -747,15 +748,15 @@ def test_append_misc_empty_frame(setup_path):
747748
tm.assert_frame_equal(store.select("df2"), df)
748749

749750

750-
def test_append_raise(setup_path):
751+
def test_append_raise(setup_path, using_infer_string):
751752
with ensure_clean_store(setup_path) as store:
752753
# test append with invalid input to get good error messages
753754

754755
# list in column
755756
df = DataFrame(
756757
1.1 * np.arange(120).reshape((30, 4)),
757-
columns=Index(list("ABCD"), dtype=object),
758-
index=Index([f"i-{i}" for i in range(30)], dtype=object),
758+
columns=Index(list("ABCD")),
759+
index=Index([f"i-{i}" for i in range(30)]),
759760
)
760761
df["invalid"] = [["a"]] * len(df)
761762
assert df.dtypes["invalid"] == np.object_
@@ -775,8 +776,8 @@ def test_append_raise(setup_path):
775776
# datetime with embedded nans as object
776777
df = DataFrame(
777778
1.1 * np.arange(120).reshape((30, 4)),
778-
columns=Index(list("ABCD"), dtype=object),
779-
index=Index([f"i-{i}" for i in range(30)], dtype=object),
779+
columns=Index(list("ABCD")),
780+
index=Index([f"i-{i}" for i in range(30)]),
780781
)
781782
s = Series(datetime.datetime(2001, 1, 2), index=df.index)
782783
s = s.astype(object)
@@ -803,8 +804,8 @@ def test_append_raise(setup_path):
803804
# appending an incompatible table
804805
df = DataFrame(
805806
1.1 * np.arange(120).reshape((30, 4)),
806-
columns=Index(list("ABCD"), dtype=object),
807-
index=Index([f"i-{i}" for i in range(30)], dtype=object),
807+
columns=Index(list("ABCD")),
808+
index=Index([f"i-{i}" for i in range(30)]),
808809
)
809810
store.append("df", df)
810811

@@ -822,10 +823,11 @@ def test_append_raise(setup_path):
822823
df["foo"] = Timestamp("20130101")
823824
store.append("df", df)
824825
df["foo"] = "bar"
826+
shape = "(30,)" if using_infer_string else "(1, 30)"
825827
msg = re.escape(
826828
"invalid combination of [values_axes] on appending data "
827829
"[name->values_block_1,cname->values_block_1,"
828-
"dtype->bytes24,kind->string,shape->(1, 30)] "
830+
f"dtype->bytes24,kind->string,shape->{shape}] "
829831
"vs current table "
830832
"[name->values_block_1,cname->values_block_1,"
831833
"dtype->datetime64[s],kind->datetime64[s],shape->None]"
@@ -884,7 +886,7 @@ def test_append_with_timedelta(setup_path):
884886
def test_append_to_multiple(setup_path):
885887
df1 = DataFrame(
886888
np.random.default_rng(2).standard_normal((10, 4)),
887-
columns=Index(list("ABCD"), dtype=object),
889+
columns=Index(list("ABCD")),
888890
index=date_range("2000-01-01", periods=10, freq="B"),
889891
)
890892
df2 = df1.copy().rename(columns="{}_2".format)
@@ -921,12 +923,12 @@ def test_append_to_multiple(setup_path):
921923
def test_append_to_multiple_dropna(setup_path):
922924
df1 = DataFrame(
923925
np.random.default_rng(2).standard_normal((10, 4)),
924-
columns=Index(list("ABCD"), dtype=object),
926+
columns=Index(list("ABCD")),
925927
index=date_range("2000-01-01", periods=10, freq="B"),
926928
)
927929
df2 = DataFrame(
928930
np.random.default_rng(2).standard_normal((10, 4)),
929-
columns=Index(list("ABCD"), dtype=object),
931+
columns=Index(list("ABCD")),
930932
index=date_range("2000-01-01", periods=10, freq="B"),
931933
).rename(columns="{}_2".format)
932934
df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
@@ -946,7 +948,7 @@ def test_append_to_multiple_dropna(setup_path):
946948
def test_append_to_multiple_dropna_false(setup_path):
947949
df1 = DataFrame(
948950
np.random.default_rng(2).standard_normal((10, 4)),
949-
columns=Index(list("ABCD"), dtype=object),
951+
columns=Index(list("ABCD")),
950952
index=date_range("2000-01-01", periods=10, freq="B"),
951953
)
952954
df2 = df1.copy().rename(columns="{}_2".format)

pandas/tests/io/pytables/test_categorical.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas import (
75
Categorical,
86
DataFrame,
@@ -16,10 +14,7 @@
1614
ensure_clean_store,
1715
)
1816

19-
pytestmark = [
20-
pytest.mark.single_cpu,
21-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
22-
]
17+
pytestmark = [pytest.mark.single_cpu]
2318

2419

2520
def test_categorical(setup_path):

pandas/tests/io/pytables/test_complex.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas as pd
75
from pandas import (
86
DataFrame,
@@ -13,10 +11,6 @@
1311

1412
from pandas.io.pytables import read_hdf
1513

16-
pytestmark = pytest.mark.xfail(
17-
using_string_dtype(), reason="TODO(infer_string)", strict=False
18-
)
19-
2014

2115
def test_complex_fixed(tmp_path, setup_path):
2216
df = DataFrame(

pandas/tests/io/pytables/test_errors.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
from pandas._config import using_string_dtype
9-
108
from pandas import (
119
CategoricalIndex,
1210
DataFrame,
@@ -24,10 +22,7 @@
2422
_maybe_adjust_name,
2523
)
2624

27-
pytestmark = [
28-
pytest.mark.single_cpu,
29-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
30-
]
25+
pytestmark = [pytest.mark.single_cpu]
3126

3227

3328
def test_pass_spec_to_storer(setup_path):
@@ -93,9 +88,14 @@ def test_unimplemented_dtypes_table_columns(setup_path):
9388

9489
with ensure_clean_store(setup_path) as store:
9590
# this fails because we have a date in the object block......
96-
msg = re.escape(
97-
"""Cannot serialize the column [datetime1]
98-
because its data contents are not [string] but [date] object dtype"""
91+
msg = "|".join(
92+
[
93+
re.escape(
94+
"Cannot serialize the column [datetime1] because its data contents "
95+
"are not [string] but [date] object dtype"
96+
),
97+
re.escape("[date] is not implemented as a table column"),
98+
]
9999
)
100100
with pytest.raises(TypeError, match=msg):
101101
store.append("df_unimplemented", df)

pandas/tests/io/pytables/test_file_handling.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
from pandas.compat import (
97
PY311,
108
is_ci_environment,
@@ -35,9 +33,7 @@
3533
from pandas.io import pytables
3634
from pandas.io.pytables import Term
3735

38-
pytestmark = [
39-
pytest.mark.single_cpu,
40-
]
36+
pytestmark = [pytest.mark.single_cpu]
4137

4238

4339
@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
@@ -329,7 +325,6 @@ def test_complibs(tmp_path, lvl, lib, request):
329325
assert node.filters.complib == lib
330326

331327

332-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
333328
@pytest.mark.skipif(
334329
not is_platform_little_endian(), reason="reason platform is not little endian"
335330
)
@@ -347,7 +342,6 @@ def test_encoding(setup_path):
347342
tm.assert_frame_equal(result, expected)
348343

349344

350-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
351345
@pytest.mark.parametrize(
352346
"val",
353347
[
@@ -362,7 +356,7 @@ def test_encoding(setup_path):
362356
[b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
363357
],
364358
)
365-
@pytest.mark.parametrize("dtype", ["category", object])
359+
@pytest.mark.parametrize("dtype", ["category", None])
366360
def test_latin_encoding(tmp_path, setup_path, dtype, val):
367361
enc = "latin-1"
368362
nan_rep = ""

pandas/tests/io/pytables/test_keys.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas import (
75
DataFrame,
86
HDFStore,
@@ -15,10 +13,7 @@
1513
tables,
1614
)
1715

18-
pytestmark = [
19-
pytest.mark.single_cpu,
20-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
21-
]
16+
pytestmark = [pytest.mark.single_cpu]
2217

2318

2419
def test_keys(setup_path):

pandas/tests/io/pytables/test_put.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@
2222
)
2323
from pandas.util import _test_decorators as td
2424

25-
pytestmark = [
26-
pytest.mark.single_cpu,
27-
]
25+
pytestmark = [pytest.mark.single_cpu]
2826

2927

3028
def test_format_type(tmp_path, setup_path):

0 commit comments

Comments
 (0)