Skip to content

Commit 4f78a26

Browse files
committed
TST (string dtype): resolve xfails in interchange
1 parent fae3e80 commit 4f78a26

File tree

2 files changed

+9
-10
lines changed

2 files changed

+9
-10
lines changed

pandas/core/interchange/from_dataframe.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
import numpy as np
1111

12+
from pandas._config import using_string_dtype
13+
1214
from pandas.compat._optional import import_optional_dependency
1315

1416
import pandas as pd
@@ -147,8 +149,6 @@ def protocol_df_chunk_to_pandas(df: DataFrameXchg) -> pd.DataFrame:
147149
-------
148150
pd.DataFrame
149151
"""
150-
# We need a dict of columns here, with each column being a NumPy array (at
151-
# least for now, deal with non-NumPy dtypes later).
152152
columns: dict[str, Any] = {}
153153
buffers = [] # hold on to buffers, keeps memory alive
154154
for name in df.column_names():
@@ -347,8 +347,12 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
347347
# Add to our list of strings
348348
str_list[i] = string
349349

350-
# Convert the string list to a NumPy array
351-
return np.asarray(str_list, dtype="object"), buffers
350+
if using_string_dtype():
351+
res = pd.Series(str_list, dtype="str")
352+
else:
353+
res = np.asarray(str_list, dtype="object")
354+
355+
return res, buffers
352356

353357

354358
def parse_datetime_format_str(format_str, data) -> pd.Series | np.ndarray:

pandas/tests/interchange/test_impl.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
import numpy as np
77
import pytest
88

9-
from pandas._config import using_string_dtype
10-
119
from pandas._libs.tslibs import iNaT
1210
from pandas.compat import (
1311
is_ci_environment,
@@ -401,7 +399,6 @@ def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None:
401399
pd.api.interchange.from_dataframe(df)
402400

403401

404-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
405402
def test_empty_string_column():
406403
# https://github.com/pandas-dev/pandas/issues/56703
407404
df = pd.DataFrame({"a": []}, dtype=str)
@@ -410,7 +407,6 @@ def test_empty_string_column():
410407
tm.assert_frame_equal(df, result)
411408

412409

413-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
414410
def test_large_string():
415411
# GH#56702
416412
pytest.importorskip("pyarrow")
@@ -427,7 +423,6 @@ def test_non_str_names():
427423
assert names == ["0"]
428424

429425

430-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
431426
def test_non_str_names_w_duplicates():
432427
# https://github.com/pandas-dev/pandas/issues/56701
433428
df = pd.DataFrame({"0": [1, 2, 3], 0: [4, 5, 6]})
@@ -438,7 +433,7 @@ def test_non_str_names_w_duplicates():
438433
"Expected a Series, got a DataFrame. This likely happened because you "
439434
"called __dataframe__ on a DataFrame which, after converting column "
440435
r"names to string, resulted in duplicated names: Index\(\['0', '0'\], "
441-
r"dtype='object'\). Please rename these columns before using the "
436+
r"dtype='(str|object)'\). Please rename these columns before using the "
442437
"interchange protocol."
443438
),
444439
):

0 commit comments

Comments
 (0)