Skip to content

Commit 888905e

Browse files
Update PyArrow conversion and arrow/parquet tests for pyarrow 19.0
1 parent 57d2489 commit 888905e

File tree

5 files changed

+58
-20
lines changed

5 files changed

+58
-20
lines changed

pandas/compat/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
pa_version_under16p0,
3535
pa_version_under17p0,
3636
pa_version_under18p0,
37+
pa_version_under19p0,
3738
)
3839

3940
if TYPE_CHECKING:
@@ -166,4 +167,5 @@ def is_ci_environment() -> bool:
166167
"pa_version_under16p0",
167168
"pa_version_under17p0",
168169
"pa_version_under18p0",
170+
"pa_version_under19p0",
169171
]

pandas/compat/pyarrow.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
pa_version_under16p0 = _palv < Version("16.0.0")
1919
pa_version_under17p0 = _palv < Version("17.0.0")
2020
pa_version_under18p0 = _palv < Version("18.0.0")
21+
pa_version_under19p0 = _palv < Version("19.0.0")
2122
HAS_PYARROW = True
2223
except ImportError:
2324
pa_version_under10p1 = True
@@ -30,4 +31,5 @@
3031
pa_version_under16p0 = True
3132
pa_version_under17p0 = True
3233
pa_version_under18p0 = True
34+
pa_version_under19p0 = True
3335
HAS_PYARROW = False

pandas/io/_util.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010
from pandas._config import using_string_dtype
1111

1212
from pandas._libs import lib
13-
from pandas.compat import pa_version_under18p0
13+
from pandas.compat import (
14+
pa_version_under18p0,
15+
pa_version_under19p0,
16+
)
1417
from pandas.compat._optional import import_optional_dependency
1518

1619
import pandas as pd
@@ -77,7 +80,10 @@ def arrow_table_to_pandas(
7780
elif dtype_backend == "pyarrow":
7881
types_mapper = pd.ArrowDtype
7982
elif using_string_dtype():
80-
types_mapper = _arrow_string_types_mapper()
83+
if pa_version_under19p0:
84+
types_mapper = _arrow_string_types_mapper()
85+
else:
86+
types_mapper = None
8187
elif dtype_backend is lib.no_default or dtype_backend == "numpy":
8288
types_mapper = None
8389
else:

pandas/tests/arrays/string_/test_string.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010

1111
from pandas._config import using_string_dtype
1212

13-
from pandas.compat.pyarrow import pa_version_under12p0
13+
from pandas.compat.pyarrow import (
14+
pa_version_under12p0,
15+
pa_version_under19p0,
16+
)
1417

1518
from pandas.core.dtypes.common import is_dtype_equal
1619

@@ -539,7 +542,7 @@ def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
539542
assert table.field("a").type == "large_string"
540543
with pd.option_context("string_storage", string_storage):
541544
result = table.to_pandas()
542-
if dtype.na_value is np.nan and not using_string_dtype():
545+
if dtype.na_value is np.nan and not using_infer_string:
543546
assert result["a"].dtype == "object"
544547
else:
545548
assert isinstance(result["a"].dtype, pd.StringDtype)
@@ -553,6 +556,20 @@ def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
553556
assert result.loc[2, "a"] is result["a"].dtype.na_value
554557

555558

559+
def test_arrow_from_string(using_infer_string):
560+
# not roundtrip, but starting with pyarrow table without pandas metadata
561+
pa = pytest.importorskip("pyarrow")
562+
table = pa.table({"a": pa.array(["a", "b", None], type=pa.string())})
563+
564+
result = table.to_pandas()
565+
566+
if using_infer_string and not pa_version_under19p0:
567+
expected = pd.DataFrame({"a": ["a", "b", None]}, dtype="str")
568+
else:
569+
expected = pd.DataFrame({"a": ["a", "b", None]}, dtype="object")
570+
tm.assert_frame_equal(result, expected)
571+
572+
556573
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
557574
def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
558575
# GH-41040

pandas/tests/io/test_parquet.py

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
pa_version_under13p0,
1818
pa_version_under15p0,
1919
pa_version_under17p0,
20+
pa_version_under19p0,
2021
)
2122

2223
import pandas as pd
@@ -254,8 +255,10 @@ def test_invalid_engine(df_compat):
254255
check_round_trip(df_compat, "foo", "bar")
255256

256257

257-
def test_options_py(df_compat, pa):
258+
def test_options_py(df_compat, pa, using_infer_string):
258259
# use the set option
260+
if using_infer_string and not pa_version_under19p0:
261+
df_compat.columns = df_compat.columns.astype("str")
259262

260263
with pd.option_context("io.parquet.engine", "pyarrow"):
261264
check_round_trip(df_compat)
@@ -784,18 +787,21 @@ def test_unsupported_float16_cleanup(self, pa, path_type):
784787

785788
def test_categorical(self, pa):
786789
# supported in >= 0.7.0
787-
df = pd.DataFrame()
788-
df["a"] = pd.Categorical(list("abcdef"))
789-
790-
# test for null, out-of-order values, and unobserved category
791-
df["b"] = pd.Categorical(
792-
["bar", "foo", "foo", "bar", None, "bar"],
793-
dtype=pd.CategoricalDtype(["foo", "bar", "baz"]),
794-
)
795-
796-
# test for ordered flag
797-
df["c"] = pd.Categorical(
798-
["a", "b", "c", "a", "c", "b"], categories=["b", "c", "d"], ordered=True
790+
df = pd.DataFrame(
791+
{
792+
"a": pd.Categorical(list("abcdef")),
793+
# test for null, out-of-order values, and unobserved category
794+
"b": pd.Categorical(
795+
["bar", "foo", "foo", "bar", None, "bar"],
796+
dtype=pd.CategoricalDtype(["foo", "bar", "baz"]),
797+
),
798+
# test for ordered flag
799+
"c": pd.Categorical(
800+
["a", "b", "c", "a", "c", "b"],
801+
categories=["b", "c", "d"],
802+
ordered=True,
803+
),
804+
}
799805
)
800806

801807
check_round_trip(df, pa)
@@ -858,11 +864,13 @@ def test_s3_roundtrip_for_dir(
858864
repeat=1,
859865
)
860866

861-
def test_read_file_like_obj_support(self, df_compat):
867+
def test_read_file_like_obj_support(self, df_compat, using_infer_string):
862868
pytest.importorskip("pyarrow")
863869
buffer = BytesIO()
864870
df_compat.to_parquet(buffer)
865871
df_from_buf = read_parquet(buffer)
872+
if using_infer_string and not pa_version_under19p0:
873+
df_compat.columns = df_compat.columns.astype("str")
866874
tm.assert_frame_equal(df_compat, df_from_buf)
867875

868876
def test_expand_user(self, df_compat, monkeypatch):
@@ -929,7 +937,7 @@ def test_additional_extension_arrays(self, pa, using_infer_string):
929937
"c": pd.Series(["a", None, "c"], dtype="string"),
930938
}
931939
)
932-
if using_infer_string:
940+
if using_infer_string and pa_version_under19p0:
933941
check_round_trip(df, pa, expected=df.astype({"c": "str"}))
934942
else:
935943
check_round_trip(df, pa)
@@ -943,7 +951,10 @@ def test_pyarrow_backed_string_array(self, pa, string_storage, using_infer_strin
943951
df = pd.DataFrame({"a": pd.Series(["a", None, "c"], dtype="string[pyarrow]")})
944952
with pd.option_context("string_storage", string_storage):
945953
if using_infer_string:
946-
expected = df.astype("str")
954+
if pa_version_under19p0:
955+
expected = df.astype("str")
956+
else:
957+
expected = df.astype(f"string[{string_storage}]")
947958
expected.columns = expected.columns.astype("str")
948959
else:
949960
expected = df.astype(f"string[{string_storage}]")

0 commit comments

Comments
 (0)