Skip to content

Commit a9c466b

Browse files
disallow pyarrow_numpy as option + fix more cases of checking storage to be pyarrow_numpy
1 parent ffa7ead commit a9c466b

File tree

6 files changed

+26
-27
lines changed

6 files changed

+26
-27
lines changed

pandas/core/arrays/string_arrow.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,7 @@ def _from_sequence(
188188

189189
if dtype and not (isinstance(dtype, str) and dtype == "string"):
190190
dtype = pandas_dtype(dtype)
191-
assert isinstance(dtype, StringDtype) and dtype.storage in (
192-
"pyarrow",
193-
"pyarrow_numpy",
194-
)
191+
assert isinstance(dtype, StringDtype) and dtype.storage == "pyarrow"
195192

196193
if isinstance(scalars, BaseMaskedArray):
197194
# avoid costly conversion to object dtype in ensure_string_array and

pandas/core/config_init.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ def is_terminal() -> bool:
460460
"string_storage",
461461
"python",
462462
string_storage_doc,
463-
validator=is_one_of_factory(["python", "pyarrow", "pyarrow_numpy"]),
463+
validator=is_one_of_factory(["python", "pyarrow"]),
464464
)
465465

466466

pandas/tests/arrays/string_/test_string.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ def test_arrow_array(dtype):
483483
data = pd.array(["a", "b", "c"], dtype=dtype)
484484
arr = pa.array(data)
485485
expected = pa.array(list(data), type=pa.large_string(), from_pandas=True)
486-
if dtype.storage in ("pyarrow", "pyarrow_numpy") and pa_version_under12p0:
486+
if dtype.storage == "pyarrow" and pa_version_under12p0:
487487
expected = pa.chunked_array(expected)
488488
if dtype.storage == "python":
489489
expected = pc.cast(expected, pa.string())
@@ -501,6 +501,10 @@ def test_arrow_roundtrip(dtype, string_storage2, request, using_infer_string):
501501
reason="infer_string takes precedence over string storage"
502502
)
503503
)
504+
if string_storage2 == "pyarrow_numpy":
505+
# we cannot set "pyarrow_numpy" as storage option anymore, need to
506+
# update the tests for this
507+
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
504508

505509
data = pd.array(["a", "b", None], dtype=dtype)
506510
df = pd.DataFrame({"a": data})
@@ -531,6 +535,8 @@ def test_arrow_load_from_zero_chunks(
531535
reason="infer_string takes precedence over string storage"
532536
)
533537
)
538+
if string_storage2 == "pyarrow_numpy":
539+
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
534540

535541
data = pd.array([], dtype=dtype)
536542
df = pd.DataFrame({"a": data})

pandas/tests/arrays/string_/test_string_arrow.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ def test_eq_all_na():
2929
def test_config(string_storage, request, using_infer_string):
3030
if using_infer_string and string_storage != "pyarrow_numpy":
3131
request.applymarker(pytest.mark.xfail(reason="infer string takes precedence"))
32+
if string_storage == "pyarrow_numpy":
33+
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
3234
with pd.option_context("string_storage", string_storage):
3335
assert StringDtype().storage == string_storage
3436
result = pd.array(["a", "b"])

pandas/tests/extension/test_string.py

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -140,28 +140,21 @@ def _get_expected_exception(
140140
self, op_name: str, obj, other
141141
) -> type[Exception] | None:
142142
if op_name in ["__divmod__", "__rdivmod__"]:
143-
if isinstance(obj, pd.Series) and cast(
144-
StringDtype, tm.get_dtype(obj)
145-
).storage in [
146-
"pyarrow",
147-
"pyarrow_numpy",
148-
]:
143+
if (
144+
isinstance(obj, pd.Series)
145+
and cast(StringDtype, tm.get_dtype(obj)).storage == "pyarrow"
146+
):
149147
# TODO: re-raise as TypeError?
150148
return NotImplementedError
151-
elif isinstance(other, pd.Series) and cast(
152-
StringDtype, tm.get_dtype(other)
153-
).storage in [
154-
"pyarrow",
155-
"pyarrow_numpy",
156-
]:
149+
elif (
150+
isinstance(other, pd.Series)
151+
and cast(StringDtype, tm.get_dtype(other)).storage == "pyarrow"
152+
):
157153
# TODO: re-raise as TypeError?
158154
return NotImplementedError
159155
return TypeError
160156
elif op_name in ["__mod__", "__rmod__", "__pow__", "__rpow__"]:
161-
if cast(StringDtype, tm.get_dtype(obj)).storage in [
162-
"pyarrow",
163-
"pyarrow_numpy",
164-
]:
157+
if cast(StringDtype, tm.get_dtype(obj)).storage == "pyarrow":
165158
return NotImplementedError
166159
return TypeError
167160
elif op_name in ["__mul__", "__rmul__"]:
@@ -175,10 +168,7 @@ def _get_expected_exception(
175168
"__sub__",
176169
"__rsub__",
177170
]:
178-
if cast(StringDtype, tm.get_dtype(obj)).storage in [
179-
"pyarrow",
180-
"pyarrow_numpy",
181-
]:
171+
if cast(StringDtype, tm.get_dtype(obj)).storage == "pyarrow":
182172
import pyarrow as pa
183173

184174
# TODO: better to re-raise as TypeError?

pandas/tests/frame/methods/test_convert_dtypes.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,17 @@ def test_convert_dtypes(
1818
# Just check that it works for DataFrame here
1919
if using_infer_string:
2020
string_storage = "pyarrow_numpy"
21+
22+
string_storage_option = string_storage
23+
if string_storage == "pyarrow_numpy":
24+
string_storage_option = "pyarrow"
2125
df = pd.DataFrame(
2226
{
2327
"a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
2428
"b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
2529
}
2630
)
27-
with pd.option_context("string_storage", string_storage):
31+
with pd.option_context("string_storage", string_storage_option):
2832
result = df.convert_dtypes(True, True, convert_integer, False)
2933
expected = pd.DataFrame(
3034
{

0 commit comments

Comments
 (0)