Skip to content

Commit e29ca8d

Browse files
update init
1 parent 10c14fb commit e29ca8d

File tree

12 files changed

+40
-49
lines changed

12 files changed

+40
-49
lines changed

pandas/_libs/lib.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2702,7 +2702,7 @@ def maybe_convert_objects(ndarray[object] objects,
27022702
if using_string_dtype() and is_string_array(objects, skipna=True):
27032703
from pandas.core.arrays.string_ import StringDtype
27042704

2705-
dtype = StringDtype(storage="pyarrow_numpy")
2705+
dtype = StringDtype(storage="pyarrow", na_value=np.nan)
27062706
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
27072707

27082708
elif convert_to_nullable_dtype and is_string_array(objects, skipna=True):

pandas/_testing/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -509,14 +509,14 @@ def shares_memory(left, right) -> bool:
509509
if (
510510
isinstance(left, ExtensionArray)
511511
and is_string_dtype(left.dtype)
512-
and left.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined]
512+
and left.dtype.storage == "pyarrow" # type: ignore[attr-defined]
513513
):
514514
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
515515
left = cast("ArrowExtensionArray", left)
516516
if (
517517
isinstance(right, ExtensionArray)
518518
and is_string_dtype(right.dtype)
519-
and right.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined]
519+
and right.dtype.storage == "pyarrow" # type: ignore[attr-defined]
520520
):
521521
right = cast("ArrowExtensionArray", right)
522522
left_pa_data = left._pa_array

pandas/core/arrays/arrow/array.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -575,10 +575,8 @@ def __getitem__(self, item: PositionalIndexer):
575575
if isinstance(item, np.ndarray):
576576
if not len(item):
577577
# Removable once we migrate StringDtype[pyarrow] to ArrowDtype[string]
578-
if self._dtype.name == "string" and self._dtype.storage in (
579-
"pyarrow",
580-
"pyarrow_numpy",
581-
):
578+
if self._dtype.name == "string" and self._dtype.storage == "pyarrow":
579+
# TODO(infer_string) should this be large_string?
582580
pa_dtype = pa.string()
583581
else:
584582
pa_dtype = self._dtype.pyarrow_dtype

pandas/core/arrays/string_.py

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from pandas._config import (
1313
get_option,
14-
using_pyarrow_string_dtype,
14+
using_string_dtype,
1515
)
1616

1717
from pandas._libs import (
@@ -84,7 +84,7 @@ class StringDtype(StorageExtensionDtype):
8484
8585
Parameters
8686
----------
87-
storage : {"python", "pyarrow", "pyarrow_numpy"}, optional
87+
storage : {"python", "pyarrow"}, optional
8888
If not given, the value of ``pd.options.mode.string_storage``.
8989
na_value :
9090
@@ -121,35 +121,24 @@ def na_value(self) -> libmissing.NAType | float: # type: ignore[override]
121121

122122
_metadata = ("storage",)
123123

124-
def __init__(self, storage=None, na_value=None) -> None:
124+
def __init__(self, storage=None, na_value=libmissing.NA) -> None:
125125
if not (
126-
na_value is None or (isinstance(na_value, float) and np.isnan(na_value))
126+
na_value is libmissing.NA
127+
or (isinstance(na_value, float) and np.isnan(na_value))
127128
):
128-
raise ValueError(
129-
"'na_value' must be the default value or pd.NA, got {na_value}"
130-
)
129+
raise ValueError("'na_value' must be np.nan or pd.NA, got {na_value}")
131130

132131
# infer defaults
133-
if storage is None and na_value is None:
134-
if using_pyarrow_string_dtype():
132+
if storage is None:
133+
if using_string_dtype():
135134
storage = "pyarrow"
136-
na_value = np.nan
137135
else:
138136
storage = get_option("mode.string_storage")
139-
na_value = libmissing.NA
140-
elif storage is None:
141-
# in this case na_value is NaN
142-
storage = get_option("mode.string_storage")
143-
elif na_value is None:
144-
na_value = np.nan if using_pyarrow_string_dtype() else libmissing.NA
145-
if na_value is not libmissing.NA and storage == "python":
146-
raise NotImplementedError(
147-
"'python' mode for na_value of NaN not yet implemented"
148-
)
149137

150138
if storage == "pyarrow_numpy":
151139
# TODO raise a deprecation warning
152140
storage = "pyarrow"
141+
153142
if storage not in {"python", "pyarrow"}:
154143
raise ValueError(
155144
f"Storage must be 'python' or 'pyarrow'. Got {storage} instead."
@@ -199,12 +188,10 @@ def construct_from_string(cls, string) -> Self:
199188
)
200189
if string == "string":
201190
return cls()
202-
elif string == "String":
203-
return cls(na_value=np.nan)
204191
elif string == "string[python]":
205-
return cls(storage="python", na_value=np.nan)
192+
return cls(storage="python")
206193
elif string == "string[pyarrow]":
207-
return cls(storage="pyarrow", na_value=np.nan)
194+
return cls(storage="pyarrow")
208195
elif string == "string[pyarrow_numpy]":
209196
# TODO deprecate
210197
return cls(storage="pyarrow_numpy")
@@ -232,9 +219,9 @@ def construct_array_type( # type: ignore[override]
232219
if self.storage == "python":
233220
return StringArray
234221
elif self.storage == "pyarrow" and self._na_value is libmissing.NA:
235-
return ArrowStringArrayNumpySemantics
236-
else:
237222
return ArrowStringArray
223+
else:
224+
return ArrowStringArrayNumpySemantics
238225

239226
def __from_arrow__(
240227
self, array: pyarrow.Array | pyarrow.ChunkedArray
@@ -244,15 +231,16 @@ def __from_arrow__(
244231
"""
245232
if self.storage == "pyarrow":
246233
if self._na_value is libmissing.NA:
234+
from pandas.core.arrays.string_arrow import ArrowStringArray
235+
236+
return ArrowStringArray(array)
237+
else:
247238
from pandas.core.arrays.string_arrow import (
248239
ArrowStringArrayNumpySemantics,
249240
)
250241

251242
return ArrowStringArrayNumpySemantics(array)
252-
else:
253-
from pandas.core.arrays.string_arrow import ArrowStringArray
254243

255-
return ArrowStringArray(array)
256244
else:
257245
import pyarrow
258246

pandas/core/arrays/string_arrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,7 @@ def _rank(
597597

598598

599599
class ArrowStringArrayNumpySemantics(ArrowStringArray):
600-
_storage = "pyarrow_numpy"
600+
_storage = "pyarrow"
601601

602602
@classmethod
603603
def _result_converter(cls, values, na=None):

pandas/core/construction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ def sanitize_array(
574574
if isinstance(data, str) and using_string_dtype() and original_dtype is None:
575575
from pandas.core.arrays.string_ import StringDtype
576576

577-
dtype = StringDtype("pyarrow_numpy")
577+
dtype = StringDtype("pyarrow", na_value=np.nan)
578578
data = construct_1d_arraylike_from_scalar(data, len(index), dtype)
579579

580580
return data
@@ -608,7 +608,7 @@ def sanitize_array(
608608
elif data.dtype.kind == "U" and using_string_dtype():
609609
from pandas.core.arrays.string_ import StringDtype
610610

611-
dtype = StringDtype(storage="pyarrow_numpy")
611+
dtype = StringDtype(storage="pyarrow", na_value=np.nan)
612612
subarr = dtype.construct_array_type()._from_sequence(data, dtype=dtype)
613613

614614
if subarr is data and copy:

pandas/core/dtypes/cast.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -801,7 +801,7 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
801801
if using_string_dtype():
802802
from pandas.core.arrays.string_ import StringDtype
803803

804-
dtype = StringDtype(storage="pyarrow_numpy")
804+
dtype = StringDtype(storage="pyarrow", na_value=np.nan)
805805

806806
elif isinstance(val, (np.datetime64, dt.datetime)):
807807
try:

pandas/core/internals/construction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def ndarray_to_mgr(
302302
nb = new_block_2d(values, placement=bp, refs=refs)
303303
block_values = [nb]
304304
elif dtype is None and values.dtype.kind == "U" and using_string_dtype():
305-
dtype = StringDtype(storage="pyarrow_numpy")
305+
dtype = StringDtype(storage="pyarrow", na_value=np.nan)
306306

307307
obj_columns = list(values)
308308
block_values = [

pandas/core/reshape/encoding.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import numpy as np
1212

13+
from pandas._libs import missing as libmissing
1314
from pandas._libs.sparse import IntIndex
1415

1516
from pandas.core.dtypes.common import (
@@ -256,7 +257,7 @@ def _get_dummies_1d(
256257
dtype = ArrowDtype(pa.bool_()) # type: ignore[assignment]
257258
elif (
258259
isinstance(input_dtype, StringDtype)
259-
and input_dtype.storage != "pyarrow_numpy"
260+
and input_dtype.na_value is libmissing.NA
260261
):
261262
dtype = pandas_dtype("boolean") # type: ignore[assignment]
262263
else:

pandas/core/reshape/merge.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2677,8 +2677,7 @@ def _factorize_keys(
26772677

26782678
elif isinstance(lk, ExtensionArray) and lk.dtype == rk.dtype:
26792679
if (isinstance(lk.dtype, ArrowDtype) and is_string_dtype(lk.dtype)) or (
2680-
isinstance(lk.dtype, StringDtype)
2681-
and lk.dtype.storage in ["pyarrow", "pyarrow_numpy"]
2680+
isinstance(lk.dtype, StringDtype) and lk.dtype.storage == "pyarrow"
26822681
):
26832682
import pyarrow as pa
26842683
import pyarrow.compute as pc

0 commit comments

Comments
 (0)