Skip to content

Commit d6264e6

Browse files
committed
Manage pyarrow and python storage in map dict like
1 parent fa46a96 commit d6264e6

File tree

2 files changed

+54
-34
lines changed

2 files changed

+54
-34
lines changed

pandas/_libs/lib.pyx

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2484,7 +2484,7 @@ def maybe_convert_objects(ndarray[object] objects,
24842484
Whether to convert datetime, timedelta, period, interval types.
24852485
dtype_if_all_nat : np.dtype, ExtensionDtype, or None, default None
24862486
Dtype to cast to if we have all-NaT.
2487-
storage : {"python", "pyarrow", "pyarrow_numpy"}, default "python"
2487+
storage : {None, "python", "pyarrow", "pyarrow_numpy"}, default None
24882488
Backend storage
24892489

24902490
Returns
@@ -2504,9 +2504,6 @@ def maybe_convert_objects(ndarray[object] objects,
25042504
object val
25052505
float64_t fnan = NaN
25062506

2507-
if storage is None:
2508-
storage="python"
2509-
25102507
if dtype_if_all_nat is not None:
25112508
# in practice we don't expect to ever pass dtype_if_all_nat
25122509
# without both convert_non_numeric, so disallow
@@ -2967,7 +2964,7 @@ def map_infer_mask(
29672964
input value is used.
29682965
dtype : numpy.dtype
29692966
The numpy dtype to use for the result ndarray.
2970-
storage : {"python", "pyarrow", "pyarrow_numpy"}, default "python"
2967+
storage : {None, "python", "pyarrow", "pyarrow_numpy"}, default None
29712968
Backend storage
29722969

29732970
Returns
@@ -3041,7 +3038,7 @@ def map_infer(
30413038
convert_to_nullable_dtype : bool, default False
30423039
If an array-like object contains only integer or boolean values (and NaN) is
30433040
encountered, whether to convert and return an Boolean/IntegerArray.
3044-
storage : {"python", "pyarrow", "pyarrow_numpy"}, default "python"
3041+
storage : {None, "python", "pyarrow", "pyarrow_numpy"}, default None
30453042
Backend storage
30463043

30473044
Returns

pandas/core/algorithms.py

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1690,7 +1690,10 @@ def map_array(
16901690
else:
16911691
mapper = Series(mapper, dtype=np.float64)
16921692
else:
1693-
mapper = Series(mapper)
1693+
if arr.dtype in ("string[pyarrow]", "string[python]"):
1694+
mapper = Series(mapper, dtype=arr.dtype)
1695+
else:
1696+
mapper = Series(mapper)
16941697

16951698
if isinstance(mapper, ABCSeries):
16961699
if na_action == "ignore":
@@ -1706,33 +1709,7 @@ def map_array(
17061709
if not len(arr):
17071710
return arr.copy()
17081711

1709-
na_value = np.nan
1710-
mask = isna(arr)
1711-
storage = None
1712-
if isinstance(arr.dtype, BaseMaskedDtype):
1713-
arr = cast("BaseMaskedArray", arr)
1714-
values = arr._data
1715-
if arr._hasna:
1716-
na_value = arr.dtype.na_value
1717-
elif isinstance(arr.dtype, ExtensionDtype):
1718-
arr = cast("ExtensionArray", arr)
1719-
arr_dtype = arr.dtype.__repr__()
1720-
if "python" in arr_dtype:
1721-
storage = "python"
1722-
values = np.asarray(arr)
1723-
elif "pyarrow" in arr_dtype:
1724-
storage = "pyarrow"
1725-
if "date" in arr_dtype:
1726-
values = np.fromiter(arr._pa_array, dtype="O")
1727-
else:
1728-
values = np.asarray(arr)
1729-
else:
1730-
values = np.asarray(arr)
1731-
if arr._hasna:
1732-
na_value = arr.dtype.na_value
1733-
else:
1734-
# we must convert to python types
1735-
values = arr.astype(object, copy=False)
1712+
mask, na_value, storage, values = _build_map_infer_methods_params(arr)
17361713

17371714
if na_action is None:
17381715
return lib.map_infer(
@@ -1752,3 +1729,49 @@ def map_array(
17521729
convert_to_nullable_dtype=na_value is NA,
17531730
storage=storage,
17541731
)
1732+
1733+
1734+
def _build_map_infer_methods_params(arr: ArrayLike):
1735+
"""
1736+
Process lib.map_infer and lib.map_infer_mask parameters from an array `arr`
1737+
1738+
Parameters
1739+
----------
1740+
arr
1741+
1742+
Returns
1743+
-------
1744+
mask : np.ndarray[bool]
1745+
na_value : object
1746+
A value in `values` to consider missing.
1747+
storage : {"python", "pyarrow", "pyarrow_numpy"}, default "python"
1748+
Backend storage
1749+
values : np.ndarray
1750+
Values to be processed by lib.map_infer and lib.map_infer_mask
1751+
1752+
"""
1753+
na_value = np.nan
1754+
mask = isna(arr)
1755+
storage = "python"
1756+
if isinstance(arr.dtype, BaseMaskedDtype):
1757+
arr = cast("BaseMaskedArray", arr)
1758+
values = arr._data
1759+
if arr._hasna:
1760+
na_value = arr.dtype.na_value
1761+
1762+
elif isinstance(arr.dtype, ExtensionDtype):
1763+
arr = cast("ExtensionArray", arr)
1764+
arr_dtype = arr.dtype.__repr__()
1765+
if "pyarrow" in arr_dtype and "date" in arr_dtype:
1766+
values = np.fromiter(arr._pa_array, dtype="O")
1767+
else:
1768+
values = np.asarray(arr)
1769+
if "pyarrow" in arr_dtype:
1770+
storage = "pyarrow"
1771+
if arr._hasna:
1772+
na_value = arr.dtype.na_value
1773+
1774+
else:
1775+
# we must convert to python types
1776+
values = arr.astype(object, copy=False)
1777+
return mask, na_value, storage, values

0 commit comments

Comments
 (0)