diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 109f674fb9043..d9890fb331cfa 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -73,7 +73,6 @@ validate_indices, ) from pandas.core.nanops import check_below_min_count -from pandas.core.strings.base import BaseStringArrayMethods from pandas.io._util import _arrow_dtype_mapping from pandas.tseries.frequencies import to_offset @@ -237,7 +236,6 @@ class ArrowExtensionArray( OpsMixin, ExtensionArraySupportsAnyAll, ArrowStringArrayMixin, - BaseStringArrayMethods, ): """ Pandas ExtensionArray backed by a PyArrow ChunkedArray. diff --git a/pandas/core/strings/__init__.py b/pandas/core/strings/__init__.py index d4ce75f768c5d..3a5e98bbbdbc8 100644 --- a/pandas/core/strings/__init__.py +++ b/pandas/core/strings/__init__.py @@ -2,7 +2,6 @@ Implementation of pandas.Series.str and its interface. * strings.accessor.StringMethods : Accessor for Series.str -* strings.base.BaseStringArrayMethods: Mixin ABC for EAs to implement str methods Most methods on the StringMethods accessor follow the pattern: @@ -10,9 +9,7 @@ 2. Call that array's implementation of the string method 3. Wrap the result (in a Series, index, or DataFrame) -Pandas extension arrays implementing string methods should inherit from -pandas.core.strings.base.BaseStringArrayMethods. This is an ABC defining -the various string methods. To avoid namespace clashes and pollution, +To avoid namespace clashes and pollution, these are prefixed with `_str_`. So ``Series.str.upper()`` calls ``Series.array._str_upper()``. The interface isn't currently public to other string extension arrays. @@ -20,7 +17,6 @@ # Pandas current implementation is in ObjectStringArrayMixin. This is designed # to work on object-dtype ndarrays. # -# BaseStringArrayMethods # - ObjectStringArrayMixin # - StringArray # - NumpyExtensionArray diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py deleted file mode 100644 index eee0fcf3df5b0..0000000000000 --- a/pandas/core/strings/base.py +++ /dev/null @@ -1,274 +0,0 @@ -from __future__ import annotations - -import abc -from typing import ( - TYPE_CHECKING, - Literal, - Self, -) - -from pandas._libs import lib - -if TYPE_CHECKING: - from collections.abc import ( - Callable, - Sequence, - ) - import re - - from pandas._typing import ( - NpDtype, - Scalar, - ) - - -class BaseStringArrayMethods(abc.ABC): - """ - Base class for extension arrays implementing string methods. - - This is where our ExtensionArrays can override the implementation of - Series.str.. We don't expect this to work with - 3rd-party extension arrays. - - * User calls Series.str. - * pandas extracts the extension array from the Series - * pandas calls ``extension_array._str_(*args, **kwargs)`` - * pandas wraps the result, to return to the user. - - See :ref:`Series.str` for the docstring of each method. - """ - - def _str_getitem(self, key): - if isinstance(key, slice): - return self._str_slice(start=key.start, stop=key.stop, step=key.step) - else: - return self._str_get(key) - - @abc.abstractmethod - def _str_count(self, pat, flags: int = 0): - pass - - @abc.abstractmethod - def _str_pad( - self, - width: int, - side: Literal["left", "right", "both"] = "left", - fillchar: str = " ", - ): - pass - - @abc.abstractmethod - def _str_contains( - self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True - ): - pass - - @abc.abstractmethod - def _str_startswith(self, pat, na=None): - pass - - @abc.abstractmethod - def _str_endswith(self, pat, na=None): - pass - - @abc.abstractmethod - def _str_replace( - self, - pat: str | re.Pattern, - repl: str | Callable, - n: int = -1, - case: bool = True, - flags: int = 0, - regex: bool = True, - ): - pass - - @abc.abstractmethod - def _str_repeat(self, repeats: int | Sequence[int]): - pass - - @abc.abstractmethod - def _str_match( - self, - pat: str, - case: bool = True, - flags: int = 0, - na: Scalar | lib.NoDefault = lib.no_default, - ): - pass - - @abc.abstractmethod - def _str_fullmatch( - self, - pat: str | re.Pattern, - case: bool = True, - flags: int = 0, - na: Scalar | lib.NoDefault = lib.no_default, - ): - pass - - @abc.abstractmethod - def _str_encode(self, encoding, errors: str = "strict"): - pass - - @abc.abstractmethod - def _str_find(self, sub, start: int = 0, end=None): - pass - - @abc.abstractmethod - def _str_rfind(self, sub, start: int = 0, end=None): - pass - - @abc.abstractmethod - def _str_findall(self, pat, flags: int = 0): - pass - - @abc.abstractmethod - def _str_get(self, i): - pass - - @abc.abstractmethod - def _str_index(self, sub, start: int = 0, end=None): - pass - - @abc.abstractmethod - def _str_rindex(self, sub, start: int = 0, end=None): - pass - - @abc.abstractmethod - def _str_join(self, sep: str): - pass - - @abc.abstractmethod - def _str_partition(self, sep: str, expand): - pass - - @abc.abstractmethod - def _str_rpartition(self, sep: str, expand): - pass - - @abc.abstractmethod - def _str_len(self): - pass - - @abc.abstractmethod - def _str_slice(self, start=None, stop=None, step=None): - pass - - @abc.abstractmethod - def _str_slice_replace(self, start=None, stop=None, repl=None): - pass - - @abc.abstractmethod - def _str_translate(self, table): - pass - - @abc.abstractmethod - def _str_wrap(self, width: int, **kwargs): - pass - - @abc.abstractmethod - def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None): - pass - - @abc.abstractmethod - def _str_isalnum(self): - pass - - @abc.abstractmethod - def _str_isalpha(self): - pass - - @abc.abstractmethod - def _str_isascii(self): - pass - - @abc.abstractmethod - def _str_isdecimal(self): - pass - - @abc.abstractmethod - def _str_isdigit(self): - pass - - @abc.abstractmethod - def _str_islower(self): - pass - - @abc.abstractmethod - def _str_isnumeric(self): - pass - - @abc.abstractmethod - def _str_isspace(self): - pass - - @abc.abstractmethod - def _str_istitle(self): - pass - - @abc.abstractmethod - def _str_isupper(self): - pass - - @abc.abstractmethod - def _str_capitalize(self): - pass - - @abc.abstractmethod - def _str_casefold(self): - pass - - @abc.abstractmethod - def _str_title(self): - pass - - @abc.abstractmethod - def _str_swapcase(self): - pass - - @abc.abstractmethod - def _str_lower(self): - pass - - @abc.abstractmethod - def _str_upper(self): - pass - - @abc.abstractmethod - def _str_normalize(self, form): - pass - - @abc.abstractmethod - def _str_strip(self, to_strip=None): - pass - - @abc.abstractmethod - def _str_lstrip(self, to_strip=None): - pass - - @abc.abstractmethod - def _str_rstrip(self, to_strip=None): - pass - - @abc.abstractmethod - def _str_removeprefix(self, prefix: str) -> Self: - pass - - @abc.abstractmethod - def _str_removesuffix(self, suffix: str) -> Self: - pass - - @abc.abstractmethod - def _str_split( - self, pat=None, n=-1, expand: bool = False, regex: bool | None = None - ): - pass - - @abc.abstractmethod - def _str_rsplit(self, pat=None, n=-1): - pass - - @abc.abstractmethod - def _str_extract(self, pat: str, flags: int = 0, expand: bool = True): - pass diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index c1d81fc3d7223..9f6baaf691577 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -21,8 +21,6 @@ from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.missing import isna -from pandas.core.strings.base import BaseStringArrayMethods - if TYPE_CHECKING: from collections.abc import ( Callable, @@ -35,7 +33,7 @@ ) -class ObjectStringArrayMixin(BaseStringArrayMethods): +class ObjectStringArrayMixin: """ String Methods operating on object-dtype ndarrays. """ @@ -44,6 +42,12 @@ def __len__(self) -> int: # For typing, _str_map relies on the object being sized. raise NotImplementedError + def _str_getitem(self, key): + if isinstance(key, slice): + return self._str_slice(start=key.start, stop=key.stop, step=key.step) + else: + return self._str_get(key) + def _str_map( self, f,