diff --git a/ci/code_checks.sh b/ci/code_checks.sh index cebb9cda1e480..68ca06564d3a6 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -73,6 +73,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Period.freq GL08" \ -i "pandas.Period.ordinal GL08" \ -i "pandas.errors.IncompatibleFrequency SA01,SS06,EX01" \ + -i "pandas.api.extensions.ExtensionArray.value_counts EX01,RT03,SA01" \ -i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \ -i "pandas.core.groupby.SeriesGroupBy.plot PR02" \ -i "pandas.core.resample.Resampler.quantile PR01,PR07" \ diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 07c297b2c15ff..84ec38e2f75d1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -99,7 +99,10 @@ npt, ) - from pandas import Index + from pandas import ( + Index, + Series, + ) _extension_array_shared_docs: dict[str, str] = {} @@ -1673,6 +1676,25 @@ def repeat(self, repeats: int | Sequence[int], axis: AxisInt | None = None) -> S ind = np.arange(len(self)).repeat(repeats) return self.take(ind) + def value_counts(self, dropna: bool = True) -> Series: + """ + Return a Series containing counts of unique values. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of NA values. + + Returns + ------- + Series + """ + from pandas.core.algorithms import value_counts_internal as value_counts + + result = value_counts(self.to_numpy(copy=False), sort=False, dropna=dropna) + result.index = result.index.astype(self.dtype) + return result + # ------------------------------------------------------------------------ # Indexing methods # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 02456cfa04469..8d13e76c57e4f 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -75,7 +75,6 @@ isin, take, unique, - value_counts_internal as value_counts, ) from pandas.core.arrays import ArrowExtensionArray from pandas.core.arrays.base import ( @@ -105,7 +104,6 @@ from pandas import ( Index, - Series, ) @@ -1197,28 +1195,6 @@ def _validate_setitem_value(self, value): return value_left, value_right - def value_counts(self, dropna: bool = True) -> Series: - """ - Returns a Series containing counts of each interval. - - Parameters - ---------- - dropna : bool, default True - Don't include counts of NaN. - - Returns - ------- - counts : Series - - See Also - -------- - Series.value_counts - """ - # TODO: implement this is a non-naive way! - result = value_counts(np.asarray(self), dropna=dropna) - result.index = result.index.astype(self.dtype) - return result - # --------------------------------------------------------------------- # Rendering Methods diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index b20fc54a6cb28..7a61a252d86a6 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -1037,10 +1037,7 @@ def sum( return self._wrap_reduction_result(axis, result) def value_counts(self, dropna: bool = True) -> Series: - from pandas.core.algorithms import value_counts_internal as value_counts - - result = value_counts(self._ndarray, sort=False, dropna=dropna) - result.index = result.index.astype(self.dtype) + result = super().value_counts(dropna=dropna) if self.dtype.na_value is libmissing.NA: result = result.astype("Int64") diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 65fb6f33b0ea3..6f7733ad7693e 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -25,7 +25,6 @@ is_scalar, ) from pandas.core import arraylike -from pandas.core.algorithms import value_counts_internal as value_counts from pandas.core.arraylike import OpsMixin from pandas.core.arrays import ( ExtensionArray, @@ -291,9 +290,6 @@ def convert_values(param): return np.asarray(res, dtype=bool) - def value_counts(self, dropna: bool = True): - return value_counts(self.to_numpy(), dropna=dropna) - # We override fillna here to simulate a 3rd party EA that has done so. This # lets us test a 3rd-party EA that has not yet updated to include a "copy" # keyword in its fillna method. diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index a7fc5061a267d..39ce93d37da45 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -171,26 +171,6 @@ def test_fillna_limit_series(self, data_missing): ): super().test_fillna_limit_series(data_missing) - @pytest.mark.parametrize("dropna", [True, False]) - def test_value_counts(self, all_data, dropna): - all_data = all_data[:10] - if dropna: - other = np.array(all_data[~all_data.isna()]) - else: - other = all_data - - vcs = pd.Series(all_data).value_counts(dropna=dropna) - vcs_ex = pd.Series(other).value_counts(dropna=dropna) - - with decimal.localcontext() as ctx: - # avoid raising when comparing Decimal("NAN") < Decimal(2) - ctx.traps[decimal.InvalidOperation] = False - - result = vcs.sort_index() - expected = vcs_ex.sort_index() - - tm.assert_series_equal(result, expected) - def test_series_repr(self, data): # Overriding this base test to explicitly test that # the custom _formatter is used diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 4bc9562f1895d..5e1980c202f62 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -189,14 +189,12 @@ def test_ffill_limit_area( data_missing, limit_area, input_ilocs, expected_ilocs ) - @unhashable - def test_value_counts(self, all_data, dropna): + def test_value_counts(self, all_data, dropna, request): + if len(all_data) == 100 or dropna: + mark = pytest.mark.xfail(reason="unhashable") + request.applymarker(mark) super().test_value_counts(all_data, dropna) - @unhashable - def test_value_counts_with_normalize(self, data): - super().test_value_counts_with_normalize(data) - @unhashable def test_sort_values_frame(self): # TODO (EA.factorize): see if _values_for_factorize allows this.