diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4ab20623cc561..5fc53a418f652 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -203,6 +203,7 @@ Other enhancements - :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`) - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) - :func:`DataFrame.to_excel` has a new ``autofilter`` parameter to add automatic filters to all columns (:issue:`61194`) +- :func:`qcut` now accepts the ``right`` parameter, consistent with :func:`cut` (:issue:`63053`) - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) - :func:`to_numeric` on big integers converts to ``object`` datatype with python integers when not coercing. (:issue:`51295`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index b13da83084e5c..fc3870f2e235f 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -294,6 +294,7 @@ def qcut( x, q, labels=None, + right: bool = True, retbins: bool = False, precision: int = 3, duplicates: str = "raise", @@ -316,6 +317,11 @@ def qcut( Used as labels for the resulting bins. Must be of the same length as the resulting bins. If False, return only integer indicators of the bins. If True, raises an error. + right : bool, default True + Indicates whether `bins` includes the rightmost edge or not. If + ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]`` + indicate (1,2], (2,3], (3,4]. This argument is ignored when + `bins` is an IntervalIndex. retbins : bool, optional Whether to return the (bins, labels) or not. Can be useful if bins is given as a scalar. @@ -378,6 +384,7 @@ def qcut( x_idx, Index(bins), labels=labels, + right=right, precision=precision, include_lowest=True, duplicates=duplicates, diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index b6d45aeab8a7b..223c5612bfc55 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -44,6 +44,38 @@ def test_qcut(): tm.assert_categorical_equal(labels, ex_levels) +def test_qcut_right(): + arr = np.random.default_rng(2).standard_normal(1000) + + labels, _ = qcut(arr, 4, retbins=True, right=True) + ex_bins = np.quantile(arr, [0, 0.25, 0.5, 0.75, 1.0]) + + result = labels.categories.left.values + assert np.allclose(result, ex_bins[:-1], atol=1e-2) + + result = labels.categories.right.values + assert np.allclose(result, ex_bins[1:], atol=1e-2) + + ex_levels = cut(arr, ex_bins, include_lowest=True, right=True) + tm.assert_categorical_equal(labels, ex_levels) + + +def test_qcut_no_right(): + arr = np.random.default_rng(2).standard_normal(1000) + + labels, _ = qcut(arr, 4, retbins=True, right=False) + ex_bins = np.quantile(arr, [0, 0.25, 0.5, 0.75, 1.0]) + + lefts = labels.categories.left.values + assert np.allclose(lefts, ex_bins[:-1], atol=1e-2) + + rights = labels.categories.right.values + assert np.allclose(rights, ex_bins[1:], atol=1e-2) + + ex_levels = cut(arr, ex_bins, include_lowest=True, right=False) + tm.assert_categorical_equal(labels, ex_levels) + + def test_qcut_bounds(): arr = np.random.default_rng(2).standard_normal(1000)