From 3898b72306acec26272cbcc0793843e3d6fd03f8 Mon Sep 17 00:00:00 2001 From: economy Date: Tue, 23 May 2017 15:35:30 -0700 Subject: [PATCH 1/2] BUG: fixed wrong order of ordered labels in pd.cut() --- doc/source/whatsnew/v0.20.2.txt | 6 +----- pandas/core/reshape/tile.py | 2 +- pandas/tests/reshape/test_tile.py | 7 +++++++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 9f88d629880ed..cbdecab728aa4 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -44,13 +44,9 @@ Bug Fixes - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) - Passing an invalid engine to :func:`read_csv` now raises an informative ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`) - - - - +- Bug in ``pd.cut`` when ``labels`` are set (:issue:`16459`) - Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`) - Conversion ^^^^^^^^^^ diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 866f229bec418..d8398023a5083 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -254,7 +254,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') if not is_categorical_dtype(labels): - labels = Categorical(labels, ordered=True) + labels = Categorical(labels, categories=labels, ordered=True) np.putmask(ids, na_mask, 0) result = algos.take_nd(labels, ids - 1) diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 8602b33856fea..0222ed277c364 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -211,6 +211,7 @@ def test_cut_pass_labels(self): result = cut(arr, bins, labels=labels) exp = Categorical(['Medium'] + 4 * ['Small'] + ['Medium', 'Large'], + categories=labels, ordered=True) tm.assert_categorical_equal(result, exp) @@ -219,6 +220,12 @@ def test_cut_pass_labels(self): exp = Categorical.from_codes([1] + 4 * [0] + [1, 2], labels) tm.assert_categorical_equal(result, exp) + labels = ['Good', 'Medium', 'Bad'] + result = cut(arr, 3, labels=labels) + exp = cut(arr, 3, labels=Categorical(labels, categories=labels, + ordered=True)) + tm.assert_categorical_equal(result, exp) + def test_qcut_include_lowest(self): values = np.arange(10) From 29128b35bf9805933465ab515624fdc95d946c2a Mon Sep 17 00:00:00 2001 From: economy Date: Wed, 31 May 2017 20:16:19 -0700 Subject: [PATCH 2/2] comments and whatsnew edits --- doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/tests/reshape/test_tile.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index cbdecab728aa4..7906daeee73d0 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -44,7 +44,7 @@ Bug Fixes - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) - Passing an invalid engine to :func:`read_csv` now raises an informative ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`) -- Bug in ``pd.cut`` when ``labels`` are set (:issue:`16459`) +- Bug in ``pd.cut`` when ``labels`` are set, resulting in incorrect label ordering (:issue:`16459`) - Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`) Conversion diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 0222ed277c364..542af321632cf 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -220,6 +220,7 @@ def test_cut_pass_labels(self): exp = Categorical.from_codes([1] + 4 * [0] + [1, 2], labels) tm.assert_categorical_equal(result, exp) + # issue 16459 labels = ['Good', 'Medium', 'Bad'] result = cut(arr, 3, labels=labels) exp = cut(arr, 3, labels=Categorical(labels, categories=labels,