From 1d81fcc846cbec686e7dac593b1afd088a66c141 Mon Sep 17 00:00:00 2001 From: seth-p Date: Tue, 12 May 2015 16:59:52 -0400 Subject: [PATCH 1/2] BUG: drop_duplicates drops name(s). --- .gitignore | 1 + doc/source/whatsnew/v0.17.0.txt | 2 +- pandas/core/index.py | 5 ++++- pandas/tests/test_index.py | 11 +++++++++++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index e8b557d68ac39..627ccf4112ff5 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,7 @@ doc/_build dist # Egg metadata *.egg-info +.eggs # tox testing tool .tox # rope diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index cc044bc35a707..9c81787e19f22 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -65,6 +65,6 @@ Bug Fixes - Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`) - Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`) - +- Bug in ``drop_duplicates`` dropping name(s) (:issue:`10115`) diff --git a/pandas/core/index.py b/pandas/core/index.py index 21f1fed2cd6da..5f762466ad2b4 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2580,7 +2580,10 @@ def drop(self, labels, errors='raise'): @Appender(_shared_docs['drop_duplicates'] % _index_doc_kwargs) def drop_duplicates(self, take_last=False): result = super(Index, self).drop_duplicates(take_last=take_last) - return self._constructor(result) + if self.name is not None: + return self._constructor(result, name=self.name) + else: + return self._constructor(result, names=self.names) @Appender(_shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, take_last=False): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 444aa2a0bab1e..b2632092ba038 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -4151,6 +4151,17 @@ def test_droplevel_multiple(self): expected = index[:2].droplevel(2).droplevel(0) self.assertTrue(dropped.equals(expected)) + def test_drop_duplicates_names(self): + # GH 10115 + for idx in [Index([3, 4, 5, 3]), + Index([3, 4, 5, 3], name='Num'), + MultiIndex.from_tuples([('A', 1), ('A', 2)]), + MultiIndex.from_tuples([('A', 1), ('A', 2)], names=[None, None]), + MultiIndex.from_tuples([('A', 1), ('A', 2)], names=[None, 'Num']), + MultiIndex.from_tuples([('A', 1), ('A', 2)], names=['Upper', 'Num']), + ]: + self.assertEqual(idx.drop_duplicates().names, idx.names) + def test_insert(self): # key contained in all levels new_index = self.index.insert(0, ('bar', 'two')) From aae866023dcfad2e345244416172be83fe9bb796 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 13 May 2015 06:55:31 -0400 Subject: [PATCH 2/2] simplify drop_duplicates --- pandas/core/index.py | 7 +------ pandas/tests/test_index.py | 36 ++++++++++++++++++++++++------------ 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 5f762466ad2b4..a5d4a1609647e 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2579,17 +2579,12 @@ def drop(self, labels, errors='raise'): @Appender(_shared_docs['drop_duplicates'] % _index_doc_kwargs) def drop_duplicates(self, take_last=False): - result = super(Index, self).drop_duplicates(take_last=take_last) - if self.name is not None: - return self._constructor(result, name=self.name) - else: - return self._constructor(result, names=self.names) + return super(Index, self).drop_duplicates(take_last=take_last) @Appender(_shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, take_last=False): return super(Index, self).duplicated(take_last=take_last) - def _evaluate_with_timedelta_like(self, other, op, opstr): raise TypeError("can only perform ops with timedelta like values") diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index b2632092ba038..9cff4c432592f 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -199,10 +199,19 @@ def test_duplicates(self): if not len(ind): continue + if isinstance(ind, MultiIndex): + continue idx = self._holder([ind[0]]*5) self.assertFalse(idx.is_unique) self.assertTrue(idx.has_duplicates) + # GH 10115 + # preserve names + idx.name = 'foo' + result = idx.drop_duplicates() + self.assertEqual(result.name, 'foo') + self.assert_index_equal(result, Index([ind[0]],name='foo')) + def test_sort(self): for ind in self.indices.values(): self.assertRaises(TypeError, ind.sort) @@ -1695,9 +1704,10 @@ def test_reindexing(self): def test_duplicates(self): - idx = CategoricalIndex([0, 0, 0]) + idx = CategoricalIndex([0, 0, 0],name='foo') self.assertFalse(idx.is_unique) self.assertTrue(idx.has_duplicates) + self.assertEqual(idx.name,'foo') def test_get_indexer(self): @@ -4151,17 +4161,6 @@ def test_droplevel_multiple(self): expected = index[:2].droplevel(2).droplevel(0) self.assertTrue(dropped.equals(expected)) - def test_drop_duplicates_names(self): - # GH 10115 - for idx in [Index([3, 4, 5, 3]), - Index([3, 4, 5, 3], name='Num'), - MultiIndex.from_tuples([('A', 1), ('A', 2)]), - MultiIndex.from_tuples([('A', 1), ('A', 2)], names=[None, None]), - MultiIndex.from_tuples([('A', 1), ('A', 2)], names=[None, 'Num']), - MultiIndex.from_tuples([('A', 1), ('A', 2)], names=['Upper', 'Num']), - ]: - self.assertEqual(idx.drop_duplicates().names, idx.names) - def test_insert(self): # key contained in all levels new_index = self.index.insert(0, ('bar', 'two')) @@ -4421,6 +4420,19 @@ def check(nlevels, with_nulls): self.assert_array_equal(mi.duplicated(), np.zeros(len(mi), dtype='bool')) + def test_duplicate_meta_data(self): + # GH 10115 + index = MultiIndex(levels=[[0, 1], [0, 1, 2]], + labels=[[0, 0, 0, 0, 1, 1, 1], + [0, 1, 2, 0, 0, 1, 2]]) + for idx in [index, + index.set_names([None, None]), + index.set_names([None, 'Num']), + index.set_names(['Upper','Num']), + ]: + self.assertTrue(idx.has_duplicates) + self.assertEqual(idx.drop_duplicates().names, idx.names) + def test_tolist(self): result = self.index.tolist() exp = list(self.index.values)