diff --git a/pandas/core/common.py b/pandas/core/common.py index aa7ed9cd6b76f..c6e58e478ec53 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -699,6 +699,23 @@ def iterpairs(seq): return itertools.izip(seq_it, seq_it_next) +def split_ranges(mask): + """ Generates tuples of ranges which cover all True value in mask + + >>> list(split_ranges([1,0,0,1,0])) + [(0, 1), (3, 4)] + """ + ranges = [(0,len(mask))] + + for pos,val in enumerate(mask): + if not val: # this pos should be ommited, split off the prefix range + r = ranges.pop() + if pos > r[0]: # yield non-zero range + yield (r[0],pos) + if pos+1 < len(mask): # save the rest for processing + ranges.append((pos+1,len(mask))) + if ranges: + yield ranges[-1] def indent(string, spaces=4): dent = ' ' * spaces diff --git a/pandas/core/index.py b/pandas/core/index.py index 7dd8e4100ef10..035d2531f382f 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -673,7 +673,7 @@ def get_loc(self, key): Returns ------- - loc : int + loc : int if unique index, possibly slice or mask if not """ return self._engine.get_loc(key) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index d54154d0e033e..a2329450a5648 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -181,38 +181,26 @@ def delete(self, item): def split_block_at(self, item): """ - Split block around given column, for "deleting" a column without - having to copy data by returning views on the original array + Split block into zero or more blocks around columns with given label, + for "deleting" a column without having to copy data by returning views + on the original array. Returns ------- - leftb, rightb : (Block or None, Block or None) + generator of Block """ loc = self.items.get_loc(item) - if len(self.items) == 1: - # no blocks left - return None, None - - if loc == 0: - # at front - left_block = None - right_block = make_block(self.values[1:], self.items[1:].copy(), - self.ref_items) - elif loc == len(self.values) - 1: - # at back - left_block = make_block(self.values[:-1], self.items[:-1].copy(), - self.ref_items) - right_block = None - else: - # in the middle - left_block = make_block(self.values[:loc], - self.items[:loc].copy(), self.ref_items) - right_block = make_block(self.values[loc + 1:], - self.items[loc + 1:].copy(), - self.ref_items) + if type(loc) == slice or type(loc) == int: + mask = [True]*len(self) + mask[loc] = False + else: # already a mask, inverted + mask = -loc - return left_block, right_block + for s,e in com.split_ranges(mask): + yield make_block(self.values[s:e], + self.items[s:e].copy(), + self.ref_items) def fillna(self, value, inplace=False): new_values = self.values if inplace else self.values.copy() @@ -906,9 +894,12 @@ def delete(self, item): i, _ = self._find_block(item) loc = self.items.get_loc(item) + self._delete_from_block(i, item) + if com._is_bool_indexer(loc): # dupe keys may return mask + loc = [i for i,v in enumerate(loc) if v] + new_items = self.items.delete(loc) - self._delete_from_block(i, item) self.set_items_norename(new_items) def set(self, item, value): @@ -970,13 +961,8 @@ def _delete_from_block(self, i, item): Delete and maybe remove the whole block """ block = self.blocks.pop(i) - new_left, new_right = block.split_block_at(item) - - if new_left is not None: - self.blocks.append(new_left) - - if new_right is not None: - self.blocks.append(new_right) + for b in block.split_block_at(item): + self.blocks.append(b) def _add_new_block(self, item, value, loc=None): # Do we care about dtype at the moment? diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 661c3a2a3edd8..dd93666cba0af 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -117,6 +117,35 @@ def test_iterpairs(): assert(result == expected) +def test_split_ranges(): + def _bin(x, width): + "return int(x) as a base2 string of given width" + return ''.join(str((x>>i)&1) for i in xrange(width-1,-1,-1)) + + def test_locs(mask): + nfalse = sum(np.array(mask) == 0) + + remaining=0 + for s, e in com.split_ranges(mask): + remaining += e-s + + assert 0 not in mask[s:e] + + # make sure the total items covered by the ranges are a complete cover + assert remaining + nfalse == len(mask) + + # exhaustively test all possible mask sequences of length 8 + ncols=8 + for i in range(2**ncols): + cols=map(int,list(_bin(i,ncols))) # count up in base2 + mask=[cols[i] == 1 for i in range(len(cols))] + test_locs(mask) + + # base cases + test_locs([]) + test_locs([0]) + test_locs([1]) + def test_indent(): s = 'a b c\nd e f' result = com.indent(s, spaces=6) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 955cfedd70466..5e77bfa6c5d8c 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2978,6 +2978,18 @@ def test_pop(self): foo = self.frame.pop('foo') self.assert_('foo' not in self.frame) + def test_pop_non_unique_cols(self): + df=DataFrame({0:[0,1],1:[0,1],2:[4,5]}) + df.columns=["a","b","a"] + + res=df.pop("a") + self.assertEqual(type(res),DataFrame) + self.assertEqual(len(res),2) + self.assertEqual(len(df.columns),1) + self.assertTrue("b" in df.columns) + self.assertFalse("a" in df.columns) + self.assertEqual(len(df.index),2) + def test_iter(self): self.assert_(tm.equalContents(list(self.frame), self.frame.columns)) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 0610dc92e2379..31ffcc5832758 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -155,22 +155,22 @@ def test_delete(self): self.assertRaises(Exception, self.fblock.delete, 'b') def test_split_block_at(self): - left, right = self.fblock.split_block_at('a') - self.assert_(left is None) - self.assert_(np.array_equal(right.items, ['c', 'e'])) + bs = list(self.fblock.split_block_at('a')) + self.assertEqual(len(bs),1) + self.assertTrue(np.array_equal(bs[0].items, ['c', 'e'])) - left, right = self.fblock.split_block_at('c') - self.assert_(np.array_equal(left.items, ['a'])) - self.assert_(np.array_equal(right.items, ['e'])) + bs = list(self.fblock.split_block_at('c')) + self.assertEqual(len(bs),2) + self.assertTrue(np.array_equal(bs[0].items, ['a'])) + self.assertTrue(np.array_equal(bs[1].items, ['e'])) - left, right = self.fblock.split_block_at('e') - self.assert_(np.array_equal(left.items, ['a', 'c'])) - self.assert_(right is None) + bs = list(self.fblock.split_block_at('e')) + self.assertEqual(len(bs),1) + self.assertTrue(np.array_equal(bs[0].items, ['a', 'c'])) bblock = get_bool_ex(['f']) - left, right = bblock.split_block_at('f') - self.assert_(left is None) - self.assert_(right is None) + bs = list(bblock.split_block_at('f')) + self.assertEqual(len(bs),0) def test_unicode_repr(self): mat = np.empty((N, 2), dtype=object)