Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ pandas 0.11.1
GH3572_). This happens before any drawing takes place which elimnates any
spurious plots from showing up.
- Added Faq section on repr display options, to help users customize their setup.
- ``where`` operations that result in block splitting are much faster (GH3733_)

**API Changes**

Expand Down Expand Up @@ -116,6 +117,8 @@ pandas 0.11.1
- the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are
deprecated
- Implement ``__nonzero__`` for ``NDFrame`` objects (GH3691_, GH3696_)
- ``as_matrix`` with mixed signed and unsigned dtypes will result in 2 x the lcd of the unsigned
as an int, maxing with ``int64``, to avoid precision issues (GH3733_)

**Bug Fixes**

Expand Down Expand Up @@ -273,6 +276,7 @@ pandas 0.11.1
.. _GH3691: https://github.com/pydata/pandas/issues/3691
.. _GH3696: https://github.com/pydata/pandas/issues/3696
.. _GH3667: https://github.com/pydata/pandas/issues/3667
.. _GH3733: https://github.com/pydata/pandas/issues/3733

pandas 0.11.0
=============
Expand Down
63 changes: 37 additions & 26 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,42 +558,38 @@ def func(c,v,o):
result.fill(np.nan)
return result

def create_block(result, items, transpose=True):
# see if we can operate on the entire block, or need item-by-item
result = func(cond,values,other)
if self._can_hold_na:

if not isinstance(result, np.ndarray):
raise TypeError('Could not compare [%s] with block values'
% repr(other))

if transpose and is_transposed:
if is_transposed:
result = result.T

# try to cast if requested
if try_cast:
result = self._try_cast_result(result)

return make_block(result, items, self.ref_items)
return make_block(result, self.items, self.ref_items)

# see if we can operate on the entire block, or need item-by-item
if not self._can_hold_na:
axis = cond.ndim-1
result_blocks = []
for item in self.items:
loc = self.items.get_loc(item)
item = self.items.take([loc])
v = values.take([loc],axis=axis)
c = cond.take([loc],axis=axis)
o = other.take([loc],axis=axis) if hasattr(other,'shape') else other

result = func(c,v,o)
if len(result) == 1:
result = np.repeat(result,self.shape[1:])

result = _block_shape(result,ndim=self.ndim,shape=self.shape[1:])
result_blocks.append(create_block(result, item, transpose=False))

return result_blocks
else:
result = func(cond,values,other)
return create_block(result, self.items)
# might need to separate out blocks
axis = cond.ndim-1
cond = cond.swapaxes(axis,0)
mask = np.array([ cond[i].all() for i in enumerate(range(cond.shape[0]))],dtype=bool)

result_blocks = []
for m in [mask, ~mask]:
if m.any():
items = self.items[m]
slices = [slice(None)] * cond.ndim
slices[axis] = self.items.get_indexer(items)
r = self._try_cast_result(result[slices])
result_blocks.append(make_block(r.T, items, self.ref_items))

return result_blocks

class NumericBlock(Block):
is_numeric = True
Expand Down Expand Up @@ -2429,7 +2425,22 @@ def _lcd_dtype(l):
elif have_bool:
return np.dtype(bool)
elif have_int and not have_float and not have_complex:
return _lcd_dtype(counts[IntBlock])

# if we are mixing unsigned and signed, then return
# the next biggest int type (if we can)
lcd = _lcd_dtype(counts[IntBlock])
kinds = set([ i.dtype.kind for i in counts[IntBlock] ])
if len(kinds) == 1:
return lcd

if lcd == 'uint64' or lcd == 'int64':
return np.dtype('int64')

# return 1 bigger on the itemsize if unsinged
if lcd.kind == 'u':
return np.dtype('int%s' % (lcd.itemsize*8*2))
return lcd

elif have_dt64 and not have_float and not have_complex:
return np.dtype('M8[ns]')
elif have_complex:
Expand Down
30 changes: 23 additions & 7 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,16 @@ def test_getitem_boolean_casting(self):
expected = Series({'float64': 6, 'int32' : 1, 'int64' : 1})
assert_series_equal(result, expected)

# where dtype conversions
# GH 3733
df = DataFrame(data = np.random.randn(100, 50))
df = df.where(df > 0) # create nans
bools = df > 0
mask = isnull(df)
expected = bools.astype(float).mask(mask)
result = bools.mask(mask)
assert_frame_equal(result,expected)

def test_getitem_boolean_list(self):
df = DataFrame(np.arange(12).reshape(3, 4))

Expand Down Expand Up @@ -7568,8 +7578,10 @@ def test_where(self):

def _safe_add(df):
# only add to the numeric items
return DataFrame(dict([ (c,s+1) if issubclass(s.dtype.type, (np.integer,np.floating)) else (c,s) for c, s in df.iteritems() ]))

def is_ok(s):
return issubclass(s.dtype.type, (np.integer,np.floating)) and s.dtype != 'uint8'
return DataFrame(dict([ (c,s+1) if is_ok(s) else (c,s) for c, s in df.iteritems() ]))

def _check_get(df, cond, check_dtypes = True):
other1 = _safe_add(df)
rs = df.where(cond, other1)
Expand Down Expand Up @@ -7605,20 +7617,24 @@ def _check_get(df, cond, check_dtypes = True):
def _check_align(df, cond, other, check_dtypes = True):
rs = df.where(cond, other)
for i, k in enumerate(rs.columns):
v = rs[k]
result = rs[k]
d = df[k].values
c = cond[k].reindex(df[k].index).fillna(False).values

if np.isscalar(other):
o = other
else:
if isinstance(other,np.ndarray):
o = Series(other[:,i],index=v.index).values
o = Series(other[:,i],index=result.index).values
else:
o = other[k].values

new_values = d if c.all() else np.where(c, d, o)
assert_series_equal(v, Series(new_values,index=v.index))
expected = Series(new_values,index=result.index)

# since we can't always have the correct numpy dtype
# as numpy doesn't know how to downcast, don't check
assert_series_equal(result, expected, check_dtype=False)

# dtypes
# can't check dtype when other is an ndarray
Expand Down Expand Up @@ -9894,14 +9910,14 @@ def test_as_matrix_lcd(self):
self.assert_(values.dtype == np.float16)

values = self.mixed_int.as_matrix(['A','B','C','D'])
self.assert_(values.dtype == np.uint64)
self.assert_(values.dtype == np.int64)

values = self.mixed_int.as_matrix(['A','D'])
self.assert_(values.dtype == np.int64)

# guess all ints are cast to uints....
values = self.mixed_int.as_matrix(['A','B','C'])
self.assert_(values.dtype == np.uint64)
self.assert_(values.dtype == np.int64)

values = self.mixed_int.as_matrix(['A','C'])
self.assert_(values.dtype == np.int32)
Expand Down
15 changes: 15 additions & 0 deletions vb_suite/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,18 @@ def f(K=500):
"""

frame_xs_col = Benchmark('df.xs(50000,axis = 1)', setup)

## masking
setup = common_setup + """
data = np.random.randn(1000, 500)
df = DataFrame(data)
df = df.where(df > 0) # create nans
bools = df > 0
mask = isnull(df)
"""

mask_bools = Benchmark('bools.mask(mask)', setup,
start_date=datetime(2013,1,1))

mask_floats = Benchmark('bools.astype(float).mask(mask)', setup,
start_date=datetime(2013,1,1))