Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ pandas 0.11.1
- ``DataFrame.to_html`` and ``DataFrame.to_latex`` now accept a path for
their first argument (GH3702_)
- Fix file tokenization error with \r delimiter and quoted fields (GH3453_)
- Groupby transform with item-by-item not upcasting correctly (GH3740_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
.. _GH2786: https://github.com/pydata/pandas/issues/2786
Expand Down Expand Up @@ -278,6 +279,7 @@ pandas 0.11.1
.. _GH3696: https://github.com/pydata/pandas/issues/3696
.. _GH3667: https://github.com/pydata/pandas/issues/3667
.. _GH3733: https://github.com/pydata/pandas/issues/3733
.. _GH3740: https://github.com/pydata/pandas/issues/3740

pandas 0.11.0
=============
Expand Down
17 changes: 14 additions & 3 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1532,20 +1532,31 @@ def transform(self, func, *args, **kwargs):
transformed : Series
"""
result = self.obj.copy()
if hasattr(result,'values'):
result = result.values
dtype = result.dtype

if isinstance(func, basestring):
wrapper = lambda x: getattr(x, func)(*args, **kwargs)
else:
wrapper = lambda x: func(x, *args, **kwargs)

for name, group in self:

group = com.ensure_float(group)
object.__setattr__(group, 'name', name)
res = wrapper(group)
# result[group.index] = res
indexer = self.obj.index.get_indexer(group.index)
np.put(result, indexer, res)
if hasattr(res,'values'):
res = res.values

return result
# need to do a safe put here, as the dtype may be different
# this needs to be an ndarray
result,_ = com._maybe_upcast_indexer(result, indexer, res)

# downcast if we can (and need)
result = _possibly_downcast_to_dtype(result, dtype)
return self.obj.__class__(result,index=self.obj.index,name=self.obj.name)


class NDFrameGroupBy(GroupBy):
Expand Down
33 changes: 33 additions & 0 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,39 @@ def f(x):
assert_series_equal(agged, expected, check_dtype=False)
self.assert_(issubclass(agged.dtype.type, np.dtype(dtype).type))

def test_groupby_transform_with_int(self):

# GH 3740, make sure that we might upcast on item-by-item transform

# floats
df = DataFrame(dict(A = [1,1,1,2,2,2], B = Series(1,dtype='float64'), C = Series([1,2,3,1,2,3],dtype='float64'), D = 'foo'))
result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std())
expected = DataFrame(dict(B = np.nan, C = Series([-1,0,1,-1,0,1],dtype='float64')))
assert_frame_equal(result,expected)

# int case
df = DataFrame(dict(A = [1,1,1,2,2,2], B = 1, C = [1,2,3,1,2,3], D = 'foo'))
result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std())
expected = DataFrame(dict(B = np.nan, C = [-1,0,1,-1,0,1]))
assert_frame_equal(result,expected)

# int that needs float conversion
s = Series([2,3,4,10,5,-1])
df = DataFrame(dict(A = [1,1,1,2,2,2], B = 1, C = s, D = 'foo'))
result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std())

s1 = s.iloc[0:3]
s1 = (s1-s1.mean())/s1.std()
s2 = s.iloc[3:6]
s2 = (s2-s2.mean())/s2.std()
expected = DataFrame(dict(B = np.nan, C = concat([s1,s2])))
assert_frame_equal(result,expected)

# int downcasting
result = df.groupby('A').transform(lambda x: x*2/2)
expected = DataFrame(dict(B = 1, C = [2,3,4,10,5,-1]))
assert_frame_equal(result,expected)

def test_indices_concatenation_order(self):

# GH 2808
Expand Down