pandas-dev · jreback · Jun 3, 2013 · Jun 3, 2013 · Jun 3, 2013
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -196,6 +196,7 @@ pandas 0.11.1
   - ``DataFrame.to_html`` and ``DataFrame.to_latex`` now accept a path for
     their first argument (GH3702_)
   - Fix file tokenization error with \r delimiter and quoted fields (GH3453_)
+  - Groupby transform with item-by-item not upcasting correctly (GH3740_)
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
 .. _GH2786: https://github.com/pydata/pandas/issues/2786
@@ -278,6 +279,7 @@ pandas 0.11.1
 .. _GH3696: https://github.com/pydata/pandas/issues/3696
 .. _GH3667: https://github.com/pydata/pandas/issues/3667
 .. _GH3733: https://github.com/pydata/pandas/issues/3733
+.. _GH3740: https://github.com/pydata/pandas/issues/3740
 
 pandas 0.11.0
 =============

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1532,20 +1532,31 @@ def transform(self, func, *args, **kwargs):
         transformed : Series
         """
         result = self.obj.copy()
+        if hasattr(result,'values'):
+            result = result.values
+        dtype = result.dtype
 
         if isinstance(func, basestring):
             wrapper = lambda x: getattr(x, func)(*args, **kwargs)
         else:
             wrapper = lambda x: func(x, *args, **kwargs)
 
         for name, group in self:
+
+            group = com.ensure_float(group)
             object.__setattr__(group, 'name', name)
             res = wrapper(group)
-            # result[group.index] = res
             indexer = self.obj.index.get_indexer(group.index)
-            np.put(result, indexer, res)
+            if hasattr(res,'values'):
+                res = res.values
 
-        return result
+            # need to do a safe put here, as the dtype may be different
+            # this needs to be an ndarray
+            result,_ = com._maybe_upcast_indexer(result, indexer, res)
+
+        # downcast if we can (and need)
+        result = _possibly_downcast_to_dtype(result, dtype)
+        return self.obj.__class__(result,index=self.obj.index,name=self.obj.name)
 
 
 class NDFrameGroupBy(GroupBy):

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -616,6 +616,39 @@ def f(x):
             assert_series_equal(agged, expected, check_dtype=False)
             self.assert_(issubclass(agged.dtype.type, np.dtype(dtype).type))
 
+    def test_groupby_transform_with_int(self):
+
+        # GH 3740, make sure that we might upcast on item-by-item transform
+
+        # floats
+        df = DataFrame(dict(A = [1,1,1,2,2,2], B = Series(1,dtype='float64'), C = Series([1,2,3,1,2,3],dtype='float64'), D = 'foo'))
+        result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std())
+        expected = DataFrame(dict(B = np.nan, C = Series([-1,0,1,-1,0,1],dtype='float64')))
+        assert_frame_equal(result,expected)
+
+        # int case
+        df = DataFrame(dict(A = [1,1,1,2,2,2], B = 1, C = [1,2,3,1,2,3], D = 'foo'))
+        result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std())
+        expected = DataFrame(dict(B = np.nan, C = [-1,0,1,-1,0,1]))
+        assert_frame_equal(result,expected)
+
+        # int that needs float conversion
+        s = Series([2,3,4,10,5,-1])
+        df = DataFrame(dict(A = [1,1,1,2,2,2], B = 1, C = s, D = 'foo'))
+        result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std())
+
+        s1 = s.iloc[0:3]
+        s1 = (s1-s1.mean())/s1.std()
+        s2 = s.iloc[3:6]
+        s2 = (s2-s2.mean())/s2.std()
+        expected = DataFrame(dict(B = np.nan, C = concat([s1,s2])))
+        assert_frame_equal(result,expected)
+
+        # int downcasting
+        result = df.groupby('A').transform(lambda x: x*2/2)
+        expected = DataFrame(dict(B = 1, C = [2,3,4,10,5,-1]))
+        assert_frame_equal(result,expected)
+
     def test_indices_concatenation_order(self):
 
         # GH 2808