From bed7ced0426f1f0f48144397d89154f4052af9e1 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 3 Jun 2013 09:22:45 -0400
Subject: [PATCH 1/2] BUG: (GH3740) Groupby transform with item-by-item not
 upcasting correctly

---
 RELEASE.rst                  |  2 ++
 pandas/core/groupby.py       | 12 +++++++++---
 pandas/tests/test_groupby.py |  9 +++++++++
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index c59a53c7f6c69..bbfc9fb948ef4 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -196,6 +196,7 @@ pandas 0.11.1
   - ``DataFrame.to_html`` and ``DataFrame.to_latex`` now accept a path for
     their first argument (GH3702_)
   - Fix file tokenization error with \r delimiter and quoted fields (GH3453_)
+  - Groupby transform with item-by-item not upcasting correctly (GH3740_)
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
 .. _GH2786: https://github.com/pydata/pandas/issues/2786
@@ -278,6 +279,7 @@ pandas 0.11.1
 .. _GH3696: https://github.com/pydata/pandas/issues/3696
 .. _GH3667: https://github.com/pydata/pandas/issues/3667
 .. _GH3733: https://github.com/pydata/pandas/issues/3733
+.. _GH3740: https://github.com/pydata/pandas/issues/3740
 
 pandas 0.11.0
 =============
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index d409adfd71158..2032f23030aeb 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1532,6 +1532,8 @@ def transform(self, func, *args, **kwargs):
         transformed : Series
         """
         result = self.obj.copy()
+        if hasattr(result,'values'):
+            result = result.values
 
         if isinstance(func, basestring):
             wrapper = lambda x: getattr(x, func)(*args, **kwargs)
@@ -1541,11 +1543,15 @@ def transform(self, func, *args, **kwargs):
         for name, group in self:
             object.__setattr__(group, 'name', name)
             res = wrapper(group)
-            # result[group.index] = res
             indexer = self.obj.index.get_indexer(group.index)
-            np.put(result, indexer, res)
+            if hasattr(res,'values'):
+                res = res.values
 
-        return result
+            # need to do a safe put here, as the dtype may be different
+            # this needs to be an ndarray
+            result,_ = com._maybe_upcast_indexer(result, indexer, res)
+
+        return self.obj.__class__(result,index=self.obj.index,name=self.obj.name)
 
 
 class NDFrameGroupBy(GroupBy):
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index c56fca49cce48..852f0109058ee 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -616,6 +616,15 @@ def f(x):
             assert_series_equal(agged, expected, check_dtype=False)
             self.assert_(issubclass(agged.dtype.type, np.dtype(dtype).type))
 
+    def test_groupby_transform_with_int(self):
+
+        # GH 3740, make sure that we might upcast on item-by-item transform
+
+        df = DataFrame(dict(A = [1,1,1,2,2,2], B = 1, C = [1,2,3,1,2,3], D = 'foo'))
+        result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std())
+        expected = DataFrame(dict(B = np.nan, C = [-1,0,1,-1,0,1]))
+        assert_frame_equal(result,expected)
+
     def test_indices_concatenation_order(self):
 
         # GH 2808

From af77e0e4a107493774212387fdd2edb8562d28a9 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 3 Jun 2013 12:08:26 -0400
Subject: [PATCH 2/2] BUG: ensure float into function of series transform when
 item-by-item

     downcast result if needed
---
 pandas/core/groupby.py       |  5 +++++
 pandas/tests/test_groupby.py | 24 ++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 2032f23030aeb..64606a6e644f9 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1534,6 +1534,7 @@ def transform(self, func, *args, **kwargs):
         result = self.obj.copy()
         if hasattr(result,'values'):
             result = result.values
+        dtype = result.dtype
 
         if isinstance(func, basestring):
             wrapper = lambda x: getattr(x, func)(*args, **kwargs)
@@ -1541,6 +1542,8 @@ def transform(self, func, *args, **kwargs):
             wrapper = lambda x: func(x, *args, **kwargs)
 
         for name, group in self:
+
+            group = com.ensure_float(group)
             object.__setattr__(group, 'name', name)
             res = wrapper(group)
             indexer = self.obj.index.get_indexer(group.index)
@@ -1551,6 +1554,8 @@ def transform(self, func, *args, **kwargs):
             # this needs to be an ndarray
             result,_ = com._maybe_upcast_indexer(result, indexer, res)
 
+        # downcast if we can (and need)
+        result = _possibly_downcast_to_dtype(result, dtype)
         return self.obj.__class__(result,index=self.obj.index,name=self.obj.name)
 
 
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 852f0109058ee..cf62b16a9dd2a 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -620,11 +620,35 @@ def test_groupby_transform_with_int(self):
 
         # GH 3740, make sure that we might upcast on item-by-item transform
 
+        # floats
+        df = DataFrame(dict(A = [1,1,1,2,2,2], B = Series(1,dtype='float64'), C = Series([1,2,3,1,2,3],dtype='float64'), D = 'foo'))
+        result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std())
+        expected = DataFrame(dict(B = np.nan, C = Series([-1,0,1,-1,0,1],dtype='float64')))
+        assert_frame_equal(result,expected)
+
+        # int case
         df = DataFrame(dict(A = [1,1,1,2,2,2], B = 1, C = [1,2,3,1,2,3], D = 'foo'))
         result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std())
         expected = DataFrame(dict(B = np.nan, C = [-1,0,1,-1,0,1]))
         assert_frame_equal(result,expected)
 
+        # int that needs float conversion
+        s = Series([2,3,4,10,5,-1])
+        df = DataFrame(dict(A = [1,1,1,2,2,2], B = 1, C = s, D = 'foo'))
+        result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std())
+
+        s1 = s.iloc[0:3]
+        s1 = (s1-s1.mean())/s1.std()
+        s2 = s.iloc[3:6]
+        s2 = (s2-s2.mean())/s2.std()
+        expected = DataFrame(dict(B = np.nan, C = concat([s1,s2])))
+        assert_frame_equal(result,expected)
+
+        # int downcasting
+        result = df.groupby('A').transform(lambda x: x*2/2)
+        expected = DataFrame(dict(B = 1, C = [2,3,4,10,5,-1]))
+        assert_frame_equal(result,expected)
+
     def test_indices_concatenation_order(self):
 
         # GH 2808