From 97327895574db469eb2de518604061d746192579 Mon Sep 17 00:00:00 2001
From: Chang She <changshe@gmail.com>
Date: Tue, 18 Sep 2012 12:11:36 -0400
Subject: [PATCH 1/3] BUG: apply across non-unique indices. Still failing for
 sparse

---
 pandas/core/frame.py   | 32 +++++++++++++++++++-------------
 pandas/sparse/frame.py |  3 ++-
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index bcfe645d5f14c..501b6ead37b71 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3745,21 +3745,22 @@ def _apply_standard(self, func, axis, ignore_failures=False):
             pass
 
         if axis == 0:
-            series_gen = ((c, self[c]) for c in self.columns)
+            series_gen = (self.icol(i) for i in range(len(self.columns)))
             res_index = self.columns
             res_columns = self.index
         elif axis == 1:
             res_index = self.index
             res_columns = self.columns
-            series_gen = ((i, Series(v, self.columns, name=i))
-                          for i, v in izip(self.index, self.values))
+            series_gen = (self.irow(i) for i in range(len(self.index)))
 
+        keys = []
         results = {}
         if ignore_failures:
             successes = []
-            for i, (k, v) in enumerate(series_gen):
+            for i, v in enumerate(series_gen):
                 try:
-                    results[k] = func(v)
+                    results[i] = func(v)
+                    keys.append(v.name)
                     successes.append(i)
                 except Exception:
                     pass
@@ -3768,32 +3769,37 @@ def _apply_standard(self, func, axis, ignore_failures=False):
                 res_index = res_index.take(successes)
         else:
             try:
-                for k, v in series_gen:
-                    results[k] = func(v)
+                for i, v in enumerate(series_gen):
+                    results[i] = func(v)
+                    keys.append(v.name)
             except Exception, e:
                 try:
                     if hasattr(e, 'args'):
+                        k = res_index[i]
                         e.args = e.args + ('occurred at index %s' % str(k),)
                 except NameError: # pragma: no cover
                     # no k defined yet
                     pass
                 raise
 
-        if len(results) > 0 and _is_sequence(results.values()[0]):
-            if not isinstance(results.values()[0], Series):
+        if len(results) > 0 and _is_sequence(results[0]):
+            if not isinstance(results[0], Series):
                 index = res_columns
             else:
                 index = None
 
-            result = self._constructor(data=results, index=index,
-                                       columns=res_index)
+            result = self._constructor(data=results, index=index)
+            result._set_columns(res_index)
 
             if axis == 1:
                 result = result.T
+            result = result.convert_objects()
 
-            return result.convert_objects()
+            return result
         else:
-            return Series(results, index=res_index)
+            s = Series(results)
+            s.index = res_index
+            return s
 
     def _apply_broadcast(self, func, axis):
         if axis == 0:
diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
index e89df3e8ed131..c26a37852ea42 100644
--- a/pandas/sparse/frame.py
+++ b/pandas/sparse/frame.py
@@ -392,7 +392,8 @@ def as_matrix(self, columns=None):
         if len(columns) == 0:
             return np.zeros((len(self.index), 0), dtype=float)
 
-        return np.array([self[col].values for col in columns]).T
+        return np.array([self.icol(i).values
+                         for i in range(len(self.columns))]).T
 
     values = property(as_matrix)
 

From 546920e002b5d773112249a8200e5ae62b9c15b2 Mon Sep 17 00:00:00 2001
From: Chang She <changshe@gmail.com>
Date: Tue, 18 Sep 2012 15:46:55 -0400
Subject: [PATCH 2/3] BUG: apply non-uq with tests #1878

---
 pandas/core/frame.py               |  7 +++++--
 pandas/sparse/tests/test_sparse.py | 13 +++++++++++++
 pandas/tests/test_frame.py         |  9 +++++++++
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 501b6ead37b71..5a793e271fa06 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3751,7 +3751,9 @@ def _apply_standard(self, func, axis, ignore_failures=False):
         elif axis == 1:
             res_index = self.index
             res_columns = self.columns
-            series_gen = (self.irow(i) for i in range(len(self.index)))
+            series_gen = (Series(self.values[i], index=res_columns,
+                                 name=res_index[i])
+                                 for i in range(len(res_index)))
 
         keys = []
         results = {}
@@ -3789,7 +3791,8 @@ def _apply_standard(self, func, axis, ignore_failures=False):
                 index = None
 
             result = self._constructor(data=results, index=index)
-            result._set_columns(res_index)
+            result.rename(columns=dict(zip(range(len(res_index)), res_index)),
+                                       inplace=True)
 
             if axis == 1:
                 result = result.T
diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py
index fd2eace9ec033..c3df935d79792 100644
--- a/pandas/sparse/tests/test_sparse.py
+++ b/pandas/sparse/tests/test_sparse.py
@@ -1057,6 +1057,19 @@ def test_apply(self):
 
         self.assert_(self.empty.apply(np.sqrt) is self.empty)
 
+    def test_apply_nonuq(self):
+        df_orig = DataFrame([[1,2,3], [4,5,6], [7,8,9]], index=['a','a','c'])
+        df = df_orig.to_sparse()
+        rs = df.apply(lambda s: s[0], axis=1)
+        xp = Series([1., 4., 7.], ['a', 'a', 'c'])
+        assert_series_equal(rs, xp)
+
+        #df.T breaks
+        df = df_orig.T.to_sparse()
+        rs = df.apply(lambda s: s[0], axis=0)
+        #no non-unique columns supported in sparse yet
+        #assert_series_equal(rs, xp)
+
     def test_applymap(self):
         # just test that it works
         result = self.frame.applymap(lambda x: x * 2)
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 12fd35ecad02f..9f8179c5cbb3c 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -5134,6 +5134,15 @@ def test_apply(self):
         expected = Series(np.nan, index=self.frame.index)
         assert_series_equal(result, expected)
 
+    def test_apply_standard_nonunique(self):
+        df = DataFrame([[1,2,3], [4,5,6], [7,8,9]], index=['a','a','c'])
+        rs = df.apply(lambda s: s[0], axis=1)
+        xp = Series([1, 4, 7], ['a', 'a', 'c'])
+        assert_series_equal(rs, xp)
+
+        rs = df.T.apply(lambda s: s[0], axis=0)
+        assert_series_equal(rs, xp)
+
     def test_apply_broadcast(self):
         broadcasted = self.frame.apply(np.mean, broadcast=True)
         agged = self.frame.apply(np.mean)

From 2f1a6871dd904b18ca23767872eb8bf8d8cb83dd Mon Sep 17 00:00:00 2001
From: Chang She <changshe@gmail.com>
Date: Tue, 18 Sep 2012 15:54:12 -0400
Subject: [PATCH 3/3] Only call DataFrame.values once in _apply_standard

---
 pandas/core/frame.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5a793e271fa06..88dec0c583b2c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3751,7 +3751,8 @@ def _apply_standard(self, func, axis, ignore_failures=False):
         elif axis == 1:
             res_index = self.index
             res_columns = self.columns
-            series_gen = (Series(self.values[i], index=res_columns,
+            values = self.values
+            series_gen = (Series(values[i], index=res_columns,
                                  name=res_index[i])
                                  for i in range(len(res_index)))