diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bcfe645d5f14c..88dec0c583b2c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3745,21 +3745,25 @@ def _apply_standard(self, func, axis, ignore_failures=False): pass if axis == 0: - series_gen = ((c, self[c]) for c in self.columns) + series_gen = (self.icol(i) for i in range(len(self.columns))) res_index = self.columns res_columns = self.index elif axis == 1: res_index = self.index res_columns = self.columns - series_gen = ((i, Series(v, self.columns, name=i)) - for i, v in izip(self.index, self.values)) + values = self.values + series_gen = (Series(values[i], index=res_columns, + name=res_index[i]) + for i in range(len(res_index))) + keys = [] results = {} if ignore_failures: successes = [] - for i, (k, v) in enumerate(series_gen): + for i, v in enumerate(series_gen): try: - results[k] = func(v) + results[i] = func(v) + keys.append(v.name) successes.append(i) except Exception: pass @@ -3768,32 +3772,38 @@ def _apply_standard(self, func, axis, ignore_failures=False): res_index = res_index.take(successes) else: try: - for k, v in series_gen: - results[k] = func(v) + for i, v in enumerate(series_gen): + results[i] = func(v) + keys.append(v.name) except Exception, e: try: if hasattr(e, 'args'): + k = res_index[i] e.args = e.args + ('occurred at index %s' % str(k),) except NameError: # pragma: no cover # no k defined yet pass raise - if len(results) > 0 and _is_sequence(results.values()[0]): - if not isinstance(results.values()[0], Series): + if len(results) > 0 and _is_sequence(results[0]): + if not isinstance(results[0], Series): index = res_columns else: index = None - result = self._constructor(data=results, index=index, - columns=res_index) + result = self._constructor(data=results, index=index) + result.rename(columns=dict(zip(range(len(res_index)), res_index)), + inplace=True) if axis == 1: result = result.T + result = result.convert_objects() - return result.convert_objects() + return result else: - return Series(results, index=res_index) + s = Series(results) + s.index = res_index + return s def _apply_broadcast(self, func, axis): if axis == 0: diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index e89df3e8ed131..c26a37852ea42 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -392,7 +392,8 @@ def as_matrix(self, columns=None): if len(columns) == 0: return np.zeros((len(self.index), 0), dtype=float) - return np.array([self[col].values for col in columns]).T + return np.array([self.icol(i).values + for i in range(len(self.columns))]).T values = property(as_matrix) diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index fd2eace9ec033..c3df935d79792 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -1057,6 +1057,19 @@ def test_apply(self): self.assert_(self.empty.apply(np.sqrt) is self.empty) + def test_apply_nonuq(self): + df_orig = DataFrame([[1,2,3], [4,5,6], [7,8,9]], index=['a','a','c']) + df = df_orig.to_sparse() + rs = df.apply(lambda s: s[0], axis=1) + xp = Series([1., 4., 7.], ['a', 'a', 'c']) + assert_series_equal(rs, xp) + + #df.T breaks + df = df_orig.T.to_sparse() + rs = df.apply(lambda s: s[0], axis=0) + #no non-unique columns supported in sparse yet + #assert_series_equal(rs, xp) + def test_applymap(self): # just test that it works result = self.frame.applymap(lambda x: x * 2) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 12fd35ecad02f..9f8179c5cbb3c 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -5134,6 +5134,15 @@ def test_apply(self): expected = Series(np.nan, index=self.frame.index) assert_series_equal(result, expected) + def test_apply_standard_nonunique(self): + df = DataFrame([[1,2,3], [4,5,6], [7,8,9]], index=['a','a','c']) + rs = df.apply(lambda s: s[0], axis=1) + xp = Series([1, 4, 7], ['a', 'a', 'c']) + assert_series_equal(rs, xp) + + rs = df.T.apply(lambda s: s[0], axis=0) + assert_series_equal(rs, xp) + def test_apply_broadcast(self): broadcasted = self.frame.apply(np.mean, broadcast=True) agged = self.frame.apply(np.mean)