diff --git a/RELEASE.rst b/RELEASE.rst index cc72cb169ad92..eafb72fbaf310 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -70,6 +70,8 @@ pandas 0.9.0 a class of TypeErrors that was occurring in code where the dtype of a column would depend on the presence of data or not (e.g. a SQL query having results) (#1783) + - Setting parts of DataFrame/Panel using ix now aligns input Series/DataFrame + (#1630) **Bug fixes** diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4bb11c3992129..e8c03257bf44e 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -10,6 +10,13 @@ # "null slice" _NS = slice(None, None) +def _is_sequence(x): + try: + iter(x) + assert(not isinstance(x, basestring)) + return True + except Exception: + return False class IndexingError(Exception): pass @@ -82,7 +89,7 @@ def _convert_tuple(self, key): return tuple(keyidx) def _setitem_with_indexer(self, indexer, value): - from pandas.core.frame import DataFrame + from pandas.core.frame import DataFrame, Series # also has the side effect of consolidating in-place @@ -92,6 +99,9 @@ def _setitem_with_indexer(self, indexer, value): if not isinstance(indexer, tuple): indexer = self._tuplify(indexer) + if isinstance(value, Series): + value = self._align_series(indexer, value) + het_axis = self.obj._het_axis het_idx = indexer[het_axis] @@ -117,13 +127,66 @@ def _setitem_with_indexer(self, indexer, value): if isinstance(indexer, tuple): indexer = _maybe_convert_ix(*indexer) + if isinstance(value, Series): + value = self._align_series(indexer, value) + if isinstance(value, DataFrame): - value = value.values - if not isinstance(self.obj, DataFrame): - value = value.T + value = self._align_frame(indexer, value) self.obj.values[indexer] = value + def _align_series(self, indexer, ser): + # indexer to assign Series can be tuple or scalar + if isinstance(indexer, tuple): + for i, idx in enumerate(indexer): + ax = self.obj.axes[i] + if _is_sequence(idx) or isinstance(idx, slice): + new_ix = ax[idx] + if ser.index.equals(new_ix): + return ser.values.copy() + return ser.reindex(new_ix).values + + elif np.isscalar(indexer): + if ser.index.equals(self.obj.index): + return ser.values.copy() + return ser.reindex(self.obj.index).values + + raise ValueError('Incompatible indexer with Series') + + def _align_frame(self, indexer, df): + from pandas import DataFrame + is_frame = isinstance(self.obj, DataFrame) + if not is_frame: + df = df.T + if isinstance(indexer, tuple): + idx, cols = None, None + for i, ix in enumerate(indexer): + ax = self.obj.axes[i] + if _is_sequence(ix) or isinstance(ix, slice): + if idx is None: + idx = ax[ix] + elif cols is None: + cols = ax[ix] + else: + break + + if idx is not None and cols is not None: + if df.index.equals(idx) and df.columns.equals(cols): + val = df.copy().values + else: + val = df.reindex(idx, columns=cols).values + return val + + elif np.isscalar(indexer) and not is_frame: + idx = self.obj.axes[1] + cols = self.obj.axes[2] + + if idx.equals(df.index) and cols.equals(df.columns): + return df.copy().values + return df.reindex(idx, columns=cols).values + + raise ValueError('Incompatible indexer with DataFrame') + def _getitem_tuple(self, tup): try: return self._getitem_lowerdim(tup) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index d763c9d561349..4a25a84a488bd 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -686,6 +686,40 @@ def test_setitem_fancy_mixed_2d(self): assert_frame_equal(df, expected) + def test_ix_align(self): + b = Series(randn(10)) + b.sort() + df_orig = DataFrame(randn(10, 4)) + df = df_orig.copy() + + df.ix[:, 0] = b + assert_series_equal(df.ix[:, 0].reindex(b.index), b) + + dft = df_orig.T + dft.ix[0, :] = b + assert_series_equal(dft.ix[0, :].reindex(b.index), b) + + df = df_orig.copy() + df.ix[:5, 0] = b + s = df.ix[:5, 0] + assert_series_equal(s, b.reindex(s.index)) + + dft = df_orig.T + dft.ix[0, :5] = b + s = dft.ix[0, :5] + assert_series_equal(s, b.reindex(s.index)) + + df = df_orig.copy() + idx = [0, 1, 3, 5] + df.ix[idx, 0] = b + s = df.ix[idx, 0] + assert_series_equal(s, b.reindex(s.index)) + + dft = df_orig.T + dft.ix[0, idx] = b + s = dft.ix[0, idx] + assert_series_equal(s, b.reindex(s.index)) + def test_getitem_setitem_non_ix_labels(self): df = tm.makeTimeDataFrame() @@ -976,7 +1010,7 @@ def test_setitem_single_column_mixed(self): def test_setitem_frame(self): piece = self.frame.ix[:2, ['A', 'B']] - self.frame.ix[-2:, ['A', 'B']] = piece + self.frame.ix[-2:, ['A', 'B']] = piece.values assert_almost_equal(self.frame.ix[-2:, ['A', 'B']].values, piece.values) @@ -985,6 +1019,14 @@ def test_setitem_frame(self): key = (slice(-2, None), ['A', 'B']) self.assertRaises(ValueError, f, key, piece) + def test_setitem_frame_align(self): + piece = self.frame.ix[:2, ['A', 'B']] + piece.index = self.frame.index[-2:] + piece.columns = ['A', 'B'] + self.frame.ix[-2:, ['A', 'B']] = piece + assert_almost_equal(self.frame.ix[-2:, ['A', 'B']].values, + piece.values) + def test_setitem_fancy_exceptions(self): pass diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 5ffc7c9823a47..2c21c663b330f 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -583,6 +583,44 @@ def test_ix_setitem_slice_dataframe(self): assert_frame_equal(a.ix[:, 22, [111, 333]], b) + def test_ix_align(self): + from pandas import Series + b = Series(np.random.randn(10)) + b.sort() + df_orig = Panel(np.random.randn(3, 10, 2)) + df = df_orig.copy() + + df.ix[0, :, 0] = b + assert_series_equal(df.ix[0, :, 0].reindex(b.index), b) + + df = df_orig.swapaxes(0, 1) + df.ix[:, 0, 0] = b + assert_series_equal(df.ix[:, 0, 0].reindex(b.index), b) + + df = df_orig.swapaxes(1, 2) + df.ix[0, 0, :] = b + assert_series_equal(df.ix[0, 0, :].reindex(b.index), b) + + def test_ix_frame_align(self): + from pandas import DataFrame + df = DataFrame(np.random.randn(2, 10)) + df.sort_index(inplace=True) + p_orig = Panel(np.random.randn(3, 10, 2)) + + p = p_orig.copy() + p.ix[0, :, :] = df + out = p.ix[0, :, :].T.reindex(df.index, columns=df.columns) + assert_frame_equal(out, df) + + p = p_orig.copy() + p.ix[0] = df + out = p.ix[0].T.reindex(df.index, columns=df.columns) + assert_frame_equal(out, df) + + p = p_orig.copy() + p.ix[0, [0, 1, 3, 5], -2:] = df + out = p.ix[0, [0, 1, 3, 5], -2:] + assert_frame_equal(out, df.T.reindex([0, 1, 3, 5], p.minor_axis[-2:])) def _check_view(self, indexer, comp): cp = self.panel.copy()