Skip to content

Commit eb20cbc

Browse files
anmyachevdchigarev
andauthored
FIX-#4438: Fix reindex function that doesn't preserve initial index metadata (#4442)
Signed-off-by: Anatoly Myachev <[email protected]> Co-authored-by: Dmitry Chigarev <[email protected]>
1 parent 4aefe86 commit eb20cbc

File tree

3 files changed

+81
-2
lines changed

3 files changed

+81
-2
lines changed

docs/release_notes/release_notes-0.15.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Key Features and Updates
1616
* FIX-#4407: Align `insert` function with pandas in case of numpy array with several columns (#4408)
1717
* FIX-#4373: Fix invalid file path when trying `read_csv_glob` with `usecols` parameter (#4405)
1818
* FIX-#4394: Fix issue with multiindex metadata desync (#4395)
19+
* FIX-#4438: Fix `reindex` function that doesn't preserve initial index metadata (#4442)
1920
* FIX-#4425: Add parameters to groupby pct_change (#4429)
2021
* FIX-#4457: Fix `loc` in case when need reindex item (#4457)
2122
* FIX-#4414: Add missing f prefix on f-strings found at https://codereview.doctor (#4415)

modin/pandas/base.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
is_dtype_equal,
2727
is_object_dtype,
2828
)
29+
from pandas.core.indexes.api import ensure_index
2930
import pandas.core.window.rolling
3031
import pandas.core.resample
3132
import pandas.core.generic
@@ -2220,6 +2221,34 @@ def rank(
22202221
)
22212222
)
22222223

2224+
def _copy_index_metadata(self, source, destination): # noqa: PR01, RT01, D200
2225+
"""
2226+
Copy Index metadata from `source` to `destination` inplace.
2227+
"""
2228+
if hasattr(source, "name") and hasattr(destination, "name"):
2229+
destination.name = source.name
2230+
if hasattr(source, "names") and hasattr(destination, "names"):
2231+
destination.names = source.names
2232+
return destination
2233+
2234+
def _ensure_index(self, index_like, axis=0): # noqa: PR01, RT01, D200
2235+
"""
2236+
Ensure that we have an index from some index-like object.
2237+
"""
2238+
if (
2239+
self._query_compiler.has_multiindex(axis=axis)
2240+
and not isinstance(index_like, pandas.Index)
2241+
and is_list_like(index_like)
2242+
and len(index_like) > 0
2243+
and isinstance(index_like[0], tuple)
2244+
):
2245+
try:
2246+
return pandas.MultiIndex.from_tuples(index_like)
2247+
except TypeError:
2248+
# not all tuples
2249+
pass
2250+
return ensure_index(index_like)
2251+
22232252
def reindex(
22242253
self,
22252254
index=None,
@@ -2243,7 +2272,9 @@ def reindex(
22432272
new_query_compiler = None
22442273
if index is not None:
22452274
if not isinstance(index, pandas.Index):
2246-
index = pandas.Index(index)
2275+
index = self._copy_index_metadata(
2276+
source=self.index, destination=self._ensure_index(index, axis=0)
2277+
)
22472278
if not index.equals(self.index):
22482279
new_query_compiler = self._query_compiler.reindex(
22492280
axis=0, labels=index, **kwargs
@@ -2253,7 +2284,9 @@ def reindex(
22532284
final_query_compiler = None
22542285
if columns is not None:
22552286
if not isinstance(columns, pandas.Index):
2256-
columns = pandas.Index(columns)
2287+
columns = self._copy_index_metadata(
2288+
source=self.columns, destination=self._ensure_index(columns, axis=1)
2289+
)
22572290
if not columns.equals(self.columns):
22582291
final_query_compiler = new_query_compiler.reindex(
22592292
axis=1, labels=columns, **kwargs

modin/pandas/test/dataframe/test_indexing.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,51 @@ def test_reindex():
768768
)
769769

770770

771+
def test_reindex_4438():
772+
index = pd.date_range(end="1/1/2018", periods=3, freq="h", name="some meta")
773+
new_index = list(reversed(index))
774+
775+
# index case
776+
modin_df = pd.DataFrame([1, 2, 3], index=index)
777+
pandas_df = pandas.DataFrame([1, 2, 3], index=index)
778+
new_modin_df = modin_df.reindex(new_index)
779+
new_pandas_df = pandas_df.reindex(new_index)
780+
df_equals(new_modin_df, new_pandas_df)
781+
782+
# column case
783+
modin_df = pd.DataFrame(np.array([[1], [2], [3]]).T, columns=index)
784+
pandas_df = pandas.DataFrame(np.array([[1], [2], [3]]).T, columns=index)
785+
new_modin_df = modin_df.reindex(columns=new_index)
786+
new_pandas_df = pandas_df.reindex(columns=new_index)
787+
df_equals(new_modin_df, new_pandas_df)
788+
789+
# multiindex case
790+
multi_index = pandas.MultiIndex.from_arrays(
791+
[("a", "b", "c"), ("a", "b", "c")], names=["first", "second"]
792+
)
793+
new_multi_index = list(reversed(multi_index))
794+
795+
modin_df = pd.DataFrame([1, 2, 3], index=multi_index)
796+
pandas_df = pandas.DataFrame([1, 2, 3], index=multi_index)
797+
new_modin_df = modin_df.reindex(new_multi_index)
798+
new_pandas_df = pandas_df.reindex(new_multi_index)
799+
df_equals(new_modin_df, new_pandas_df)
800+
801+
# multicolumn case
802+
modin_df = pd.DataFrame(np.array([[1], [2], [3]]).T, columns=multi_index)
803+
pandas_df = pandas.DataFrame(np.array([[1], [2], [3]]).T, columns=multi_index)
804+
new_modin_df = modin_df.reindex(columns=new_multi_index)
805+
new_pandas_df = pandas_df.reindex(columns=new_multi_index)
806+
df_equals(new_modin_df, new_pandas_df)
807+
808+
# index + multiindex case
809+
modin_df = pd.DataFrame([1, 2, 3], index=index)
810+
pandas_df = pandas.DataFrame([1, 2, 3], index=index)
811+
new_modin_df = modin_df.reindex(new_multi_index)
812+
new_pandas_df = pandas_df.reindex(new_multi_index)
813+
df_equals(new_modin_df, new_pandas_df)
814+
815+
771816
def test_reindex_like():
772817
df1 = pd.DataFrame(
773818
[

0 commit comments

Comments
 (0)