Skip to content

Commit 0cd9c8e

Browse files
authored
GH-33473 [Python] Fix KeyError on Pandas roundtrip with RangeIndex in MultiIndex (#39983)
### Rationale for this change Fixes bug when round-tripping to Pandas with a specific combination of indices ### Are these changes tested? Yes ### Are there any user-facing changes? No I don't know if this counts as a a "Critical Fix". Without it, `Table.from_pandas()` can return a table which cannot be converted back with `table.to_pandas()` due to a column missing from the `"pandas"` field in the table metadata. * Closes: #33473 Authored-by: Eirik B. Stavestrand <[email protected]> Signed-off-by: Will Ayd <[email protected]>
1 parent 1044022 commit 0cd9c8e

File tree

2 files changed

+19
-9
lines changed

2 files changed

+19
-9
lines changed

python/pyarrow/pandas_compat.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,14 @@ def construct_metadata(columns_to_convert, df, column_names, index_levels,
221221
# see https://github.com/apache/arrow/pull/44963#discussion_r1875771953
222222
column_field_names = [str(name) for name in column_names]
223223

224-
num_serialized_index_levels = len([descr for descr in index_descriptors
225-
if not isinstance(descr, dict)])
224+
serialized_index_levels = [
225+
(level, descriptor)
226+
for level, descriptor in zip(index_levels, index_descriptors)
227+
if not isinstance(descriptor, dict)
228+
]
229+
230+
num_serialized_index_levels = len(serialized_index_levels)
231+
226232
# Use ntypes instead of Python shorthand notation [:-len(x)] as [:-0]
227233
# behaves differently to what we want.
228234
ntypes = len(types)
@@ -240,13 +246,9 @@ def construct_metadata(columns_to_convert, df, column_names, index_levels,
240246
index_column_metadata = []
241247
if preserve_index is not False:
242248
non_str_index_names = []
243-
for level, arrow_type, descriptor in zip(index_levels, index_types,
244-
index_descriptors):
245-
if isinstance(descriptor, dict):
246-
# The index is represented in a non-serialized fashion,
247-
# e.g. RangeIndex
248-
continue
249-
249+
for (level, descriptor), arrow_type in zip(
250+
serialized_index_levels, index_types
251+
):
250252
if level.name is not None and not isinstance(level.name, str):
251253
non_str_index_names.append(level.name)
252254

python/pyarrow/tests/test_pandas.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,14 @@ def test_multiindex_doesnt_warn(self):
345345
)
346346
_check_pandas_roundtrip(df, preserve_index=True)
347347

348+
def test_multiindex_rangeindex(self):
349+
# https://github.com/apache/arrow/issues/33473
350+
multiindex = pd.MultiIndex.from_arrays(
351+
[pd.RangeIndex(0, 2), pd.Index([1, 2])]
352+
)
353+
df = pd.DataFrame(pd.Series([1, 2], name="a"), index=multiindex)
354+
_check_pandas_roundtrip(df, preserve_index=None)
355+
348356
def test_integer_index_column(self):
349357
df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')])
350358
_check_pandas_roundtrip(df, preserve_index=True)

0 commit comments

Comments
 (0)