GH-44188: [Python] Fix pandas roundtrip with bytes column names (#44171)

Piong1997 · pitrou · web-flow · commit e2ac52d661d4 · 2025-02-27T11:44:32.000+01:00
### Rationale for this change There is a bug that when column dtype is np.bytes，it will goto the final branch and run level=level.astype(dtype) ### Are these changes tested? Yes * GitHub Issue: #44188 Lead-authored-by: Piong1997 <32859450+Piong1997@users.noreply.github.com> Co-authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: Antoine Pitrou <antoine@python.org>
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
@@ -1163,7 +1163,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
         if dtype == np.bytes_:
             level = level.map(encoder)
         # ARROW-13756: if index is timezone aware DataTimeIndex
-        if pandas_dtype == "datetimetz":
+        elif pandas_dtype == "datetimetz":
             tz = pa.lib.string_to_tzinfo(
                 column_indexes[0]['metadata']['timezone'])
             level = pd.to_datetime(level, utc=True).tz_convert(tz)
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
@@ -5255,6 +5255,13 @@ def roundtrip(df, schema=None):
               schema=schema)
 
 
+def test_bytes_column_name_to_pandas():
+    df = pd.DataFrame([[0.1, 0.2], [0.3, 0.4]], columns=[b'col1', b'col2'])
+    table = pa.Table.from_pandas(df)
+    assert table.column_names == ['col1', 'col2']
+    assert table.to_pandas().equals(df)
+
+
 @pytest.mark.processes
 def test_is_data_frame_race_condition():
     # See https://github.com/apache/arrow/issues/39313