Skip to content

Commit dcac1a3

Browse files
committed
Re-doing fix for Dataframe combine that works with duplicate column names.
1 parent 5e4a066 commit dcac1a3

File tree

1 file changed

+20
-5
lines changed

1 file changed

+20
-5
lines changed

pandas/core/frame.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9037,13 +9037,26 @@ def combine(
90379037
if self.empty and len(other) == other_idxlen:
90389038
return other.copy()
90399039

9040+
def rename_duplicates(columns: Index) -> Index:
9041+
seen = {}
9042+
for col in columns:
9043+
if col in seen:
9044+
col = col + f".{seen[col]}"
9045+
seen[col] = seen.get(col, 0) + 1
9046+
return columns
9047+
9048+
new_columns_out = self.columns.union(other_columns, sort=False)
9049+
self_columns, other_columns = (
9050+
rename_duplicates(self.columns),
9051+
rename_duplicates(other_columns),
9052+
)
90409053
# preserve column order
9041-
new_columns = self.columns.union(other_columns, sort=False)
9054+
new_columns_unique = self_columns.union(other_columns, sort=False)
90429055
do_fill = fill_value is not None
90439056
result = {}
9044-
for i, col in enumerate(new_columns):
9045-
series = this.iloc[:, i]
9046-
other_series = other.iloc[:, i]
9057+
for col in new_columns_unique:
9058+
series = this[col]
9059+
other_series = other[col]
90479060

90489061
this_dtype = series.dtype
90499062
other_dtype = other_series.dtype
@@ -9091,7 +9104,9 @@ def combine(
90919104
result[col] = arr
90929105

90939106
# convert_objects just in case
9094-
frame_result = self._constructor(result, index=new_index, columns=new_columns)
9107+
frame_result = self._constructor(
9108+
result, index=new_index, columns=new_columns_out
9109+
)
90959110
return frame_result.__finalize__(self, method="combine")
90969111

90979112
def combine_first(self, other: DataFrame) -> DataFrame:

0 commit comments

Comments
 (0)