Skip to content

Commit 6333c3b

Browse files
committed
ENH: Add sort_columns parameter to combine_first
1 parent 1d809c3 commit 6333c3b

File tree

2 files changed

+34
-1
lines changed

2 files changed

+34
-1
lines changed

pandas/core/frame.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8712,7 +8712,7 @@ def combine(
87128712
frame_result = self._constructor(result, index=new_index, columns=new_columns)
87138713
return frame_result.__finalize__(self, method="combine")
87148714

8715-
def combine_first(self, other: DataFrame) -> DataFrame:
8715+
def combine_first(self, other: DataFrame, sort_columns=True) -> DataFrame:
87168716
"""
87178717
Update null elements with value in the same location in `other`.
87188718
@@ -8728,6 +8728,10 @@ def combine_first(self, other: DataFrame) -> DataFrame:
87288728
----------
87298729
other : DataFrame
87308730
Provided DataFrame to use to fill null values.
8731+
sort_columns : bool, default True
8732+
Whether to sort the columns in the result DataFrame. If False, the
8733+
order of the columns in `self` is preserved.
8734+
87318735
87328736
Returns
87338737
-------
@@ -8741,13 +8745,25 @@ def combine_first(self, other: DataFrame) -> DataFrame:
87418745
87428746
Examples
87438747
--------
8748+
Default behavior with `sort_columns=True` (default):
8749+
87448750
>>> df1 = pd.DataFrame({"A": [None, 0], "B": [None, 4]})
87458751
>>> df2 = pd.DataFrame({"A": [1, 1], "B": [3, 3]})
87468752
>>> df1.combine_first(df2)
87478753
A B
87488754
0 1.0 3.0
87498755
1 0.0 4.0
87508756
8757+
8758+
Preserving the column order of `self` with `sort_columns=False`:
8759+
8760+
>>> df1 = pd.DataFrame({"B": [None, 4], "A": [0, None]})
8761+
>>> df2 = pd.DataFrame({"A": [1, 1], "B": [3, 3]})
8762+
>>> df1.combine_first(df2, sort_columns=False)
8763+
B A
8764+
0 3.0 0.0
8765+
1 4.0 1.0
8766+
87518767
Null values still persist if the location of that null value
87528768
does not exist in `other`
87538769
@@ -8773,6 +8789,8 @@ def combiner(x: Series, y: Series):
87738789
return y_values
87748790

87758791
return expressions.where(mask, y_values, x_values)
8792+
8793+
all_columns = self.columns.union(other.columns)
87768794

87778795
if len(other) == 0:
87788796
combined = self.reindex(
@@ -8790,6 +8808,13 @@ def combiner(x: Series, y: Series):
87908808

87918809
if dtypes:
87928810
combined = combined.astype(dtypes)
8811+
8812+
combined = combined.reindex(columns=all_columns, fill_value=None)
8813+
8814+
if not sort_columns:
8815+
combined = combined[self.columns]
8816+
8817+
87938818

87948819
return combined.__finalize__(self, method="combine_first")
87958820

pandas/tests/frame/methods/test_combine_first.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,3 +560,11 @@ def test_combine_first_empty_columns():
560560
result = left.combine_first(right)
561561
expected = DataFrame(columns=["a", "b", "c"])
562562
tm.assert_frame_equal(result, expected)
563+
564+
def test_combine_first_column_order():
565+
df1 = pd.DataFrame({"B": [1, 2], "A": [3, 4]})
566+
df2 = pd.DataFrame({"A": [5]}, index=[1])
567+
568+
result = df1.combine_first(df2,sort_columns=False)
569+
expected = pd.DataFrame({"B": [1, 2], "A": [3, 4]})
570+
pd.testing.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)