Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ Bug Fixes
**Other**

- Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`)
-
- Bug in :func:`merge` when merging an empty ``DataFrame`` with an ``Int64`` column or a non-empty ``DataFrame`` with an ``Int64`` column that is all ``NaN`` (:issue:`25183`)
-

.. _whatsnew_0.242.contributors:
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
pass
elif getattr(self.block, 'is_sparse', False):
pass
elif getattr(self.block, 'is_extension', False):
pass
else:
missing_arr = np.empty(self.shape, dtype=empty_dtype)
missing_arr.fill(fill_value)
Expand Down
63 changes: 63 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,42 @@ def get_test_data(ngroups=NGROUPS, n=N):
return arr


def get_series():
return [
pd.Series([1], dtype='int64'),
pd.Series([1], dtype='Int64'),
pd.Series([1.23]),
pd.Series(['foo']),
pd.Series([True]),
pd.Series([pd.Timestamp('2018-01-01')]),
pd.Series([pd.Timestamp('2018-01-01', tz='US/Eastern')]),
]


def get_series_nan():
return [
pd.Series([np.nan], dtype='Int64'),
pd.Series([np.nan], dtype='float'),
pd.Series([np.nan], dtype='object'),
pd.Series([pd.NaT]),
]


@pytest.fixture(params=get_series(), ids=lambda x: x.dtype.name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a 1-line comment to each of these, and make the names a bit more verbose; its easy to see this in the diff, but when looking at the raw column these names are hard to grok.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you talking about column names a and b or fixture names value_col and value_col2?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the fixture names

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

def value_col(request):
return request.param


@pytest.fixture(params=get_series(), ids=lambda x: x.dtype.name)
def value_col2(request):
return request.param


@pytest.fixture(params=get_series_nan(), ids=lambda x: x.dtype.name)
def value_col_nan(request):
return request.param


class TestMerge(object):

def setup_method(self, method):
Expand Down Expand Up @@ -428,6 +464,33 @@ def check2(exp, kwarg):
check1(exp_in, kwarg)
check2(exp_out, kwarg)

def test_merge_empty_frame(self, value_col, value_col2):
# GH 25183
df = pd.DataFrame({'a': value_col, 'b': value_col2},
columns=['a', 'b'])
df_empty = df[:0]
exp = pd.DataFrame({
'b_x': pd.Series(dtype=df.dtypes['b']),
'a': pd.Series(dtype=df.dtypes['a']),
'b_y': pd.Series(dtype=df.dtypes['b']),
}, columns=['b_x', 'a', 'b_y'])
act = df_empty.merge(df, on='a')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you use result and expected here rather than abbrevs

assert_frame_equal(act, exp)

def test_merge_all_na_column(self, value_col, value_col_nan):
# GH 25183
df_left = pd.DataFrame(
{'a': value_col, 'b': value_col_nan}, columns=['a', 'b'])
df_right = pd.DataFrame(
{'a': value_col, 'b': value_col_nan}, columns=['a', 'b'])
exp = pd.DataFrame({
'a': value_col,
'b_x': value_col_nan,
'b_y': value_col_nan,
}, columns=['a', 'b_x', 'b_y'])
act = df_left.merge(df_right, on='a')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

assert_frame_equal(act, exp)

def test_merge_nosort(self):
# #2098, anything to do?

Expand Down