diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index af377dd7a32f2..bdb156e12fa16 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -626,6 +626,7 @@ potentially differently-indexed :class:`DataFrame` into a single result :class:`DataFrame`. .. ipython:: python + :okwarning: left = pd.DataFrame( {"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, index=["K0", "K1", "K2"] @@ -640,12 +641,14 @@ potentially differently-indexed :class:`DataFrame` into a single result .. ipython:: python :suppress: + :okwarning: @savefig merging_join.png p.plot([left, right], result, labels=["left", "right"], vertical=False); plt.close("all"); .. ipython:: python + :okwarning: result = left.join(right, how="outer") result @@ -658,6 +661,7 @@ potentially differently-indexed :class:`DataFrame` into a single result plt.close("all"); .. ipython:: python + :okwarning: result = left.join(right, how="inner") result @@ -674,6 +678,7 @@ or multiple column names that the passed :class:`DataFrame` is to be aligned. .. ipython:: python + :okwarning: left = pd.DataFrame( { @@ -714,6 +719,7 @@ aligned. To join on multiple keys, the passed :class:`DataFrame` must have a :class:`MultiIndex`: .. ipython:: python + :okwarning: left = pd.DataFrame( { @@ -747,6 +753,7 @@ which uses only the keys found in the calling :class:`DataFrame`. Other join types can be specified with ``how``. .. ipython:: python + :okwarning: result = left.join(right, on=["key1", "key2"], how="inner") result @@ -767,6 +774,7 @@ You can join a :class:`DataFrame` with a :class:`Index` to a :class:`DataFrame` The ``name`` of the :class:`Index` will match the level name of the :class:`MultiIndex`. .. ipython:: python + :okwarning: left = pd.DataFrame( {"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, @@ -802,6 +810,7 @@ The :class:`MultiIndex` of the input argument must be completely used in the join and is a subset of the indices in the left argument. .. ipython:: python + :okwarning: leftindex = pd.MultiIndex.from_product( [list("abc"), list("xy"), [1, 2]], names=["abc", "xy", "num"] @@ -911,6 +920,7 @@ A list or tuple of :class:`DataFrame` can also be passed to :meth:`~DataFrame.jo to join them together on their indexes. .. ipython:: python + :okwarning: right2 = pd.DataFrame({"v": [7, 8, 9]}, index=["K1", "K1", "K2"]) result = left.join([right, right2]) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 0f40f5bfa5fc9..6445d19c780af 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -251,6 +251,7 @@ See the :ref:`Merge, join, and concatenate ` documentation section. .. ipython:: python + :okwarning: index_left = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'), ('K1', 'X2')], diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d519400834ee1..f62c2f9527ec6 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -623,6 +623,7 @@ Other Deprecations - Deprecated strings ``w``, ``d``, ``MIN``, ``MS``, ``US`` and ``NS`` denoting units in :class:`Timedelta` in favour of ``W``, ``D``, ``min``, ``ms``, ``us`` and ``ns`` (:issue:`59051`) - Deprecated the ``arg`` parameter of ``Series.map``; pass the added ``func`` argument instead. (:issue:`61260`) - Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`) +- Deprecated using empty string ('') as default value for lsuffix and rsuffix parameters in :meth:`DataFrame.join`, will use None instead in a future version. (:issue:`61294`) .. --------------------------------------------------------------------------- .. _whatsnew_300.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e86f7069f9137..09a09aed32464 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10057,7 +10057,15 @@ def explode( if not all(counts0 == self[c].apply(mylen)): raise ValueError("columns must have matching element counts") result = DataFrame({c: df[c].explode() for c in columns}) - result = df.drop(columns, axis=1).join(result) + with warnings.catch_warnings(): + # The default behavior of empty string suffixes ('') in join operations + # will change in a future version. Currently, integer columns with empty + # suffixes are treated differently from string columns, leading to + # inconsistent behavior. In the future, None will be the default and + # all column types will be handled consistently. We catch and ignore + # this warning for now since the current behavior is still supported. + warnings.filterwarnings("ignore", category=DeprecationWarning) + result = df.drop(columns, axis=1).join(result) if ignore_index: result.index = default_index(len(result)) else: @@ -10940,8 +10948,8 @@ def join( other: DataFrame | Series | Iterable[DataFrame | Series], on: IndexLabel | None = None, how: MergeHow = "left", - lsuffix: str = "", - rsuffix: str = "", + lsuffix: str | lib.NoDefault = lib.no_default, + rsuffix: str | lib.NoDefault = lib.no_default, sort: bool = False, validate: JoinValidate | None = None, ) -> DataFrame: @@ -10983,8 +10991,24 @@ def join( index. lsuffix : str, default '' Suffix to use from left frame's overlapping columns. + + .. note:: + The default value of empty string ("") is deprecated and will be + changed to None in a future version. When suffixes are specified, + any non-string columns will be converted to strings before applying + the suffix. + + .. deprecated:: 3.0.0 rsuffix : str, default '' Suffix to use from right frame's overlapping columns. + + .. note:: + The default value of empty string ("") is deprecated and will be + changed to None in a future version. When suffixes are specified, + any non-string columns will be converted to strings before applying + the suffix. + + .. deprecated:: 3.0.0 sort : bool, default False Order result DataFrame lexicographically by the join key. If False, the order of the join key depends on the join type (how keyword). @@ -11108,6 +11132,17 @@ def join( from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import merge + if lsuffix is lib.no_default or rsuffix is lib.no_default: + warnings.warn( + "The default value of empty string ('') for suffix " + "parameters is deprecated and will be changed to None " + "in a future version.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + lsuffix = "" if lsuffix is lib.no_default else lsuffix + rsuffix = "" if rsuffix is lib.no_default else rsuffix + if isinstance(other, Series): if other.name is None: raise ValueError("Other Series must have a name") diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 20b4cd2185bb4..57140886aba9c 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -2,6 +2,7 @@ import re from typing import TYPE_CHECKING +import warnings import numpy as np @@ -666,6 +667,14 @@ def melt_stub(df, stub: str, i, j, value_vars, sep: str): new = df[id_vars] if len(i) == 1: - return new.set_index(i).join(melted) + with warnings.catch_warnings(): + # The default behavior of empty string suffixes ('') in join operations + # will change in a future version. Currently, integer columns with empty + # suffixes are treated differently from string columns, leading to + # inconsistent behavior. In the future, None will be the default and + # all column types will be handled consistently. We catch and ignore + # this warning for now since the current behavior is still supported. + warnings.filterwarnings("ignore", category=DeprecationWarning) + return new.set_index(i).join(melted) else: return new.merge(melted.reset_index(), on=i).set_index(i + [j]) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 8c26744f171c3..deb3150663ff7 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -453,11 +453,13 @@ class MergeError(ValueError): ... {"a": ["a", "b", "c", "d"], "c": ["meow", "bark", "chirp", "nay"]}, ... index=range(4), ... ).set_index("a") - >>> left.join( - ... right, - ... on="a", - ... validate="one_to_one", - ... ) + >>> with warnings.catch_warnings(): + ... warnings.simplefilter("ignore", DeprecationWarning) + ... left.join( + ... right, + ... on="a", + ... validate="one_to_one", + ... ) Traceback (most recent call last): MergeError: Merge keys are not unique in left dataset; not a one-to-one merge """ diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py index d23263835c615..e56c017ecfead 100644 --- a/pandas/tests/copy_view/test_functions.py +++ b/pandas/tests/copy_view/test_functions.py @@ -243,6 +243,10 @@ def test_merge_copy_keyword(): assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) +@pytest.mark.filterwarnings( + r"ignore:The default value of empty string \(''\) for suffix parameters " + "is deprecated:DeprecationWarning" +) @pytest.mark.parametrize("dtype", [object, "str"]) def test_join_on_key(dtype): df_index = Index(["a", "b", "c"], name="key", dtype=dtype) @@ -271,6 +275,10 @@ def test_join_on_key(dtype): tm.assert_frame_equal(df2, df2_orig) +@pytest.mark.filterwarnings( + r"ignore:The default value of empty string \(''\) for suffix parameters " + "is deprecated:DeprecationWarning" +) def test_join_multiple_dataframes_on_key(): df_index = Index(["a", "b", "c"], name="key", dtype=object) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index aaa9485cab580..f1e9950380086 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -5,6 +5,7 @@ import numpy as np import pytest +from pandas._libs import lib from pandas.errors import MergeError import pandas as pd @@ -18,6 +19,11 @@ import pandas._testing as tm from pandas.core.reshape.concat import concat +pytestmark = pytest.mark.filterwarnings( + r"ignore:The default value of empty string \(''\) for suffix " + "parameters is deprecated:DeprecationWarning" +) + @pytest.fixture def left_no_dup(): @@ -414,7 +420,45 @@ def test_suppress_future_warning_with_sort_kw(sort): expected = expected.reindex(index=["c", "a", "b"]) with tm.assert_produces_warning(None): - result = a.join([b, c], how="outer", sort=sort_kw) + result = a.join([b, c], how="outer", sort=sort_kw, lsuffix="", rsuffix="") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "lsuffix,rsuffix,warning,is_rsuffix_test", + [ + (lib.no_default, "_test", DeprecationWarning, True), + ("", "_test", None, True), + ("_test", lib.no_default, DeprecationWarning, False), + ("_test", "", None, False), + ], +) +def test_join_suffix_deprecation_combined(lsuffix, rsuffix, warning, is_rsuffix_test): + df1 = DataFrame({0: [1, 2, 3]}) + df2 = DataFrame(index=[1, 2, 3], data={0: [4, 5, 6]}) + msg = ( + "The default value of empty string ('') for suffix parameters is deprecated " + "and will be changed to None in a future version." + ) + + with tm.assert_produces_warning(warning, match=re.escape(msg) if warning else None): + kwargs = {} + if lsuffix is not lib.no_default: + kwargs["lsuffix"] = lsuffix + if rsuffix is not lib.no_default: + kwargs["rsuffix"] = rsuffix + result = df1.join(df2, on=0, **kwargs) + + if is_rsuffix_test: + expected = DataFrame( + index=[0, 1, 2], + data={"key_0": [1, 2, 3], "0": [1, 2, 3], "0_test": [4, 5, 6]}, + ) + else: + expected = DataFrame( + index=[0, 1, 2], + data={"key_0": [1, 2, 3], "0_test": [1, 2, 3], "0": [4, 5, 6]}, + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 2be6bba475af7..6f2640663a6d7 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -208,6 +208,10 @@ def test_join_midx_string(): tm.assert_index_equal(result, expected) +@pytest.mark.filterwarnings( + r"ignore:The default value of empty string \(''\) for suffix parameters " + "is deprecated:DeprecationWarning" +) def test_join_multi_with_nan(): # GH29252 df1 = DataFrame( diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 65bfea0b9beea..8ae7e96c5d163 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -20,6 +20,11 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + r"ignore:The default value of empty string \(''\) for suffix parameters " + "is deprecated:DeprecationWarning" +) + def get_test_data(ngroups=8, n=50): unique_groups = list(range(ngroups)) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a8e29ef03acc2..685a82a42cbe8 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -35,6 +35,11 @@ merge, ) +pytestmark = pytest.mark.filterwarnings( + r"ignore:The default value of empty string \(''\) for suffix parameters " + "is deprecated:DeprecationWarning" +) + def get_test_data(ngroups=8, n=50): unique_groups = list(range(ngroups)) diff --git a/pandas/tests/reshape/merge/test_merge_cross.py b/pandas/tests/reshape/merge/test_merge_cross.py index 6ab80cf0e0823..c73e3597a5c85 100644 --- a/pandas/tests/reshape/merge/test_merge_cross.py +++ b/pandas/tests/reshape/merge/test_merge_cross.py @@ -88,6 +88,10 @@ def test_merge_cross_null_values(nulls_fixture): tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings( + r"ignore:The default value of empty string \(''\) for suffix parameters " + "is deprecated:DeprecationWarning" +) def test_join_cross_error_reporting(): # GH#5401 left = DataFrame({"a": [1, 3]}) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 7ae2fffa04205..3b80229a78750 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -17,6 +17,11 @@ from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import merge +pytestmark = pytest.mark.filterwarnings( + r"ignore:The default value of empty string \(''\) for suffix parameters " + "is deprecated:DeprecationWarning" +) + @pytest.fixture def left(): diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py index 9c08f47c0d678..1da74831a0fe5 100644 --- a/pandas/tests/series/methods/test_explode.py +++ b/pandas/tests/series/methods/test_explode.py @@ -82,6 +82,10 @@ def test_non_object_dtype(data): tm.assert_series_equal(result, ser) +@pytest.mark.filterwarnings( + r"ignore:The default value of empty string \(''\) for suffix parameters " + "is deprecated:DeprecationWarning" +) def test_typical_usecase(): df = pd.DataFrame( [{"var1": "a,b,c", "var2": 1}, {"var1": "d,e,f", "var2": 2}],