pandas-dev
diff --git a/‎ci/code_checks.sh
Lines changed: 2 additions & 59 deletions b/‎ci/code_checks.sh
Lines changed: 2 additions & 59 deletions
diff --git a/‎doc/source/whatsnew/v2.3.0.rst
Lines changed: 3 additions & 1 deletion b/‎doc/source/whatsnew/v2.3.0.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 3 additions & 3 deletions b/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 3 additions & 3 deletions
diff --git a/‎pandas/conftest.py
Lines changed: 3 additions & 0 deletions b/‎pandas/conftest.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/core/arrays/categorical.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/arrays/categorical.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/arrays/datetimes.py
Lines changed: 1 addition & 6 deletions b/‎pandas/core/arrays/datetimes.py
Lines changed: 1 addition & 6 deletions
diff --git a/‎pandas/core/frame.py
Lines changed: 5 additions & 5 deletions b/‎pandas/core/frame.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎pandas/core/generic.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/generic.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/groupby/groupby.py
Lines changed: 4 additions & 3 deletions b/‎pandas/core/groupby/groupby.py
Lines changed: 4 additions & 3 deletions
diff --git a/‎pandas/core/indexes/base.py
Lines changed: 30 additions & 29 deletions b/‎pandas/core/indexes/base.py
Lines changed: 30 additions & 29 deletions
@@ -65,84 +65,38 @@ fi
 ### DOCSTRINGS ###
 if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
 
-    MSG='Validate docstrings (EX01, EX02, EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
-    $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX02,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
+    MSG='Validate docstrings (EX01, EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
+    $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Partially validate docstrings (EX03)' ;  echo $MSG
     $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX03 --ignore_functions \
-        pandas.Series.dt.day_name \
-        pandas.Series.str.len \
-        pandas.Series.cat.set_categories \
-        pandas.Series.plot.bar \
-        pandas.Series.plot.hist \
         pandas.Series.plot.line \
         pandas.Series.to_sql \
-        pandas.Series.to_latex \
-        pandas.errors.CategoricalConversionWarning \
-        pandas.errors.ChainedAssignmentError \
-        pandas.errors.ClosedFileError \
         pandas.errors.DatabaseError \
         pandas.errors.IndexingError \
         pandas.errors.InvalidColumnName \
-        pandas.errors.NumExprClobberingError \
-        pandas.errors.PossibleDataLossError \
-        pandas.errors.PossiblePrecisionLoss \
-        pandas.errors.SettingWithCopyError \
         pandas.errors.SettingWithCopyWarning \
         pandas.errors.SpecificationError \
         pandas.errors.UndefinedVariableError \
-        pandas.errors.ValueLabelTypeMismatch \
         pandas.Timestamp.ceil \
         pandas.Timestamp.floor \
         pandas.Timestamp.round \
-        pandas.read_pickle \
-        pandas.ExcelWriter \
         pandas.read_json \
         pandas.io.json.build_table_schema \
-        pandas.DataFrame.to_latex \
         pandas.io.formats.style.Styler.to_latex \
         pandas.read_parquet \
         pandas.DataFrame.to_sql \
         pandas.read_stata \
-        pandas.core.resample.Resampler.pipe \
-        pandas.core.resample.Resampler.fillna \
-        pandas.core.resample.Resampler.interpolate \
         pandas.plotting.scatter_matrix \
-        pandas.pivot \
-        pandas.merge_asof \
-        pandas.wide_to_long \
-        pandas.Index.rename \
         pandas.Index.droplevel \
-        pandas.Index.isin \
-        pandas.CategoricalIndex.set_categories \
         pandas.MultiIndex.names \
         pandas.MultiIndex.droplevel \
-        pandas.IndexSlice \
-        pandas.DatetimeIndex.month_name \
-        pandas.DatetimeIndex.day_name \
-        pandas.core.window.rolling.Rolling.corr \
         pandas.Grouper \
-        pandas.core.groupby.SeriesGroupBy.apply \
-        pandas.core.groupby.DataFrameGroupBy.apply \
-        pandas.core.groupby.SeriesGroupBy.transform \
-        pandas.core.groupby.SeriesGroupBy.pipe \
-        pandas.core.groupby.DataFrameGroupBy.pipe \
-        pandas.core.groupby.DataFrameGroupBy.describe \
-        pandas.core.groupby.DataFrameGroupBy.idxmax \
-        pandas.core.groupby.DataFrameGroupBy.idxmin \
-        pandas.core.groupby.DataFrameGroupBy.value_counts \
-        pandas.core.groupby.SeriesGroupBy.describe \
-        pandas.core.groupby.DataFrameGroupBy.boxplot \
-        pandas.core.groupby.DataFrameGroupBy.hist \
         pandas.io.formats.style.Styler.map \
         pandas.io.formats.style.Styler.apply_index \
         pandas.io.formats.style.Styler.map_index \
         pandas.io.formats.style.Styler.format \
-        pandas.io.formats.style.Styler.format_index \
-        pandas.io.formats.style.Styler.relabel_index \
-        pandas.io.formats.style.Styler.hide \
-        pandas.io.formats.style.Styler.set_td_classes \
         pandas.io.formats.style.Styler.set_tooltips \
         pandas.io.formats.style.Styler.set_uuid \
         pandas.io.formats.style.Styler.pipe \
@@ -152,20 +106,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.io.formats.style.Styler.text_gradient \
         pandas.DataFrame.values \
         pandas.DataFrame.groupby \
-        pandas.DataFrame.skew \
-        pandas.DataFrame.var \
-        pandas.DataFrame.idxmax \
-        pandas.DataFrame.idxmin \
-        pandas.DataFrame.last \
-        pandas.DataFrame.pivot \
         pandas.DataFrame.sort_values \
-        pandas.DataFrame.tz_convert \
-        pandas.DataFrame.tz_localize \
-        pandas.DataFrame.plot.bar \
         pandas.DataFrame.plot.hexbin \
-        pandas.DataFrame.plot.hist \
         pandas.DataFrame.plot.line \
-        pandas.DataFrame.hist \
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
 fi
 
@@ -101,6 +101,8 @@ Deprecations
 
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
+- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
+- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
 - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
 -
 
@@ -119,7 +121,7 @@ Categorical
 
 Datetimelike
 ^^^^^^^^^^^^
--
+- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
 -
 
 Timedelta
 
@@ -4860,15 +4860,15 @@ cpdef to_offset(freq, bint is_period=False):
 
             tups = zip(split[0::4], split[1::4], split[2::4])
             for n, (sep, stride, name) in enumerate(tups):
-                if is_period is False and name in c_OFFSET_DEPR_FREQSTR:
+                if is_period is False and name.upper() in c_OFFSET_DEPR_FREQSTR:
                     warnings.warn(
                         f"\'{name}\' is deprecated and will be removed "
                         f"in a future version, please use "
-                        f"\'{c_OFFSET_DEPR_FREQSTR.get(name)}\' instead.",
+                        f"\'{c_OFFSET_DEPR_FREQSTR.get(name.upper())}\' instead.",
                         FutureWarning,
                         stacklevel=find_stack_level(),
                     )
-                    name = c_OFFSET_DEPR_FREQSTR[name]
+                    name = c_OFFSET_DEPR_FREQSTR[name.upper()]
                 if is_period is True and name in c_REVERSE_OFFSET_DEPR_FREQSTR:
                     if name.startswith("Y"):
                         raise ValueError(
 
@@ -1973,4 +1973,7 @@ def warsaw(request) -> str:
 
 @pytest.fixture
 def arrow_string_storage():
+    """
+    Fixture that lists possible PyArrow values for StringDtype storage field.
+    """
     return ("pyarrow", "pyarrow_numpy")
@@ -1082,7 +1082,7 @@ def set_categories(
         For :class:`pandas.Series`:
 
         >>> raw_cat = pd.Categorical(['a', 'b', 'c', 'A'],
-        ...                           categories=['a', 'b', 'c'], ordered=True)
+        ...                          categories=['a', 'b', 'c'], ordered=True)
         >>> ser = pd.Series(raw_cat)
         >>> ser
         0   a
 
@@ -1365,7 +1365,7 @@ def day_name(self, locale=None) -> npt.NDArray[np.object_]:
         >>> idx
         DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
                       dtype='datetime64[ns]', freq='D')
-        >>> idx.day_name(locale='pt_BR.utf8') # doctest: +SKIP
+        >>> idx.day_name(locale='pt_BR.utf8')  # doctest: +SKIP
         Index(['Segunda', 'Terça', 'Quarta'], dtype='object')
         """
         values = self._local_timestamps()
@@ -2775,11 +2775,6 @@ def _generate_range(
         # variable has type "Optional[Timestamp]")
         start = offset.rollforward(start)  # type: ignore[assignment]
 
-    elif end and not offset.is_on_offset(end):
-        # Incompatible types in assignment (expression has type "datetime",
-        # variable has type "Optional[Timestamp]")
-        end = offset.rollback(end)  # type: ignore[assignment]
-
     # Unsupported operand types for < ("Timestamp" and "None")
     if periods is None and end < start and offset.n >= 0:  # type: ignore[operator]
         end = None
 
@@ -9223,11 +9223,11 @@ def groupby(
         You could also assign a list of column names or a list of index names.
 
         >>> df = pd.DataFrame({
-        ...        "lev1": [1, 1, 1, 2, 2, 2],
-        ...        "lev2": [1, 1, 2, 1, 1, 2],
-        ...        "lev3": [1, 2, 1, 2, 1, 2],
-        ...        "lev4": [1, 2, 3, 4, 5, 6],
-        ...        "values": [0, 1, 2, 3, 4, 5]})
+        ...                   "lev1": [1, 1, 1, 2, 2, 2],
+        ...                   "lev2": [1, 1, 2, 1, 1, 2],
+        ...                   "lev3": [1, 2, 1, 2, 1, 2],
+        ...                   "lev4": [1, 2, 3, 4, 5, 6],
+        ...                   "values": [0, 1, 2, 3, 4, 5]})
         >>> df
             lev1 lev2 lev3 lev4 values
         0   1    1    1    1    0
 
@@ -3544,7 +3544,7 @@ def to_latex(
         >>> print(df.to_latex(index=False,
         ...                   formatters={"name": str.upper},
         ...                   float_format="{:.1f}".format,
-        ... ))  # doctest: +SKIP
+        ...                   ))  # doctest: +SKIP
         \begin{tabular}{lrr}
         \toprule
         name & age & height \\
 
@@ -444,14 +444,15 @@ class providing the base-class of operations.
     a `(callable, data_keyword)` tuple where `data_keyword` is a
     string indicating the keyword of `callable` that expects the
     %(klass)s object.
-args : iterable, optional
+*args : iterable, optional
        Positional arguments passed into `func`.
-kwargs : dict, optional
+**kwargs : dict, optional
          A dictionary of keyword arguments passed into `func`.
 
 Returns
 -------
-the return type of `func`.
+%(klass)s
+    The original object with the function `func` applied.
 
 See Also
 --------
 
@@ -1953,7 +1953,7 @@ def rename(self, name, inplace: bool = False) -> Self | None:
 
         >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
         ...                                   [2018, 2019]],
-        ...                                   names=['kind', 'year'])
+        ...                                  names=['kind', 'year'])
         >>> idx
         MultiIndex([('python', 2018),
                     ('python', 2019),
@@ -4809,11 +4809,18 @@ def _join_non_unique(
         left_idx, right_idx = get_join_indexers_non_unique(
             self._values, other._values, how=how, sort=sort
         )
-        mask = left_idx == -1
 
-        join_idx = self.take(left_idx)
-        right = other.take(right_idx)
-        join_index = join_idx.putmask(mask, right)
+        if how == "right":
+            join_index = other.take(right_idx)
+        else:
+            join_index = self.take(left_idx)
+
+        if how == "outer":
+            mask = left_idx == -1
+            if mask.any():
+                right = other.take(right_idx)
+                join_index = join_index.putmask(mask, right)
+
         if isinstance(join_index, ABCMultiIndex) and how == "outer":
             # test_join_index_levels
             join_index = join_index._sort_levels_monotonic()
@@ -4989,35 +4996,29 @@ def _join_monotonic(
         ridx: npt.NDArray[np.intp] | None
         lidx: npt.NDArray[np.intp] | None
 
-        if self.is_unique and other.is_unique:
-            # We can perform much better than the general case
-            if how == "left":
+        if how == "left":
+            if other.is_unique:
+                # We can perform much better than the general case
                 join_index = self
                 lidx = None
                 ridx = self._left_indexer_unique(other)
-            elif how == "right":
+            else:
+                join_array, lidx, ridx = self._left_indexer(other)
+                join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
+        elif how == "right":
+            if self.is_unique:
+                # We can perform much better than the general case
                 join_index = other
                 lidx = other._left_indexer_unique(self)
                 ridx = None
-            elif how == "inner":
-                join_array, lidx, ridx = self._inner_indexer(other)
-                join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
-            elif how == "outer":
-                join_array, lidx, ridx = self._outer_indexer(other)
-                join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
-        else:
-            if how == "left":
-                join_array, lidx, ridx = self._left_indexer(other)
-            elif how == "right":
+            else:
                 join_array, ridx, lidx = other._left_indexer(self)
-            elif how == "inner":
-                join_array, lidx, ridx = self._inner_indexer(other)
-            elif how == "outer":
-                join_array, lidx, ridx = self._outer_indexer(other)
-
-            assert lidx is not None
-            assert ridx is not None
-
+                join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
+        elif how == "inner":
+            join_array, lidx, ridx = self._inner_indexer(other)
+            join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
+        elif how == "outer":
+            join_array, lidx, ridx = self._outer_indexer(other)
             join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
 
         lidx = None if lidx is None else ensure_platform_int(lidx)
@@ -6574,7 +6575,7 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
 
         Examples
         --------
-        >>> idx = pd.Index([1,2,3])
+        >>> idx = pd.Index([1, 2, 3])
         >>> idx
         Index([1, 2, 3], dtype='int64')
 
@@ -6583,7 +6584,7 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
         >>> idx.isin([1, 4])
         array([ True, False, False])
 
-        >>> midx = pd.MultiIndex.from_arrays([[1,2,3],
+        >>> midx = pd.MultiIndex.from_arrays([[1, 2, 3],
         ...                                  ['red', 'blue', 'green']],
         ...                                  names=('number', 'color'))
         >>> midx
Original file line number	Diff line number	Diff line change
`@@ -101,6 +101,8 @@ Deprecations`
`101`	`101`
`102`	`102`	`Performance improvements`
`103`	`103`	`~~~~~~~~~~~~~~~~~~~~~~~~`
	`104`	+- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
	`105`	+- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
`104`	`106`	- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
`105`	`107`	`-`
`106`	`108`
`@@ -119,7 +121,7 @@ Categorical`
`119`	`121`
`120`	`122`	`Datetimelike`
`121`	`123`	`^^^^^^^^^^^^`
`122`		`--`
	`124`	+- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
`123`	`125`	`-`
`124`	`126`
`125`	`127`	`Timedelta`