pandas-dev
diff --git a/‎.github/workflows/comment-commands.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/comment-commands.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/deprecation-tracking-bot.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/deprecation-tracking-bot.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/unit-tests.yml
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/unit-tests.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎asv_bench/benchmarks/groupby.py
Lines changed: 45 additions & 0 deletions b/‎asv_bench/benchmarks/groupby.py
Lines changed: 45 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/indexing.py
Lines changed: 4 additions & 0 deletions b/‎asv_bench/benchmarks/indexing.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/io/csv.py
Lines changed: 11 additions & 0 deletions b/‎asv_bench/benchmarks/io/csv.py
Lines changed: 11 additions & 0 deletions
diff --git a/‎doc/source/user_guide/copy_on_write.rst
Lines changed: 10 additions & 0 deletions b/‎doc/source/user_guide/copy_on_write.rst
Lines changed: 10 additions & 0 deletions
diff --git a/‎doc/source/user_guide/indexing.rst
Lines changed: 48 additions & 0 deletions b/‎doc/source/user_guide/indexing.rst
Lines changed: 48 additions & 0 deletions
diff --git a/‎doc/source/user_guide/timeseries.rst
Lines changed: 5 additions & 5 deletions b/‎doc/source/user_guide/timeseries.rst
Lines changed: 5 additions & 5 deletions
diff --git a/‎doc/source/whatsnew/v0.19.0.rst
Lines changed: 10 additions & 6 deletions b/‎doc/source/whatsnew/v0.19.0.rst
Lines changed: 10 additions & 6 deletions
@@ -77,7 +77,7 @@ jobs:
           echo 'EOF' >> $GITHUB_ENV
           echo "REGEX=$REGEX" >> $GITHUB_ENV
 
-      - uses: actions/github-script@v6
+      - uses: actions/github-script@v7
         env:
           BENCH_OUTPUT: ${{env.BENCH_OUTPUT}}
           REGEX: ${{env.REGEX}}
 
@@ -21,7 +21,7 @@ jobs:
     env:
       DEPRECATION_TRACKER_ISSUE: 50578
     steps:
-    - uses: actions/github-script@v6
+    - uses: actions/github-script@v7
       id: update-deprecation-issue
       with:
         script: |
 
@@ -23,7 +23,7 @@ defaults:
 jobs:
   ubuntu:
     runs-on: ubuntu-22.04
-    timeout-minutes: 180
+    timeout-minutes: 90
     strategy:
       matrix:
         env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml]
@@ -177,7 +177,7 @@ jobs:
       if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}}
 
   macos-windows:
-    timeout-minutes: 180
+    timeout-minutes: 90
     strategy:
       matrix:
         os: [macos-latest, windows-latest]
@@ -322,7 +322,7 @@ jobs:
       matrix:
         os: [ubuntu-22.04, macOS-latest, windows-latest]
 
-    timeout-minutes: 180
+    timeout-minutes: 90
 
     concurrency:
       #https://github.community/t/concurrecy-not-work-for-push/183068/7
 
@@ -802,6 +802,51 @@ def time_groupby_extra_cat_nosort(self, observed):
         self.df_extra_cat.groupby("a", observed=observed, sort=False)["b"].count()
 
 
+class MultipleCategories:
+    def setup(self):
+        N = 10**3
+        arr = np.random.random(N)
+        data = {
+            "a1": Categorical(np.random.randint(10000, size=N)),
+            "a2": Categorical(np.random.randint(10000, size=N)),
+            "b": arr,
+        }
+        self.df = DataFrame(data)
+        data = {
+            "a1": Categorical(np.random.randint(10000, size=N), ordered=True),
+            "a2": Categorical(np.random.randint(10000, size=N), ordered=True),
+            "b": arr,
+        }
+        self.df_ordered = DataFrame(data)
+        data = {
+            "a1": Categorical(np.random.randint(100, size=N), categories=np.arange(N)),
+            "a2": Categorical(np.random.randint(100, size=N), categories=np.arange(N)),
+            "b": arr,
+        }
+        self.df_extra_cat = DataFrame(data)
+
+    def time_groupby_sort(self):
+        self.df.groupby(["a1", "a2"], observed=False)["b"].count()
+
+    def time_groupby_nosort(self):
+        self.df.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()
+
+    def time_groupby_ordered_sort(self):
+        self.df_ordered.groupby(["a1", "a2"], observed=False)["b"].count()
+
+    def time_groupby_ordered_nosort(self):
+        self.df_ordered.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()
+
+    def time_groupby_extra_cat_sort(self):
+        self.df_extra_cat.groupby(["a1", "a2"], observed=False)["b"].count()
+
+    def time_groupby_extra_cat_nosort(self):
+        self.df_extra_cat.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()
+
+    def time_groupby_transform(self):
+        self.df_extra_cat.groupby(["a1", "a2"], observed=False)["b"].cumsum()
+
+
 class Datelike:
     # GH 14338
     params = ["period_range", "date_range", "date_range_tz"]
 
@@ -306,6 +306,10 @@ def time_loc_null_slice_plus_slice(self, unique_levels):
         target = (self.tgt_null_slice, self.tgt_slice)
         self.df.loc[target, :]
 
+    def time_loc_multiindex(self, unique_levels):
+        target = self.df.index[::10]
+        self.df.loc[target]
+
     def time_xs_level_0(self, unique_levels):
         target = self.tgt_scalar
         self.df.xs(target, level=0)
 
@@ -621,4 +621,15 @@ def time_read_csv_index_col(self):
         )
 
 
+class ReadCSVCParserLowMemory:
+    # GH 16798
+    def setup(self):
+        self.csv = StringIO(
+            "strings\n" + "\n".join(["x" * (1 << 20) for _ in range(2100)])
+        )
+
+    def peakmem_over_2gb_input(self):
+        read_csv(self.csv, engine="c", low_memory=False)
+
+
 from ..pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -6,6 +6,12 @@
 Copy-on-Write (CoW)
 *******************
 
+.. note::
+
+    Copy-on-Write will become the default in pandas 3.0. We recommend
+    :ref:`turning it on now <copy_on_write_enabling>`
+    to benefit from all improvements.
+
 Copy-on-Write was first introduced in version 1.5.0. Starting from version 2.0 most of the
 optimizations that become possible through CoW are implemented and supported. All possible
 optimizations are supported starting from pandas 2.1.
@@ -123,6 +129,8 @@ CoW triggers a copy when ``df`` is changed to avoid mutating ``view`` as well:
     df
     view
 
+.. _copy_on_write_chained_assignment:
+
 Chained Assignment
 ------------------
 
@@ -238,6 +246,8 @@ and :meth:`DataFrame.rename`.
 These methods return views when Copy-on-Write is enabled, which provides a significant
 performance improvement compared to the regular execution.
 
+.. _copy_on_write_enabling:
+
 How to enable CoW
 -----------------
 
 
@@ -1727,6 +1727,22 @@ You can assign a custom index to the ``index`` attribute:
 Returning a view versus a copy
 ------------------------------
 
+.. warning::
+
+    :ref:`Copy-on-Write <copy_on_write>`
+    will become the new default in pandas 3.0. This means than chained indexing will
+    never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary
+    anymore.
+    See :ref:`this section <copy_on_write_chained_assignment>`
+    for more context.
+    We recommend turning Copy-on-Write on to leverage the improvements with
+
+    ```
+    pd.options.mode.copy_on_write = True
+    ```
+
+    even before pandas 3.0 is available.
+
 When setting values in a pandas object, care must be taken to avoid what is called
 ``chained indexing``. Here is an example.
 
@@ -1765,6 +1781,22 @@ faster, and allows one to index *both* axes if so desired.
 Why does assignment fail when using chained indexing?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. warning::
+
+    :ref:`Copy-on-Write <copy_on_write>`
+    will become the new default in pandas 3.0. This means than chained indexing will
+    never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary
+    anymore.
+    See :ref:`this section <copy_on_write_chained_assignment>`
+    for more context.
+    We recommend turning Copy-on-Write on to leverage the improvements with
+
+    ```
+    pd.options.mode.copy_on_write = True
+    ```
+
+    even before pandas 3.0 is available.
+
 The problem in the previous section is just a performance issue. What's up with
 the ``SettingWithCopy`` warning? We don't **usually** throw warnings around when
 you do something that might cost a few extra milliseconds!
@@ -1821,6 +1853,22 @@ Yikes!
 Evaluation order matters
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. warning::
+
+    :ref:`Copy-on-Write <copy_on_write>`
+    will become the new default in pandas 3.0. This means than chained indexing will
+    never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary
+    anymore.
+    See :ref:`this section <copy_on_write_chained_assignment>`
+    for more context.
+    We recommend turning Copy-on-Write on to leverage the improvements with
+
+    ```
+    pd.options.mode.copy_on_write = True
+    ```
+
+    even before pandas 3.0 is available.
+
 When you use chained indexing, the order and type of the indexing operation
 partially determine whether the result is a slice into the original object, or
 a copy of the slice.
 
@@ -882,11 +882,11 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ
     :class:`~pandas.tseries.offsets.BMonthBegin` or :class:`~pandas.tseries.offsets.BusinessMonthBegin`, ``'BMS'``, "business month begin"
     :class:`~pandas.tseries.offsets.CBMonthEnd` or :class:`~pandas.tseries.offsets.CustomBusinessMonthEnd`, ``'CBME'``, "custom business month end"
     :class:`~pandas.tseries.offsets.CBMonthBegin` or :class:`~pandas.tseries.offsets.CustomBusinessMonthBegin`, ``'CBMS'``, "custom business month begin"
-    :class:`~pandas.tseries.offsets.SemiMonthEnd`, ``'SM'``, "15th (or other day_of_month) and calendar month end"
+    :class:`~pandas.tseries.offsets.SemiMonthEnd`, ``'SME'``, "15th (or other day_of_month) and calendar month end"
     :class:`~pandas.tseries.offsets.SemiMonthBegin`, ``'SMS'``, "15th (or other day_of_month) and calendar month begin"
     :class:`~pandas.tseries.offsets.QuarterEnd`, ``'QE'``, "calendar quarter end"
     :class:`~pandas.tseries.offsets.QuarterBegin`, ``'QS'``, "calendar quarter begin"
-    :class:`~pandas.tseries.offsets.BQuarterEnd`, ``'BQ``, "business quarter end"
+    :class:`~pandas.tseries.offsets.BQuarterEnd`, ``'BQE``, "business quarter end"
     :class:`~pandas.tseries.offsets.BQuarterBegin`, ``'BQS'``, "business quarter begin"
     :class:`~pandas.tseries.offsets.FY5253Quarter`, ``'REQ'``, "retail (aka 52-53 week) quarter"
     :class:`~pandas.tseries.offsets.YearEnd`, ``'YE'``, "calendar year end"
@@ -1241,15 +1241,15 @@ frequencies. We will refer to these aliases as *offset aliases*.
     "D", "calendar day frequency"
     "W", "weekly frequency"
     "ME", "month end frequency"
-    "SM", "semi-month end frequency (15th and end of month)"
+    "SME", "semi-month end frequency (15th and end of month)"
     "BME", "business month end frequency"
     "CBME", "custom business month end frequency"
     "MS", "month start frequency"
     "SMS", "semi-month start frequency (1st and 15th)"
     "BMS", "business month start frequency"
     "CBMS", "custom business month start frequency"
     "QE", "quarter end frequency"
-    "BQ", "business quarter end frequency"
+    "BQE", "business quarter end frequency"
     "QS", "quarter start frequency"
     "BQS", "business quarter start frequency"
     "YE", "year end frequency"
@@ -1686,7 +1686,7 @@ the end of the interval.
 .. warning::
 
     The default values for ``label`` and ``closed`` is '**left**' for all
-    frequency offsets except for 'ME', 'YE', 'QE', 'BME', 'BY', 'BQ', and 'W'
+    frequency offsets except for 'ME', 'YE', 'QE', 'BME', 'BY', 'BQE', and 'W'
     which all have a default of 'right'.
 
     This might unintendedly lead to looking ahead, where the value for a later
 
@@ -329,11 +329,13 @@ These provide date offsets anchored (by default) to the 15th and end of month, a
 
 **SemiMonthEnd**:
 
-.. ipython:: python
+.. code-block:: python
 
-   pd.Timestamp("2016-01-01") + SemiMonthEnd()
+   In [46]: pd.Timestamp("2016-01-01") + SemiMonthEnd()
+   Out[46]: Timestamp('2016-01-15 00:00:00')
 
-   pd.date_range("2015-01-01", freq="SM", periods=4)
+   In [47]: pd.date_range("2015-01-01", freq="SM", periods=4)
+   Out[47]: DatetimeIndex(['2015-01-15', '2015-01-31', '2015-02-15', '2015-02-28'], dtype='datetime64[ns]', freq='SM-15')
 
 **SemiMonthBegin**:
 
@@ -345,11 +347,13 @@ These provide date offsets anchored (by default) to the 15th and end of month, a
 
 Using the anchoring suffix, you can also specify the day of month to use instead of the 15th.
 
-.. ipython:: python
+.. code-block:: python
 
-   pd.date_range("2015-01-01", freq="SMS-16", periods=4)
+   In [50]: pd.date_range("2015-01-01", freq="SMS-16", periods=4)
+   Out[50]: DatetimeIndex(['2015-01-01', '2015-01-16', '2015-02-01', '2015-02-16'], dtype='datetime64[ns]', freq='SMS-16')
 
-   pd.date_range("2015-01-01", freq="SM-14", periods=4)
+   In [51]: pd.date_range("2015-01-01", freq="SM-14", periods=4)
+   Out[51]: DatetimeIndex(['2015-01-14', '2015-01-31', '2015-02-14', '2015-02-28'], dtype='datetime64[ns]', freq='SM-14')
 
 .. _whatsnew_0190.enhancements.index: