Skip to content

Commit 868850f

Browse files
authored
Merge branch 'main' into 55837
2 parents bc87192 + 639bd66 commit 868850f

File tree

273 files changed

+4641
-4006
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

273 files changed

+4641
-4006
lines changed

.github/workflows/comment-commands.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ jobs:
7777
echo 'EOF' >> $GITHUB_ENV
7878
echo "REGEX=$REGEX" >> $GITHUB_ENV
7979
80-
- uses: actions/github-script@v6
80+
- uses: actions/github-script@v7
8181
env:
8282
BENCH_OUTPUT: ${{env.BENCH_OUTPUT}}
8383
REGEX: ${{env.REGEX}}

.github/workflows/deprecation-tracking-bot.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
env:
2222
DEPRECATION_TRACKER_ISSUE: 50578
2323
steps:
24-
- uses: actions/github-script@v6
24+
- uses: actions/github-script@v7
2525
id: update-deprecation-issue
2626
with:
2727
script: |

.github/workflows/unit-tests.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ defaults:
2323
jobs:
2424
ubuntu:
2525
runs-on: ubuntu-22.04
26-
timeout-minutes: 180
26+
timeout-minutes: 90
2727
strategy:
2828
matrix:
2929
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml]
@@ -177,7 +177,7 @@ jobs:
177177
if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}}
178178

179179
macos-windows:
180-
timeout-minutes: 180
180+
timeout-minutes: 90
181181
strategy:
182182
matrix:
183183
os: [macos-latest, windows-latest]
@@ -322,7 +322,7 @@ jobs:
322322
matrix:
323323
os: [ubuntu-22.04, macOS-latest, windows-latest]
324324

325-
timeout-minutes: 180
325+
timeout-minutes: 90
326326

327327
concurrency:
328328
#https://github.community/t/concurrecy-not-work-for-push/183068/7

asv_bench/benchmarks/groupby.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,51 @@ def time_groupby_extra_cat_nosort(self, observed):
802802
self.df_extra_cat.groupby("a", observed=observed, sort=False)["b"].count()
803803

804804

805+
class MultipleCategories:
806+
def setup(self):
807+
N = 10**3
808+
arr = np.random.random(N)
809+
data = {
810+
"a1": Categorical(np.random.randint(10000, size=N)),
811+
"a2": Categorical(np.random.randint(10000, size=N)),
812+
"b": arr,
813+
}
814+
self.df = DataFrame(data)
815+
data = {
816+
"a1": Categorical(np.random.randint(10000, size=N), ordered=True),
817+
"a2": Categorical(np.random.randint(10000, size=N), ordered=True),
818+
"b": arr,
819+
}
820+
self.df_ordered = DataFrame(data)
821+
data = {
822+
"a1": Categorical(np.random.randint(100, size=N), categories=np.arange(N)),
823+
"a2": Categorical(np.random.randint(100, size=N), categories=np.arange(N)),
824+
"b": arr,
825+
}
826+
self.df_extra_cat = DataFrame(data)
827+
828+
def time_groupby_sort(self):
829+
self.df.groupby(["a1", "a2"], observed=False)["b"].count()
830+
831+
def time_groupby_nosort(self):
832+
self.df.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()
833+
834+
def time_groupby_ordered_sort(self):
835+
self.df_ordered.groupby(["a1", "a2"], observed=False)["b"].count()
836+
837+
def time_groupby_ordered_nosort(self):
838+
self.df_ordered.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()
839+
840+
def time_groupby_extra_cat_sort(self):
841+
self.df_extra_cat.groupby(["a1", "a2"], observed=False)["b"].count()
842+
843+
def time_groupby_extra_cat_nosort(self):
844+
self.df_extra_cat.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()
845+
846+
def time_groupby_transform(self):
847+
self.df_extra_cat.groupby(["a1", "a2"], observed=False)["b"].cumsum()
848+
849+
805850
class Datelike:
806851
# GH 14338
807852
params = ["period_range", "date_range", "date_range_tz"]

asv_bench/benchmarks/indexing.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,10 @@ def time_loc_null_slice_plus_slice(self, unique_levels):
306306
target = (self.tgt_null_slice, self.tgt_slice)
307307
self.df.loc[target, :]
308308

309+
def time_loc_multiindex(self, unique_levels):
310+
target = self.df.index[::10]
311+
self.df.loc[target]
312+
309313
def time_xs_level_0(self, unique_levels):
310314
target = self.tgt_scalar
311315
self.df.xs(target, level=0)

asv_bench/benchmarks/io/csv.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,4 +621,15 @@ def time_read_csv_index_col(self):
621621
)
622622

623623

624+
class ReadCSVCParserLowMemory:
625+
# GH 16798
626+
def setup(self):
627+
self.csv = StringIO(
628+
"strings\n" + "\n".join(["x" * (1 << 20) for _ in range(2100)])
629+
)
630+
631+
def peakmem_over_2gb_input(self):
632+
read_csv(self.csv, engine="c", low_memory=False)
633+
634+
624635
from ..pandas_vb_common import setup # noqa: F401 isort:skip

doc/source/user_guide/copy_on_write.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66
Copy-on-Write (CoW)
77
*******************
88

9+
.. note::
10+
11+
Copy-on-Write will become the default in pandas 3.0. We recommend
12+
:ref:`turning it on now <copy_on_write_enabling>`
13+
to benefit from all improvements.
14+
915
Copy-on-Write was first introduced in version 1.5.0. Starting from version 2.0 most of the
1016
optimizations that become possible through CoW are implemented and supported. All possible
1117
optimizations are supported starting from pandas 2.1.
@@ -123,6 +129,8 @@ CoW triggers a copy when ``df`` is changed to avoid mutating ``view`` as well:
123129
df
124130
view
125131
132+
.. _copy_on_write_chained_assignment:
133+
126134
Chained Assignment
127135
------------------
128136

@@ -238,6 +246,8 @@ and :meth:`DataFrame.rename`.
238246
These methods return views when Copy-on-Write is enabled, which provides a significant
239247
performance improvement compared to the regular execution.
240248

249+
.. _copy_on_write_enabling:
250+
241251
How to enable CoW
242252
-----------------
243253

doc/source/user_guide/indexing.rst

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,6 +1727,22 @@ You can assign a custom index to the ``index`` attribute:
17271727
Returning a view versus a copy
17281728
------------------------------
17291729

1730+
.. warning::
1731+
1732+
:ref:`Copy-on-Write <copy_on_write>`
1733+
will become the new default in pandas 3.0. This means than chained indexing will
1734+
never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary
1735+
anymore.
1736+
See :ref:`this section <copy_on_write_chained_assignment>`
1737+
for more context.
1738+
We recommend turning Copy-on-Write on to leverage the improvements with
1739+
1740+
```
1741+
pd.options.mode.copy_on_write = True
1742+
```
1743+
1744+
even before pandas 3.0 is available.
1745+
17301746
When setting values in a pandas object, care must be taken to avoid what is called
17311747
``chained indexing``. Here is an example.
17321748

@@ -1765,6 +1781,22 @@ faster, and allows one to index *both* axes if so desired.
17651781
Why does assignment fail when using chained indexing?
17661782
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
17671783

1784+
.. warning::
1785+
1786+
:ref:`Copy-on-Write <copy_on_write>`
1787+
will become the new default in pandas 3.0. This means than chained indexing will
1788+
never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary
1789+
anymore.
1790+
See :ref:`this section <copy_on_write_chained_assignment>`
1791+
for more context.
1792+
We recommend turning Copy-on-Write on to leverage the improvements with
1793+
1794+
```
1795+
pd.options.mode.copy_on_write = True
1796+
```
1797+
1798+
even before pandas 3.0 is available.
1799+
17681800
The problem in the previous section is just a performance issue. What's up with
17691801
the ``SettingWithCopy`` warning? We don't **usually** throw warnings around when
17701802
you do something that might cost a few extra milliseconds!
@@ -1821,6 +1853,22 @@ Yikes!
18211853
Evaluation order matters
18221854
~~~~~~~~~~~~~~~~~~~~~~~~
18231855

1856+
.. warning::
1857+
1858+
:ref:`Copy-on-Write <copy_on_write>`
1859+
will become the new default in pandas 3.0. This means than chained indexing will
1860+
never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary
1861+
anymore.
1862+
See :ref:`this section <copy_on_write_chained_assignment>`
1863+
for more context.
1864+
We recommend turning Copy-on-Write on to leverage the improvements with
1865+
1866+
```
1867+
pd.options.mode.copy_on_write = True
1868+
```
1869+
1870+
even before pandas 3.0 is available.
1871+
18241872
When you use chained indexing, the order and type of the indexing operation
18251873
partially determine whether the result is a slice into the original object, or
18261874
a copy of the slice.

doc/source/user_guide/timeseries.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -882,11 +882,11 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ
882882
:class:`~pandas.tseries.offsets.BMonthBegin` or :class:`~pandas.tseries.offsets.BusinessMonthBegin`, ``'BMS'``, "business month begin"
883883
:class:`~pandas.tseries.offsets.CBMonthEnd` or :class:`~pandas.tseries.offsets.CustomBusinessMonthEnd`, ``'CBME'``, "custom business month end"
884884
:class:`~pandas.tseries.offsets.CBMonthBegin` or :class:`~pandas.tseries.offsets.CustomBusinessMonthBegin`, ``'CBMS'``, "custom business month begin"
885-
:class:`~pandas.tseries.offsets.SemiMonthEnd`, ``'SM'``, "15th (or other day_of_month) and calendar month end"
885+
:class:`~pandas.tseries.offsets.SemiMonthEnd`, ``'SME'``, "15th (or other day_of_month) and calendar month end"
886886
:class:`~pandas.tseries.offsets.SemiMonthBegin`, ``'SMS'``, "15th (or other day_of_month) and calendar month begin"
887887
:class:`~pandas.tseries.offsets.QuarterEnd`, ``'QE'``, "calendar quarter end"
888888
:class:`~pandas.tseries.offsets.QuarterBegin`, ``'QS'``, "calendar quarter begin"
889-
:class:`~pandas.tseries.offsets.BQuarterEnd`, ``'BQ``, "business quarter end"
889+
:class:`~pandas.tseries.offsets.BQuarterEnd`, ``'BQE``, "business quarter end"
890890
:class:`~pandas.tseries.offsets.BQuarterBegin`, ``'BQS'``, "business quarter begin"
891891
:class:`~pandas.tseries.offsets.FY5253Quarter`, ``'REQ'``, "retail (aka 52-53 week) quarter"
892892
:class:`~pandas.tseries.offsets.YearEnd`, ``'YE'``, "calendar year end"
@@ -1241,15 +1241,15 @@ frequencies. We will refer to these aliases as *offset aliases*.
12411241
"D", "calendar day frequency"
12421242
"W", "weekly frequency"
12431243
"ME", "month end frequency"
1244-
"SM", "semi-month end frequency (15th and end of month)"
1244+
"SME", "semi-month end frequency (15th and end of month)"
12451245
"BME", "business month end frequency"
12461246
"CBME", "custom business month end frequency"
12471247
"MS", "month start frequency"
12481248
"SMS", "semi-month start frequency (1st and 15th)"
12491249
"BMS", "business month start frequency"
12501250
"CBMS", "custom business month start frequency"
12511251
"QE", "quarter end frequency"
1252-
"BQ", "business quarter end frequency"
1252+
"BQE", "business quarter end frequency"
12531253
"QS", "quarter start frequency"
12541254
"BQS", "business quarter start frequency"
12551255
"YE", "year end frequency"
@@ -1686,7 +1686,7 @@ the end of the interval.
16861686
.. warning::
16871687

16881688
The default values for ``label`` and ``closed`` is '**left**' for all
1689-
frequency offsets except for 'ME', 'YE', 'QE', 'BME', 'BY', 'BQ', and 'W'
1689+
frequency offsets except for 'ME', 'YE', 'QE', 'BME', 'BY', 'BQE', and 'W'
16901690
which all have a default of 'right'.
16911691

16921692
This might unintendedly lead to looking ahead, where the value for a later

doc/source/whatsnew/v0.19.0.rst

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -329,11 +329,13 @@ These provide date offsets anchored (by default) to the 15th and end of month, a
329329
330330
**SemiMonthEnd**:
331331

332-
.. ipython:: python
332+
.. code-block:: python
333333
334-
pd.Timestamp("2016-01-01") + SemiMonthEnd()
334+
In [46]: pd.Timestamp("2016-01-01") + SemiMonthEnd()
335+
Out[46]: Timestamp('2016-01-15 00:00:00')
335336
336-
pd.date_range("2015-01-01", freq="SM", periods=4)
337+
In [47]: pd.date_range("2015-01-01", freq="SM", periods=4)
338+
Out[47]: DatetimeIndex(['2015-01-15', '2015-01-31', '2015-02-15', '2015-02-28'], dtype='datetime64[ns]', freq='SM-15')
337339
338340
**SemiMonthBegin**:
339341

@@ -345,11 +347,13 @@ These provide date offsets anchored (by default) to the 15th and end of month, a
345347
346348
Using the anchoring suffix, you can also specify the day of month to use instead of the 15th.
347349

348-
.. ipython:: python
350+
.. code-block:: python
349351
350-
pd.date_range("2015-01-01", freq="SMS-16", periods=4)
352+
In [50]: pd.date_range("2015-01-01", freq="SMS-16", periods=4)
353+
Out[50]: DatetimeIndex(['2015-01-01', '2015-01-16', '2015-02-01', '2015-02-16'], dtype='datetime64[ns]', freq='SMS-16')
351354
352-
pd.date_range("2015-01-01", freq="SM-14", periods=4)
355+
In [51]: pd.date_range("2015-01-01", freq="SM-14", periods=4)
356+
Out[51]: DatetimeIndex(['2015-01-14', '2015-01-31', '2015-02-14', '2015-02-28'], dtype='datetime64[ns]', freq='SM-14')
353357
354358
.. _whatsnew_0190.enhancements.index:
355359

0 commit comments

Comments
 (0)