Skip to content

Commit 87c803b

Browse files
authored
Merge branch 'main' into kurtosis
2 parents 4fc5ca2 + b0192c7 commit 87c803b

File tree

124 files changed

+1079
-632
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

124 files changed

+1079
-632
lines changed

.circleci/config.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ jobs:
3434
fi
3535
python -m pip install --no-build-isolation -ve . -Csetup-args="--werror"
3636
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH
37-
sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
3837
ci/run_tests.sh
3938
test-linux-musl:
4039
docker:

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.7.2
22+
rev: v0.8.1
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -47,7 +47,7 @@ repos:
4747
types_or: [python, rst, markdown, cython, c]
4848
additional_dependencies: [tomli]
4949
- repo: https://github.com/MarcoGorelli/cython-lint
50-
rev: v0.16.2
50+
rev: v0.16.6
5151
hooks:
5252
- id: cython-lint
5353
- id: double-quote-cython-strings
@@ -95,7 +95,7 @@ repos:
9595
- id: sphinx-lint
9696
args: ["--enable", "all", "--disable", "line-too-long"]
9797
- repo: https://github.com/pre-commit/mirrors-clang-format
98-
rev: v19.1.3
98+
rev: v19.1.4
9999
hooks:
100100
- id: clang-format
101101
files: ^pandas/_libs/src|^pandas/_libs/include

asv_bench/benchmarks/groupby.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,8 +511,7 @@ def setup(self, dtype, method, application, ncols, engine):
511511
# grouping on multiple columns
512512
# and we lack kernels for a bunch of methods
513513
if (
514-
engine == "numba"
515-
and method in _numba_unsupported_methods
514+
(engine == "numba" and method in _numba_unsupported_methods)
516515
or ncols > 1
517516
or application == "transformation"
518517
or dtype == "datetime"

ci/code_checks.sh

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7373
-i "pandas.Period.freq GL08" \
7474
-i "pandas.Period.ordinal GL08" \
7575
-i "pandas.RangeIndex.from_range PR01,SA01" \
76-
-i "pandas.Series.dt.unit GL08" \
77-
-i "pandas.Series.pad PR01,SA01" \
7876
-i "pandas.Timedelta.max PR02" \
7977
-i "pandas.Timedelta.min PR02" \
8078
-i "pandas.Timedelta.resolution PR02" \
@@ -83,40 +81,21 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8381
-i "pandas.Timestamp.resolution PR02" \
8482
-i "pandas.Timestamp.tzinfo GL08" \
8583
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
86-
-i "pandas.arrays.IntegerArray SA01" \
87-
-i "pandas.arrays.IntervalArray.length SA01" \
8884
-i "pandas.arrays.NumpyExtensionArray SA01" \
8985
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
90-
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
91-
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
92-
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
93-
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
9486
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
95-
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
96-
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
97-
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
9887
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
99-
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
100-
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
101-
-i "pandas.core.resample.Resampler.indices SA01" \
10288
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
10389
-i "pandas.core.resample.Resampler.mean SA01" \
10490
-i "pandas.core.resample.Resampler.min PR01,RT03,SA01" \
10591
-i "pandas.core.resample.Resampler.prod SA01" \
10692
-i "pandas.core.resample.Resampler.quantile PR01,PR07" \
107-
-i "pandas.core.resample.Resampler.sem SA01" \
10893
-i "pandas.core.resample.Resampler.std SA01" \
10994
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
11095
-i "pandas.core.resample.Resampler.var SA01" \
111-
-i "pandas.errors.IntCastingNaNError SA01" \
112-
-i "pandas.errors.NullFrequencyError SA01" \
113-
-i "pandas.errors.NumbaUtilError SA01" \
114-
-i "pandas.errors.PerformanceWarning SA01" \
11596
-i "pandas.errors.UndefinedVariableError PR01,SA01" \
11697
-i "pandas.errors.ValueLabelTypeMismatch SA01" \
117-
-i "pandas.infer_freq SA01" \
11898
-i "pandas.io.json.build_table_schema PR07,RT03,SA01" \
119-
-i "pandas.io.stata.StataWriter.write_file SA01" \
12099
-i "pandas.plotting.andrews_curves RT03,SA01" \
121100
-i "pandas.plotting.scatter_matrix PR07,SA01" \
122101
-i "pandas.tseries.offsets.BDay PR02,SA01" \

doc/source/getting_started/install.rst

Lines changed: 81 additions & 81 deletions
Large diffs are not rendered by default.

doc/source/reference/frame.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ Reindexing / selection / label manipulation
185185
DataFrame.duplicated
186186
DataFrame.equals
187187
DataFrame.filter
188-
DataFrame.head
189188
DataFrame.idxmax
190189
DataFrame.idxmin
191190
DataFrame.reindex
@@ -196,7 +195,6 @@ Reindexing / selection / label manipulation
196195
DataFrame.sample
197196
DataFrame.set_axis
198197
DataFrame.set_index
199-
DataFrame.tail
200198
DataFrame.take
201199
DataFrame.truncate
202200

doc/source/user_guide/dsintro.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ This case is handled identically to a dict of arrays.
326326

327327
.. ipython:: python
328328
329-
data = np.zeros((2,), dtype=[("A", "i4"), ("B", "f4"), ("C", "a10")])
329+
data = np.zeros((2,), dtype=[("A", "i4"), ("B", "f4"), ("C", "S10")])
330330
data[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
331331
332332
pd.DataFrame(data)

doc/source/whatsnew/v3.0.0.rst

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ Other enhancements
5757
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
5858
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
5959
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`SeriesGroupBy.apply`, :meth:`DataFrame.apply` now support ``kurt`` (:issue:`40139`)
60+
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
6061
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
6162
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
6263
- :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
@@ -627,6 +628,7 @@ Datetimelike
627628
- Bug in :meth:`DatetimeIndex.union` and :meth:`DatetimeIndex.intersection` when ``unit`` was non-nanosecond (:issue:`59036`)
628629
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
629630
- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
631+
- Bug in :meth:`to_datetime` on float32 df with year, month, day etc. columns leads to precision issues and incorrect result. (:issue:`60506`)
630632
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
631633
- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
632634
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
@@ -668,7 +670,8 @@ Indexing
668670
^^^^^^^^
669671
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
670672
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
671-
-
673+
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
674+
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
672675

673676
Missing
674677
^^^^^^^
@@ -701,6 +704,7 @@ I/O
701704
- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
702705
- Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
703706
- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
707+
- Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)
704708
- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
705709
- Bug in :meth:`read_json` where extreme value integers in string format were incorrectly parsed as a different integer number (:issue:`20608`)
706710
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
@@ -732,11 +736,13 @@ Groupby/resample/rolling
732736
- Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
733737
- Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
734738
- Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
739+
- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`)
735740
- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
736741
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
737742
- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
738743
- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
739744
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
745+
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
740746
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
741747
- Bug in :meth:`Series.resample` could raise when the the date range ended shortly before a non-existent time. (:issue:`58380`)
742748

@@ -762,6 +768,7 @@ ExtensionArray
762768
- Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`)
763769
- Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)
764770
- Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`)
771+
- Bug in constructing pandas data structures when passing into ``dtype`` a string of the type followed by ``[pyarrow]`` while PyArrow is not installed would raise ``NameError`` rather than ``ImportError`` (:issue:`57928`)
765772
- Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`)
766773

767774
Styler
@@ -794,6 +801,8 @@ Other
794801
- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)
795802
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
796803
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
804+
- Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`)
805+
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
797806

798807
.. ***DO NOT USE THIS SECTION***
799808

environment.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ dependencies:
3535
- hypothesis>=6.84.0
3636
- gcsfs>=2022.11.0
3737
- ipython
38+
- pickleshare # Needed for IPython Sphinx directive in the docs GH#60429
3839
- jinja2>=3.1.2
3940
- lxml>=4.9.2
4041
- matplotlib>=3.6.3
@@ -87,7 +88,7 @@ dependencies:
8788
- google-auth
8889
- natsort # DataFrame.sort_values doctest
8990
- numpydoc
90-
- pydata-sphinx-theme=0.14
91+
- pydata-sphinx-theme=0.16
9192
- pytest-cython # doctest
9293
- sphinx
9394
- sphinx-design

pandas/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@
235235
# Pandas is not (yet) a py.typed library: the public API is determined
236236
# based on the documentation.
237237
__all__ = [
238+
"NA",
238239
"ArrowDtype",
239240
"BooleanDtype",
240241
"Categorical",
@@ -253,15 +254,14 @@
253254
"HDFStore",
254255
"Index",
255256
"IndexSlice",
257+
"Int8Dtype",
256258
"Int16Dtype",
257259
"Int32Dtype",
258260
"Int64Dtype",
259-
"Int8Dtype",
260261
"Interval",
261262
"IntervalDtype",
262263
"IntervalIndex",
263264
"MultiIndex",
264-
"NA",
265265
"NaT",
266266
"NamedAgg",
267267
"Period",
@@ -274,10 +274,10 @@
274274
"Timedelta",
275275
"TimedeltaIndex",
276276
"Timestamp",
277+
"UInt8Dtype",
277278
"UInt16Dtype",
278279
"UInt32Dtype",
279280
"UInt64Dtype",
280-
"UInt8Dtype",
281281
"api",
282282
"array",
283283
"arrays",
@@ -290,8 +290,8 @@
290290
"errors",
291291
"eval",
292292
"factorize",
293-
"get_dummies",
294293
"from_dummies",
294+
"get_dummies",
295295
"get_option",
296296
"infer_freq",
297297
"interval_range",

0 commit comments

Comments
 (0)