Skip to content

Commit a9f000c

Browse files
authored
Merge branch 'main' into BUG-56994/pyarrow-assignment-unexpected-dtypes
2 parents ad1db19 + d4ae654 commit a9f000c

File tree

151 files changed

+1421
-886
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

151 files changed

+1421
-886
lines changed

.gitattributes

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,5 @@ pandas/tests/io/parser/data export-ignore
8585

8686
# Include cibw script in sdist since it's needed for building wheels
8787
scripts/cibw_before_build.sh -export-ignore
88-
scripts/cibw_before_test.sh -export-ignore
88+
scripts/cibw_before_build_windows.sh -export-ignore
89+
scripts/cibw_before_test_windows.sh -export-ignore

.github/workflows/unit-tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,8 @@ jobs:
387387
- name: Build Environment
388388
run: |
389389
python --version
390-
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
391-
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy cython
390+
python -m pip install --upgrade pip setuptools wheel numpy meson[ninja]==1.2.1 meson-python==0.13.1
391+
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython
392392
python -m pip install versioneer[toml]
393393
python -m pip install python-dateutil pytz tzdata hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
394394
python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror"

.github/workflows/wheels.yml

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,6 @@ jobs:
111111
- buildplat: [ubuntu-22.04, pyodide_wasm32]
112112
python: ["cp312", "3.12"]
113113
cibw_build_frontend: 'build'
114-
# TODO: Build free-threaded wheels for Windows
115-
exclude:
116-
- buildplat: [windows-2022, win_amd64]
117-
python: ["cp313t", "3.13"]
118114

119115
env:
120116
IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
@@ -181,20 +177,6 @@ jobs:
181177
shell: bash -el {0}
182178
run: for whl in $(ls wheelhouse); do wheel unpack wheelhouse/$whl -d /tmp; done
183179

184-
# Testing on windowsservercore instead of GHA runner to fail on missing DLLs
185-
- name: Test Windows Wheels
186-
if: ${{ matrix.buildplat[1] == 'win_amd64' }}
187-
shell: pwsh
188-
run: |
189-
$TST_CMD = @"
190-
python -m pip install hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0;
191-
python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
192-
python -c `'import pandas as pd; pd.test(extra_args=[`\"--no-strict-data-files`\", `\"-m not clipboard and not single_cpu and not slow and not network and not db`\"])`';
193-
"@
194-
# add rc to the end of the image name if the Python version is unreleased
195-
docker pull python:${{ matrix.python[1] == '3.13' && '3.13-rc' || format('{0}-windowsservercore', matrix.python[1]) }}
196-
docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.13' && '3.13-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD
197-
198180
- uses: actions/upload-artifact@v4
199181
with:
200182
name: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}

MANIFEST.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,5 @@ graft pandas/_libs/include
6565

6666
# Include cibw script in sdist since it's needed for building wheels
6767
include scripts/cibw_before_build.sh
68+
include scripts/cibw_before_build_windows.sh
69+
include scripts/cibw_before_test_windows.sh

ci/code_checks.sh

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -73,45 +73,32 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7373
-i "pandas.Period.freq GL08" \
7474
-i "pandas.Period.ordinal GL08" \
7575
-i "pandas.RangeIndex.from_range PR01,SA01" \
76-
-i "pandas.Series.dt.freq GL08" \
7776
-i "pandas.Series.dt.unit GL08" \
7877
-i "pandas.Series.pad PR01,SA01" \
7978
-i "pandas.Timedelta.max PR02" \
8079
-i "pandas.Timedelta.min PR02" \
8180
-i "pandas.Timedelta.resolution PR02" \
8281
-i "pandas.Timestamp.max PR02" \
8382
-i "pandas.Timestamp.min PR02" \
84-
-i "pandas.Timestamp.nanosecond GL08" \
8583
-i "pandas.Timestamp.resolution PR02" \
8684
-i "pandas.Timestamp.tzinfo GL08" \
87-
-i "pandas.Timestamp.year GL08" \
8885
-i "pandas.api.types.is_re_compilable PR07,SA01" \
89-
-i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
9086
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
9187
-i "pandas.arrays.IntegerArray SA01" \
9288
-i "pandas.arrays.IntervalArray.length SA01" \
93-
-i "pandas.arrays.IntervalArray.right SA01" \
9489
-i "pandas.arrays.NumpyExtensionArray SA01" \
95-
-i "pandas.arrays.SparseArray PR07,SA01" \
9690
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
9791
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
9892
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
99-
-i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
10093
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
101-
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
10294
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
10395
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
10496
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
10597
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
106-
-i "pandas.core.groupby.SeriesGroupBy.groups SA01" \
10798
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
108-
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
109-
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
110-
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
11199
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
112100
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
113101
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
114-
-i "pandas.core.resample.Resampler.groups SA01" \
115102
-i "pandas.core.resample.Resampler.indices SA01" \
116103
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
117104
-i "pandas.core.resample.Resampler.mean SA01" \
@@ -122,9 +109,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
122109
-i "pandas.core.resample.Resampler.std SA01" \
123110
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
124111
-i "pandas.core.resample.Resampler.var SA01" \
125-
-i "pandas.errors.AttributeConflictWarning SA01" \
126-
-i "pandas.errors.ChainedAssignmentError SA01" \
127-
-i "pandas.errors.DataError SA01" \
128112
-i "pandas.errors.DuplicateLabelError SA01" \
129113
-i "pandas.errors.IntCastingNaNError SA01" \
130114
-i "pandas.errors.InvalidIndexError SA01" \

doc/source/development/contributing_docstring.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -940,7 +940,7 @@ Finally, docstrings can also be appended to with the ``doc`` decorator.
940940

941941
In this example, we'll create a parent docstring normally (this is like
942942
``pandas.core.generic.NDFrame``). Then we'll have two children (like
943-
``pandas.core.series.Series`` and ``pandas.DataFrame``). We'll
943+
``pandas.Series`` and ``pandas.DataFrame``). We'll
944944
substitute the class names in this docstring.
945945

946946
.. code-block:: python
@@ -995,5 +995,5 @@ mapping function names to docstrings. Wherever possible, we prefer using
995995
``doc``, since the docstring-writing processes is slightly closer to normal.
996996

997997
See ``pandas.core.generic.NDFrame.fillna`` for an example template, and
998-
``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna``
998+
``pandas.Series.fillna`` and ``pandas.core.generic.frame.fillna``
999999
for the filled versions.

doc/source/whatsnew/v2.3.0.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,10 @@ Conversion
106106
Strings
107107
^^^^^^^
108108
- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
109+
- Bug in :meth:`Series.replace` with :class:`StringDtype` when replacing with a non-string value was not upcasting to ``object`` dtype (:issue:`60282`)
109110
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
110111
- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
111112
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
112-
-
113113

114114
Interval
115115
^^^^^^^^
@@ -118,7 +118,7 @@ Interval
118118

119119
Indexing
120120
^^^^^^^^
121-
-
121+
- Fixed bug in :meth:`Index.get_indexer` round-tripping through string dtype when ``infer_string`` is enabled (:issue:`55834`)
122122
-
123123

124124
Missing

doc/source/whatsnew/v3.0.0.rst

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ Other enhancements
5454
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5555
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
5656
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
57+
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
5758
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
5859
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
5960
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
@@ -481,7 +482,7 @@ Other Removals
481482
- Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`)
482483
- Enforced deprecation of :meth:`offsets.Tick.delta`, use ``pd.Timedelta(obj)`` instead (:issue:`55498`)
483484
- Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`)
484-
- Enforced deprecation of ``core.internals`` members ``Block``, ``ExtensionBlock``, and ``DatetimeTZBlock`` (:issue:`58467`)
485+
- Enforced deprecation of ``core.internals`` member ``DatetimeTZBlock`` (:issue:`58467`)
485486
- Enforced deprecation of ``date_parser`` in :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_excel` in favour of ``date_format`` (:issue:`50601`)
486487
- Enforced deprecation of ``keep_date_col`` keyword in :func:`read_csv` (:issue:`55569`)
487488
- Enforced deprecation of ``quantile`` keyword in :meth:`.Rolling.quantile` and :meth:`.Expanding.quantile`, renamed to ``q`` instead. (:issue:`52550`)
@@ -626,6 +627,7 @@ Datetimelike
626627
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
627628
- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
628629
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
630+
- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
629631
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
630632

631633
Timedelta
@@ -703,6 +705,8 @@ I/O
703705
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
704706
- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
705707
- Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
708+
- Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`)
709+
- Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`)
706710

707711
Period
708712
^^^^^^
@@ -761,7 +765,7 @@ ExtensionArray
761765

762766
Styler
763767
^^^^^^
764-
-
768+
- Bug in :meth:`Styler.to_latex` where styling column headers when combined with a hidden index or hidden index-levels is fixed.
765769

766770
Other
767771
^^^^^
@@ -785,6 +789,7 @@ Other
785789
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
786790
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
787791
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
792+
- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)
788793
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
789794
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
790795

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ dependencies:
7777

7878
# code checks
7979
- flake8=7.1.0 # run in subprocess over docstring examples
80-
- mypy=1.11.2 # pre-commit uses locally installed mypy
80+
- mypy=1.13.0 # pre-commit uses locally installed mypy
8181
- tokenize-rt # scripts/check_for_inconsistent_pandas_namespace.py
8282
- pre-commit>=4.0.1
8383

pandas/_libs/src/vendored/numpy/datetime/np_datetime.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -660,11 +660,12 @@ void pandas_datetime_to_datetimestruct(npy_datetime dt, NPY_DATETIMEUNIT base,
660660
perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000;
661661

662662
set_datetimestruct_days(extract_unit(&dt, perday), out);
663-
out->hour = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60);
664-
out->min = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 60);
665-
out->sec = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000);
666-
out->us = (npy_int32)extract_unit(&dt, 1000LL);
667-
out->ps = (npy_int32)(dt * 1000);
663+
out->hour =
664+
(npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 60 * 60);
665+
out->min = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 60);
666+
out->sec = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000);
667+
out->us = (npy_int32)extract_unit(&dt, 1000LL * 1000);
668+
out->ps = (npy_int32)(dt);
668669
break;
669670

670671
case NPY_FR_fs:

0 commit comments

Comments
 (0)