Skip to content

Commit 8f8645a

Browse files
authored
Merge branch 'main' into add-type-hint-squeeze
2 parents 5dfacd6 + 98f7e4d commit 8f8645a

File tree

107 files changed

+1012
-623
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+1012
-623
lines changed

.github/workflows/wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ jobs:
152152
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
153153

154154
- name: Build wheels
155-
uses: pypa/cibuildwheel@v2.21.3
155+
uses: pypa/cibuildwheel@v2.22.0
156156
with:
157157
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
158158
env:

ci/code_checks.sh

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7373
-i "pandas.Period.freq GL08" \
7474
-i "pandas.Period.ordinal GL08" \
7575
-i "pandas.RangeIndex.from_range PR01,SA01" \
76-
-i "pandas.Series.dt.freq GL08" \
7776
-i "pandas.Series.dt.unit GL08" \
7877
-i "pandas.Series.pad PR01,SA01" \
7978
-i "pandas.Timedelta.max PR02" \
@@ -83,8 +82,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8382
-i "pandas.Timestamp.min PR02" \
8483
-i "pandas.Timestamp.resolution PR02" \
8584
-i "pandas.Timestamp.tzinfo GL08" \
86-
-i "pandas.api.types.is_re_compilable PR07,SA01" \
87-
-i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
8885
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
8986
-i "pandas.arrays.IntegerArray SA01" \
9087
-i "pandas.arrays.IntervalArray.length SA01" \
@@ -93,15 +90,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9390
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
9491
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
9592
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
96-
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
9793
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
9894
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
9995
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
10096
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
10197
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
102-
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
103-
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
104-
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
10598
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
10699
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
107100
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
@@ -115,19 +108,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
115108
-i "pandas.core.resample.Resampler.std SA01" \
116109
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
117110
-i "pandas.core.resample.Resampler.var SA01" \
118-
-i "pandas.errors.AttributeConflictWarning SA01" \
119-
-i "pandas.errors.ChainedAssignmentError SA01" \
120-
-i "pandas.errors.DuplicateLabelError SA01" \
121111
-i "pandas.errors.IntCastingNaNError SA01" \
122-
-i "pandas.errors.InvalidIndexError SA01" \
123112
-i "pandas.errors.NullFrequencyError SA01" \
124-
-i "pandas.errors.NumExprClobberingError SA01" \
125113
-i "pandas.errors.NumbaUtilError SA01" \
126-
-i "pandas.errors.OutOfBoundsTimedelta SA01" \
127114
-i "pandas.errors.PerformanceWarning SA01" \
128-
-i "pandas.errors.PossibleDataLossError SA01" \
129115
-i "pandas.errors.UndefinedVariableError PR01,SA01" \
130-
-i "pandas.errors.UnsortedIndexError SA01" \
131116
-i "pandas.errors.ValueLabelTypeMismatch SA01" \
132117
-i "pandas.infer_freq SA01" \
133118
-i "pandas.io.json.build_table_schema PR07,RT03,SA01" \

doc/source/conf.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,6 @@
242242
"external_links": [],
243243
"footer_start": ["pandas_footer", "sphinx-version"],
244244
"github_url": "https://github.com/pandas-dev/pandas",
245-
"twitter_url": "https://twitter.com/pandas_dev",
246245
"analytics": {
247246
"plausible_analytics_domain": "pandas.pydata.org",
248247
"plausible_analytics_url": "https://views.scientific-python.org/js/script.js",
@@ -258,6 +257,11 @@
258257
# patch version doesn't compare as equal (e.g. 2.2.1 != 2.2.0 but it should be)
259258
"show_version_warning_banner": False,
260259
"icon_links": [
260+
{
261+
"name": "X",
262+
"url": "https://x.com/pandas_dev",
263+
"icon": "fa-brands fa-square-x-twitter",
264+
},
261265
{
262266
"name": "Mastodon",
263267
"url": "https://fosstodon.org/@pandas_dev",

doc/source/user_guide/reshaping.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ The missing value can be filled with a specific value with the ``fill_value`` ar
321321
.. image:: ../_static/reshaping_melt.png
322322

323323
The top-level :func:`~pandas.melt` function and the corresponding :meth:`DataFrame.melt`
324-
are useful to massage a :class:`DataFrame` into a format where one or more columns
324+
are useful to reshape a :class:`DataFrame` into a format where one or more columns
325325
are *identifier variables*, while all other columns, considered *measured
326326
variables*, are "unpivoted" to the row axis, leaving just two non-identifier
327327
columns, "variable" and "value". The names of those columns can be customized

doc/source/user_guide/window.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -567,9 +567,9 @@ One must have :math:`0 < \alpha \leq 1`, and while it is possible to pass
567567
568568
\alpha =
569569
\begin{cases}
570-
\frac{2}{s + 1}, & \text{for span}\ s \geq 1\\
571-
\frac{1}{1 + c}, & \text{for center of mass}\ c \geq 0\\
572-
1 - \exp^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0
570+
\frac{2}{s + 1}, & \text{for span}\ s \geq 1\\
571+
\frac{1}{1 + c}, & \text{for center of mass}\ c \geq 0\\
572+
1 - e^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0
573573
\end{cases}
574574
575575
One must specify precisely one of **span**, **center of mass**, **half-life**

doc/source/whatsnew/v2.3.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ Interval
118118

119119
Indexing
120120
^^^^^^^^
121-
-
121+
- Fixed bug in :meth:`Index.get_indexer` round-tripping through string dtype when ``infer_string`` is enabled (:issue:`55834`)
122122
-
123123

124124
Missing

doc/source/whatsnew/v3.0.0.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ Other enhancements
5454
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5555
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
5656
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
57+
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
5758
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
5859
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
5960
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
@@ -626,6 +627,7 @@ Datetimelike
626627
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
627628
- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
628629
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
630+
- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
629631
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
630632

631633
Timedelta
@@ -688,6 +690,7 @@ I/O
688690
- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`)
689691
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
690692
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
693+
- Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`)
691694
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
692695
- Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`)
693696
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
@@ -702,6 +705,8 @@ I/O
702705
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
703706
- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
704707
- Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
708+
- Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`)
709+
- Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`)
705710

706711
Period
707712
^^^^^^
@@ -760,7 +765,7 @@ ExtensionArray
760765

761766
Styler
762767
^^^^^^
763-
-
768+
- Bug in :meth:`Styler.to_latex` where styling column headers when combined with a hidden index or hidden index-levels is fixed.
764769

765770
Other
766771
^^^^^
@@ -784,6 +789,7 @@ Other
784789
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
785790
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
786791
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
792+
- Bug in :meth:`Series.to_string` when series contains complex floats with exponents (:issue:`60405`)
787793
- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)
788794
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
789795
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)

pandas/_libs/index.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ class MaskedUInt16Engine(MaskedIndexEngine): ...
7272
class MaskedUInt8Engine(MaskedIndexEngine): ...
7373
class MaskedBoolEngine(MaskedUInt8Engine): ...
7474

75+
class StringObjectEngine(ObjectEngine):
76+
def __init__(self, values: object, na_value) -> None: ...
77+
7578
class BaseMultiIndexCodesEngine:
7679
levels: list[np.ndarray]
7780
offsets: np.ndarray # np.ndarray[..., ndim=1]

pandas/_libs/index.pyx

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,31 @@ cdef class StringEngine(IndexEngine):
557557
raise KeyError(val)
558558
return str(val)
559559

560+
cdef class StringObjectEngine(ObjectEngine):
561+
562+
cdef:
563+
object na_value
564+
bint uses_na
565+
566+
def __init__(self, ndarray values, na_value):
567+
super().__init__(values)
568+
self.na_value = na_value
569+
self.uses_na = na_value is C_NA
570+
571+
cdef bint _checknull(self, object val):
572+
if self.uses_na:
573+
return val is C_NA
574+
else:
575+
return util.is_nan(val)
576+
577+
cdef _check_type(self, object val):
578+
if isinstance(val, str):
579+
return val
580+
elif self._checknull(val):
581+
return self.na_value
582+
else:
583+
raise KeyError(val)
584+
560585

561586
cdef class DatetimeEngine(Int64Engine):
562587

pandas/_libs/src/vendored/numpy/datetime/np_datetime.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -660,11 +660,12 @@ void pandas_datetime_to_datetimestruct(npy_datetime dt, NPY_DATETIMEUNIT base,
660660
perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000;
661661

662662
set_datetimestruct_days(extract_unit(&dt, perday), out);
663-
out->hour = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60);
664-
out->min = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 60);
665-
out->sec = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000);
666-
out->us = (npy_int32)extract_unit(&dt, 1000LL);
667-
out->ps = (npy_int32)(dt * 1000);
663+
out->hour =
664+
(npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 60 * 60);
665+
out->min = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 60);
666+
out->sec = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000);
667+
out->us = (npy_int32)extract_unit(&dt, 1000LL * 1000);
668+
out->ps = (npy_int32)(dt);
668669
break;
669670

670671
case NPY_FR_fs:

0 commit comments

Comments
 (0)