Skip to content

Commit ec9fd4f

Browse files
committed
Merge branch 'main' of https://github.com/sanggon6107/pandas into fix-loc-dtype
2 parents 1f529ce + 44c8f20 commit ec9fd4f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+817
-89
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,5 @@ COPY requirements-dev.txt /tmp
1313
RUN python -m pip install -r /tmp/requirements-dev.txt
1414
RUN git config --global --add safe.directory /home/pandas
1515

16-
ENV SHELL "/bin/bash"
16+
ENV SHELL="/bin/bash"
1717
CMD ["/bin/bash"]

asv_bench/benchmarks/rolling.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,19 @@ class Methods:
1010
["DataFrame", "Series"],
1111
[("rolling", {"window": 10}), ("rolling", {"window": 1000}), ("expanding", {})],
1212
["int", "float"],
13-
["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum", "sem"],
13+
[
14+
"median",
15+
"mean",
16+
"max",
17+
"min",
18+
"std",
19+
"count",
20+
"skew",
21+
"kurt",
22+
"sum",
23+
"sem",
24+
"nunique",
25+
],
1426
)
1527
param_names = ["constructor", "window_kwargs", "dtype", "method"]
1628

ci/code_checks.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,16 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8383
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
8484
-i "pandas.core.resample.Resampler.quantile PR01,PR07" \
8585
-i "pandas.tseries.offsets.BDay PR02,SA01" \
86+
-i "pandas.tseries.offsets.BHalfYearBegin.is_on_offset GL08" \
87+
-i "pandas.tseries.offsets.BHalfYearBegin.n GL08" \
88+
-i "pandas.tseries.offsets.BHalfYearBegin.normalize GL08" \
89+
-i "pandas.tseries.offsets.BHalfYearBegin.rule_code GL08" \
90+
-i "pandas.tseries.offsets.BHalfYearBegin.startingMonth GL08" \
91+
-i "pandas.tseries.offsets.BHalfYearEnd.is_on_offset GL08" \
92+
-i "pandas.tseries.offsets.BHalfYearEnd.n GL08" \
93+
-i "pandas.tseries.offsets.BHalfYearEnd.normalize GL08" \
94+
-i "pandas.tseries.offsets.BHalfYearEnd.rule_code GL08" \
95+
-i "pandas.tseries.offsets.BHalfYearEnd.startingMonth GL08" \
8696
-i "pandas.tseries.offsets.BQuarterBegin.is_on_offset GL08" \
8797
-i "pandas.tseries.offsets.BQuarterBegin.n GL08" \
8898
-i "pandas.tseries.offsets.BQuarterBegin.normalize GL08" \
@@ -185,6 +195,16 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
185195
-i "pandas.tseries.offsets.FY5253Quarter.variation GL08" \
186196
-i "pandas.tseries.offsets.FY5253Quarter.weekday GL08" \
187197
-i "pandas.tseries.offsets.FY5253Quarter.year_has_extra_week GL08" \
198+
-i "pandas.tseries.offsets.HalfYearBegin.is_on_offset GL08" \
199+
-i "pandas.tseries.offsets.HalfYearBegin.n GL08" \
200+
-i "pandas.tseries.offsets.HalfYearBegin.normalize GL08" \
201+
-i "pandas.tseries.offsets.HalfYearBegin.rule_code GL08" \
202+
-i "pandas.tseries.offsets.HalfYearBegin.startingMonth GL08" \
203+
-i "pandas.tseries.offsets.HalfYearEnd.is_on_offset GL08" \
204+
-i "pandas.tseries.offsets.HalfYearEnd.n GL08" \
205+
-i "pandas.tseries.offsets.HalfYearEnd.normalize GL08" \
206+
-i "pandas.tseries.offsets.HalfYearEnd.rule_code GL08" \
207+
-i "pandas.tseries.offsets.HalfYearEnd.startingMonth GL08" \
188208
-i "pandas.tseries.offsets.Hour.is_on_offset GL08" \
189209
-i "pandas.tseries.offsets.Hour.n GL08" \
190210
-i "pandas.tseries.offsets.Hour.normalize GL08" \

doc/source/getting_started/overview.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,4 @@ License
174174
-------
175175

176176
.. literalinclude:: ../../../LICENSE
177+
:language: none

doc/source/reference/offset_frequency.rst

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,146 @@ Methods
776776
QuarterBegin.is_year_start
777777
QuarterBegin.is_year_end
778778

779+
BHalfYearEnd
780+
------------
781+
.. autosummary::
782+
:toctree: api/
783+
784+
BHalfYearEnd
785+
786+
Properties
787+
~~~~~~~~~~
788+
.. autosummary::
789+
:toctree: api/
790+
791+
BHalfYearEnd.freqstr
792+
BHalfYearEnd.kwds
793+
BHalfYearEnd.name
794+
BHalfYearEnd.nanos
795+
BHalfYearEnd.normalize
796+
BHalfYearEnd.rule_code
797+
BHalfYearEnd.n
798+
BHalfYearEnd.startingMonth
799+
800+
Methods
801+
~~~~~~~
802+
.. autosummary::
803+
:toctree: api/
804+
805+
BHalfYearEnd.copy
806+
BHalfYearEnd.is_on_offset
807+
BHalfYearEnd.is_month_start
808+
BHalfYearEnd.is_month_end
809+
BHalfYearEnd.is_quarter_start
810+
BHalfYearEnd.is_quarter_end
811+
BHalfYearEnd.is_year_start
812+
BHalfYearEnd.is_year_end
813+
814+
BHalfYearBegin
815+
--------------
816+
.. autosummary::
817+
:toctree: api/
818+
819+
BHalfYearBegin
820+
821+
Properties
822+
~~~~~~~~~~
823+
.. autosummary::
824+
:toctree: api/
825+
826+
BHalfYearBegin.freqstr
827+
BHalfYearBegin.kwds
828+
BHalfYearBegin.name
829+
BHalfYearBegin.nanos
830+
BHalfYearBegin.normalize
831+
BHalfYearBegin.rule_code
832+
BHalfYearBegin.n
833+
BHalfYearBegin.startingMonth
834+
835+
Methods
836+
~~~~~~~
837+
.. autosummary::
838+
:toctree: api/
839+
840+
BHalfYearBegin.copy
841+
BHalfYearBegin.is_on_offset
842+
BHalfYearBegin.is_month_start
843+
BHalfYearBegin.is_month_end
844+
BHalfYearBegin.is_quarter_start
845+
BHalfYearBegin.is_quarter_end
846+
BHalfYearBegin.is_year_start
847+
BHalfYearBegin.is_year_end
848+
849+
HalfYearEnd
850+
-----------
851+
.. autosummary::
852+
:toctree: api/
853+
854+
HalfYearEnd
855+
856+
Properties
857+
~~~~~~~~~~
858+
.. autosummary::
859+
:toctree: api/
860+
861+
HalfYearEnd.freqstr
862+
HalfYearEnd.kwds
863+
HalfYearEnd.name
864+
HalfYearEnd.nanos
865+
HalfYearEnd.normalize
866+
HalfYearEnd.rule_code
867+
HalfYearEnd.n
868+
HalfYearEnd.startingMonth
869+
870+
Methods
871+
~~~~~~~
872+
.. autosummary::
873+
:toctree: api/
874+
875+
HalfYearEnd.copy
876+
HalfYearEnd.is_on_offset
877+
HalfYearEnd.is_month_start
878+
HalfYearEnd.is_month_end
879+
HalfYearEnd.is_quarter_start
880+
HalfYearEnd.is_quarter_end
881+
HalfYearEnd.is_year_start
882+
HalfYearEnd.is_year_end
883+
884+
HalfYearBegin
885+
-------------
886+
.. autosummary::
887+
:toctree: api/
888+
889+
HalfYearBegin
890+
891+
Properties
892+
~~~~~~~~~~
893+
.. autosummary::
894+
:toctree: api/
895+
896+
HalfYearBegin.freqstr
897+
HalfYearBegin.kwds
898+
HalfYearBegin.name
899+
HalfYearBegin.nanos
900+
HalfYearBegin.normalize
901+
HalfYearBegin.rule_code
902+
HalfYearBegin.n
903+
HalfYearBegin.startingMonth
904+
905+
Methods
906+
~~~~~~~
907+
.. autosummary::
908+
:toctree: api/
909+
910+
HalfYearBegin.copy
911+
HalfYearBegin.is_on_offset
912+
HalfYearBegin.is_month_start
913+
HalfYearBegin.is_month_end
914+
HalfYearBegin.is_quarter_start
915+
HalfYearBegin.is_quarter_end
916+
HalfYearBegin.is_year_start
917+
HalfYearBegin.is_year_end
918+
779919
BYearEnd
780920
--------
781921
.. autosummary::

doc/source/reference/window.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Rolling window functions
4242
Rolling.quantile
4343
Rolling.sem
4444
Rolling.rank
45+
Rolling.nunique
4546

4647
.. _api.functions_window:
4748

@@ -86,6 +87,7 @@ Expanding window functions
8687
Expanding.quantile
8788
Expanding.sem
8889
Expanding.rank
90+
Expanding.nunique
8991

9092
.. _api.functions_ewm:
9193

doc/source/user_guide/io.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
1818
:widths: 30, 100, 60, 60
1919

2020
text,`CSV <https://en.wikipedia.org/wiki/Comma-separated_values>`__, :ref:`read_csv<io.read_csv_table>`, :ref:`to_csv<io.store_in_csv>`
21-
text,Fixed-Width Text File, :ref:`read_fwf<io.fwf_reader>` , NA
21+
text,Fixed-Width Text File, :ref:`read_fwf<io.fwf_reader>`, NA
2222
text,`JSON <https://www.json.org/>`__, :ref:`read_json<io.json_reader>`, :ref:`to_json<io.json_writer>`
2323
text,`HTML <https://en.wikipedia.org/wiki/HTML>`__, :ref:`read_html<io.read_html>`, :ref:`to_html<io.html>`
24-
text,`LaTeX <https://en.wikipedia.org/wiki/LaTeX>`__, :ref:`Styler.to_latex<io.latex>` , NA
24+
text,`LaTeX <https://en.wikipedia.org/wiki/LaTeX>`__, NA, :ref:`Styler.to_latex<io.latex>`
2525
text,`XML <https://www.w3.org/standards/xml/core>`__, :ref:`read_xml<io.read_xml>`, :ref:`to_xml<io.xml>`
2626
text, Local clipboard, :ref:`read_clipboard<io.clipboard>`, :ref:`to_clipboard<io.clipboard>`
2727
binary,`MS Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__ , :ref:`read_excel<io.excel_reader>`, :ref:`to_excel<io.excel_writer>`

doc/source/user_guide/timeseries.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,10 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ
891891
:class:`~pandas.tseries.offsets.BQuarterEnd`, ``'BQE``, "business quarter end"
892892
:class:`~pandas.tseries.offsets.BQuarterBegin`, ``'BQS'``, "business quarter begin"
893893
:class:`~pandas.tseries.offsets.FY5253Quarter`, ``'REQ'``, "retail (aka 52-53 week) quarter"
894+
:class:`~pandas.tseries.offsets.HalfYearEnd`, ``'HYE'``, "calendar half year end"
895+
:class:`~pandas.tseries.offsets.HalfYearBegin`, ``'HYS'``, "calendar half year begin"
896+
:class:`~pandas.tseries.offsets.BHalfYearEnd`, ``'BHYE``, "business half year end"
897+
:class:`~pandas.tseries.offsets.BHalfYearBegin`, ``'BHYS'``, "business half year begin"
894898
:class:`~pandas.tseries.offsets.YearEnd`, ``'YE'``, "calendar year end"
895899
:class:`~pandas.tseries.offsets.YearBegin`, ``'YS'`` or ``'BYS'``,"calendar year begin"
896900
:class:`~pandas.tseries.offsets.BYearEnd`, ``'BYE'``, "business year end"

doc/source/whatsnew/v2.3.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Other enhancements
3737
updated to work correctly with NumPy >= 2 (:issue:`57739`)
3838
- :meth:`Series.str.decode` result now has ``StringDtype`` when ``future.infer_string`` is True (:issue:`60709`)
3939
- :meth:`~Series.to_hdf` and :meth:`~DataFrame.to_hdf` now round-trip with ``StringDtype`` (:issue:`60663`)
40+
- Improved ``repr`` of :class:`.NumpyExtensionArray` to account for NEP51 (:issue:`61085`)
4041
- The :meth:`Series.str.decode` has gained the argument ``dtype`` to control the dtype of the result (:issue:`60940`)
4142
- The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns (:issue:`60633`)
4243
- The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
@@ -118,6 +119,7 @@ Conversion
118119

119120
Strings
120121
^^^^^^^
122+
- Bug in :meth:`DataFrame.sum` with ``axis=1``, :meth:`.DataFrameGroupBy.sum` or :meth:`.SeriesGroupBy.sum` with ``skipna=True``, and :meth:`.Resampler.sum` on :class:`StringDtype` with all NA values resulted in ``0`` and is now the empty string ``""`` (:issue:`60229`)
121123
- Bug in :meth:`Series.__pos__` and :meth:`DataFrame.__pos__` did not raise for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`60710`)
122124
- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
123125
- Bug in :meth:`Series.replace` with :class:`StringDtype` when replacing with a non-string value was not upcasting to ``object`` dtype (:issue:`60282`)

doc/source/whatsnew/v3.0.0.rst

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ Other enhancements
6161
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
6262
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
6363
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
64+
- :class:`ArrowDtype` now supports ``pyarrow.JsonType`` (:issue:`60958`)
6465
- :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``median``, ``prod``, ``min``, ``max``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`)
66+
- :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`)
6567
- :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
6668
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
6769
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
@@ -694,8 +696,10 @@ Interval
694696
Indexing
695697
^^^^^^^^
696698
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
699+
- Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`)
697700
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
698701
- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`)
702+
- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
699703
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
700704
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
701705

@@ -712,12 +716,13 @@ MultiIndex
712716
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
713717
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
714718
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
715-
-
719+
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
716720

717721
I/O
718722
^^^
719723
- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
720724
- Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
725+
- Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`)
721726
- Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`)
722727
- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`)
723728
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
@@ -778,6 +783,7 @@ Reshaping
778783
^^^^^^^^^
779784
- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
780785
- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
786+
- Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`)
781787
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
782788
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
783789
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
@@ -786,6 +792,7 @@ Reshaping
786792
- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
787793
- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`)
788794
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
795+
- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
789796

790797
Sparse
791798
^^^^^^

0 commit comments

Comments
 (0)