Skip to content

Commit 221525b

Browse files
authored
Merge branch 'main' into gha-arm64-ci
2 parents bc8cc69 + a4e8149 commit 221525b

File tree

18 files changed

+504
-4
lines changed

18 files changed

+504
-4
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7979
-i "pandas.Timestamp.min PR02" \
8080
-i "pandas.Timestamp.resolution PR02" \
8181
-i "pandas.Timestamp.tzinfo GL08" \
82-
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
8382
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
8483
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
8584
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \

doc/source/reference/groupby.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ Function application
104104
DataFrameGroupBy.shift
105105
DataFrameGroupBy.size
106106
DataFrameGroupBy.skew
107+
DataFrameGroupBy.kurt
107108
DataFrameGroupBy.std
108109
DataFrameGroupBy.sum
109110
DataFrameGroupBy.var
@@ -159,6 +160,7 @@ Function application
159160
SeriesGroupBy.shift
160161
SeriesGroupBy.size
161162
SeriesGroupBy.skew
163+
SeriesGroupBy.kurt
162164
SeriesGroupBy.std
163165
SeriesGroupBy.sum
164166
SeriesGroupBy.var

doc/source/reference/window.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ Rolling window functions
3030
Rolling.std
3131
Rolling.min
3232
Rolling.max
33+
Rolling.first
34+
Rolling.last
3335
Rolling.corr
3436
Rolling.cov
3537
Rolling.skew
@@ -72,6 +74,8 @@ Expanding window functions
7274
Expanding.std
7375
Expanding.min
7476
Expanding.max
77+
Expanding.first
78+
Expanding.last
7579
Expanding.corr
7680
Expanding.cov
7781
Expanding.skew

doc/source/whatsnew/v2.3.0.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@ Other
175175
^^^^^
176176
- Fixed usage of ``inspect`` when the optional dependencies ``pyarrow`` or ``jinja2``
177177
are not installed (:issue:`60196`)
178-
-
179178

180179
.. ---------------------------------------------------------------------------
181180
.. _whatsnew_230.contributors:

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ Other enhancements
5757
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5858
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
5959
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
60+
- :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
6061
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
6162
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
6263
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
@@ -811,6 +812,7 @@ Other
811812
- Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`)
812813
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
813814
- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
815+
- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
814816

815817
.. ***DO NOT USE THIS SECTION***
816818

pandas/_libs/window/aggregations.pyi

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,18 @@ def roll_min(
6060
end: np.ndarray, # np.ndarray[np.int64]
6161
minp: int, # int64_t
6262
) -> np.ndarray: ... # np.ndarray[float]
63+
def roll_first(
64+
values: np.ndarray, # np.ndarray[np.float64]
65+
start: np.ndarray, # np.ndarray[np.int64]
66+
end: np.ndarray, # np.ndarray[np.int64]
67+
minp: int, # int64_t
68+
) -> np.ndarray: ... # np.ndarray[float]
69+
def roll_last(
70+
values: np.ndarray, # np.ndarray[np.float64]
71+
start: np.ndarray, # np.ndarray[np.int64]
72+
end: np.ndarray, # np.ndarray[np.int64]
73+
minp: int, # int64_t
74+
) -> np.ndarray: ... # np.ndarray[float]
6375
def roll_quantile(
6476
values: np.ndarray, # const float64_t[:]
6577
start: np.ndarray, # np.ndarray[np.int64]

pandas/_libs/window/aggregations.pyx

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1133,6 +1133,89 @@ cdef _roll_min_max(ndarray[float64_t] values,
11331133

11341134
return output
11351135

1136+
# ----------------------------------------------------------------------
1137+
# Rolling first, last
1138+
1139+
1140+
def roll_first(const float64_t[:] values, ndarray[int64_t] start,
1141+
ndarray[int64_t] end, int64_t minp) -> np.ndarray:
1142+
return _roll_first_last(values, start, end, minp, is_first=1)
1143+
1144+
1145+
def roll_last(const float64_t[:] values, ndarray[int64_t] start,
1146+
ndarray[int64_t] end, int64_t minp) -> np.ndarray:
1147+
return _roll_first_last(values, start, end, minp, is_first=0)
1148+
1149+
1150+
cdef _roll_first_last(const float64_t[:] values, ndarray[int64_t] start,
1151+
ndarray[int64_t] end, int64_t minp, bint is_first):
1152+
cdef:
1153+
Py_ssize_t i, j, fl_idx
1154+
bint is_monotonic_increasing_bounds
1155+
int64_t nobs = 0, N = len(start), s, e
1156+
float64_t val, res
1157+
ndarray[float64_t] output
1158+
1159+
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
1160+
start, end
1161+
)
1162+
1163+
output = np.empty(N, dtype=np.float64)
1164+
1165+
if (end - start).max() == 0:
1166+
output[:] = NaN
1167+
return output
1168+
1169+
with nogil:
1170+
for i in range(0, N):
1171+
s = start[i]
1172+
e = end[i]
1173+
1174+
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
1175+
fl_idx = -1
1176+
nobs = 0
1177+
for j in range(s, e):
1178+
val = values[j]
1179+
if val == val:
1180+
if not is_first or fl_idx < s:
1181+
fl_idx = j
1182+
nobs += 1
1183+
else:
1184+
# handle deletes
1185+
for j in range(start[i - 1], s):
1186+
val = values[j]
1187+
if val == val:
1188+
nobs -= 1
1189+
1190+
# update fl_idx if out of range, if first
1191+
if is_first and fl_idx < s:
1192+
fl_idx = -1
1193+
for j in range(s, end[i - 1]):
1194+
val = values[j]
1195+
if val == val:
1196+
fl_idx = j
1197+
break
1198+
1199+
# handle adds
1200+
for j in range(end[i - 1], e):
1201+
val = values[j]
1202+
if val == val:
1203+
if not is_first or fl_idx < s:
1204+
fl_idx = j
1205+
nobs += 1
1206+
1207+
if nobs >= minp and fl_idx >= s:
1208+
res = values[fl_idx]
1209+
else:
1210+
res = NaN
1211+
1212+
output[i] = res
1213+
1214+
if not is_monotonic_increasing_bounds:
1215+
nobs = 0
1216+
1217+
return output
1218+
11361219

11371220
cdef enum InterpolationType:
11381221
LINEAR,

pandas/core/arrays/arrow/array.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ class ArrowExtensionArray(
258258
Parameters
259259
----------
260260
values : pyarrow.Array or pyarrow.ChunkedArray
261+
The input data to initialize the ArrowExtensionArray.
261262
262263
Attributes
263264
----------
@@ -271,6 +272,12 @@ class ArrowExtensionArray(
271272
-------
272273
ArrowExtensionArray
273274
275+
See Also
276+
--------
277+
array : Create a Pandas array with a specified dtype.
278+
DataFrame.to_feather : Write a DataFrame to the binary Feather format.
279+
read_feather : Load a feather-format object from the file path.
280+
274281
Notes
275282
-----
276283
Most methods are implemented using `pyarrow compute functions. <https://arrow.apache.org/docs/python/api/compute.html>`__

pandas/core/frame.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2317,7 +2317,10 @@ def maybe_reorder(
23172317
columns = columns.drop(exclude)
23182318

23192319
mgr = arrays_to_mgr(arrays, columns, result_index)
2320-
return cls._from_mgr(mgr, axes=mgr.axes)
2320+
df = DataFrame._from_mgr(mgr, axes=mgr.axes)
2321+
if cls is not DataFrame:
2322+
return cls(df, copy=False)
2323+
return df
23212324

23222325
def to_records(
23232326
self, index: bool = True, column_dtypes=None, index_dtypes=None

pandas/core/window/expanding.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,78 @@ def skew(self, numeric_only: bool = False):
723723
def kurt(self, numeric_only: bool = False):
724724
return super().kurt(numeric_only=numeric_only)
725725

726+
@doc(
727+
template_header,
728+
create_section_header("Parameters"),
729+
kwargs_numeric_only,
730+
create_section_header("Returns"),
731+
template_returns,
732+
create_section_header("See Also"),
733+
dedent(
734+
"""
735+
GroupBy.first : Similar method for GroupBy objects.
736+
Expanding.last : Method to get the last element in each window.\n
737+
"""
738+
).replace("\n", "", 1),
739+
create_section_header("Examples"),
740+
dedent(
741+
"""
742+
The example below will show an expanding calculation with a window size of
743+
three.
744+
745+
>>> s = pd.Series(range(5))
746+
>>> s.expanding(3).first()
747+
0 NaN
748+
1 NaN
749+
2 0.0
750+
3 0.0
751+
4 0.0
752+
dtype: float64
753+
"""
754+
).replace("\n", "", 1),
755+
window_method="expanding",
756+
aggregation_description="First (left-most) element of the window",
757+
agg_method="first",
758+
)
759+
def first(self, numeric_only: bool = False):
760+
return super().first(numeric_only=numeric_only)
761+
762+
@doc(
763+
template_header,
764+
create_section_header("Parameters"),
765+
kwargs_numeric_only,
766+
create_section_header("Returns"),
767+
template_returns,
768+
create_section_header("See Also"),
769+
dedent(
770+
"""
771+
GroupBy.last : Similar method for GroupBy objects.
772+
Expanding.first : Method to get the first element in each window.\n
773+
"""
774+
).replace("\n", "", 1),
775+
create_section_header("Examples"),
776+
dedent(
777+
"""
778+
The example below will show an expanding calculation with a window size of
779+
three.
780+
781+
>>> s = pd.Series(range(5))
782+
>>> s.expanding(3).last()
783+
0 NaN
784+
1 NaN
785+
2 2.0
786+
3 3.0
787+
4 4.0
788+
dtype: float64
789+
"""
790+
).replace("\n", "", 1),
791+
window_method="expanding",
792+
aggregation_description="Last (right-most) element of the window",
793+
agg_method="last",
794+
)
795+
def last(self, numeric_only: bool = False):
796+
return super().last(numeric_only=numeric_only)
797+
726798
@doc(
727799
template_header,
728800
create_section_header("Parameters"),

0 commit comments

Comments
 (0)