Skip to content

Commit dc5fa49

Browse files
committed
Merge branch 'main' into fix_docstring_pandas_styler
2 parents 175dd36 + bb4ab4f commit dc5fa49

File tree

7 files changed

+147
-52
lines changed

7 files changed

+147
-52
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
142142
-i "pandas.Series.sparse.sp_values SA01" \
143143
-i "pandas.Series.sparse.to_coo PR07,RT03,SA01" \
144144
-i "pandas.Series.std PR01,RT03,SA01" \
145-
-i "pandas.Series.str.wrap RT03,SA01" \
146-
-i "pandas.Series.str.zfill RT03" \
147145
-i "pandas.Timedelta.asm8 SA01" \
148146
-i "pandas.Timedelta.ceil SA01" \
149147
-i "pandas.Timedelta.components SA01" \
@@ -175,14 +173,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
175173
-i "pandas.Timestamp.tzinfo GL08" \
176174
-i "pandas.Timestamp.value GL08" \
177175
-i "pandas.Timestamp.year GL08" \
178-
-i "pandas.api.extensions.ExtensionArray.duplicated RT03,SA01" \
179-
-i "pandas.api.extensions.ExtensionArray.fillna SA01" \
180-
-i "pandas.api.extensions.ExtensionArray.insert PR07,RT03,SA01" \
181176
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
182-
-i "pandas.api.extensions.ExtensionArray.isin PR07,RT03,SA01" \
183-
-i "pandas.api.extensions.ExtensionArray.tolist RT03,SA01" \
184-
-i "pandas.api.extensions.ExtensionArray.unique RT03,SA01" \
185-
-i "pandas.api.extensions.ExtensionArray.view SA01" \
186177
-i "pandas.api.interchange.from_dataframe RT03,SA01" \
187178
-i "pandas.api.types.is_bool PR01,SA01" \
188179
-i "pandas.api.types.is_categorical_dtype SA01" \
@@ -235,7 +226,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
235226
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
236227
-i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
237228
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
238-
-i "pandas.core.groupby.DataFrameGroupBy.prod SA01" \
239229
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
240230
-i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
241231
-i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
@@ -252,7 +242,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
252242
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
253243
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
254244
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
255-
-i "pandas.core.groupby.SeriesGroupBy.prod SA01" \
256245
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
257246
-i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
258247
-i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ Other enhancements
4343
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
4444
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
4545
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
46+
- :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`)
4647
- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
4748
- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
4849
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)

pandas/core/arrays/base.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1137,6 +1137,13 @@ def fillna(
11371137
ExtensionArray
11381138
With NA/NaN filled.
11391139
1140+
See Also
1141+
--------
1142+
api.extensions.ExtensionArray.dropna : Return ExtensionArray without
1143+
NA values.
1144+
api.extensions.ExtensionArray.isna : A 1-D array indicating if
1145+
each value is missing.
1146+
11401147
Examples
11411148
--------
11421149
>>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])
@@ -1220,6 +1227,15 @@ def duplicated(
12201227
Returns
12211228
-------
12221229
ndarray[bool]
1230+
With true in indices where elements are duplicated and false otherwise.
1231+
1232+
See Also
1233+
--------
1234+
DataFrame.duplicated : Return boolean Series denoting
1235+
duplicate rows.
1236+
Series.duplicated : Indicate duplicate Series values.
1237+
api.extensions.ExtensionArray.unique : Compute the ExtensionArray
1238+
of unique values.
12231239
12241240
Examples
12251241
--------
@@ -1303,6 +1319,13 @@ def unique(self) -> Self:
13031319
Returns
13041320
-------
13051321
pandas.api.extensions.ExtensionArray
1322+
With unique values from the input array.
1323+
1324+
See Also
1325+
--------
1326+
Index.unique: Return unique values in the index.
1327+
Series.unique: Return unique values of Series object.
1328+
unique: Return unique values based on a hash table.
13061329
13071330
Examples
13081331
--------
@@ -1436,10 +1459,18 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
14361459
Parameters
14371460
----------
14381461
values : np.ndarray or ExtensionArray
1462+
Values to compare every element in the array against.
14391463
14401464
Returns
14411465
-------
14421466
np.ndarray[bool]
1467+
With true at indices where value is in `values`.
1468+
1469+
See Also
1470+
--------
1471+
DataFrame.isin: Whether each element in the DataFrame is contained in values.
1472+
Index.isin: Return a boolean array where the index values are in values.
1473+
Series.isin: Whether elements in Series are contained in values.
14431474
14441475
Examples
14451476
--------
@@ -1743,6 +1774,12 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
17431774
ExtensionArray or np.ndarray
17441775
A view on the :class:`ExtensionArray`'s data.
17451776
1777+
See Also
1778+
--------
1779+
api.extensions.ExtensionArray.ravel: Return a flattened view on input array.
1780+
Index.view: Equivalent function for Index.
1781+
ndarray.view: New view of array with the same data.
1782+
17461783
Examples
17471784
--------
17481785
This gives view on the underlying data of an ``ExtensionArray`` and is not a
@@ -2201,6 +2238,12 @@ def tolist(self) -> list:
22012238
Returns
22022239
-------
22032240
list
2241+
Python list of values in array.
2242+
2243+
See Also
2244+
--------
2245+
Index.to_list: Return a list of the values in the Index.
2246+
Series.to_list: Return a list of the values in the Series.
22042247
22052248
Examples
22062249
--------
@@ -2223,11 +2266,18 @@ def insert(self, loc: int, item) -> Self:
22232266
Parameters
22242267
----------
22252268
loc : int
2269+
Index where the `item` needs to be inserted.
22262270
item : scalar-like
2271+
Value to be inserted.
22272272
22282273
Returns
22292274
-------
2230-
same type as self
2275+
ExtensionArray
2276+
With `item` inserted at `loc`.
2277+
2278+
See Also
2279+
--------
2280+
Index.insert: Make new Index inserting new item at location.
22312281
22322282
Notes
22332283
-----

pandas/core/groupby/groupby.py

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -164,32 +164,6 @@ class providing the base-class of operations.
164164
to each row or column of a DataFrame.
165165
"""
166166

167-
_groupby_agg_method_template = """
168-
Compute {fname} of group values.
169-
170-
Parameters
171-
----------
172-
numeric_only : bool, default {no}
173-
Include only float, int, boolean columns.
174-
175-
.. versionchanged:: 2.0.0
176-
177-
numeric_only no longer accepts ``None``.
178-
179-
min_count : int, default {mc}
180-
The required number of valid values to perform the operation. If fewer
181-
than ``min_count`` non-NA values are present the result will be NA.
182-
183-
Returns
184-
-------
185-
Series or DataFrame
186-
Computed {fname} of values within each group.
187-
188-
Examples
189-
--------
190-
{example}
191-
"""
192-
193167
_groupby_agg_method_engine_template = """
194168
Compute {fname} of group values.
195169
@@ -3029,16 +3003,38 @@ def sum(
30293003
return result
30303004

30313005
@final
3032-
@doc(
3033-
_groupby_agg_method_template,
3034-
fname="prod",
3035-
no=False,
3036-
mc=0,
3037-
example=dedent(
3038-
"""\
3006+
def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT:
3007+
"""
3008+
Compute prod of group values.
3009+
3010+
Parameters
3011+
----------
3012+
numeric_only : bool, default False
3013+
Include only float, int, boolean columns.
3014+
3015+
.. versionchanged:: 2.0.0
3016+
3017+
numeric_only no longer accepts ``None``.
3018+
3019+
min_count : int, default 0
3020+
The required number of valid values to perform the operation. If fewer
3021+
than ``min_count`` non-NA values are present the result will be NA.
3022+
3023+
Returns
3024+
-------
3025+
Series or DataFrame
3026+
Computed prod of values within each group.
3027+
3028+
See Also
3029+
--------
3030+
Series.prod : Return the product of the values over the requested axis.
3031+
DataFrame.prod : Return the product of the values over the requested axis.
3032+
3033+
Examples
3034+
--------
30393035
For SeriesGroupBy:
30403036
3041-
>>> lst = ['a', 'a', 'b', 'b']
3037+
>>> lst = ["a", "a", "b", "b"]
30423038
>>> ser = pd.Series([1, 2, 3, 4], index=lst)
30433039
>>> ser
30443040
a 1
@@ -3054,8 +3050,11 @@ def sum(
30543050
For DataFrameGroupBy:
30553051
30563052
>>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
3057-
>>> df = pd.DataFrame(data, columns=["a", "b", "c"],
3058-
... index=["tiger", "leopard", "cheetah", "lion"])
3053+
>>> df = pd.DataFrame(
3054+
... data,
3055+
... columns=["a", "b", "c"],
3056+
... index=["tiger", "leopard", "cheetah", "lion"],
3057+
... )
30593058
>>> df
30603059
a b c
30613060
tiger 1 8 2
@@ -3066,10 +3065,8 @@ def sum(
30663065
b c
30673066
a
30683067
1 16 10
3069-
2 30 72"""
3070-
),
3071-
)
3072-
def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT:
3068+
2 30 72
3069+
"""
30733070
return self._agg_general(
30743071
numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod
30753072
)

pandas/core/series.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from pandas._libs.lib import is_range_indexer
3535
from pandas.compat import PYPY
3636
from pandas.compat._constants import REF_COUNT
37+
from pandas.compat._optional import import_optional_dependency
3738
from pandas.compat.numpy import function as nv
3839
from pandas.errors import (
3940
ChainedAssignmentError,
@@ -558,6 +559,32 @@ def _init_dict(
558559

559560
# ----------------------------------------------------------------------
560561

562+
def __arrow_c_stream__(self, requested_schema=None):
563+
"""
564+
Export the pandas Series as an Arrow C stream PyCapsule.
565+
566+
This relies on pyarrow to convert the pandas Series to the Arrow
567+
format (and follows the default behaviour of ``pyarrow.Array.from_pandas``
568+
in its handling of the index, i.e. to ignore it).
569+
This conversion is not necessarily zero-copy.
570+
571+
Parameters
572+
----------
573+
requested_schema : PyCapsule, default None
574+
The schema to which the dataframe should be casted, passed as a
575+
PyCapsule containing a C ArrowSchema representation of the
576+
requested schema.
577+
578+
Returns
579+
-------
580+
PyCapsule
581+
"""
582+
pa = import_optional_dependency("pyarrow", min_version="16.0.0")
583+
ca = pa.chunked_array([pa.Array.from_pandas(self, type=requested_schema)])
584+
return ca.__arrow_c_stream__(requested_schema)
585+
586+
# ----------------------------------------------------------------------
587+
561588
@property
562589
def _constructor(self) -> type[Series]:
563590
return Series

pandas/core/strings/accessor.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1853,6 +1853,7 @@ def zfill(self, width: int):
18531853
Returns
18541854
-------
18551855
Series/Index of objects.
1856+
A Series or Index where the strings are prepended with '0' characters.
18561857
18571858
See Also
18581859
--------
@@ -2385,6 +2386,13 @@ def wrap(
23852386
Returns
23862387
-------
23872388
Series or Index
2389+
A Series or Index where the strings are wrapped at the specified line width.
2390+
2391+
See Also
2392+
--------
2393+
Series.str.strip : Remove leading and trailing characters in Series/Index.
2394+
Series.str.lstrip : Remove leading characters in Series/Index.
2395+
Series.str.rstrip : Remove trailing characters in Series/Index.
23882396
23892397
Notes
23902398
-----
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import ctypes
2+
3+
import pytest
4+
5+
import pandas as pd
6+
7+
pa = pytest.importorskip("pyarrow", minversion="16.0")
8+
9+
10+
def test_series_arrow_interface():
11+
s = pd.Series([1, 4, 2])
12+
13+
capsule = s.__arrow_c_stream__()
14+
assert (
15+
ctypes.pythonapi.PyCapsule_IsValid(
16+
ctypes.py_object(capsule), b"arrow_array_stream"
17+
)
18+
== 1
19+
)
20+
21+
ca = pa.chunked_array(s)
22+
expected = pa.chunked_array([[1, 4, 2]])
23+
assert ca.equals(expected)

0 commit comments

Comments
 (0)