Skip to content

Commit 2e25207

Browse files
Merge branch 'main' into fix_docstring_groupby
2 parents 9d2d4cf + a36c44e commit 2e25207

File tree

12 files changed

+97
-41
lines changed

12 files changed

+97
-41
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7373
-i "pandas.Period.freq GL08" \
7474
-i "pandas.Period.ordinal GL08" \
7575
-i "pandas.RangeIndex.from_range PR01,SA01" \
76-
-i "pandas.Series.dt.unit GL08" \
77-
-i "pandas.Series.pad PR01,SA01" \
7876
-i "pandas.Timedelta.max PR02" \
7977
-i "pandas.Timedelta.min PR02" \
8078
-i "pandas.Timedelta.resolution PR02" \
@@ -87,12 +85,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8785
-i "pandas.arrays.NumpyExtensionArray SA01" \
8886
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
8987
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
90-
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
91-
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
9288
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
93-
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
9489
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
95-
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
9690
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
9791
-i "pandas.core.resample.Resampler.mean SA01" \
9892
-i "pandas.core.resample.Resampler.min PR01,RT03,SA01" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,7 @@ Other
797797
- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)
798798
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
799799
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
800+
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
800801

801802
.. ***DO NOT USE THIS SECTION***
802803

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ dependencies:
3535
- hypothesis>=6.84.0
3636
- gcsfs>=2022.11.0
3737
- ipython
38+
- pickleshare # Needed for IPython Sphinx directive in the docs GH#60429
3839
- jinja2>=3.1.2
3940
- lxml>=4.9.2
4041
- matplotlib>=3.6.3

pandas/core/arrays/datetimelike.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2073,7 +2073,29 @@ def _creso(self) -> int:
20732073

20742074
@cache_readonly
20752075
def unit(self) -> str:
2076-
# e.g. "ns", "us", "ms"
2076+
"""
2077+
The precision unit of the datetime data.
2078+
2079+
Returns the precision unit for the dtype.
2080+
It means the smallest time frame that can be stored within this dtype.
2081+
2082+
Returns
2083+
-------
2084+
str
2085+
Unit string representation (e.g. "ns").
2086+
2087+
See Also
2088+
--------
2089+
TimelikeOps.as_unit : Converts to a specific unit.
2090+
2091+
Examples
2092+
--------
2093+
>>> idx = pd.DatetimeIndex(["2020-01-02 01:02:03.004005006"])
2094+
>>> idx.unit
2095+
'ns'
2096+
>>> idx.as_unit("s").unit
2097+
's'
2098+
"""
20772099
# error: Argument 1 to "dtype_to_unit" has incompatible type
20782100
# "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]"
20792101
return dtype_to_unit(self.dtype) # type: ignore[arg-type]

pandas/core/dtypes/cast.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@
8787

8888
if TYPE_CHECKING:
8989
from collections.abc import (
90+
Collection,
9091
Sequence,
91-
Sized,
9292
)
9393

9494
from pandas._typing import (
@@ -1581,7 +1581,7 @@ def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):
15811581
return _maybe_unbox_datetimelike(value, dtype)
15821582

15831583

1584-
def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
1584+
def construct_1d_object_array_from_listlike(values: Collection) -> np.ndarray:
15851585
"""
15861586
Transform any list-like object in a 1-dimensional numpy array of object
15871587
dtype.
@@ -1599,11 +1599,9 @@ def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
15991599
-------
16001600
1-dimensional numpy array of dtype object
16011601
"""
1602-
# numpy will try to interpret nested lists as further dimensions, hence
1603-
# making a 1D array that contains list-likes is a bit tricky:
1604-
result = np.empty(len(values), dtype="object")
1605-
result[:] = values
1606-
return result
1602+
# numpy will try to interpret nested lists as further dimensions in np.array(),
1603+
# hence explicitly making a 1D array using np.fromiter
1604+
return np.fromiter(values, dtype="object", count=len(values))
16071605

16081606

16091607
def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.ndarray:

pandas/core/generic.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3878,6 +3878,14 @@ def to_csv(
38783878
>>> import os # doctest: +SKIP
38793879
>>> os.makedirs("folder/subfolder", exist_ok=True) # doctest: +SKIP
38803880
>>> df.to_csv("folder/subfolder/out.csv") # doctest: +SKIP
3881+
3882+
Format floats to two decimal places:
3883+
3884+
>>> df.to_csv("out1.csv", float_format="%.2f") # doctest: +SKIP
3885+
3886+
Format floats using scientific notation:
3887+
3888+
>>> df.to_csv("out2.csv", float_format="{{:.2e}}".format) # doctest: +SKIP
38813889
"""
38823890
df = self if isinstance(self, ABCDataFrame) else self.to_frame()
38833891

pandas/core/groupby/generic.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,8 +1321,8 @@ def idxmin(self, skipna: bool = True) -> Series:
13211321
13221322
Returns
13231323
-------
1324-
Index
1325-
Label of the minimum value.
1324+
Series
1325+
Indexes of minima in each group.
13261326
13271327
Raises
13281328
------
@@ -1374,8 +1374,8 @@ def idxmax(self, skipna: bool = True) -> Series:
13741374
13751375
Returns
13761376
-------
1377-
Index
1378-
Label of the maximum value.
1377+
Series
1378+
Indexes of maxima in each group.
13791379
13801380
Raises
13811381
------
@@ -2453,6 +2453,10 @@ def nunique(self, dropna: bool = True) -> DataFrame:
24532453
nunique: DataFrame
24542454
Counts of unique elements in each position.
24552455
2456+
See Also
2457+
--------
2458+
DataFrame.nunique : Count number of distinct elements in specified axis.
2459+
24562460
Examples
24572461
--------
24582462
>>> df = pd.DataFrame(
@@ -2508,8 +2512,8 @@ def idxmax(
25082512
25092513
Returns
25102514
-------
2511-
Series
2512-
Indexes of maxima in each group.
2515+
DataFrame
2516+
Indexes of maxima in each column according to the group.
25132517
25142518
Raises
25152519
------
@@ -2519,6 +2523,7 @@ def idxmax(
25192523
See Also
25202524
--------
25212525
Series.idxmax : Return index of the maximum element.
2526+
DataFrame.idxmax : Indexes of maxima along the specified axis.
25222527
25232528
Notes
25242529
-----
@@ -2532,6 +2537,7 @@ def idxmax(
25322537
... {
25332538
... "consumption": [10.51, 103.11, 55.48],
25342539
... "co2_emissions": [37.2, 19.66, 1712],
2540+
... "food_type": ["meat", "plant", "meat"],
25352541
... },
25362542
... index=["Pork", "Wheat Products", "Beef"],
25372543
... )
@@ -2542,12 +2548,14 @@ def idxmax(
25422548
Wheat Products 103.11 19.66
25432549
Beef 55.48 1712.00
25442550
2545-
By default, it returns the index for the maximum value in each column.
2551+
By default, it returns the index for the maximum value in each column
2552+
according to the group.
25462553
2547-
>>> df.idxmax()
2548-
consumption Wheat Products
2549-
co2_emissions Beef
2550-
dtype: object
2554+
>>> df.groupby("food_type").idxmax()
2555+
consumption co2_emissions
2556+
food_type
2557+
animal Beef Beef
2558+
plant Wheat Products Wheat Products
25512559
"""
25522560
return self._idxmax_idxmin("idxmax", numeric_only=numeric_only, skipna=skipna)
25532561

@@ -2570,8 +2578,8 @@ def idxmin(
25702578
25712579
Returns
25722580
-------
2573-
Series
2574-
Indexes of minima in each group.
2581+
DataFrame
2582+
Indexes of minima in each column according to the group.
25752583
25762584
Raises
25772585
------
@@ -2581,6 +2589,7 @@ def idxmin(
25812589
See Also
25822590
--------
25832591
Series.idxmin : Return index of the minimum element.
2592+
DataFrame.idxmin : Indexes of minima along the specified axis.
25842593
25852594
Notes
25862595
-----
@@ -2594,6 +2603,7 @@ def idxmin(
25942603
... {
25952604
... "consumption": [10.51, 103.11, 55.48],
25962605
... "co2_emissions": [37.2, 19.66, 1712],
2606+
... "food_type": ["meat", "plant", "meat"],
25972607
... },
25982608
... index=["Pork", "Wheat Products", "Beef"],
25992609
... )
@@ -2604,12 +2614,14 @@ def idxmin(
26042614
Wheat Products 103.11 19.66
26052615
Beef 55.48 1712.00
26062616
2607-
By default, it returns the index for the minimum value in each column.
2617+
By default, it returns the index for the minimum value in each column
2618+
according to the group.
26082619
2609-
>>> df.idxmin()
2610-
consumption Pork
2611-
co2_emissions Wheat Products
2612-
dtype: object
2620+
>>> df.groupby("food_type").idxmin()
2621+
consumption co2_emissions
2622+
food_type
2623+
animal Pork Pork
2624+
plant Wheat Products Wheat Products
26132625
"""
26142626
return self._idxmax_idxmin("idxmin", numeric_only=numeric_only, skipna=skipna)
26152627

pandas/core/groupby/groupby.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,19 @@ def get_group(self, name) -> DataFrame | Series:
715715
716716
Returns
717717
-------
718-
DataFrame or Series
718+
Series or DataFrame
719+
Get the respective Series or DataFrame corresponding to the group provided.
720+
721+
See Also
722+
--------
723+
DataFrameGroupBy.groups: Dictionary representation of the groupings formed
724+
during a groupby operation.
725+
DataFrameGroupBy.indices: Provides a mapping of group rows to positions
726+
of the elements.
727+
SeriesGroupBy.groups: Dictionary representation of the groupings formed
728+
during a groupby operation.
729+
SeriesGroupBy.indices: Provides a mapping of group rows to positions
730+
of the elements.
719731
720732
Examples
721733
--------
@@ -2660,8 +2672,8 @@ def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT:
26602672
26612673
See Also
26622674
--------
2663-
Series.sem : Return unbiased standard error of the mean over requested axis.
26642675
DataFrame.sem : Return unbiased standard error of the mean over requested axis.
2676+
Series.sem : Return unbiased standard error of the mean over requested axis.
26652677
26662678
Examples
26672679
--------

pandas/io/formats/format.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -669,9 +669,9 @@ def _truncate_horizontally(self) -> None:
669669
assert self.max_cols_fitted is not None
670670
col_num = self.max_cols_fitted // 2
671671
if col_num >= 1:
672-
left = self.tr_frame.iloc[:, :col_num]
673-
right = self.tr_frame.iloc[:, -col_num:]
674-
self.tr_frame = concat((left, right), axis=1)
672+
_len = len(self.tr_frame.columns)
673+
_slice = np.hstack([np.arange(col_num), np.arange(_len - col_num, _len)])
674+
self.tr_frame = self.tr_frame.iloc[:, _slice]
675675

676676
# truncate formatter
677677
if isinstance(self.formatters, (list, tuple)):
@@ -682,7 +682,7 @@ def _truncate_horizontally(self) -> None:
682682
else:
683683
col_num = cast(int, self.max_cols)
684684
self.tr_frame = self.tr_frame.iloc[:, :col_num]
685-
self.tr_col_num = col_num
685+
self.tr_col_num: int = col_num
686686

687687
def _truncate_vertically(self) -> None:
688688
"""Remove rows, which are not to be displayed.

pandas/io/stata.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2207,14 +2207,14 @@ def _convert_datetime_to_stata_type(fmt: str) -> np.dtype:
22072207
def _maybe_convert_to_int_keys(convert_dates: dict, varlist: list[Hashable]) -> dict:
22082208
new_dict = {}
22092209
for key, value in convert_dates.items():
2210-
if not value.startswith("%"): # make sure proper fmts
2210+
if not convert_dates[key].startswith("%"): # make sure proper fmts
22112211
convert_dates[key] = "%" + value
22122212
if key in varlist:
2213-
new_dict[varlist.index(key)] = value
2213+
new_dict[varlist.index(key)] = convert_dates[key]
22142214
else:
22152215
if not isinstance(key, int):
22162216
raise ValueError("convert_dates key must be a column or an integer")
2217-
new_dict[key] = value
2217+
new_dict[key] = convert_dates[key]
22182218
return new_dict
22192219

22202220

0 commit comments

Comments
 (0)