Skip to content

Commit 604de62

Browse files
Merge remote-tracking branch 'upstream/main' into string-dtype-ensure_string_array
2 parents cf40333 + 47b56ea commit 604de62

File tree

26 files changed

+274
-158
lines changed

26 files changed

+274
-158
lines changed

.github/workflows/unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ jobs:
380380
fetch-depth: 0
381381

382382
- name: Set up Python Free-threading Version
383-
uses: deadsnakes/action@v3.1.0
383+
uses: deadsnakes/action@v3.2.0
384384
with:
385385
python-version: 3.13-dev
386386
nogil: true

ci/code_checks.sh

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7575
-i "pandas.Period.ordinal GL08" \
7676
-i "pandas.PeriodDtype.freq SA01" \
7777
-i "pandas.RangeIndex.from_range PR01,SA01" \
78-
-i "pandas.RangeIndex.start SA01" \
7978
-i "pandas.RangeIndex.step SA01" \
80-
-i "pandas.RangeIndex.stop SA01" \
8179
-i "pandas.Series.cat.add_categories PR01,PR02" \
8280
-i "pandas.Series.cat.as_ordered PR01" \
8381
-i "pandas.Series.cat.as_unordered PR01" \
@@ -92,10 +90,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9290
-i "pandas.Series.dt.floor PR01,PR02" \
9391
-i "pandas.Series.dt.freq GL08" \
9492
-i "pandas.Series.dt.month_name PR01,PR02" \
95-
-i "pandas.Series.dt.nanoseconds SA01" \
9693
-i "pandas.Series.dt.normalize PR01" \
9794
-i "pandas.Series.dt.round PR01,PR02" \
98-
-i "pandas.Series.dt.seconds SA01" \
9995
-i "pandas.Series.dt.strftime PR01,PR02" \
10096
-i "pandas.Series.dt.to_period PR01,PR02" \
10197
-i "pandas.Series.dt.total_seconds PR01" \
@@ -113,8 +109,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
113109
-i "pandas.Timedelta.resolution PR02" \
114110
-i "pandas.Timedelta.to_timedelta64 SA01" \
115111
-i "pandas.Timedelta.total_seconds SA01" \
116-
-i "pandas.TimedeltaIndex.nanoseconds SA01" \
117-
-i "pandas.TimedeltaIndex.seconds SA01" \
118112
-i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
119113
-i "pandas.Timestamp.max PR02" \
120114
-i "pandas.Timestamp.min PR02" \
@@ -123,13 +117,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
123117
-i "pandas.Timestamp.tzinfo GL08" \
124118
-i "pandas.Timestamp.year GL08" \
125119
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
126-
-i "pandas.api.types.is_bool PR01,SA01" \
127-
-i "pandas.api.types.is_categorical_dtype SA01" \
128-
-i "pandas.api.types.is_complex PR01,SA01" \
129-
-i "pandas.api.types.is_complex_dtype SA01" \
130-
-i "pandas.api.types.is_datetime64_dtype SA01" \
131-
-i "pandas.api.types.is_datetime64_ns_dtype SA01" \
132-
-i "pandas.api.types.is_datetime64tz_dtype SA01" \
133120
-i "pandas.api.types.is_dict_like PR07,SA01" \
134121
-i "pandas.api.types.is_extension_array_dtype SA01" \
135122
-i "pandas.api.types.is_file_like PR07,SA01" \
@@ -163,7 +150,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
163150
-i "pandas.core.groupby.DataFrameGroupBy.agg RT03" \
164151
-i "pandas.core.groupby.DataFrameGroupBy.aggregate RT03" \
165152
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
166-
-i "pandas.core.groupby.DataFrameGroupBy.filter SA01" \
167153
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
168154
-i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
169155
-i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
@@ -179,7 +165,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
179165
-i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
180166
-i "pandas.core.groupby.SeriesGroupBy.agg RT03" \
181167
-i "pandas.core.groupby.SeriesGroupBy.aggregate RT03" \
182-
-i "pandas.core.groupby.SeriesGroupBy.filter PR01,SA01" \
183168
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
184169
-i "pandas.core.groupby.SeriesGroupBy.groups SA01" \
185170
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \

pandas/_libs/lib.pyx

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,10 +1129,21 @@ def is_bool(obj: object) -> bool:
11291129
"""
11301130
Return True if given object is boolean.
11311131

1132+
Parameters
1133+
----------
1134+
obj : object
1135+
Object to check.
1136+
11321137
Returns
11331138
-------
11341139
bool
11351140

1141+
See Also
1142+
--------
1143+
api.types.is_scalar : Check if the input is a scalar.
1144+
api.types.is_integer : Check if the input is an integer.
1145+
api.types.is_float : Check if the input is a float.
1146+
11361147
Examples
11371148
--------
11381149
>>> pd.api.types.is_bool(True)
@@ -1148,10 +1159,22 @@ def is_complex(obj: object) -> bool:
11481159
"""
11491160
Return True if given object is complex.
11501161

1162+
Parameters
1163+
----------
1164+
obj : object
1165+
Object to check.
1166+
11511167
Returns
11521168
-------
11531169
bool
11541170

1171+
See Also
1172+
--------
1173+
api.types.is_complex_dtype: Check whether the provided array or
1174+
dtype is of a complex dtype.
1175+
api.types.is_number: Check if the object is a number.
1176+
api.types.is_integer: Return True if given object is integer.
1177+
11551178
Examples
11561179
--------
11571180
>>> pd.api.types.is_complex(1 + 1j)

pandas/core/arrays/string_.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,10 @@ def __setitem__(self, key, value) -> None:
715715
else:
716716
if not is_array_like(value):
717717
value = np.asarray(value, dtype=object)
718+
else:
719+
# cast categories and friends to arrays to see if values are
720+
# compatible, compatibility with arrow backed strings
721+
value = np.asarray(value)
718722
if len(value) and not lib.is_string_array(value, skipna=True):
719723
raise TypeError("Must provide strings.")
720724

pandas/core/arrays/string_arrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def _maybe_convert_setitem_value(self, value):
240240
value[isna(value)] = None
241241
for v in value:
242242
if not (v is None or isinstance(v, str)):
243-
raise TypeError("Scalar must be NA or str")
243+
raise TypeError("Must provide strings")
244244
return super()._maybe_convert_setitem_value(value)
245245

246246
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:

pandas/core/arrays/timedeltas.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,11 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]:
842842
seconds_docstring = textwrap.dedent(
843843
"""Number of seconds (>= 0 and less than 1 day) for each element.
844844
845+
See Also
846+
--------
847+
Series.dt.seconds : Return number of seconds for each element.
848+
Series.dt.nanoseconds : Return number of nanoseconds for each element.
849+
845850
Examples
846851
--------
847852
For Series:
@@ -917,6 +922,11 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]:
917922
nanoseconds_docstring = textwrap.dedent(
918923
"""Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.
919924
925+
See Also
926+
--------
927+
Series.dt.seconds : Return number of seconds for each element.
928+
Series.dt.microseconds : Return number of nanoseconds for each element.
929+
920930
Examples
921931
--------
922932
For Series:

pandas/core/construction.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,10 @@ def sanitize_array(
611611
dtype = StringDtype(na_value=np.nan)
612612
subarr = dtype.construct_array_type()._from_sequence(data, dtype=dtype)
613613

614-
if subarr is data and copy:
614+
if (
615+
subarr is data
616+
or (subarr.dtype == "str" and subarr.dtype.storage == "python") # type: ignore[union-attr]
617+
) and copy:
615618
subarr = subarr.copy()
616619

617620
else:

pandas/core/dtypes/common.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,13 @@ def is_datetime64_dtype(arr_or_dtype) -> bool:
279279
boolean
280280
Whether or not the array-like or dtype is of the datetime64 dtype.
281281
282+
See Also
283+
--------
284+
api.types.is_datetime64_ns_dtype: Check whether the provided array or
285+
dtype is of the datetime64[ns] dtype.
286+
api.types.is_datetime64_any_dtype: Check whether the provided array or
287+
dtype is of the datetime64 dtype.
288+
282289
Examples
283290
--------
284291
>>> from pandas.api.types import is_datetime64_dtype
@@ -316,6 +323,13 @@ def is_datetime64tz_dtype(arr_or_dtype) -> bool:
316323
boolean
317324
Whether or not the array-like or dtype is of a DatetimeTZDtype dtype.
318325
326+
See Also
327+
--------
328+
api.types.is_datetime64_dtype: Check whether an array-like or
329+
dtype is of the datetime64 dtype.
330+
api.types.is_datetime64_any_dtype: Check whether the provided array or
331+
dtype is of the datetime64 dtype.
332+
319333
Examples
320334
--------
321335
>>> from pandas.api.types import is_datetime64tz_dtype
@@ -514,6 +528,12 @@ def is_categorical_dtype(arr_or_dtype) -> bool:
514528
boolean
515529
Whether or not the array-like or dtype is of the Categorical dtype.
516530
531+
See Also
532+
--------
533+
api.types.is_list_like: Check if the object is list-like.
534+
api.types.is_complex_dtype: Check whether the provided array or
535+
dtype is of a complex dtype.
536+
517537
Examples
518538
--------
519539
>>> from pandas.api.types import is_categorical_dtype
@@ -977,6 +997,13 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
977997
bool
978998
Whether or not the array or dtype is of the datetime64[ns] dtype.
979999
1000+
See Also
1001+
--------
1002+
api.types.is_datetime64_dtype: Check whether an array-like or
1003+
dtype is of the datetime64 dtype.
1004+
api.types.is_datetime64_any_dtype: Check whether the provided array or
1005+
dtype is of the datetime64 dtype.
1006+
9801007
Examples
9811008
--------
9821009
>>> from pandas.api.types import is_datetime64_ns_dtype
@@ -1436,6 +1463,14 @@ def is_complex_dtype(arr_or_dtype) -> bool:
14361463
boolean
14371464
Whether or not the array or dtype is of a complex dtype.
14381465
1466+
See Also
1467+
--------
1468+
api.types.is_complex: Return True if given object is complex.
1469+
api.types.is_numeric_dtype: Check whether the provided array or
1470+
dtype is of a numeric dtype.
1471+
api.types.is_integer_dtype: Check whether the provided array or
1472+
dtype is of an integer dtype.
1473+
14391474
Examples
14401475
--------
14411476
>>> from pandas.api.types import is_complex_dtype

pandas/core/groupby/generic.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -600,15 +600,23 @@ def filter(self, func, dropna: bool = True, *args, **kwargs):
600600
----------
601601
func : function
602602
Criterion to apply to each group. Should return True or False.
603-
dropna : bool
603+
dropna : bool, optional
604604
Drop groups that do not pass the filter. True by default; if False,
605605
groups that evaluate False are filled with NaNs.
606+
*args : tuple
607+
Optional positional arguments to pass to `func`.
608+
**kwargs : dict
609+
Optional keyword arguments to pass to `func`.
606610
607611
Returns
608612
-------
609613
Series
610614
The filtered subset of the original Series.
611615
616+
See Also
617+
--------
618+
DataFrameGroupBy.filter : Filter elements from groups base on criterion.
619+
612620
Notes
613621
-----
614622
Functions that mutate the passed object can produce unexpected
@@ -1943,16 +1951,20 @@ def filter(self, func, dropna: bool = True, *args, **kwargs) -> DataFrame:
19431951
dropna : bool
19441952
Drop groups that do not pass the filter. True by default; if False,
19451953
groups that evaluate False are filled with NaNs.
1946-
*args
1954+
*args : tuple
19471955
Additional positional arguments to pass to `func`.
1948-
**kwargs
1956+
**kwargs : dict
19491957
Additional keyword arguments to pass to `func`.
19501958
19511959
Returns
19521960
-------
19531961
DataFrame
19541962
The filtered subset of the original DataFrame.
19551963
1964+
See Also
1965+
--------
1966+
SeriesGroupBy.filter : Filter elements from groups base on criterion.
1967+
19561968
Notes
19571969
-----
19581970
Each subframe is endowed the attribute 'name' in case you need to know

pandas/core/indexes/base.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,8 @@ def __new__(
504504

505505
elif is_ea_or_datetimelike_dtype(dtype):
506506
# non-EA dtype indexes have special casting logic, so we punt here
507-
pass
507+
if isinstance(data, (set, frozenset)):
508+
data = list(data)
508509

509510
elif is_ea_or_datetimelike_dtype(data_dtype):
510511
pass
@@ -6877,6 +6878,9 @@ def insert(self, loc: int, item) -> Index:
68776878
# We cannot keep the same dtype, so cast to the (often object)
68786879
# minimal shared dtype before doing the insert.
68796880
dtype = self._find_common_type_compat(item)
6881+
if dtype == self.dtype:
6882+
# EA's might run into recursion errors if loc is invalid
6883+
raise
68806884
return self.astype(dtype).insert(loc, item)
68816885

68826886
if arr.dtype != object or not isinstance(

0 commit comments

Comments
 (0)