Skip to content

Commit ee91760

Browse files
Merge remote-tracking branch 'upstream/main' into string-dtype-honor-storage-option
2 parents 1e646a3 + 9c776ae commit ee91760

File tree

29 files changed

+205
-142
lines changed

29 files changed

+205
-142
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -158,26 +158,15 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
158158
-i "pandas.Series.sparse.sp_values SA01" \
159159
-i "pandas.Series.sparse.to_coo PR07,RT03,SA01" \
160160
-i "pandas.Series.std PR01,RT03,SA01" \
161-
-i "pandas.Series.str.capitalize RT03" \
162-
-i "pandas.Series.str.casefold RT03" \
163-
-i "pandas.Series.str.center RT03,SA01" \
164-
-i "pandas.Series.str.decode PR07,RT03,SA01" \
165-
-i "pandas.Series.str.encode PR07,RT03,SA01" \
166-
-i "pandas.Series.str.ljust RT03,SA01" \
167-
-i "pandas.Series.str.lower RT03" \
168161
-i "pandas.Series.str.lstrip RT03" \
169162
-i "pandas.Series.str.match RT03" \
170163
-i "pandas.Series.str.normalize RT03,SA01" \
171164
-i "pandas.Series.str.partition RT03" \
172165
-i "pandas.Series.str.repeat SA01" \
173166
-i "pandas.Series.str.replace SA01" \
174-
-i "pandas.Series.str.rjust RT03,SA01" \
175167
-i "pandas.Series.str.rpartition RT03" \
176168
-i "pandas.Series.str.rstrip RT03" \
177169
-i "pandas.Series.str.strip RT03" \
178-
-i "pandas.Series.str.swapcase RT03" \
179-
-i "pandas.Series.str.title RT03" \
180-
-i "pandas.Series.str.upper RT03" \
181170
-i "pandas.Series.str.wrap RT03,SA01" \
182171
-i "pandas.Series.str.zfill RT03" \
183172
-i "pandas.Series.struct.dtypes SA01" \

doc/source/development/contributing_docstring.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ backticks. The following are considered inline code:
142142
143143
With several mistakes in the docstring.
144144
145-
It has a blank like after the signature ``def func():``.
145+
It has a blank line after the signature ``def func():``.
146146
147147
The text 'Some function' should go in the line after the
148148
opening quotes of the docstring, not in the same line.

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Other enhancements
5050
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
5151
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5252
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
53+
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
5354
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
5455
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
5556
- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)

pandas/_libs/tslibs/nattype.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,7 +1047,7 @@ class NaTType(_NaT):
10471047
* 'NaT' will return NaT for an ambiguous time.
10481048
* 'raise' will raise an AmbiguousTimeError for an ambiguous time.
10491049
1050-
nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
1050+
nonexistent : {'raise', 'shift_forward', 'shift_backward', 'NaT', \
10511051
timedelta}, default 'raise'
10521052
A nonexistent time does not exist in a particular timezone
10531053
where clocks moved forward due to DST.
@@ -1148,7 +1148,7 @@ timedelta}, default 'raise'
11481148
* 'NaT' will return NaT for an ambiguous time.
11491149
* 'raise' will raise an AmbiguousTimeError for an ambiguous time.
11501150
1151-
nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
1151+
nonexistent : {'raise', 'shift_forward', 'shift_backward', 'NaT', \
11521152
timedelta}, default 'raise'
11531153
A nonexistent time does not exist in a particular timezone
11541154
where clocks moved forward due to DST.
@@ -1243,7 +1243,7 @@ timedelta}, default 'raise'
12431243
* 'NaT' will return NaT for an ambiguous time.
12441244
* 'raise' will raise an AmbiguousTimeError for an ambiguous time.
12451245
1246-
nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
1246+
nonexistent : {'raise', 'shift_forward', 'shift_backward', 'NaT', \
12471247
timedelta}, default 'raise'
12481248
A nonexistent time does not exist in a particular timezone
12491249
where clocks moved forward due to DST.
@@ -1407,7 +1407,7 @@ timedelta}, default 'raise'
14071407
* 'NaT' will return NaT for an ambiguous time.
14081408
* 'raise' will raise an AmbiguousTimeError for an ambiguous time.
14091409
1410-
nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
1410+
nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, \
14111411
default 'raise'
14121412
A nonexistent time does not exist in a particular timezone
14131413
where clocks moved forward due to DST.

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2106,7 +2106,7 @@ class Timestamp(_Timestamp):
21062106
* 'NaT' will return NaT for an ambiguous time.
21072107
* 'raise' will raise an AmbiguousTimeError for an ambiguous time.
21082108
2109-
nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
2109+
nonexistent : {'raise', 'shift_forward', 'shift_backward', 'NaT', \
21102110
timedelta}, default 'raise'
21112111
A nonexistent time does not exist in a particular timezone
21122112
where clocks moved forward due to DST.
@@ -2209,7 +2209,7 @@ timedelta}, default 'raise'
22092209
* 'NaT' will return NaT for an ambiguous time.
22102210
* 'raise' will raise an AmbiguousTimeError for an ambiguous time.
22112211
2212-
nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
2212+
nonexistent : {'raise', 'shift_forward', 'shift_backward', 'NaT', \
22132213
timedelta}, default 'raise'
22142214
A nonexistent time does not exist in a particular timezone
22152215
where clocks moved forward due to DST.
@@ -2304,7 +2304,7 @@ timedelta}, default 'raise'
23042304
* 'NaT' will return NaT for an ambiguous time.
23052305
* 'raise' will raise an AmbiguousTimeError for an ambiguous time.
23062306
2307-
nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
2307+
nonexistent : {'raise', 'shift_forward', 'shift_backward', 'NaT', \
23082308
timedelta}, default 'raise'
23092309
A nonexistent time does not exist in a particular timezone
23102310
where clocks moved forward due to DST.
@@ -2443,7 +2443,7 @@ timedelta}, default 'raise'
24432443
* 'NaT' will return NaT for an ambiguous time.
24442444
* 'raise' will raise an AmbiguousTimeError for an ambiguous time.
24452445
2446-
nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
2446+
nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, \
24472447
default 'raise'
24482448
A nonexistent time does not exist in a particular timezone
24492449
where clocks moved forward due to DST.

pandas/core/arrays/string_arrow.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -130,18 +130,22 @@ class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringAr
130130

131131
def __init__(self, values) -> None:
132132
_chk_pyarrow_available()
133-
if isinstance(values, (pa.Array, pa.ChunkedArray)) and pa.types.is_string(
134-
values.type
133+
if isinstance(values, (pa.Array, pa.ChunkedArray)) and (
134+
pa.types.is_string(values.type)
135+
or (
136+
pa.types.is_dictionary(values.type)
137+
and (
138+
pa.types.is_string(values.type.value_type)
139+
or pa.types.is_large_string(values.type.value_type)
140+
)
141+
)
135142
):
136143
values = pc.cast(values, pa.large_string())
137144

138145
super().__init__(values)
139146
self._dtype = StringDtype(storage=self._storage, na_value=self._na_value)
140147

141-
if not pa.types.is_large_string(self._pa_array.type) and not (
142-
pa.types.is_dictionary(self._pa_array.type)
143-
and pa.types.is_large_string(self._pa_array.type.value_type)
144-
):
148+
if not pa.types.is_large_string(self._pa_array.type):
145149
raise ValueError(
146150
"ArrowStringArray requires a PyArrow (chunked) array of "
147151
"large_string type"

pandas/core/dtypes/cast.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,17 +1014,17 @@ def convert_dtypes(
10141014
Back-end data type applied to the resultant :class:`DataFrame`
10151015
(still experimental). Behaviour is as follows:
10161016
1017-
* ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
1018-
(default).
1017+
* ``"numpy_nullable"``: returns nullable-dtype
10191018
* ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
1020-
DataFrame.
10211019
10221020
.. versionadded:: 2.0
10231021
10241022
Returns
10251023
-------
10261024
np.dtype, or ExtensionDtype
10271025
"""
1026+
from pandas.core.arrays.string_ import StringDtype
1027+
10281028
inferred_dtype: str | DtypeObj
10291029

10301030
if (
@@ -1103,6 +1103,13 @@ def convert_dtypes(
11031103
# If we couldn't do anything else, then we retain the dtype
11041104
inferred_dtype = input_array.dtype
11051105

1106+
elif (
1107+
convert_string
1108+
and isinstance(input_array.dtype, StringDtype)
1109+
and input_array.dtype.na_value is np.nan
1110+
):
1111+
inferred_dtype = pandas_dtype_func("string")
1112+
11061113
else:
11071114
inferred_dtype = input_array.dtype
11081115

pandas/core/generic.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6670,10 +6670,10 @@ def convert_dtypes(
66706670
Back-end data type applied to the resultant :class:`DataFrame` or
66716671
:class:`Series` (still experimental). Behaviour is as follows:
66726672
6673-
* ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
6674-
or :class:`Series` (default).
6673+
* ``"numpy_nullable"``: returns nullable-dtype-backed
6674+
:class:`DataFrame` or :class:`Serires`.
66756675
* ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
6676-
DataFrame or Series.
6676+
:class:`DataFrame` or :class:`Series`.
66776677
66786678
.. versionadded:: 2.0
66796679

pandas/core/internals/blocks.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,11 @@ def convert(self) -> list[Block]:
512512
convert_non_numeric=True,
513513
)
514514
refs = None
515-
if res_values is values:
515+
if (
516+
res_values is values
517+
or isinstance(res_values, NumpyExtensionArray)
518+
and res_values._ndarray is values
519+
):
516520
refs = self.refs
517521

518522
res_values = ensure_block_shape(res_values, self.ndim)

pandas/core/reshape/concat.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,11 @@ def concat(
379379
0 1 2
380380
1 3 4
381381
"""
382+
if ignore_index and keys is not None:
383+
raise ValueError(
384+
f"Cannot set {ignore_index=} and specify keys. Either should be used."
385+
)
386+
382387
if copy is not lib.no_default:
383388
warnings.warn(
384389
"The copy keyword is deprecated and will be removed in a future "

0 commit comments

Comments
 (0)