Skip to content

Commit 1024ac5

Browse files
committed
Merge branch 'main' into api-nan-vs-na
2 parents b2a64bb + aabbbc5 commit 1024ac5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+631
-5116
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ repos:
121121
types: [python]
122122
stages: [manual]
123123
additional_dependencies: &pyright_dependencies
124-
124+
125125
- id: pyright
126126
# note: assumes python env is setup and activated
127127
name: pyright reportGeneralTypeIssues

asv_bench/benchmarks/strings.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
DataFrame,
99
Index,
1010
Series,
11+
StringDtype,
1112
)
1213
from pandas.arrays import StringArray
1314

@@ -290,10 +291,10 @@ def setup(self):
290291
self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])
291292

292293
def time_string_array_construction(self):
293-
StringArray(self.series_arr)
294+
StringArray(self.series_arr, dtype=StringDtype())
294295

295296
def time_string_array_with_nan_construction(self):
296-
StringArray(self.series_arr_nan)
297+
StringArray(self.series_arr_nan, dtype=StringDtype())
297298

298299
def peakmem_stringarray_construction(self):
299-
StringArray(self.series_arr)
300+
StringArray(self.series_arr, dtype=StringDtype())

doc/source/user_guide/migration-3-strings.rst

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,37 @@ the :meth:`~pandas.Series.str.decode` method now has a ``dtype`` parameter to be
315315
able to specify object dtype instead of the default of string dtype for this use
316316
case.
317317

318+
:meth:`Series.values` now returns an :class:`~pandas.api.extensions.ExtensionArray`
319+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
320+
321+
With object dtype, using ``.values`` on a Series will return the underlying NumPy array.
322+
323+
.. code-block:: python
324+
325+
>>> ser = pd.Series(["a", "b", np.nan], dtype="object")
326+
>>> type(ser.values)
327+
<class 'numpy.ndarray'>
328+
329+
However with the new string dtype, the underlying ExtensionArray is returned instead.
330+
331+
.. code-block:: python
332+
333+
>>> ser = pd.Series(["a", "b", pd.NA], dtype="str")
334+
>>> ser.values
335+
<ArrowStringArray>
336+
['a', 'b', nan]
337+
Length: 3, dtype: str
338+
339+
If your code requires a NumPy array, you should use :meth:`Series.to_numpy`.
340+
341+
.. code-block:: python
342+
343+
>>> ser = pd.Series(["a", "b", pd.NA], dtype="str")
344+
>>> ser.to_numpy()
345+
['a' 'b' nan]
346+
347+
In general, you should always prefer :meth:`Series.to_numpy` to get a NumPy array or :meth:`Series.array` to get an ExtensionArray over using :meth:`Series.values`.
348+
318349
Notable bug fixes
319350
~~~~~~~~~~~~~~~~~
320351

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,7 @@ Indexing
994994
- Bug in :meth:`Series.__setitem__` when assigning boolean series with boolean indexer will raise ``LossySetitemError`` (:issue:`57338`)
995995
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
996996
- Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
997+
- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
997998

998999
Missing
9991000
^^^^^^^

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ dependencies:
7777

7878
# code checks
7979
- flake8=7.1.0 # run in subprocess over docstring examples
80-
- mypy=1.13.0 # pre-commit uses locally installed mypy
80+
- mypy=1.17.1 # pre-commit uses locally installed mypy
8181
- tokenize-rt # scripts/check_for_inconsistent_pandas_namespace.py
8282
- pre-commit>=4.2.0
8383

pandas/_libs/src/datetime/pd_datetime.c

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,23 @@ static int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
5555
out->month = 1;
5656
out->day = 1;
5757

58-
out->year = PyLong_AsLong(PyObject_GetAttrString(obj, "year"));
59-
out->month = PyLong_AsLong(PyObject_GetAttrString(obj, "month"));
60-
out->day = PyLong_AsLong(PyObject_GetAttrString(obj, "day"));
58+
tmp = PyObject_GetAttrString(obj, "year");
59+
if (tmp == NULL)
60+
return -1;
61+
out->year = PyLong_AsLong(tmp);
62+
Py_DECREF(tmp);
63+
64+
tmp = PyObject_GetAttrString(obj, "month");
65+
if (tmp == NULL)
66+
return -1;
67+
out->month = PyLong_AsLong(tmp);
68+
Py_DECREF(tmp);
69+
70+
tmp = PyObject_GetAttrString(obj, "day");
71+
if (tmp == NULL)
72+
return -1;
73+
out->day = PyLong_AsLong(tmp);
74+
Py_DECREF(tmp);
6175

6276
// TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use
6377
// PyDateTime_Check here, and less verbose attribute lookups.
@@ -70,10 +84,29 @@ static int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
7084
return 0;
7185
}
7286

73-
out->hour = PyLong_AsLong(PyObject_GetAttrString(obj, "hour"));
74-
out->min = PyLong_AsLong(PyObject_GetAttrString(obj, "minute"));
75-
out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second"));
76-
out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond"));
87+
tmp = PyObject_GetAttrString(obj, "hour");
88+
if (tmp == NULL)
89+
return -1;
90+
out->hour = PyLong_AsLong(tmp);
91+
Py_DECREF(tmp);
92+
93+
tmp = PyObject_GetAttrString(obj, "minute");
94+
if (tmp == NULL)
95+
return -1;
96+
out->min = PyLong_AsLong(tmp);
97+
Py_DECREF(tmp);
98+
99+
tmp = PyObject_GetAttrString(obj, "second");
100+
if (tmp == NULL)
101+
return -1;
102+
out->sec = PyLong_AsLong(tmp);
103+
Py_DECREF(tmp);
104+
105+
tmp = PyObject_GetAttrString(obj, "microsecond");
106+
if (tmp == NULL)
107+
return -1;
108+
out->us = PyLong_AsLong(tmp);
109+
Py_DECREF(tmp);
77110

78111
if (PyObject_HasAttrString(obj, "tzinfo")) {
79112
PyObject *offset = extract_utc_offset(obj);

pandas/_libs/tslibs/dtypes.pyi

Lines changed: 40 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -28,33 +28,35 @@ class PeriodDtypeBase:
2828
def _td64_unit(self) -> str: ...
2929

3030
class FreqGroup(Enum):
31-
FR_ANN: int
32-
FR_QTR: int
33-
FR_MTH: int
34-
FR_WK: int
35-
FR_BUS: int
36-
FR_DAY: int
37-
FR_HR: int
38-
FR_MIN: int
39-
FR_SEC: int
40-
FR_MS: int
41-
FR_US: int
42-
FR_NS: int
43-
FR_UND: int
31+
_value_: int
32+
FR_ANN = ...
33+
FR_QTR = ...
34+
FR_MTH = ...
35+
FR_WK = ...
36+
FR_BUS = ...
37+
FR_DAY = ...
38+
FR_HR = ...
39+
FR_MIN = ...
40+
FR_SEC = ...
41+
FR_MS = ...
42+
FR_US = ...
43+
FR_NS = ...
44+
FR_UND = ...
4445
@staticmethod
4546
def from_period_dtype_code(code: int) -> FreqGroup: ...
4647

4748
class Resolution(Enum):
48-
RESO_NS: int
49-
RESO_US: int
50-
RESO_MS: int
51-
RESO_SEC: int
52-
RESO_MIN: int
53-
RESO_HR: int
54-
RESO_DAY: int
55-
RESO_MTH: int
56-
RESO_QTR: int
57-
RESO_YR: int
49+
_value_: int
50+
RESO_NS = ...
51+
RESO_US = ...
52+
RESO_MS = ...
53+
RESO_SEC = ...
54+
RESO_MIN = ...
55+
RESO_HR = ...
56+
RESO_DAY = ...
57+
RESO_MTH = ...
58+
RESO_QTR = ...
59+
RESO_YR = ...
5860
def __lt__(self, other: Resolution) -> bool: ...
5961
def __ge__(self, other: Resolution) -> bool: ...
6062
@property
@@ -67,17 +69,18 @@ class Resolution(Enum):
6769
def attr_abbrev(self) -> str: ...
6870

6971
class NpyDatetimeUnit(Enum):
70-
NPY_FR_Y: int
71-
NPY_FR_M: int
72-
NPY_FR_W: int
73-
NPY_FR_D: int
74-
NPY_FR_h: int
75-
NPY_FR_m: int
76-
NPY_FR_s: int
77-
NPY_FR_ms: int
78-
NPY_FR_us: int
79-
NPY_FR_ns: int
80-
NPY_FR_ps: int
81-
NPY_FR_fs: int
82-
NPY_FR_as: int
83-
NPY_FR_GENERIC: int
72+
_value_: int
73+
NPY_FR_Y = ...
74+
NPY_FR_M = ...
75+
NPY_FR_W = ...
76+
NPY_FR_D = ...
77+
NPY_FR_h = ...
78+
NPY_FR_m = ...
79+
NPY_FR_s = ...
80+
NPY_FR_ms = ...
81+
NPY_FR_us = ...
82+
NPY_FR_ns = ...
83+
NPY_FR_ps = ...
84+
NPY_FR_fs = ...
85+
NPY_FR_as = ...
86+
NPY_FR_GENERIC = ...

pandas/_testing/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,8 +348,9 @@ class SubclassedDataFrame(DataFrame):
348348
def _constructor(self):
349349
return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
350350

351+
# error: Cannot override writeable attribute with read-only property
351352
@property
352-
def _constructor_sliced(self):
353+
def _constructor_sliced(self): # type: ignore[override]
353354
return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
354355

355356

pandas/_testing/_io.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,12 @@ def round_trip_pathlib(writer, reader, path: str | None = None):
8181
if path is None:
8282
path = "___pathlib___"
8383
with ensure_clean(path) as path:
84-
writer(Path(path)) # type: ignore[arg-type]
85-
obj = reader(Path(path)) # type: ignore[arg-type]
84+
writer(Path(path))
85+
obj = reader(Path(path))
8686
return obj
8787

8888

89-
def write_to_compressed(compression, path, data, dest: str = "test") -> None:
89+
def write_to_compressed(compression, path: str, data, dest: str = "test") -> None:
9090
"""
9191
Write data to a compressed file.
9292
@@ -138,5 +138,9 @@ def write_to_compressed(compression, path, data, dest: str = "test") -> None:
138138
else:
139139
raise ValueError(f"Unrecognized compression type: {compression}")
140140

141-
with compress_method(path, mode=mode) as f:
141+
# error: No overload variant of "ZipFile" matches argument types "str", "str"
142+
# error: No overload variant of "BZ2File" matches argument types "str", "str"
143+
# error: Argument "mode" to "TarFile" has incompatible type "str";
144+
# expected "Literal['r', 'a', 'w', 'x']
145+
with compress_method(path, mode=mode) as f: # type: ignore[call-overload, arg-type]
142146
getattr(f, method)(*args)

pandas/core/_numba/executor.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,11 @@ def column_looper(
8787
else:
8888

8989
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
90-
def column_looper(
90+
# error: Incompatible redefinition (redefinition with type
91+
# "Callable[[ndarray[Any, Any], ndarray[Any, Any], ndarray[Any, Any],
92+
# int, VarArg(Any)], Any]", original type "Callable[[ndarray[Any, Any],
93+
# ndarray[Any, Any], int, int, VarArg(Any)], Any]")
94+
def column_looper( # type: ignore[misc]
9195
values: np.ndarray,
9296
start: np.ndarray,
9397
end: np.ndarray,

0 commit comments

Comments
 (0)