Skip to content

Commit 1ba2259

Browse files
committed
Merge branch 'main' into fix-dt-overflow
2 parents 842b7c1 + ec9be9d commit 1ba2259

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+360
-230
lines changed

doc/source/whatsnew/v2.2.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ Datetimelike
355355
- Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`)
356356
- Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
357357
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond :class:`DatetimeTZDtype` and inputs that would be out of bounds with nanosecond resolution incorrectly raising ``OutOfBoundsDatetime`` (:issue:`54620`)
358+
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` (or :class:`DatetimeTZDtype`) from mixed-numeric inputs treating those as nanoseconds instead of as multiples of the dtype's unit (which would happen with non-mixed numeric inputs) (:issue:`56004`)
358359
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`)
359360
-
360361

pandas/_libs/lib.pyx

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ from cpython.object cimport (
2525
Py_EQ,
2626
PyObject,
2727
PyObject_RichCompareBool,
28-
PyTypeObject,
2928
)
3029
from cpython.ref cimport Py_INCREF
3130
from cpython.sequence cimport PySequence_Check
@@ -67,10 +66,6 @@ from numpy cimport (
6766

6867
cnp.import_array()
6968

70-
cdef extern from "Python.h":
71-
# Note: importing extern-style allows us to declare these as nogil
72-
# functions, whereas `from cpython cimport` does not.
73-
bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
7469

7570
cdef extern from "numpy/arrayobject.h":
7671
# cython's numpy.dtype specification is incorrect, which leads to
@@ -89,9 +84,6 @@ cdef extern from "numpy/arrayobject.h":
8984
object fields
9085
tuple names
9186

92-
PyTypeObject PySignedIntegerArrType_Type
93-
PyTypeObject PyUnsignedIntegerArrType_Type
94-
9587
cdef extern from "pandas/parser/pd_parser.h":
9688
int floatify(object, float64_t *result, int *maybe_int) except -1
9789
void PandasParser_IMPORT()
@@ -1437,14 +1429,12 @@ cdef class Seen:
14371429
self.sint_ = (
14381430
self.sint_
14391431
or (oINT64_MIN <= val < 0)
1440-
# Cython equivalent of `isinstance(val, np.signedinteger)`
1441-
or PyObject_TypeCheck(val, &PySignedIntegerArrType_Type)
1432+
or isinstance(val, cnp.signedinteger)
14421433
)
14431434
self.uint_ = (
14441435
self.uint_
14451436
or (oINT64_MAX < val <= oUINT64_MAX)
1446-
# Cython equivalent of `isinstance(val, np.unsignedinteger)`
1447-
or PyObject_TypeCheck(val, &PyUnsignedIntegerArrType_Type)
1437+
or isinstance(val, cnp.unsignedinteger)
14481438
)
14491439

14501440
@property

pandas/_libs/src/vendored/numpy/datetime/np_datetime.c

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -40,31 +40,19 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
4040
#else
4141
#if defined __has_builtin
4242
#if __has_builtin(__builtin_add_overflow)
43-
#if _LP64 || __LP64__ || _ILP64 || __ILP64__
44-
#define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res)
45-
#define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res)
46-
#define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res)
47-
#else
48-
#define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res)
49-
#define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res)
50-
#define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res)
51-
#endif
43+
#define checked_int64_add(a, b, res) __builtin_add_overflow(a, b, res)
44+
#define checked_int64_sub(a, b, res) __builtin_sub_overflow(a, b, res)
45+
#define checked_int64_mul(a, b, res) __builtin_mul_overflow(a, b, res)
5246
#else
5347
_Static_assert(0,
5448
"Overflow checking not detected; please try a newer compiler");
5549
#endif
5650
// __has_builtin was added in gcc 10, but our muslinux_1_1 build environment
5751
// only has gcc-9.3, so fall back to __GNUC__ macro as long as we have that
5852
#elif __GNUC__ > 7
59-
#if _LP64 || __LP64__ || _ILP64 || __ILP64__
60-
#define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res)
61-
#define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res)
62-
#define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res)
63-
#else
64-
#define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res)
65-
#define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res)
66-
#define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res)
67-
#endif
53+
#define checked_int64_add(a, b, res) __builtin_add_overflow(a, b, res)
54+
#define checked_int64_sub(a, b, res) __builtin_sub_overflow(a, b, res)
55+
#define checked_int64_mul(a, b, res) __builtin_mul_overflow(a, b, res)
6856
#else
6957
_Static_assert(0, "__has_builtin not detected; please try a newer compiler");
7058
#endif

pandas/_libs/tslib.pyx

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,9 @@ cpdef array_to_datetime(
530530
state.update_creso(item_reso)
531531
if infer_reso:
532532
creso = state.creso
533-
iresult[i] = cast_from_unit(val, "ns", out_reso=creso)
533+
534+
# we now need to parse this as if unit=abbrev
535+
iresult[i] = cast_from_unit(val, abbrev, out_reso=creso)
534536
state.found_other = True
535537

536538
elif isinstance(val, str):
@@ -779,6 +781,13 @@ def array_to_datetime_with_tz(
779781
_TSObject tsobj
780782
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
781783
DatetimeParseState state = DatetimeParseState(creso)
784+
str abbrev
785+
786+
if infer_reso:
787+
# We treat ints/floats as nanoseconds
788+
abbrev = "ns"
789+
else:
790+
abbrev = npy_unit_to_abbrev(creso)
782791

783792
for i in range(n):
784793
# Analogous to `item = values[i]`
@@ -790,7 +799,12 @@ def array_to_datetime_with_tz(
790799

791800
else:
792801
tsobj = convert_to_tsobject(
793-
item, tz=tz, unit="ns", dayfirst=dayfirst, yearfirst=yearfirst, nanos=0
802+
item,
803+
tz=tz,
804+
unit=abbrev,
805+
dayfirst=dayfirst,
806+
yearfirst=yearfirst,
807+
nanos=0,
794808
)
795809
if tsobj.value != NPY_NAT:
796810
state.update_creso(tsobj.creso)

pandas/_libs/tslibs/util.pxd

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ cdef extern from "Python.h":
2222
object PyUnicode_DecodeLocale(const char *str, const char *errors) nogil
2323

2424

25+
cimport numpy as cnp
2526
from numpy cimport (
2627
PyArray_Check,
2728
float64_t,
@@ -54,7 +55,7 @@ cdef inline bint is_integer_object(object obj) noexcept:
5455
"""
5556
Cython equivalent of
5657
57-
`isinstance(val, (int, long, np.integer)) and not isinstance(val, bool)`
58+
`isinstance(val, (int, np.integer)) and not isinstance(val, (bool, np.timedelta64))`
5859
5960
Parameters
6061
----------
@@ -68,13 +69,13 @@ cdef inline bint is_integer_object(object obj) noexcept:
6869
-----
6970
This counts np.timedelta64 objects as integers.
7071
"""
71-
return (not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj)
72+
return (not PyBool_Check(obj) and isinstance(obj, (int, cnp.integer))
7273
and not is_timedelta64_object(obj))
7374

7475

7576
cdef inline bint is_float_object(object obj) noexcept nogil:
7677
"""
77-
Cython equivalent of `isinstance(val, (float, np.float64))`
78+
Cython equivalent of `isinstance(val, (float, np.floating))`
7879
7980
Parameters
8081
----------
@@ -90,7 +91,7 @@ cdef inline bint is_float_object(object obj) noexcept nogil:
9091

9192
cdef inline bint is_complex_object(object obj) noexcept nogil:
9293
"""
93-
Cython equivalent of `isinstance(val, (complex, np.complex128))`
94+
Cython equivalent of `isinstance(val, (complex, np.complexfloating))`
9495
9596
Parameters
9697
----------

pandas/core/arrays/datetimes.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2220,6 +2220,7 @@ def _sequence_to_dt64(
22202220
data = cast(np.ndarray, data)
22212221
copy = False
22222222
if lib.infer_dtype(data, skipna=False) == "integer":
2223+
# Much more performant than going through array_to_datetime
22232224
data = data.astype(np.int64)
22242225
elif tz is not None and ambiguous == "raise":
22252226
obj_data = np.asarray(data, dtype=object)

pandas/core/computation/eval.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,9 @@ def eval(
389389
# to use a non-numeric indexer
390390
try:
391391
with warnings.catch_warnings(record=True):
392+
warnings.filterwarnings(
393+
"always", "Setting a value on a view", FutureWarning
394+
)
392395
# TODO: Filter the warnings we actually care about here.
393396
if inplace and isinstance(target, NDFrame):
394397
target.loc[:, assigner] = ret

pandas/core/dtypes/cast.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1133,7 +1133,7 @@ def convert_dtypes(
11331133
base_dtype = inferred_dtype
11341134
if (
11351135
base_dtype.kind == "O" # type: ignore[union-attr]
1136-
and len(input_array) > 0
1136+
and input_array.size > 0
11371137
and isna(input_array).all()
11381138
):
11391139
import pyarrow as pa

pandas/core/frame.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -909,8 +909,8 @@ def __dataframe__(
909909
Parameters
910910
----------
911911
nan_as_null : bool, default False
912-
Whether to tell the DataFrame to overwrite null values in the data
913-
with ``NaN`` (or ``NaT``).
912+
`nan_as_null` is DEPRECATED and has no effect. Please avoid using
913+
it; it will be removed in a future release.
914914
allow_copy : bool, default True
915915
Whether to allow memory copying when exporting. If set to False
916916
it would cause non-zero-copy exports to fail.
@@ -925,9 +925,6 @@ def __dataframe__(
925925
Details on the interchange protocol:
926926
https://data-apis.org/dataframe-protocol/latest/index.html
927927
928-
`nan_as_null` currently has no effect; once support for nullable extension
929-
dtypes is added, this value should be propagated to columns.
930-
931928
Examples
932929
--------
933930
>>> df_not_necessarily_pandas = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
@@ -947,7 +944,7 @@ def __dataframe__(
947944

948945
from pandas.core.interchange.dataframe import PandasDataFrameXchg
949946

950-
return PandasDataFrameXchg(self, nan_as_null, allow_copy)
947+
return PandasDataFrameXchg(self, allow_copy=allow_copy)
951948

952949
def __dataframe_consortium_standard__(
953950
self, *, api_version: str | None = None
@@ -4857,6 +4854,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
48574854

48584855
inplace = validate_bool_kwarg(inplace, "inplace")
48594856
kwargs["level"] = kwargs.pop("level", 0) + 1
4857+
# TODO(CoW) those index/column resolvers create unnecessary refs to `self`
48604858
index_resolvers = self._get_index_resolvers()
48614859
column_resolvers = self._get_cleaned_column_resolvers()
48624860
resolvers = column_resolvers, index_resolvers

pandas/core/generic.py

Lines changed: 10 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6940,36 +6940,16 @@ def convert_dtypes(
69406940
dtype: string
69416941
"""
69426942
check_dtype_backend(dtype_backend)
6943-
if self.ndim == 1:
6944-
return self._convert_dtypes(
6945-
infer_objects,
6946-
convert_string,
6947-
convert_integer,
6948-
convert_boolean,
6949-
convert_floating,
6950-
dtype_backend=dtype_backend,
6951-
)
6952-
else:
6953-
results = [
6954-
col._convert_dtypes(
6955-
infer_objects,
6956-
convert_string,
6957-
convert_integer,
6958-
convert_boolean,
6959-
convert_floating,
6960-
dtype_backend=dtype_backend,
6961-
)
6962-
for col_name, col in self.items()
6963-
]
6964-
if len(results) > 0:
6965-
result = concat(results, axis=1, copy=False, keys=self.columns)
6966-
cons = cast(type["DataFrame"], self._constructor)
6967-
result = cons(result)
6968-
result = result.__finalize__(self, method="convert_dtypes")
6969-
# https://github.com/python/mypy/issues/8354
6970-
return cast(Self, result)
6971-
else:
6972-
return self.copy(deep=None)
6943+
new_mgr = self._mgr.convert_dtypes( # type: ignore[union-attr]
6944+
infer_objects=infer_objects,
6945+
convert_string=convert_string,
6946+
convert_integer=convert_integer,
6947+
convert_boolean=convert_boolean,
6948+
convert_floating=convert_floating,
6949+
dtype_backend=dtype_backend,
6950+
)
6951+
res = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
6952+
return res.__finalize__(self, method="convert_dtypes")
69736953

69746954
# ----------------------------------------------------------------------
69756955
# Filling NA's

0 commit comments

Comments
 (0)