Skip to content

Commit e1837a4

Browse files
authored
Merge branch 'main' into checks_extensions.ExtensionArray
2 parents a298b1f + ca2b8c3 commit e1837a4

File tree

4 files changed

+58
-22
lines changed

4 files changed

+58
-22
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ Datetimelike
543543
- Bug in :attr:`is_year_start` where a DateTimeIndex constructed via a date_range with frequency 'MS' wouldn't have the correct year or quarter start attributes (:issue:`57377`)
544544
- Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`)
545545
- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
546-
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
546+
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`)
547547
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
548548
- Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`)
549549
- Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`)

pandas/_libs/src/vendored/numpy/datetime/np_datetime.c

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,12 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
2020
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
2121
#endif // NPY_NO_DEPRECATED_API
2222

23-
#include <Python.h>
24-
2523
#include "pandas/vendored/numpy/datetime/np_datetime.h"
26-
2724
#define NO_IMPORT_ARRAY
2825
#define PY_ARRAY_UNIQUE_SYMBOL PANDAS_DATETIME_NUMPY
2926
#include <numpy/ndarrayobject.h>
3027
#include <numpy/npy_common.h>
28+
#include <stdbool.h>
3129

3230
#if defined(_WIN32)
3331
#ifndef ENABLE_INTSAFE_SIGNED_FUNCTIONS
@@ -58,12 +56,15 @@ _Static_assert(0, "__has_builtin not detected; please try a newer compiler");
5856
#endif
5957
#endif
6058

59+
#define XSTR(a) STR(a)
60+
#define STR(a) #a
61+
6162
#define PD_CHECK_OVERFLOW(FUNC) \
6263
do { \
6364
if ((FUNC) != 0) { \
6465
PyGILState_STATE gstate = PyGILState_Ensure(); \
6566
PyErr_SetString(PyExc_OverflowError, \
66-
"Overflow occurred in npy_datetimestruct_to_datetime"); \
67+
"Overflow occurred at " __FILE__ ":" XSTR(__LINE__)); \
6768
PyGILState_Release(gstate); \
6869
return -1; \
6970
} \
@@ -139,53 +140,53 @@ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) {
139140
npy_int64 year, days = 0;
140141
const int *month_lengths;
141142

142-
year = dts->year - 1970;
143-
days = year * 365;
143+
PD_CHECK_OVERFLOW(checked_int64_sub(dts->year, 1970, &year));
144+
PD_CHECK_OVERFLOW(checked_int64_mul(year, 365, &days));
144145

145146
/* Adjust for leap years */
146147
if (days >= 0) {
147148
/*
148149
* 1968 is the closest leap year before 1970.
149150
* Exclude the current year, so add 1.
150151
*/
151-
year += 1;
152+
PD_CHECK_OVERFLOW(checked_int64_add(year, 1, &year));
152153
/* Add one day for each 4 years */
153-
days += year / 4;
154+
PD_CHECK_OVERFLOW(checked_int64_add(days, year / 4, &days));
154155
/* 1900 is the closest previous year divisible by 100 */
155-
year += 68;
156+
PD_CHECK_OVERFLOW(checked_int64_add(year, 68, &year));
156157
/* Subtract one day for each 100 years */
157-
days -= year / 100;
158+
PD_CHECK_OVERFLOW(checked_int64_sub(days, year / 100, &days));
158159
/* 1600 is the closest previous year divisible by 400 */
159-
year += 300;
160+
PD_CHECK_OVERFLOW(checked_int64_add(year, 300, &year));
160161
/* Add one day for each 400 years */
161-
days += year / 400;
162+
PD_CHECK_OVERFLOW(checked_int64_add(days, year / 400, &days));
162163
} else {
163164
/*
164165
* 1972 is the closest later year after 1970.
165166
* Include the current year, so subtract 2.
166167
*/
167-
year -= 2;
168+
PD_CHECK_OVERFLOW(checked_int64_sub(year, 2, &year));
168169
/* Subtract one day for each 4 years */
169-
days += year / 4;
170+
PD_CHECK_OVERFLOW(checked_int64_add(days, year / 4, &days));
170171
/* 2000 is the closest later year divisible by 100 */
171-
year -= 28;
172+
PD_CHECK_OVERFLOW(checked_int64_sub(year, 28, &year));
172173
/* Add one day for each 100 years */
173-
days -= year / 100;
174+
PD_CHECK_OVERFLOW(checked_int64_sub(days, year / 100, &days));
174175
/* 2000 is also the closest later year divisible by 400 */
175176
/* Subtract one day for each 400 years */
176-
days += year / 400;
177+
PD_CHECK_OVERFLOW(checked_int64_add(days, year / 400, &days));
177178
}
178179

179180
month_lengths = days_per_month_table[is_leapyear(dts->year)];
180181
month = dts->month - 1;
181182

182183
/* Add the months */
183184
for (i = 0; i < month; ++i) {
184-
days += month_lengths[i];
185+
PD_CHECK_OVERFLOW(checked_int64_add(days, month_lengths[i], &days));
185186
}
186187

187188
/* Add the days */
188-
days += dts->day - 1;
189+
PD_CHECK_OVERFLOW(checked_int64_add(days, dts->day - 1, &days));
189190

190191
return days;
191192
}
@@ -430,6 +431,15 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
430431
}
431432

432433
const int64_t days = get_datetimestruct_days(dts);
434+
if (days == -1) {
435+
PyGILState_STATE gstate = PyGILState_Ensure();
436+
bool did_error = PyErr_Occurred() == NULL ? false : true;
437+
PyGILState_Release(gstate);
438+
if (did_error) {
439+
return -1;
440+
}
441+
}
442+
433443
if (base == NPY_FR_D) {
434444
return days;
435445
}

pandas/tests/io/test_parquet.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,6 +1137,21 @@ def test_infer_string_large_string_type(self, tmp_path, pa):
11371137
# assert result["strings"].dtype == "string"
11381138
# FIXME: don't leave commented-out
11391139

1140+
def test_non_nanosecond_timestamps(self, temp_file):
1141+
# GH#49236
1142+
pa = pytest.importorskip("pyarrow", "11.0.0")
1143+
pq = pytest.importorskip("pyarrow.parquet")
1144+
1145+
arr = pa.array([datetime.datetime(1600, 1, 1)], type=pa.timestamp("us"))
1146+
table = pa.table([arr], names=["timestamp"])
1147+
pq.write_table(table, temp_file)
1148+
result = read_parquet(temp_file)
1149+
expected = pd.DataFrame(
1150+
data={"timestamp": [datetime.datetime(1600, 1, 1)]},
1151+
dtype="datetime64[us]",
1152+
)
1153+
tm.assert_frame_equal(result, expected)
1154+
11401155

11411156
class TestParquetFastParquet(Base):
11421157
@pytest.mark.xfail(reason="datetime_with_nat gets incorrect values")
@@ -1178,6 +1193,10 @@ def test_duplicate_columns(self, fp):
11781193
msg = "Cannot create parquet dataset with duplicate column names"
11791194
self.check_error_on_write(df, fp, ValueError, msg)
11801195

1196+
@pytest.mark.xfail(
1197+
Version(np.__version__) >= Version("2.0.0"),
1198+
reason="fastparquet uses np.float_ in numpy2",
1199+
)
11811200
def test_bool_with_none(self, fp):
11821201
df = pd.DataFrame({"a": [True, None, False]})
11831202
expected = pd.DataFrame({"a": [1.0, np.nan, 0.0]}, dtype="float16")

pandas/tests/plotting/frame/test_frame.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
_check_visible,
4646
get_y_axis,
4747
)
48+
from pandas.util.version import Version
4849

4950
from pandas.io.formats.printing import pprint_thing
5051

@@ -2465,8 +2466,14 @@ def test_group_subplot_invalid_column_name(self):
24652466
d = {"a": np.arange(10), "b": np.arange(10)}
24662467
df = DataFrame(d)
24672468

2468-
with pytest.raises(ValueError, match=r"Column label\(s\) \['bad_name'\]"):
2469-
df.plot(subplots=[("a", "bad_name")])
2469+
if Version(np.__version__) < Version("2.0.0"):
2470+
with pytest.raises(ValueError, match=r"Column label\(s\) \['bad_name'\]"):
2471+
df.plot(subplots=[("a", "bad_name")])
2472+
else:
2473+
with pytest.raises(
2474+
ValueError, match=r"Column label\(s\) \[np\.str\_\('bad_name'\)\]"
2475+
):
2476+
df.plot(subplots=[("a", "bad_name")])
24702477

24712478
def test_group_subplot_duplicated_column(self):
24722479
d = {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)}

0 commit comments

Comments
 (0)