Skip to content

Commit 558488f

Browse files
Merge remote-tracking branch 'upstream/main' into preview_docs
2 parents 56a2fe5 + 16801a1 commit 558488f

File tree

28 files changed

+189
-123
lines changed

28 files changed

+189
-123
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,13 +630,15 @@ Other API changes
630630
Deprecations
631631
~~~~~~~~~~~~
632632
- Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`)
633+
- Deprecated behavior of :func:`to_datetime` with ``unit`` when parsing strings, in a future version these will be parsed as datetimes (matching unit-less behavior) instead of cast to floats. To retain the old behavior, cast strings to numeric types before calling :func:`to_datetime` (:issue:`50735`)
633634
- Deprecated :func:`pandas.io.sql.execute` (:issue:`50185`)
634635
- :meth:`Index.is_boolean` has been deprecated. Use :func:`pandas.api.types.is_bool_dtype` instead (:issue:`50042`)
635636
- :meth:`Index.is_integer` has been deprecated. Use :func:`pandas.api.types.is_integer_dtype` instead (:issue:`50042`)
636637
- :meth:`Index.is_floating` has been deprecated. Use :func:`pandas.api.types.is_float_dtype` instead (:issue:`50042`)
637638
- :meth:`Index.holds_integer` has been deprecated. Use :func:`pandas.api.types.infer_dtype` instead (:issue:`50243`)
638639
- :meth:`Index.is_categorical` has been deprecated. Use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`50042`)
639640
- :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_intterval_dtype` instead (:issue:`50042`)
641+
-
640642

641643
.. ---------------------------------------------------------------------------
642644
.. _whatsnew_200.prior_deprecations:

pandas/_libs/tslib.pyx

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
import warnings
2+
3+
from pandas.util._exceptions import find_stack_level
4+
15
cimport cython
26

37
from datetime import timezone
@@ -303,6 +307,16 @@ def array_with_unit_to_datetime(
303307
raise ValueError(
304308
f"non convertible value {val} with the unit '{unit}'"
305309
)
310+
warnings.warn(
311+
"The behavior of 'to_datetime' with 'unit' when parsing "
312+
"strings is deprecated. In a future version, strings will "
313+
"be parsed as datetime strings, matching the behavior "
314+
"without a 'unit'. To retain the old behavior, explicitly "
315+
"cast ints or floats to numeric type before calling "
316+
"to_datetime.",
317+
FutureWarning,
318+
stacklevel=find_stack_level(),
319+
)
306320

307321
iresult[i] = cast_from_unit(fval, unit)
308322

pandas/_testing/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
import pandas as pd
4444
from pandas import (
45+
ArrowDtype,
4546
Categorical,
4647
CategoricalIndex,
4748
DataFrame,
@@ -198,10 +199,16 @@
198199
UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()]
199200
SIGNED_INT_PYARROW_DTYPES = [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
200201
ALL_INT_PYARROW_DTYPES = UNSIGNED_INT_PYARROW_DTYPES + SIGNED_INT_PYARROW_DTYPES
202+
ALL_INT_PYARROW_DTYPES_STR_REPR = [
203+
str(ArrowDtype(typ)) for typ in ALL_INT_PYARROW_DTYPES
204+
]
201205

202206
# pa.float16 doesn't seem supported
203207
# https://github.com/apache/arrow/blob/master/python/pyarrow/src/arrow/python/helpers.cc#L86
204208
FLOAT_PYARROW_DTYPES = [pa.float32(), pa.float64()]
209+
FLOAT_PYARROW_DTYPES_STR_REPR = [
210+
str(ArrowDtype(typ)) for typ in FLOAT_PYARROW_DTYPES
211+
]
205212
STRING_PYARROW_DTYPES = [pa.string()]
206213
BINARY_PYARROW_DTYPES = [pa.binary()]
207214

@@ -234,6 +241,9 @@
234241
+ TIMEDELTA_PYARROW_DTYPES
235242
+ BOOL_PYARROW_DTYPES
236243
)
244+
else:
245+
FLOAT_PYARROW_DTYPES_STR_REPR = []
246+
ALL_INT_PYARROW_DTYPES_STR_REPR = []
237247

238248

239249
EMPTY_STRING_PATTERN = re.compile("^$")

pandas/conftest.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1527,6 +1527,43 @@ def any_numeric_ea_dtype(request):
15271527
return request.param
15281528

15291529

1530+
# Unsupported operand types for + ("List[Union[str, ExtensionDtype, dtype[Any],
1531+
# Type[object]]]" and "List[str]")
1532+
@pytest.fixture(
1533+
params=tm.ALL_INT_EA_DTYPES
1534+
+ tm.FLOAT_EA_DTYPES
1535+
+ tm.ALL_INT_PYARROW_DTYPES_STR_REPR
1536+
+ tm.FLOAT_PYARROW_DTYPES_STR_REPR # type: ignore[operator]
1537+
)
1538+
def any_numeric_ea_and_arrow_dtype(request):
1539+
"""
1540+
Parameterized fixture for any nullable integer dtype and
1541+
any float ea dtypes.
1542+
1543+
* 'UInt8'
1544+
* 'Int8'
1545+
* 'UInt16'
1546+
* 'Int16'
1547+
* 'UInt32'
1548+
* 'Int32'
1549+
* 'UInt64'
1550+
* 'Int64'
1551+
* 'Float32'
1552+
* 'Float64'
1553+
* 'uint8[pyarrow]'
1554+
* 'int8[pyarrow]'
1555+
* 'uint16[pyarrow]'
1556+
* 'int16[pyarrow]'
1557+
* 'uint32[pyarrow]'
1558+
* 'int32[pyarrow]'
1559+
* 'uint64[pyarrow]'
1560+
* 'int64[pyarrow]'
1561+
* 'float32[pyarrow]'
1562+
* 'float64[pyarrow]'
1563+
"""
1564+
return request.param
1565+
1566+
15301567
@pytest.fixture(params=tm.SIGNED_INT_EA_DTYPES)
15311568
def any_signed_int_ea_dtype(request):
15321569
"""

pandas/core/groupby/groupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3188,10 +3188,10 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, Dtype | None]:
31883188
elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray):
31893189
out = vals.to_numpy(dtype=float, na_value=np.nan)
31903190
elif is_datetime64_dtype(vals.dtype):
3191-
inference = np.dtype("datetime64[ns]")
3191+
inference = vals.dtype
31923192
out = np.asarray(vals).astype(float)
31933193
elif is_timedelta64_dtype(vals.dtype):
3194-
inference = np.dtype("timedelta64[ns]")
3194+
inference = vals.dtype
31953195
out = np.asarray(vals).astype(float)
31963196
elif isinstance(vals, ExtensionArray) and is_float_dtype(vals):
31973197
inference = np.dtype(np.float64)

pandas/core/util/hashing.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,14 @@ def combine_hash_arrays(
7171

7272
mult = np.uint64(1000003)
7373
out = np.zeros_like(first) + np.uint64(0x345678)
74+
last_i = 0
7475
for i, a in enumerate(arrays):
7576
inverse_i = num_items - i
7677
out ^= a
7778
out *= mult
7879
mult += np.uint64(82520 + inverse_i + inverse_i)
79-
assert i + 1 == num_items, "Fed in wrong num_items"
80+
last_i = i
81+
assert last_i + 1 == num_items, "Fed in wrong num_items"
8082
out += np.uint64(97531)
8183
return out
8284

pandas/io/excel/_base.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,9 @@ def parse(
736736

737737
output = {}
738738

739+
last_sheetname = None
739740
for asheetname in sheets:
741+
last_sheetname = asheetname
740742
if verbose:
741743
print(f"Reading sheet {asheetname}")
742744

@@ -888,10 +890,13 @@ def parse(
888890
err.args = (f"{err.args[0]} (sheet: {asheetname})", *err.args[1:])
889891
raise err
890892

893+
if last_sheetname is None:
894+
raise ValueError("Sheet name is an empty list")
895+
891896
if ret_dict:
892897
return output
893898
else:
894-
return output[asheetname]
899+
return output[last_sheetname]
895900

896901

897902
@doc(storage_options=_shared_docs["storage_options"])

pandas/io/json/_json.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1222,7 +1222,9 @@ def _try_convert_to_date(self, data):
12221222
if new_data.dtype == "object":
12231223
try:
12241224
new_data = data.astype("int64")
1225-
except (TypeError, ValueError, OverflowError):
1225+
except OverflowError:
1226+
return data, False
1227+
except (TypeError, ValueError):
12261228
pass
12271229

12281230
# ignore numbers that are out of range

pandas/tests/frame/methods/test_to_dict_of_blocks.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,34 @@ def test_copy_blocks(self, float_frame):
2020
column = df.columns[0]
2121

2222
# use the default copy=True, change a column
23+
_last_df = None
2324
blocks = df._to_dict_of_blocks(copy=True)
2425
for _df in blocks.values():
26+
_last_df = _df
2527
if column in _df:
2628
_df.loc[:, column] = _df[column] + 1
2729

2830
# make sure we did not change the original DataFrame
29-
assert not _df[column].equals(df[column])
31+
assert _last_df is not None and not _last_df[column].equals(df[column])
3032

3133
def test_no_copy_blocks(self, float_frame, using_copy_on_write):
3234
# GH#9607
3335
df = DataFrame(float_frame, copy=True)
3436
column = df.columns[0]
3537

38+
_last_df = None
3639
# use the copy=False, change a column
3740
blocks = df._to_dict_of_blocks(copy=False)
3841
for _df in blocks.values():
42+
_last_df = _df
3943
if column in _df:
4044
_df.loc[:, column] = _df[column] + 1
4145

4246
if not using_copy_on_write:
4347
# make sure we did change the original DataFrame
44-
assert _df[column].equals(df[column])
48+
assert _last_df is not None and _last_df[column].equals(df[column])
4549
else:
46-
assert not _df[column].equals(df[column])
50+
assert _last_df is not None and not _last_df[column].equals(df[column])
4751

4852

4953
def test_to_dict_of_blocks_item_cache(request, using_copy_on_write):

pandas/tests/groupby/test_quantile.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,33 +26,45 @@
2626
([np.nan, 4.0, np.nan, 2.0, np.nan], [np.nan, 4.0, np.nan, 2.0, np.nan]),
2727
# Timestamps
2828
(
29-
list(pd.date_range("1/1/18", freq="D", periods=5)),
30-
list(pd.date_range("1/1/18", freq="D", periods=5))[::-1],
29+
pd.date_range("1/1/18", freq="D", periods=5),
30+
pd.date_range("1/1/18", freq="D", periods=5)[::-1],
31+
),
32+
(
33+
pd.date_range("1/1/18", freq="D", periods=5).as_unit("s"),
34+
pd.date_range("1/1/18", freq="D", periods=5)[::-1].as_unit("s"),
3135
),
3236
# All NA
3337
([np.nan] * 5, [np.nan] * 5),
3438
],
3539
)
3640
@pytest.mark.parametrize("q", [0, 0.25, 0.5, 0.75, 1])
3741
def test_quantile(interpolation, a_vals, b_vals, q, request):
38-
if interpolation == "nearest" and q == 0.5 and b_vals == [4, 3, 2, 1]:
42+
if (
43+
interpolation == "nearest"
44+
and q == 0.5
45+
and isinstance(b_vals, list)
46+
and b_vals == [4, 3, 2, 1]
47+
):
3948
request.node.add_marker(
4049
pytest.mark.xfail(
4150
reason="Unclear numpy expectation for nearest "
4251
"result with equidistant data"
4352
)
4453
)
54+
all_vals = pd.concat([pd.Series(a_vals), pd.Series(b_vals)])
4555

4656
a_expected = pd.Series(a_vals).quantile(q, interpolation=interpolation)
4757
b_expected = pd.Series(b_vals).quantile(q, interpolation=interpolation)
4858

49-
df = DataFrame(
50-
{"key": ["a"] * len(a_vals) + ["b"] * len(b_vals), "val": a_vals + b_vals}
51-
)
59+
df = DataFrame({"key": ["a"] * len(a_vals) + ["b"] * len(b_vals), "val": all_vals})
5260

5361
expected = DataFrame(
5462
[a_expected, b_expected], columns=["val"], index=Index(["a", "b"], name="key")
5563
)
64+
if all_vals.dtype.kind == "M" and expected.dtypes.values[0].kind == "M":
65+
# TODO(non-nano): this should be unnecessary once array_to_datetime
66+
# correctly infers non-nano from Timestamp.unit
67+
expected = expected.astype(all_vals.dtype)
5668
result = df.groupby("key").quantile(q, interpolation=interpolation)
5769

5870
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)