datapythonista
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/tslib.pyx
Lines changed: 14 additions & 0 deletions b/‎pandas/_libs/tslib.pyx
Lines changed: 14 additions & 0 deletions
diff --git a/‎pandas/_testing/__init__.py
Lines changed: 10 additions & 0 deletions b/‎pandas/_testing/__init__.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎pandas/conftest.py
Lines changed: 37 additions & 0 deletions b/‎pandas/conftest.py
Lines changed: 37 additions & 0 deletions
diff --git a/‎pandas/core/groupby/groupby.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/groupby/groupby.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/core/util/hashing.py
Lines changed: 3 additions & 1 deletion b/‎pandas/core/util/hashing.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/io/excel/_base.py
Lines changed: 6 additions & 1 deletion b/‎pandas/io/excel/_base.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎pandas/io/json/_json.py
Lines changed: 3 additions & 1 deletion b/‎pandas/io/json/_json.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/tests/frame/methods/test_to_dict_of_blocks.py
Lines changed: 7 additions & 3 deletions b/‎pandas/tests/frame/methods/test_to_dict_of_blocks.py
Lines changed: 7 additions & 3 deletions
diff --git a/‎pandas/tests/groupby/test_quantile.py
Lines changed: 18 additions & 6 deletions b/‎pandas/tests/groupby/test_quantile.py
Lines changed: 18 additions & 6 deletions
@@ -630,13 +630,15 @@ Other API changes
 Deprecations
 ~~~~~~~~~~~~
 - Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`)
+- Deprecated behavior of :func:`to_datetime` with ``unit`` when parsing strings, in a future version these will be parsed as datetimes (matching unit-less behavior) instead of cast to floats. To retain the old behavior, cast strings to numeric types before calling :func:`to_datetime` (:issue:`50735`)
 - Deprecated :func:`pandas.io.sql.execute` (:issue:`50185`)
 - :meth:`Index.is_boolean` has been deprecated. Use :func:`pandas.api.types.is_bool_dtype` instead (:issue:`50042`)
 - :meth:`Index.is_integer` has been deprecated. Use :func:`pandas.api.types.is_integer_dtype` instead (:issue:`50042`)
 - :meth:`Index.is_floating` has been deprecated. Use :func:`pandas.api.types.is_float_dtype` instead (:issue:`50042`)
 - :meth:`Index.holds_integer` has been deprecated. Use :func:`pandas.api.types.infer_dtype` instead (:issue:`50243`)
 - :meth:`Index.is_categorical` has been deprecated. Use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`50042`)
 - :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_intterval_dtype` instead (:issue:`50042`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_200.prior_deprecations:
 
@@ -1,3 +1,7 @@
+import warnings
+
+from pandas.util._exceptions import find_stack_level
+
 cimport cython
 
 from datetime import timezone
@@ -303,6 +307,16 @@ def array_with_unit_to_datetime(
                         raise ValueError(
                             f"non convertible value {val} with the unit '{unit}'"
                         )
+                    warnings.warn(
+                        "The behavior of 'to_datetime' with 'unit' when parsing "
+                        "strings is deprecated. In a future version, strings will "
+                        "be parsed as datetime strings, matching the behavior "
+                        "without a 'unit'. To retain the old behavior, explicitly "
+                        "cast ints or floats to numeric type before calling "
+                        "to_datetime.",
+                        FutureWarning,
+                        stacklevel=find_stack_level(),
+                    )
 
                     iresult[i] = cast_from_unit(fval, unit)
 
 
@@ -42,6 +42,7 @@
 
 import pandas as pd
 from pandas import (
+    ArrowDtype,
     Categorical,
     CategoricalIndex,
     DataFrame,
@@ -198,10 +199,16 @@
     UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()]
     SIGNED_INT_PYARROW_DTYPES = [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
     ALL_INT_PYARROW_DTYPES = UNSIGNED_INT_PYARROW_DTYPES + SIGNED_INT_PYARROW_DTYPES
+    ALL_INT_PYARROW_DTYPES_STR_REPR = [
+        str(ArrowDtype(typ)) for typ in ALL_INT_PYARROW_DTYPES
+    ]
 
     # pa.float16 doesn't seem supported
     # https://github.com/apache/arrow/blob/master/python/pyarrow/src/arrow/python/helpers.cc#L86
     FLOAT_PYARROW_DTYPES = [pa.float32(), pa.float64()]
+    FLOAT_PYARROW_DTYPES_STR_REPR = [
+        str(ArrowDtype(typ)) for typ in FLOAT_PYARROW_DTYPES
+    ]
     STRING_PYARROW_DTYPES = [pa.string()]
     BINARY_PYARROW_DTYPES = [pa.binary()]
 
@@ -234,6 +241,9 @@
         + TIMEDELTA_PYARROW_DTYPES
         + BOOL_PYARROW_DTYPES
     )
+else:
+    FLOAT_PYARROW_DTYPES_STR_REPR = []
+    ALL_INT_PYARROW_DTYPES_STR_REPR = []
 
 
 EMPTY_STRING_PATTERN = re.compile("^$")
 
@@ -1527,6 +1527,43 @@ def any_numeric_ea_dtype(request):
     return request.param
 
 
+#  Unsupported operand types for + ("List[Union[str, ExtensionDtype, dtype[Any],
+#  Type[object]]]" and "List[str]")
+@pytest.fixture(
+    params=tm.ALL_INT_EA_DTYPES
+    + tm.FLOAT_EA_DTYPES
+    + tm.ALL_INT_PYARROW_DTYPES_STR_REPR
+    + tm.FLOAT_PYARROW_DTYPES_STR_REPR  # type: ignore[operator]
+)
+def any_numeric_ea_and_arrow_dtype(request):
+    """
+    Parameterized fixture for any nullable integer dtype and
+    any float ea dtypes.
+
+    * 'UInt8'
+    * 'Int8'
+    * 'UInt16'
+    * 'Int16'
+    * 'UInt32'
+    * 'Int32'
+    * 'UInt64'
+    * 'Int64'
+    * 'Float32'
+    * 'Float64'
+    * 'uint8[pyarrow]'
+    * 'int8[pyarrow]'
+    * 'uint16[pyarrow]'
+    * 'int16[pyarrow]'
+    * 'uint32[pyarrow]'
+    * 'int32[pyarrow]'
+    * 'uint64[pyarrow]'
+    * 'int64[pyarrow]'
+    * 'float32[pyarrow]'
+    * 'float64[pyarrow]'
+    """
+    return request.param
+
+
 @pytest.fixture(params=tm.SIGNED_INT_EA_DTYPES)
 def any_signed_int_ea_dtype(request):
     """
 
@@ -3188,10 +3188,10 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, Dtype | None]:
             elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray):
                 out = vals.to_numpy(dtype=float, na_value=np.nan)
             elif is_datetime64_dtype(vals.dtype):
-                inference = np.dtype("datetime64[ns]")
+                inference = vals.dtype
                 out = np.asarray(vals).astype(float)
             elif is_timedelta64_dtype(vals.dtype):
-                inference = np.dtype("timedelta64[ns]")
+                inference = vals.dtype
                 out = np.asarray(vals).astype(float)
             elif isinstance(vals, ExtensionArray) and is_float_dtype(vals):
                 inference = np.dtype(np.float64)
 
@@ -71,12 +71,14 @@ def combine_hash_arrays(
 
     mult = np.uint64(1000003)
     out = np.zeros_like(first) + np.uint64(0x345678)
+    last_i = 0
     for i, a in enumerate(arrays):
         inverse_i = num_items - i
         out ^= a
         out *= mult
         mult += np.uint64(82520 + inverse_i + inverse_i)
-    assert i + 1 == num_items, "Fed in wrong num_items"
+        last_i = i
+    assert last_i + 1 == num_items, "Fed in wrong num_items"
     out += np.uint64(97531)
     return out
 
 
@@ -736,7 +736,9 @@ def parse(
 
         output = {}
 
+        last_sheetname = None
         for asheetname in sheets:
+            last_sheetname = asheetname
             if verbose:
                 print(f"Reading sheet {asheetname}")
 
@@ -888,10 +890,13 @@ def parse(
                 err.args = (f"{err.args[0]} (sheet: {asheetname})", *err.args[1:])
                 raise err
 
+        if last_sheetname is None:
+            raise ValueError("Sheet name is an empty list")
+
         if ret_dict:
             return output
         else:
-            return output[asheetname]
+            return output[last_sheetname]
 
 
 @doc(storage_options=_shared_docs["storage_options"])
 
@@ -1222,7 +1222,9 @@ def _try_convert_to_date(self, data):
         if new_data.dtype == "object":
             try:
                 new_data = data.astype("int64")
-            except (TypeError, ValueError, OverflowError):
+            except OverflowError:
+                return data, False
+            except (TypeError, ValueError):
                 pass
 
         # ignore numbers that are out of range
 
@@ -20,30 +20,34 @@ def test_copy_blocks(self, float_frame):
         column = df.columns[0]
 
         # use the default copy=True, change a column
+        _last_df = None
         blocks = df._to_dict_of_blocks(copy=True)
         for _df in blocks.values():
+            _last_df = _df
             if column in _df:
                 _df.loc[:, column] = _df[column] + 1
 
         # make sure we did not change the original DataFrame
-        assert not _df[column].equals(df[column])
+        assert _last_df is not None and not _last_df[column].equals(df[column])
 
     def test_no_copy_blocks(self, float_frame, using_copy_on_write):
         # GH#9607
         df = DataFrame(float_frame, copy=True)
         column = df.columns[0]
 
+        _last_df = None
         # use the copy=False, change a column
         blocks = df._to_dict_of_blocks(copy=False)
         for _df in blocks.values():
+            _last_df = _df
             if column in _df:
                 _df.loc[:, column] = _df[column] + 1
 
         if not using_copy_on_write:
             # make sure we did change the original DataFrame
-            assert _df[column].equals(df[column])
+            assert _last_df is not None and _last_df[column].equals(df[column])
         else:
-            assert not _df[column].equals(df[column])
+            assert _last_df is not None and not _last_df[column].equals(df[column])
 
 
 def test_to_dict_of_blocks_item_cache(request, using_copy_on_write):
 
@@ -26,33 +26,45 @@
         ([np.nan, 4.0, np.nan, 2.0, np.nan], [np.nan, 4.0, np.nan, 2.0, np.nan]),
         # Timestamps
         (
-            list(pd.date_range("1/1/18", freq="D", periods=5)),
-            list(pd.date_range("1/1/18", freq="D", periods=5))[::-1],
+            pd.date_range("1/1/18", freq="D", periods=5),
+            pd.date_range("1/1/18", freq="D", periods=5)[::-1],
+        ),
+        (
+            pd.date_range("1/1/18", freq="D", periods=5).as_unit("s"),
+            pd.date_range("1/1/18", freq="D", periods=5)[::-1].as_unit("s"),
         ),
         # All NA
         ([np.nan] * 5, [np.nan] * 5),
     ],
 )
 @pytest.mark.parametrize("q", [0, 0.25, 0.5, 0.75, 1])
 def test_quantile(interpolation, a_vals, b_vals, q, request):
-    if interpolation == "nearest" and q == 0.5 and b_vals == [4, 3, 2, 1]:
+    if (
+        interpolation == "nearest"
+        and q == 0.5
+        and isinstance(b_vals, list)
+        and b_vals == [4, 3, 2, 1]
+    ):
         request.node.add_marker(
             pytest.mark.xfail(
                 reason="Unclear numpy expectation for nearest "
                 "result with equidistant data"
             )
         )
+    all_vals = pd.concat([pd.Series(a_vals), pd.Series(b_vals)])
 
     a_expected = pd.Series(a_vals).quantile(q, interpolation=interpolation)
     b_expected = pd.Series(b_vals).quantile(q, interpolation=interpolation)
 
-    df = DataFrame(
-        {"key": ["a"] * len(a_vals) + ["b"] * len(b_vals), "val": a_vals + b_vals}
-    )
+    df = DataFrame({"key": ["a"] * len(a_vals) + ["b"] * len(b_vals), "val": all_vals})
 
     expected = DataFrame(
         [a_expected, b_expected], columns=["val"], index=Index(["a", "b"], name="key")
     )
+    if all_vals.dtype.kind == "M" and expected.dtypes.values[0].kind == "M":
+        # TODO(non-nano): this should be unnecessary once array_to_datetime
+        #  correctly infers non-nano from Timestamp.unit
+        expected = expected.astype(all_vals.dtype)
     result = df.groupby("key").quantile(q, interpolation=interpolation)
 
     tm.assert_frame_equal(result, expected)