Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Fixed regressions
- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`)
- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pyarrow nullable on with missing values (:issue:`57664`)
- Avoid issuing a spurious ``DeprecationWarning`` when a custom :class:`DataFrame` or :class:`Series` subclass method is called (:issue:`57553`)
- Fixed regression in :meth:`Index.map` that would not change the dtype when the provided mapping would change data from tz-aware to tz-agnostic or tz-agnostic to tz-aware (:issue:`57192`)
- Fixed regression in precision of :func:`to_datetime` with string and ``unit`` input (:issue:`57051`)

.. ---------------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def is_timedelta_or_timedelta64_array(
values: np.ndarray, skipna: bool = True
) -> bool: ...
def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ...
def is_datetime_naive_array(values: np.ndarray) -> bool: ...
def is_time_array(values: np.ndarray, skipna: bool = ...): ...
def is_date_array(values: np.ndarray, skipna: bool = ...): ...
def is_datetime_array(values: np.ndarray, skipna: bool = ...): ...
Expand Down
24 changes: 24 additions & 0 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2068,6 +2068,30 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool:
return True


def is_datetime_naive_array(values: ndarray) -> bool:
"""
Check values have are datetime naive.
Doesn't check values are datetime-like types.
"""
cdef:
Py_ssize_t j, n = len(values)
object tz

if n == 0:
return False

for j in range(n):
# Compare val's timezone with the reference timezone
# NaT can coexist with tz-aware datetimes, so skip if encountered
# val = values[j]
# if val is not NaT and val is not None and not util.is_nan(val):
tz = getattr(values[j], "tzinfo", None)
if tz is not None:
return False

return True


@cython.internal
cdef class TimedeltaValidator(TemporalValidator):
cdef bint is_value_typed(self, object value) except -1:
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,9 +290,15 @@ def _scalar_type(self) -> type[Timestamp]:

@classmethod
def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
if lib.infer_dtype(scalars, skipna=True) not in ["datetime", "datetime64"]:
# TODO: require any NAs be valid-for-DTA
# TODO: if dtype is passed, check for tzawareness compat?
# TODO: require any NAs be valid-for-DTA
# TODO: if dtype is passed, check for tzawareness compat?
if not lib.is_datetime64_array(scalars):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we need this (or something like it) because e.g. lib.is_datetime_with_singletz_array does not check if there are datetimes:

print(lib.is_datetime_with_singletz_array(np.array([1, 2, 3])))
# True

However, I do not currently understand the behavior differences between this and the previous lib.infer_dtype and need to look into this more. Any guidance here is much appreciated.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we get that behavior from is_datetime_with_singletz_array bc it implicitly assumes it is called from maybe_convert_objects, where it will only get called if the first non-NaT it sees is a datetime with a tz.

I'm not a fan of infer_dtype bc the string returns are clunky (and hard to reason about, e.g. when do you expect "datetime" vs "datetime64") and fragile when we add new return values (e.g. "integer-na"). Also in cases when you only care about datetimes, it will not short-circuit when it sees a non-datetime.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks - makes perfect sense to move away from infer_dtype. For the old behavior, I'm seeing the use of infer_dtype allow the following to not raise on a non-empty input:

  • If the scalars has the attribute type, kind, name, or base set to one of "datetime64[ns]", "M", datetime
  • is_datetime64_array returns True
  • is_datetime_array returns True

As far as I can tell, the input scalars here must be an ndarray, Index, or ExtensionArray, but I don't feel confident about this from the code path in Index.map.

With this, I'm wondering if the following is a good equivalent:

if (
    not is_datetime64_any_dtype(scalars)
    and not lib.is_datetime64_array(scalars)
    and not lib.is_datetime_array(scalars)
):
    raise ValueError

raise ValueError
elif isinstance(
dtype, DatetimeTZDtype
) and not lib.is_datetime_with_singletz_array(scalars):
raise ValueError
elif isinstance(dtype, np.dtype) and not lib.is_datetime_naive_array(scalars):
raise ValueError
return cls._from_sequence(scalars, dtype=dtype)

Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/indexes/datetimes/methods/test_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,16 @@ def test_index_map(self, name):
)
exp_index = MultiIndex.from_product(((2018,), range(1, 7)), names=[name, name])
tm.assert_index_equal(index, exp_index)

@pytest.mark.parametrize("input_tz", ["UTC", None])
@pytest.mark.parametrize("output_tz", ["UTC", None])
def test_mapping_tz_to_tz_agnostic(self, input_tz, output_tz):
# GH#57192
index = date_range("2018-01-01", periods=6, freq="ME", tz=input_tz)
expected = date_range("2018-01-01", periods=6, freq="ME", tz=output_tz)
if input_tz == "UTC" and output_tz == "UTC":
method = "tz_convert"
else:
method = "tz_localize"
result = index.map(lambda x: getattr(x, method)(output_tz))
tm.assert_index_equal(result, expected)