Skip to content

Commit be6594e

Browse files
Emit a warning when converting datetime or timedelta values to nanosecond precision (#7201)
* [test-upstream] Emit a warning when converting to nanosecond precision * Assert nanosecond dtype after conversion in all cases * Apply suggestions from code review Co-authored-by: Illviljan <[email protected]> * [test-upstream] add separate test for second-precision TimedeltaIndex Co-authored-by: Illviljan <[email protected]>
1 parent 4944b9e commit be6594e

File tree

4 files changed

+197
-4
lines changed

4 files changed

+197
-4
lines changed

doc/whats-new.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,13 @@ Internal Changes
7676
encoding times to preserve existing behavior and prevent future errors when it
7777
is eventually set to ``True`` by default in cftime (:pull:`7171`). By
7878
`Spencer Clark <https://github.com/spencerkclark>`_.
79+
- Emit a warning under the development version of pandas when we convert
80+
non-nanosecond precision datetime or timedelta values to nanosecond precision.
81+
This was required in the past, because pandas previously was not compatible
82+
with non-nanosecond precision values. However pandas is currently working
83+
towards removing this restriction. When things stabilize in pandas we will
84+
likely consider relaxing this behavior in xarray as well (:issue:`7175`,
85+
:pull:`7201`). By `Spencer Clark <https://github.com/spencerkclark>`_.
7986

8087
.. _whats-new.2022.10.0:
8188

xarray/core/variable.py

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@
7474
T_Variable,
7575
)
7676

77+
NON_NANOSECOND_WARNING = (
78+
"Converting non-nanosecond precision {case} values to nanosecond precision. "
79+
"This behavior can eventually be relaxed in xarray, as it is an artifact from "
80+
"pandas which is now beginning to support non-nanosecond precision values."
81+
)
82+
7783

7884
class MissingDimensionsError(ValueError):
7985
"""Error class used when we can't safely guess a dimension name."""
@@ -180,13 +186,58 @@ def _maybe_wrap_data(data):
180186
return data
181187

182188

189+
def _as_nanosecond_precision(data):
190+
dtype = data.dtype
191+
non_ns_datetime64 = (
192+
dtype.kind == "M"
193+
and isinstance(dtype, np.dtype)
194+
and dtype != np.dtype("datetime64[ns]")
195+
)
196+
non_ns_datetime_tz_dtype = (
197+
isinstance(dtype, pd.DatetimeTZDtype) and dtype.unit != "ns"
198+
)
199+
if non_ns_datetime64 or non_ns_datetime_tz_dtype:
200+
warnings.warn(NON_NANOSECOND_WARNING.format(case="datetime"))
201+
if isinstance(dtype, pd.DatetimeTZDtype):
202+
nanosecond_precision_dtype = pd.DatetimeTZDtype("ns", dtype.tz)
203+
else:
204+
nanosecond_precision_dtype = "datetime64[ns]"
205+
return data.astype(nanosecond_precision_dtype)
206+
elif dtype.kind == "m" and dtype != np.dtype("timedelta64[ns]"):
207+
warnings.warn(NON_NANOSECOND_WARNING.format(case="timedelta"))
208+
return data.astype("timedelta64[ns]")
209+
else:
210+
return data
211+
212+
183213
def _possibly_convert_objects(values):
184214
"""Convert arrays of datetime.datetime and datetime.timedelta objects into
185-
datetime64 and timedelta64, according to the pandas convention. Also used for
186-
validating that datetime64 and timedelta64 objects are within the valid date
187-
range for ns precision, as pandas will raise an error if they are not.
215+
datetime64 and timedelta64, according to the pandas convention. For the time
216+
being, convert any non-nanosecond precision DatetimeIndex or TimedeltaIndex
217+
objects to nanosecond precision. While pandas is relaxing this in version
218+
2.0.0, in xarray we will need to make sure we are ready to handle
219+
non-nanosecond precision datetimes or timedeltas in our code before allowing
220+
such values to pass through unchanged. Converting to nanosecond precision
221+
through pandas.Series objects ensures that datetimes and timedeltas are
222+
within the valid date range for ns precision, as pandas will raise an error
223+
if they are not.
188224
"""
189-
return np.asarray(pd.Series(values.ravel())).reshape(values.shape)
225+
as_series = pd.Series(values.ravel())
226+
if as_series.dtype.kind in "mM":
227+
as_series = _as_nanosecond_precision(as_series)
228+
return np.asarray(as_series).reshape(values.shape)
229+
230+
231+
def _possibly_convert_datetime_or_timedelta_index(data):
232+
"""For the time being, convert any non-nanosecond precision DatetimeIndex or
233+
TimedeltaIndex objects to nanosecond precision. While pandas is relaxing
234+
this in version 2.0.0, in xarray we will need to make sure we are ready to
235+
handle non-nanosecond precision datetimes or timedeltas in our code
236+
before allowing such values to pass through unchanged."""
237+
if isinstance(data, (pd.DatetimeIndex, pd.TimedeltaIndex)):
238+
return _as_nanosecond_precision(data)
239+
else:
240+
return data
190241

191242

192243
def as_compatible_data(data, fastpath=False):
@@ -210,6 +261,7 @@ def as_compatible_data(data, fastpath=False):
210261
return data.data
211262

212263
if isinstance(data, NON_NUMPY_SUPPORTED_ARRAY_TYPES):
264+
data = _possibly_convert_datetime_or_timedelta_index(data)
213265
return _maybe_wrap_data(data)
214266

215267
if isinstance(data, tuple):

xarray/tests/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ def _importorskip(
8989
requires_scipy_or_netCDF4 = pytest.mark.skipif(
9090
not has_scipy_or_netCDF4, reason="requires scipy or netCDF4"
9191
)
92+
# _importorskip does not work for development versions
93+
has_pandas_version_two = Version(pd.__version__).major >= 2
94+
requires_pandas_version_two = pytest.mark.skipif(
95+
not has_pandas_version_two, reason="requires pandas 2.0.0"
96+
)
9297

9398
# change some global options for tests
9499
set_options(warn_for_unclosed_files=True)

xarray/tests/test_variable.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@
3636
assert_equal,
3737
assert_identical,
3838
assert_no_warnings,
39+
has_pandas_version_two,
3940
raise_if_dask_computes,
4041
requires_cupy,
4142
requires_dask,
43+
requires_pandas_version_two,
4244
requires_pint,
4345
requires_sparse,
4446
source_ndarray,
@@ -2520,6 +2522,26 @@ def test_datetime(self):
25202522
assert np.ndarray == type(actual)
25212523
assert np.dtype("datetime64[ns]") == actual.dtype
25222524

2525+
@requires_pandas_version_two
2526+
def test_tz_datetime(self) -> None:
2527+
tz = pytz.timezone("US/Eastern")
2528+
times_ns = pd.date_range("2000", periods=1, tz=tz)
2529+
2530+
times_s = times_ns.astype(pd.DatetimeTZDtype("s", tz))
2531+
with warnings.catch_warnings():
2532+
warnings.simplefilter("ignore")
2533+
actual = as_compatible_data(times_s)
2534+
assert actual.array == times_s
2535+
assert actual.array.dtype == pd.DatetimeTZDtype("ns", tz)
2536+
2537+
series = pd.Series(times_s)
2538+
with warnings.catch_warnings():
2539+
warnings.simplefilter("ignore")
2540+
actual = as_compatible_data(series)
2541+
2542+
np.testing.assert_array_equal(actual, series.values)
2543+
assert actual.dtype == np.dtype("datetime64[ns]")
2544+
25232545
def test_full_like(self) -> None:
25242546
# For more thorough tests, see test_variable.py
25252547
orig = Variable(
@@ -2790,3 +2812,110 @@ def test_from_pint_wrapping_dask(self, Var):
27902812
result = v.as_numpy()
27912813
assert_identical(result, Var("x", arr))
27922814
np.testing.assert_equal(v.to_numpy(), arr)
2815+
2816+
2817+
@pytest.mark.parametrize(
2818+
("values", "warns_under_pandas_version_two"),
2819+
[
2820+
(np.datetime64("2000-01-01", "ns"), False),
2821+
(np.datetime64("2000-01-01", "s"), True),
2822+
(np.array([np.datetime64("2000-01-01", "ns")]), False),
2823+
(np.array([np.datetime64("2000-01-01", "s")]), True),
2824+
(pd.date_range("2000", periods=1), False),
2825+
(datetime(2000, 1, 1), False),
2826+
(np.array([datetime(2000, 1, 1)]), False),
2827+
(pd.date_range("2000", periods=1, tz=pytz.timezone("US/Eastern")), False),
2828+
(
2829+
pd.Series(pd.date_range("2000", periods=1, tz=pytz.timezone("US/Eastern"))),
2830+
False,
2831+
),
2832+
],
2833+
ids=lambda x: f"{x}",
2834+
)
2835+
def test_datetime_conversion_warning(values, warns_under_pandas_version_two) -> None:
2836+
dims = ["time"] if isinstance(values, (np.ndarray, pd.Index, pd.Series)) else []
2837+
if warns_under_pandas_version_two and has_pandas_version_two:
2838+
with pytest.warns(UserWarning, match="non-nanosecond precision datetime"):
2839+
var = Variable(dims, values)
2840+
else:
2841+
with warnings.catch_warnings():
2842+
warnings.simplefilter("error")
2843+
var = Variable(dims, values)
2844+
2845+
if var.dtype.kind == "M":
2846+
assert var.dtype == np.dtype("datetime64[ns]")
2847+
else:
2848+
# The only case where a non-datetime64 dtype can occur currently is in
2849+
# the case that the variable is backed by a timezone-aware
2850+
# DatetimeIndex, and thus is hidden within the PandasIndexingAdapter class.
2851+
assert var._data.array.dtype == pd.DatetimeTZDtype(
2852+
"ns", pytz.timezone("US/Eastern")
2853+
)
2854+
2855+
2856+
@requires_pandas_version_two
2857+
def test_pandas_two_only_datetime_conversion_warnings() -> None:
2858+
# Note these tests rely on pandas features that are only present in pandas
2859+
# 2.0.0 and above, and so for now cannot be parametrized.
2860+
cases = [
2861+
(pd.date_range("2000", periods=1), "datetime64[s]"),
2862+
(pd.Series(pd.date_range("2000", periods=1)), "datetime64[s]"),
2863+
(
2864+
pd.date_range("2000", periods=1, tz=pytz.timezone("US/Eastern")),
2865+
pd.DatetimeTZDtype("s", pytz.timezone("US/Eastern")),
2866+
),
2867+
(
2868+
pd.Series(pd.date_range("2000", periods=1, tz=pytz.timezone("US/Eastern"))),
2869+
pd.DatetimeTZDtype("s", pytz.timezone("US/Eastern")),
2870+
),
2871+
]
2872+
for data, dtype in cases:
2873+
with pytest.warns(UserWarning, match="non-nanosecond precision datetime"):
2874+
var = Variable(["time"], data.astype(dtype))
2875+
2876+
if var.dtype.kind == "M":
2877+
assert var.dtype == np.dtype("datetime64[ns]")
2878+
else:
2879+
# The only case where a non-datetime64 dtype can occur currently is in
2880+
# the case that the variable is backed by a timezone-aware
2881+
# DatetimeIndex, and thus is hidden within the PandasIndexingAdapter class.
2882+
assert var._data.array.dtype == pd.DatetimeTZDtype(
2883+
"ns", pytz.timezone("US/Eastern")
2884+
)
2885+
2886+
2887+
@pytest.mark.parametrize(
2888+
("values", "warns_under_pandas_version_two"),
2889+
[
2890+
(np.timedelta64(10, "ns"), False),
2891+
(np.timedelta64(10, "s"), True),
2892+
(np.array([np.timedelta64(10, "ns")]), False),
2893+
(np.array([np.timedelta64(10, "s")]), True),
2894+
(pd.timedelta_range("1", periods=1), False),
2895+
(timedelta(days=1), False),
2896+
(np.array([timedelta(days=1)]), False),
2897+
],
2898+
ids=lambda x: f"{x}",
2899+
)
2900+
def test_timedelta_conversion_warning(values, warns_under_pandas_version_two) -> None:
2901+
dims = ["time"] if isinstance(values, (np.ndarray, pd.Index)) else []
2902+
if warns_under_pandas_version_two and has_pandas_version_two:
2903+
with pytest.warns(UserWarning, match="non-nanosecond precision timedelta"):
2904+
var = Variable(dims, values)
2905+
else:
2906+
with warnings.catch_warnings():
2907+
warnings.simplefilter("error")
2908+
var = Variable(dims, values)
2909+
2910+
assert var.dtype == np.dtype("timedelta64[ns]")
2911+
2912+
2913+
@requires_pandas_version_two
2914+
def test_pandas_two_only_timedelta_conversion_warning() -> None:
2915+
# Note this test relies on a pandas feature that is only present in pandas
2916+
# 2.0.0 and above, and so for now cannot be parametrized.
2917+
data = pd.timedelta_range("1", periods=1).astype("timedelta64[s]")
2918+
with pytest.warns(UserWarning, match="non-nanosecond precision timedelta"):
2919+
var = Variable(["time"], data)
2920+
2921+
assert var.dtype == np.dtype("timedelta64[ns]")

0 commit comments

Comments
 (0)