Skip to content

Commit 9e912ba

Browse files
authored
clib.converison._to_numpy: Add tests for pandas.Series with datetime dtypes (#3670)
1 parent 0bf733f commit 9e912ba

File tree

2 files changed

+118
-0
lines changed

2 files changed

+118
-0
lines changed

pygmt/clib/conversion.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,17 @@ def _to_numpy(data: Any) -> np.ndarray:
192192
numpy_dtype = np.float64
193193
data = data.to_numpy(na_value=np.nan)
194194

195+
# Deal with timezone-aware datetime dtypes.
196+
if isinstance(dtype, pd.DatetimeTZDtype): # pandas.DatetimeTZDtype
197+
numpy_dtype = getattr(dtype, "base", None)
198+
elif isinstance(dtype, pd.ArrowDtype) and hasattr(dtype.pyarrow_dtype, "tz"):
199+
# pd.ArrowDtype[pa.Timestamp]
200+
numpy_dtype = getattr(dtype, "numpy_dtype", None)
201+
# TODO(pandas>=2.1): Remove the workaround for pandas<2.1.
202+
if Version(pd.__version__) < Version("2.1"):
203+
# In pandas 2.0, dtype.numpy_type is dtype("O").
204+
numpy_dtype = np.dtype(f"M8[{dtype.pyarrow_dtype.unit}]") # type: ignore[assignment, attr-defined]
205+
195206
array = np.ascontiguousarray(data, dtype=numpy_dtype)
196207

197208
# Check if a np.object_ array can be converted to np.str_.

pygmt/tests/test_clib_to_numpy.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,113 @@ def test_to_numpy_pandas_date(dtype, expected_dtype):
365365
)
366366

367367

368+
pandas_old_version = pytest.mark.xfail(
369+
condition=Version(pd.__version__) < Version("2.1"),
370+
reason="pandas 2.0 bug reported in https://github.com/pandas-dev/pandas/issues/52705",
371+
)
372+
373+
374+
@pytest.mark.parametrize(
375+
("dtype", "expected_dtype"),
376+
[
377+
# NumPy datetime64 types. Only unit 's'/'ms'/'us'/'ns' are supported.
378+
pytest.param("datetime64[s]", "datetime64[s]", id="datetime64[s]"),
379+
pytest.param("datetime64[ms]", "datetime64[ms]", id="datetime64[ms]"),
380+
pytest.param("datetime64[us]", "datetime64[us]", id="datetime64[us]"),
381+
pytest.param("datetime64[ns]", "datetime64[ns]", id="datetime64[ns]"),
382+
# pandas.DatetimeTZDtype can be given in two ways [tz is required]:
383+
# 1. pandas.DatetimeTZDtype(unit, tz)
384+
# 2. String aliases: "datetime64[unit, tz]"
385+
pytest.param(
386+
"datetime64[s, UTC]",
387+
"datetime64[s]",
388+
id="datetime64[s, tz=UTC]",
389+
marks=pandas_old_version,
390+
),
391+
pytest.param(
392+
"datetime64[s, America/New_York]",
393+
"datetime64[s]",
394+
id="datetime64[s, tz=America/New_York]",
395+
marks=pandas_old_version,
396+
),
397+
pytest.param(
398+
"datetime64[s, +07:30]",
399+
"datetime64[s]",
400+
id="datetime64[s, +07:30]",
401+
marks=pandas_old_version,
402+
),
403+
# PyArrow timestamp types can be given in two ways [tz is optional]:
404+
# 1. pd.ArrowDtype(pyarrow.Timestamp(unit, tz=tz))
405+
# 2. String aliases: "timestamp[unit, tz][pyarrow]"
406+
pytest.param(
407+
"timestamp[s][pyarrow]",
408+
"datetime64[s]",
409+
id="timestamp[s][pyarrow]",
410+
marks=skip_if_no(package="pyarrow"),
411+
),
412+
pytest.param(
413+
"timestamp[ms][pyarrow]",
414+
"datetime64[ms]",
415+
id="timestamp[ms][pyarrow]",
416+
marks=[skip_if_no(package="pyarrow"), pandas_old_version],
417+
),
418+
pytest.param(
419+
"timestamp[us][pyarrow]",
420+
"datetime64[us]",
421+
id="timestamp[us][pyarrow]",
422+
marks=[skip_if_no(package="pyarrow"), pandas_old_version],
423+
),
424+
pytest.param(
425+
"timestamp[ns][pyarrow]",
426+
"datetime64[ns]",
427+
id="timestamp[ns][pyarrow]",
428+
marks=skip_if_no(package="pyarrow"),
429+
),
430+
pytest.param(
431+
"timestamp[s, UTC][pyarrow]",
432+
"datetime64[s]",
433+
id="timestamp[s, UTC][pyarrow]",
434+
marks=skip_if_no(package="pyarrow"),
435+
),
436+
pytest.param(
437+
"timestamp[s, America/New_York][pyarrow]",
438+
"datetime64[s]",
439+
id="timestamp[s, America/New_York][pyarrow]",
440+
marks=skip_if_no(package="pyarrow"),
441+
),
442+
pytest.param(
443+
"timestamp[s, +08:00][pyarrow]",
444+
"datetime64[s]",
445+
id="timestamp[s, +08:00][pyarrow]",
446+
marks=skip_if_no(package="pyarrow"),
447+
),
448+
],
449+
)
450+
def test_to_numpy_pandas_datetime(dtype, expected_dtype):
451+
"""
452+
Test the _to_numpy function with pandas.Series of datetime types.
453+
"""
454+
series = pd.Series(
455+
[pd.Timestamp("2024-01-02T03:04:05"), pd.Timestamp("2024-01-02T03:04:06")],
456+
dtype=dtype,
457+
)
458+
result = _to_numpy(series)
459+
_check_result(result, np.datetime64)
460+
assert result.dtype == expected_dtype
461+
462+
# Convert to UTC if the dtype is timezone-aware
463+
if "," in str(dtype): # A hacky way to decide if the dtype is timezone-aware.
464+
# TODO(pandas>=2.1): Simplify the if-else statement.
465+
if Version(pd.__version__) < Version("2.1") and dtype.startswith("timestamp"):
466+
# pandas 2.0 doesn't have the dt.tz_convert method for pyarrow.Timestamp.
467+
series = pd.to_datetime(series, utc=True)
468+
else:
469+
series = series.dt.tz_convert("UTC")
470+
# Remove time zone information and preserve local time.
471+
expected_series = series.dt.tz_localize(tz=None)
472+
npt.assert_array_equal(result, np.array(expected_series, dtype=expected_dtype))
473+
474+
368475
########################################################################################
369476
# Test the _to_numpy function with PyArrow arrays.
370477
#

0 commit comments

Comments
 (0)