diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index 847acccbda1..e77dd53ea4a 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -10,6 +10,7 @@ import pytest from packaging.version import Version from pygmt.clib.conversion import _to_numpy +from pygmt.helpers.testing import skip_if_no try: import pyarrow as pa @@ -18,6 +19,9 @@ except ImportError: _HAS_PYARROW = False +# Mark tests that require pyarrow +pa_marks = {"marks": skip_if_no(package="pyarrow")} + def _check_result(result, expected_dtype): """ @@ -145,6 +149,11 @@ def test_to_numpy_ndarray_numpy_dtypes_numeric(dtype, expected_dtype): # - BooleanDtype # - ArrowDtype: a special dtype used to store data in the PyArrow format. # +# In pandas, PyArrow types can be specified using the following formats: +# +# - Prefixed with the name of the dtype and "[pyarrow]" (e.g., "int8[pyarrow]") +# - Specified using ``ArrowDType`` (e.g., "pd.ArrowDtype(pa.int8())") +# # References: # 1. https://pandas.pydata.org/docs/reference/arrays.html # 2. https://pandas.pydata.org/docs/user_guide/basics.html#basics-dtypes @@ -174,13 +183,30 @@ def test_to_numpy_pandas_series_numpy_dtypes_numeric(dtype, expected_dtype): pytest.param(pd.UInt64Dtype(), np.uint64, id="UInt64"), pytest.param(pd.Float32Dtype(), np.float32, id="Float32"), pytest.param(pd.Float64Dtype(), np.float64, id="Float64"), + pytest.param("int8[pyarrow]", np.int8, id="int8[pyarrow]", **pa_marks), + pytest.param("int16[pyarrow]", np.int16, id="int16[pyarrow]", **pa_marks), + pytest.param("int32[pyarrow]", np.int32, id="int32[pyarrow]", **pa_marks), + pytest.param("int64[pyarrow]", np.int64, id="int64[pyarrow]", **pa_marks), + pytest.param("uint8[pyarrow]", np.uint8, id="uint8[pyarrow]", **pa_marks), + pytest.param("uint16[pyarrow]", np.uint16, id="uint16[pyarrow]", **pa_marks), + pytest.param("uint32[pyarrow]", np.uint32, id="uint32[pyarrow]", **pa_marks), + pytest.param("uint64[pyarrow]", np.uint64, id="uint64[pyarrow]", **pa_marks), + pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]", **pa_marks), + pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]", **pa_marks), + pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]", **pa_marks), ], ) def test_to_numpy_pandas_series_pandas_dtypes_numeric(dtype, expected_dtype): """ - Test the _to_numpy function with pandas.Series of pandas numeric dtypes. + Test the _to_numpy function with pandas.Series of pandas/PyArrow numeric dtypes. """ - series = pd.Series([1, 2, 3, 4, 5, 6], dtype=dtype)[::2] # Not C-contiguous + data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"): + # float16 needs special handling for pandas < 2.2. + # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + data = np.array(data, dtype=np.float16) + + series = pd.Series(data, dtype=dtype)[::2] # Not C-contiguous result = _to_numpy(series) _check_result(result, expected_dtype) npt.assert_array_equal(result, series) @@ -199,13 +225,30 @@ def test_to_numpy_pandas_series_pandas_dtypes_numeric(dtype, expected_dtype): pytest.param(pd.UInt64Dtype(), np.float64, id="UInt64"), pytest.param(pd.Float32Dtype(), np.float32, id="Float32"), pytest.param(pd.Float64Dtype(), np.float64, id="Float64"), + pytest.param("int8[pyarrow]", np.float64, id="int8[pyarrow]", **pa_marks), + pytest.param("int16[pyarrow]", np.float64, id="int16[pyarrow]", **pa_marks), + pytest.param("int32[pyarrow]", np.float64, id="int32[pyarrow]", **pa_marks), + pytest.param("int64[pyarrow]", np.float64, id="int64[pyarrow]", **pa_marks), + pytest.param("uint8[pyarrow]", np.float64, id="uint8[pyarrow]", **pa_marks), + pytest.param("uint16[pyarrow]", np.float64, id="uint16[pyarrow]", **pa_marks), + pytest.param("uint32[pyarrow]", np.float64, id="uint32[pyarrow]", **pa_marks), + pytest.param("uint64[pyarrow]", np.float64, id="uint64[pyarrow]", **pa_marks), + pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]", **pa_marks), + pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]", **pa_marks), + pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]", **pa_marks), ], ) def test_to_numpy_pandas_series_pandas_dtypes_numeric_with_na(dtype, expected_dtype): """ - Test the _to_numpy function with pandas.Series of pandas numeric dtypes and NA. + Test the _to_numpy function with pandas.Series of pandas/PyArrow numeric dtypes and + missing values (NA). """ - series = pd.Series([1, 2, pd.NA, 4, 5, 6], dtype=dtype)[::2] # Not C-contiguous + data = [1.0, 2.0, None, 4.0, 5.0, 6.0] + if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"): + # float16 needs special handling for pandas < 2.2. + # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + data = np.array(data, dtype=np.float16) + series = pd.Series(data, dtype=dtype)[::2] # Not C-contiguous assert series.isna().any() result = _to_numpy(series) _check_result(result, expected_dtype)