diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 40d90ed71c4..bb616c27296 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -132,6 +132,66 @@ def dataarray_to_matrix( return matrix, region, inc +def _to_ndarray(array: Any) -> np.ndarray: + """ + Convert an array-like object to a C contiguous numpy array. + + The function aims to convert any array-like objects (e.g., Python lists or tuples, + NumPy arrays with various dtypes, pandas.Series with NumPy/Pandas/PyArrow dtypes, + PyArrow arrays with various dtypes) to a numpy array. + + The function is internally used in the ``vectors_to_arrays`` function, which is + responsible for converting a sequence of vectors to a list of C contiguous numpy + arrays. Thus, the function uses the :numpy:func:`numpy.ascontiguousarray` function + rather than the :numpy:func:`numpy.asarray`/:func:`numpy.asanyarray` functions, to + ensure the returned numpy array is C contiguous. + + Parameters + ---------- + array + The array-like object to convert. + + Returns + ------- + array + The C contiguous numpy array. + """ + # A dictionary mapping unsupported dtypes to the expected numpy dtype. + dtypes: dict[str, type] = { + # "string" for "string[python]", "string[pyarrow]", "string[pyarrow_numpy]", and + # pa.string() + "string": np.str_, + "date32[day][pyarrow]": np.datetime64, + "date64[ms][pyarrow]": np.datetime64, + } + # pandas nullable types and pyarrow types were converted to object dtype prior to + # pandas 2.2, and these dtypes are now converted to suitable numpy dtypes. + # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#to-numpy-for-numpy-nullable-and-arrow-types-converts-to-suitable-numpy-dtype + # Following SPEC 0, pandas 2.1 will be dropped in 2025 Q3, so it's likely we can + # remove the workaround in PyGMT v0.17.0. + if Version(pd.__version__) < Version("2.2"): + dtypes.update( + { + "Int8": np.int8, + "Int16": np.int16, + "Int32": np.int32, + "Int64": np.int64, + "UInt8": np.uint8, + "UInt16": np.uint16, + "UInt32": np.uint32, + "UInt64": np.uint64, + "Float32": np.float32, + "Float64": np.float64, + } + ) + if hasattr(array, "isna") and array.isna().any(): + array = array.astype(np.float64) + + vec_dtype = str(getattr(array, "dtype", getattr(array, "type", ""))) + array = np.ascontiguousarray(array, dtype=dtypes.get(vec_dtype)) + return array + + def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: """ Convert 1-D vectors (scalars, lists, or array-like) to C contiguous 1-D arrays. @@ -171,27 +231,7 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: >>> all(i.ndim == 1 for i in arrays) True """ - dtypes = { - "date32[day][pyarrow]": np.datetime64, - "date64[ms][pyarrow]": np.datetime64, - } - arrays = [] - for vector in vectors: - if ( - hasattr(vector, "isna") - and vector.isna().any() - and Version(pd.__version__) < Version("2.2") - ): - # Workaround for dealing with pd.NA with pandas < 2.2. - # Bug report at: https://github.com/GenericMappingTools/pygmt/issues/2844 - # Following SPEC0, pandas 2.1 will be dropped in 2025 Q3, so it's likely - # we can remove the workaround in PyGMT v0.17.0. - array = np.ascontiguousarray(vector.astype(float)) - else: - vec_dtype = str(getattr(vector, "dtype", "")) - array = np.ascontiguousarray(vector, dtype=dtypes.get(vec_dtype)) - arrays.append(array) - return arrays + return [_to_ndarray(vector) for vector in vectors] def sequence_to_ctypes_array( diff --git a/pygmt/tests/test_clib_to_ndarray.py b/pygmt/tests/test_clib_to_ndarray.py new file mode 100644 index 00000000000..b48bc548d6c --- /dev/null +++ b/pygmt/tests/test_clib_to_ndarray.py @@ -0,0 +1,300 @@ +""" +Test the _to_ndarray function in the clib.conversion module. +""" + +import numpy as np +import numpy.testing as npt +import pandas as pd +import pytest +from pygmt.clib.conversion import _to_ndarray +from pygmt.helpers.testing import skip_if_no + +try: + import pyarrow as pa + + _HAS_PYARROW = True +except ImportError: + _HAS_PYARROW = False + +dtypes_numpy = [ + np.int8, + np.int16, + np.int32, + np.int64, + np.longlong, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.ulonglong, + np.float16, + np.float32, + np.float64, + np.longdouble, + np.complex64, + np.complex128, + np.clongdouble, +] + + +def _check_result(result): + """ + A helper function to check the result of the _to_ndarray function. + + Check the following: + + 1. The result is a NumPy array. + 2. The result is C-contiguous. + 3. The result dtype is not np.object_. + """ + assert isinstance(result, np.ndarray) + assert result.flags.c_contiguous is True + assert result.dtype != np.object_ + + +@pytest.mark.parametrize("dtype", dtypes_numpy) +def test_to_ndarray_numpy_ndarray_numpy_numeric(dtype): + """ + Test the _to_ndarray function with 1-D NumPy arrays. + """ + # 1-D array + array = np.array([1, 2, 3], dtype=dtype) + assert array.dtype == dtype + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) + + # 2-D array + array = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype) + assert array.dtype == dtype + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) + + +@pytest.mark.parametrize("dtype", [None, np.str_]) +def test_to_ndarray_numpy_ndarray_numpy_string(dtype): + """ + Test the _to_ndarray function with 1-D NumPy arrays of strings. + """ + array = np.array(["a", "b", "c"], dtype=dtype) + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) + + +@pytest.mark.parametrize( + "dtype", + [ + np.datetime64, + "datetime64[Y]", + "datetime64[M]", + "datetime64[W]", + "datetime64[D]", + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + "datetime64[ps]", + "datetime64[fs]", + "datetime64[as]", + ], +) +def test_to_ndarray_numpy_ndarray_numpy_datetime(dtype): + """ + Test the _to_ndarray function with 1-D NumPy arrays of datetime. + """ + array = np.array(["2024-01-01", "2024-01-02", "2024-01-03"], dtype=dtype) + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) + + +@pytest.mark.parametrize( + "dtype", + [ + *dtypes_numpy, + pytest.param(pd.Int8Dtype(), id="Int8"), + pytest.param(pd.Int16Dtype(), id="Int16"), + pytest.param(pd.Int32Dtype(), id="Int32"), + pytest.param(pd.Int64Dtype(), id="Int64"), + pytest.param(pd.UInt8Dtype(), id="UInt8"), + pytest.param(pd.UInt16Dtype(), id="UInt16"), + pytest.param(pd.UInt32Dtype(), id="UInt32"), + pytest.param(pd.UInt64Dtype(), id="UInt64"), + pytest.param(pd.Float32Dtype(), id="Float32"), + pytest.param(pd.Float64Dtype(), id="Float64"), + pytest.param("int8[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("int16[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("int32[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("int64[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint8[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint16[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint32[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint64[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("float32[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("float64[pyarrow]", marks=skip_if_no(package="pyarrow")), + ], +) +def test_to_ndarray_pandas_series_numeric(dtype): + """ + Test the _to_ndarray function with pandas Series with NumPy dtypes, pandas dtypes, + and pandas dtypes with pyarrow backend. + """ + series = pd.Series([1, 2, 3], dtype=dtype) + assert series.dtype == dtype + result = _to_ndarray(series) + _check_result(result) + npt.assert_array_equal(result, series) + + +@pytest.mark.parametrize( + "dtype", + [ + pytest.param(pd.Int8Dtype(), id="Int8"), + pytest.param(pd.Int16Dtype(), id="Int16"), + pytest.param(pd.Int32Dtype(), id="Int32"), + pytest.param(pd.Int64Dtype(), id="Int64"), + pytest.param(pd.UInt8Dtype(), id="UInt8"), + pytest.param(pd.UInt16Dtype(), id="UInt16"), + pytest.param(pd.UInt32Dtype(), id="UInt32"), + pytest.param(pd.UInt64Dtype(), id="UInt64"), + pytest.param(pd.Float32Dtype(), id="Float32"), + pytest.param(pd.Float64Dtype(), id="Float64"), + pytest.param("int8[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("int16[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("int32[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("int64[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint8[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint16[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint32[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint64[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("float32[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("float64[pyarrow]", marks=skip_if_no(package="pyarrow")), + ], +) +def test_to_ndarray_pandas_series_numeric_with_na(dtype): + """ + Test the _to_ndarray function with pandas Series with NumPy dtypes and pandas NA. + """ + series = pd.Series([1, pd.NA, 3], dtype=dtype) + assert series.dtype == dtype + result = _to_ndarray(series) + _check_result(result) + npt.assert_array_equal(result, np.array([1, np.nan, 3], dtype=np.float64)) + + +@pytest.mark.parametrize( + "dtype", + [ + # None, + # np.str_, + "string[python]", + pytest.param("string[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("string[pyarrow_numpy]", marks=skip_if_no(package="pyarrow")), + ], +) +def test_to_ndarray_pandas_series_string(dtype): + """ + Test the _to_ndarray function with pandas Series with string dtype. + """ + series = pd.Series(["a", "bcd", "12345"], dtype=dtype) + result = _to_ndarray(series) + _check_result(result) + npt.assert_array_equal(result, series) + + +@pytest.mark.parametrize( + "dtype", + [ + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + # pd.DatetimeTZDtype(tz="UTC"), + pytest.param("date32[day][pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("date64[ms][pyarrow]", marks=skip_if_no(package="pyarrow")), + ], +) +def test_to_ndarray_pandas_series_datetime(dtype): + """ + Test the _to_ndarray function with pandas Series with datetime dtype. + """ + series = pd.Series( + ["2024-01-01T00:00:00", "2024-01-02T00:00:00", "2024-01-03T00:00:00"], + dtype=dtype, + ) + result = _to_ndarray(series) + _check_result(result) + npt.assert_array_equal(result, series) + + +# @pytest.mark.parametrize( +# "dtype", +# [ +# pytest.param("time32[s][pyarrow]", marks=skip_if_no(package="pyarrow")), +# pytest.param("time32[ms][pyarrow]", marks=skip_if_no(package="pyarrow")), +# pytest.param("time64[us][pyarrow]", marks=skip_if_no(package="pyarrow")), +# pytest.param("time64[ns][pyarrow]", marks=skip_if_no(package="pyarrow")), +# ], +# ) +# def test_to_ndarray_pandas_series_time(dtype): +# """ +# Test the _to_ndarray function with pandas Series with time dtype. +# """ +# series = pd.Series(["00:00:00", "01:02:03", "23:59:59"], dtype=dtype) +# result = _to_ndarray(series) +# _check_result(result) + + +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") +@pytest.mark.parametrize( + "dtype", + [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float32", + "float64", + ], +) +def test_to_ndarray_pyarrow_array(dtype): + """ + Test the _to_ndarray function with pandas Series with pyarrow dtypes. + """ + array = pa.array([1, 2, 3], type=dtype) + assert array.type == dtype + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) + + +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") +def test_to_ndarray_pyarrow_array_float16(): + """ + Test the _to_ndarray function with pyarrow float16 array. + + Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + """ + array = pa.array(np.array([1.5, 2.5, 3.5], dtype=np.float16), type=pa.float16()) + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) + + +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") +def test_to_ndarray_pyarrow_array_string(): + """ + Test the _to_ndarray function with pyarrow string array. + """ + array = pa.array(["a", "bcd", "12345"], type=pa.string()) + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array)