From 35230fa712eea44f802071ef5d02c9cea28b58e9 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 1 Nov 2024 14:52:45 +0800 Subject: [PATCH 1/8] Add the private _to_ndarray function for converting an object to a numpy array The new function is extracted from `vectors_to_arrays` without any changes. --- pygmt/clib/conversion.py | 68 +++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 40d90ed71c4..3d3618738ad 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -132,6 +132,52 @@ def dataarray_to_matrix( return matrix, region, inc +def _to_ndarray(array: Any) -> np.ndarray: + """ + Convert an array-like object to a C contiguous numpy array. + + The function aims to convert any array-like objects (e.g., Python lists or tuples, + NumPy arrays with various dtypes, pandas.Series with NumPy/Pandas/PyArrow dtypes, + PyArrow arrays with various dtypes) to a numpy array. + + The function is internally used in the ``vectors_to_arrays`` function, which is + responsible for converting a sequence of vectors to a list of C contiguous numpy + arrays. Thus, the function uses the :numpy:func:`numpy.ascontiguousarray` function + rather than the :numpy:func:`numpy.asarray`/:func:`numpy.asanyarray` functions, to + ensure the returned numpy array is C contiguous. + + Parameters + ---------- + array + The array-like object to convert. + + Returns + ------- + array + The C contiguous numpy array. + """ + # A dictionary mapping unsupported dtypes to the expected numpy dtype. + dtypes = { + "date32[day][pyarrow]": np.datetime64, + "date64[ms][pyarrow]": np.datetime64, + } + + if ( + hasattr(array, "isna") + and array.isna().any() + and Version(pd.__version__) < Version("2.2") + ): + # Workaround for dealing with pd.NA with pandas < 2.2. + # Bug report at: https://github.com/GenericMappingTools/pygmt/issues/2844 + # Following SPEC0, pandas 2.1 will be dropped in 2025 Q3, so it's likely + # we can remove the workaround in PyGMT v0.17.0. + array = np.ascontiguousarray(array.astype(float)) + else: + vec_dtype = str(getattr(array, "dtype", "")) + array = np.ascontiguousarray(array, dtype=dtypes.get(vec_dtype)) + return array + + def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: """ Convert 1-D vectors (scalars, lists, or array-like) to C contiguous 1-D arrays. @@ -171,27 +217,7 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: >>> all(i.ndim == 1 for i in arrays) True """ - dtypes = { - "date32[day][pyarrow]": np.datetime64, - "date64[ms][pyarrow]": np.datetime64, - } - arrays = [] - for vector in vectors: - if ( - hasattr(vector, "isna") - and vector.isna().any() - and Version(pd.__version__) < Version("2.2") - ): - # Workaround for dealing with pd.NA with pandas < 2.2. - # Bug report at: https://github.com/GenericMappingTools/pygmt/issues/2844 - # Following SPEC0, pandas 2.1 will be dropped in 2025 Q3, so it's likely - # we can remove the workaround in PyGMT v0.17.0. - array = np.ascontiguousarray(vector.astype(float)) - else: - vec_dtype = str(getattr(vector, "dtype", "")) - array = np.ascontiguousarray(vector, dtype=dtypes.get(vec_dtype)) - arrays.append(array) - return arrays + return [_to_ndarray(vector) for vector in vectors] def sequence_to_ctypes_array( From e75e894b721b1d87a880d7f6d8c38a0d3b56fccb Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 1 Nov 2024 16:07:35 +0800 Subject: [PATCH 2/8] Add tests for _to_ndarray with various numeric dtypes --- pygmt/tests/test_clib_to_ndarray.py | 185 ++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 pygmt/tests/test_clib_to_ndarray.py diff --git a/pygmt/tests/test_clib_to_ndarray.py b/pygmt/tests/test_clib_to_ndarray.py new file mode 100644 index 00000000000..5592033adfd --- /dev/null +++ b/pygmt/tests/test_clib_to_ndarray.py @@ -0,0 +1,185 @@ +""" +Test the _to_ndarray function in the clib.conversion module. +""" + +import numpy as np +import numpy.testing as npt +import pandas as pd +import pytest +from pygmt.clib.conversion import _to_ndarray + +try: + import pyarrow as pa + + _HAS_PYARROW = True +except ImportError: + _HAS_PYARROW = False + + +@pytest.fixture(scope="module", name="dtypes_numpy_numeric") +def fixture_dtypes_numpy_numeric(): + """ + List of NumPy numeric dtypes. + + Reference: https://numpy.org/doc/stable/reference/arrays.scalars.html + """ + return [ + np.int8, + np.int16, + np.int32, + np.int64, + np.longlong, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.ulonglong, + np.float16, + np.float32, + np.float64, + np.longdouble, + np.complex64, + np.complex128, + np.clongdouble, + ] + + +@pytest.fixture(scope="module", name="dtypes_pandas_numeric") +def fixture_dtypes_pandas_numeric(): + """ + List of pandas numeric dtypes. + + Reference: https://pandas.pydata.org/docs/reference/arrays.html + """ + return [ + pd.Int8Dtype(), + pd.Int16Dtype(), + pd.Int32Dtype(), + pd.Int64Dtype(), + pd.UInt8Dtype(), + pd.UInt16Dtype(), + pd.UInt32Dtype(), + pd.UInt64Dtype(), + pd.Float32Dtype(), + pd.Float64Dtype(), + ] + + +@pytest.fixture(scope="module", name="dtypes_pandas_numeric_pyarrow_backend") +def fixture_dtypes_pandas_numeric_pyarrow_backend(): + """ + List of pandas dtypes that use pyarrow as the backend. + + Reference: https://pandas.pydata.org/docs/user_guide/pyarrow.html + """ + return [ + "int8[pyarrow]", + "int16[pyarrow]", + "int32[pyarrow]", + "int64[pyarrow]", + "uint8[pyarrow]", + "uint16[pyarrow]", + "uint32[pyarrow]", + "uint64[pyarrow]", + "float32[pyarrow]", + "float64[pyarrow]", + ] + + +@pytest.fixture(scope="module", name="dtypes_pyarrow_numeric") +def fixture_dtypes_pyarrow_numeric(): + """ + List of pyarrow numeric dtypes. + + Reference: https://arrow.apache.org/docs/python/api/datatypes.html + """ + if not _HAS_PYARROW: + return [] + return [ + pa.int8(), + pa.int16(), + pa.int32(), + pa.int64(), + pa.uint8(), + pa.uint16(), + pa.uint32(), + pa.uint64(), + # pa.float16(), # Need special handling. + pa.float32(), + pa.float64(), + ] + + +def _check_result(result): + """ + A helper function to check the result of the _to_ndarray function. + + Check the following: + + 1. The result is a NumPy array. + 2. The result is C-contiguous. + 3. The result dtype is not np.object_. + """ + assert isinstance(result, np.ndarray) + assert result.flags.c_contiguous is True + assert result.dtype != np.object_ + + +def test_to_ndarray_numpy_ndarray_numpy_numeric(dtypes_numpy_numeric): + """ + Test the _to_ndarray function with 1-D NumPy arrays. + """ + # 1-D array + for dtype in dtypes_numpy_numeric: + array = np.array([1, 2, 3], dtype=dtype) + assert array.dtype == dtype + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) + + # 2-D array + for dtype in dtypes_numpy_numeric: + array = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype) + assert array.dtype == dtype + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) + + +def test_to_ndarray_pandas_series_numeric( + dtypes_numpy_numeric, dtypes_pandas_numeric, dtypes_pandas_numeric_pyarrow_backend +): + """ + Test the _to_ndarray function with pandas Series with NumPy dtypes, pandas dtypes, + and pandas dtypes with pyarrow backend. + """ + for dtype in ( + dtypes_numpy_numeric + + dtypes_pandas_numeric + + dtypes_pandas_numeric_pyarrow_backend + ): + series = pd.Series([1, 2, 3], dtype=dtype) + assert series.dtype == dtype + result = _to_ndarray(series) + _check_result(result) + npt.assert_array_equal(result, series) + + +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") +def test_to_ndarray_pandas_series_pyarrow_dtype(dtypes_pyarrow_numeric): + """ + Test the _to_ndarray function with pandas Series with pyarrow dtypes. + """ + for dtype in dtypes_pyarrow_numeric: + array = pa.array([1, 2, 3], type=dtype) + assert array.type == dtype + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) + + # Special handling for float16. + # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + array = pa.array(np.array([1.5, 2.5, 3.5], dtype=np.float16), type=pa.float16()) + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) From 44bb82c201c20d22a0dbfb2ba51bc0c7b6e5a3eb Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 2 Nov 2024 22:33:06 +0800 Subject: [PATCH 3/8] Use pytest.mark.parametrize --- pygmt/tests/test_clib_to_ndarray.py | 223 ++++++++++++---------------- 1 file changed, 94 insertions(+), 129 deletions(-) diff --git a/pygmt/tests/test_clib_to_ndarray.py b/pygmt/tests/test_clib_to_ndarray.py index 5592033adfd..3452fce56fc 100644 --- a/pygmt/tests/test_clib_to_ndarray.py +++ b/pygmt/tests/test_clib_to_ndarray.py @@ -7,6 +7,7 @@ import pandas as pd import pytest from pygmt.clib.conversion import _to_ndarray +from pygmt.helpers.testing import skip_if_no try: import pyarrow as pa @@ -15,99 +16,25 @@ except ImportError: _HAS_PYARROW = False - -@pytest.fixture(scope="module", name="dtypes_numpy_numeric") -def fixture_dtypes_numpy_numeric(): - """ - List of NumPy numeric dtypes. - - Reference: https://numpy.org/doc/stable/reference/arrays.scalars.html - """ - return [ - np.int8, - np.int16, - np.int32, - np.int64, - np.longlong, - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.ulonglong, - np.float16, - np.float32, - np.float64, - np.longdouble, - np.complex64, - np.complex128, - np.clongdouble, - ] - - -@pytest.fixture(scope="module", name="dtypes_pandas_numeric") -def fixture_dtypes_pandas_numeric(): - """ - List of pandas numeric dtypes. - - Reference: https://pandas.pydata.org/docs/reference/arrays.html - """ - return [ - pd.Int8Dtype(), - pd.Int16Dtype(), - pd.Int32Dtype(), - pd.Int64Dtype(), - pd.UInt8Dtype(), - pd.UInt16Dtype(), - pd.UInt32Dtype(), - pd.UInt64Dtype(), - pd.Float32Dtype(), - pd.Float64Dtype(), - ] - - -@pytest.fixture(scope="module", name="dtypes_pandas_numeric_pyarrow_backend") -def fixture_dtypes_pandas_numeric_pyarrow_backend(): - """ - List of pandas dtypes that use pyarrow as the backend. - - Reference: https://pandas.pydata.org/docs/user_guide/pyarrow.html - """ - return [ - "int8[pyarrow]", - "int16[pyarrow]", - "int32[pyarrow]", - "int64[pyarrow]", - "uint8[pyarrow]", - "uint16[pyarrow]", - "uint32[pyarrow]", - "uint64[pyarrow]", - "float32[pyarrow]", - "float64[pyarrow]", - ] - - -@pytest.fixture(scope="module", name="dtypes_pyarrow_numeric") -def fixture_dtypes_pyarrow_numeric(): - """ - List of pyarrow numeric dtypes. - - Reference: https://arrow.apache.org/docs/python/api/datatypes.html - """ - if not _HAS_PYARROW: - return [] - return [ - pa.int8(), - pa.int16(), - pa.int32(), - pa.int64(), - pa.uint8(), - pa.uint16(), - pa.uint32(), - pa.uint64(), - # pa.float16(), # Need special handling. - pa.float32(), - pa.float64(), - ] +dtypes_numpy = [ + np.int8, + np.int16, + np.int32, + np.int64, + np.longlong, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.ulonglong, + np.float16, + np.float32, + np.float64, + np.longdouble, + np.complex64, + np.complex128, + np.clongdouble, +] def _check_result(result): @@ -125,60 +52,98 @@ def _check_result(result): assert result.dtype != np.object_ -def test_to_ndarray_numpy_ndarray_numpy_numeric(dtypes_numpy_numeric): +@pytest.mark.parametrize("dtype", dtypes_numpy) +def test_to_ndarray_numpy_ndarray_numpy_numeric(dtype): """ Test the _to_ndarray function with 1-D NumPy arrays. """ # 1-D array - for dtype in dtypes_numpy_numeric: - array = np.array([1, 2, 3], dtype=dtype) - assert array.dtype == dtype - result = _to_ndarray(array) - _check_result(result) - npt.assert_array_equal(result, array) + array = np.array([1, 2, 3], dtype=dtype) + assert array.dtype == dtype + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) # 2-D array - for dtype in dtypes_numpy_numeric: - array = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype) - assert array.dtype == dtype - result = _to_ndarray(array) - _check_result(result) - npt.assert_array_equal(result, array) + array = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype) + assert array.dtype == dtype + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) -def test_to_ndarray_pandas_series_numeric( - dtypes_numpy_numeric, dtypes_pandas_numeric, dtypes_pandas_numeric_pyarrow_backend -): +@pytest.mark.parametrize( + "dtype", + [ + *dtypes_numpy, + pytest.param(pd.Int8Dtype(), id="Int8"), + pytest.param(pd.Int16Dtype(), id="Int16"), + pytest.param(pd.Int32Dtype(), id="Int32"), + pytest.param(pd.Int64Dtype(), id="Int64"), + pytest.param(pd.UInt8Dtype(), id="UInt8"), + pytest.param(pd.UInt16Dtype(), id="UInt16"), + pytest.param(pd.UInt32Dtype(), id="UInt32"), + pytest.param(pd.UInt64Dtype(), id="UInt64"), + pytest.param(pd.Float32Dtype(), id="Float32"), + pytest.param(pd.Float64Dtype(), id="Float64"), + pytest.param("int8[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("int16[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("int32[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("int64[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint8[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint16[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint32[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint64[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("float32[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("float64[pyarrow]", marks=skip_if_no(package="pyarrow")), + ], +) +def test_to_ndarray_pandas_series_numeric(dtype): """ Test the _to_ndarray function with pandas Series with NumPy dtypes, pandas dtypes, and pandas dtypes with pyarrow backend. """ - for dtype in ( - dtypes_numpy_numeric - + dtypes_pandas_numeric - + dtypes_pandas_numeric_pyarrow_backend - ): - series = pd.Series([1, 2, 3], dtype=dtype) - assert series.dtype == dtype - result = _to_ndarray(series) - _check_result(result) - npt.assert_array_equal(result, series) + series = pd.Series([1, 2, 3], dtype=dtype) + assert series.dtype == dtype + result = _to_ndarray(series) + _check_result(result) + npt.assert_array_equal(result, series) @pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") -def test_to_ndarray_pandas_series_pyarrow_dtype(dtypes_pyarrow_numeric): +@pytest.mark.parametrize( + "dtype", + [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float32", + "float64", + ], +) +def test_to_ndarray_pyarrow_array(dtype): """ Test the _to_ndarray function with pandas Series with pyarrow dtypes. """ - for dtype in dtypes_pyarrow_numeric: - array = pa.array([1, 2, 3], type=dtype) - assert array.type == dtype - result = _to_ndarray(array) - _check_result(result) - npt.assert_array_equal(result, array) - - # Special handling for float16. - # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + array = pa.array([1, 2, 3], type=dtype) + assert array.type == dtype + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) + + +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") +def test_to_ndarray_pyarrow_array_float16(): + """ + Test the _to_ndarray function with pyarrow float16 array. + + Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + """ array = pa.array(np.array([1.5, 2.5, 3.5], dtype=np.float16), type=pa.float16()) result = _to_ndarray(array) _check_result(result) From fa6903d4d037ee8bf5c250c47a29841094f59eba Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 3 Nov 2024 00:19:43 +0800 Subject: [PATCH 4/8] Backward compatibility with pandas 2.0/2.1 --- pygmt/clib/conversion.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 3d3618738ad..74d2b9dd949 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -157,21 +157,37 @@ def _to_ndarray(array: Any) -> np.ndarray: The C contiguous numpy array. """ # A dictionary mapping unsupported dtypes to the expected numpy dtype. - dtypes = { + dtypes: dict[str, type] = { "date32[day][pyarrow]": np.datetime64, "date64[ms][pyarrow]": np.datetime64, } + # pandas nullable types and pyarrow types were converted to object dtype prior to + # pandas 2.2, and these dtypes are now converted to suitable numpy dtypes. + # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#to-numpy-for-numpy-nullable-and-arrow-types-converts-to-suitable-numpy-dtype + # Following SPEC 0, pandas 2.1 will be dropped in 2025 Q3, so it's likely we can + # remove the workaround in PyGMT v0.17.0. + if Version(pd.__version__) < Version("2.2"): + dtypes.update( + { + "Int8": np.int8, + "Int16": np.int16, + "Int32": np.int32, + "Int64": np.int64, + "UInt8": np.uint8, + "UInt16": np.uint16, + "UInt32": np.uint32, + "UInt64": np.uint64, + "Float32": np.float32, + "Float64": np.float64, + } + ) if ( hasattr(array, "isna") and array.isna().any() and Version(pd.__version__) < Version("2.2") ): - # Workaround for dealing with pd.NA with pandas < 2.2. - # Bug report at: https://github.com/GenericMappingTools/pygmt/issues/2844 - # Following SPEC0, pandas 2.1 will be dropped in 2025 Q3, so it's likely - # we can remove the workaround in PyGMT v0.17.0. - array = np.ascontiguousarray(array.astype(float)) + array = np.ascontiguousarray(array.astype(np.float64)) else: vec_dtype = str(getattr(array, "dtype", "")) array = np.ascontiguousarray(array, dtype=dtypes.get(vec_dtype)) From 7ae437f4511a420433e4939ac1ac557724f6bc8b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 3 Nov 2024 00:41:32 +0800 Subject: [PATCH 5/8] Add tests for pd.NA --- pygmt/tests/test_clib_to_ndarray.py | 36 +++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pygmt/tests/test_clib_to_ndarray.py b/pygmt/tests/test_clib_to_ndarray.py index 3452fce56fc..8b2f09cec16 100644 --- a/pygmt/tests/test_clib_to_ndarray.py +++ b/pygmt/tests/test_clib_to_ndarray.py @@ -110,6 +110,42 @@ def test_to_ndarray_pandas_series_numeric(dtype): npt.assert_array_equal(result, series) +@pytest.mark.parametrize( + "dtype", + [ + pytest.param(pd.Int8Dtype(), id="Int8"), + pytest.param(pd.Int16Dtype(), id="Int16"), + pytest.param(pd.Int32Dtype(), id="Int32"), + pytest.param(pd.Int64Dtype(), id="Int64"), + pytest.param(pd.UInt8Dtype(), id="UInt8"), + pytest.param(pd.UInt16Dtype(), id="UInt16"), + pytest.param(pd.UInt32Dtype(), id="UInt32"), + pytest.param(pd.UInt64Dtype(), id="UInt64"), + pytest.param(pd.Float32Dtype(), id="Float32"), + pytest.param(pd.Float64Dtype(), id="Float64"), + pytest.param("int8[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("int16[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("int32[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("int64[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint8[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint16[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint32[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("uint64[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("float32[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("float64[pyarrow]", marks=skip_if_no(package="pyarrow")), + ], +) +def test_to_ndarray_pandas_series_numeric_with_na(dtype): + """ + Test the _to_ndarray function with pandas Series with NumPy dtypes and pandas NA. + """ + series = pd.Series([1, pd.NA, 3], dtype=dtype) + assert series.dtype == dtype + result = _to_ndarray(series) + _check_result(result) + npt.assert_array_equal(result, np.array([1, np.nan, 3], dtype=np.float64)) + + @pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") @pytest.mark.parametrize( "dtype", From bf8c9a5f8f9e693a8cc1559dc8535777f1aa585e Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 3 Nov 2024 00:59:25 +0800 Subject: [PATCH 6/8] Simplify --- pygmt/clib/conversion.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 74d2b9dd949..1865881bad2 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -181,16 +181,11 @@ def _to_ndarray(array: Any) -> np.ndarray: "Float64": np.float64, } ) + if hasattr(array, "isna") and array.isna().any(): + array = array.astype(np.float64) - if ( - hasattr(array, "isna") - and array.isna().any() - and Version(pd.__version__) < Version("2.2") - ): - array = np.ascontiguousarray(array.astype(np.float64)) - else: - vec_dtype = str(getattr(array, "dtype", "")) - array = np.ascontiguousarray(array, dtype=dtypes.get(vec_dtype)) + vec_dtype = str(getattr(array, "dtype", "")) + array = np.ascontiguousarray(array, dtype=dtypes.get(vec_dtype)) return array From e4807e207301903fac43e57a931e016360893e04 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 3 Nov 2024 16:21:03 +0800 Subject: [PATCH 7/8] Add tests for string arrays --- pygmt/clib/conversion.py | 5 +++- pygmt/tests/test_clib_to_ndarray.py | 42 +++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 1865881bad2..bb616c27296 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -158,6 +158,9 @@ def _to_ndarray(array: Any) -> np.ndarray: """ # A dictionary mapping unsupported dtypes to the expected numpy dtype. dtypes: dict[str, type] = { + # "string" for "string[python]", "string[pyarrow]", "string[pyarrow_numpy]", and + # pa.string() + "string": np.str_, "date32[day][pyarrow]": np.datetime64, "date64[ms][pyarrow]": np.datetime64, } @@ -184,7 +187,7 @@ def _to_ndarray(array: Any) -> np.ndarray: if hasattr(array, "isna") and array.isna().any(): array = array.astype(np.float64) - vec_dtype = str(getattr(array, "dtype", "")) + vec_dtype = str(getattr(array, "dtype", getattr(array, "type", ""))) array = np.ascontiguousarray(array, dtype=dtypes.get(vec_dtype)) return array diff --git a/pygmt/tests/test_clib_to_ndarray.py b/pygmt/tests/test_clib_to_ndarray.py index 8b2f09cec16..f67c87dc19c 100644 --- a/pygmt/tests/test_clib_to_ndarray.py +++ b/pygmt/tests/test_clib_to_ndarray.py @@ -72,6 +72,17 @@ def test_to_ndarray_numpy_ndarray_numpy_numeric(dtype): npt.assert_array_equal(result, array) +@pytest.mark.parametrize("dtype", [None, np.str_]) +def test_to_ndarray_numpy_ndarray_numpy_string(dtype): + """ + Test the _to_ndarray function with 1-D NumPy arrays of strings. + """ + array = np.array(["a", "b", "c"], dtype=dtype) + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) + + @pytest.mark.parametrize( "dtype", [ @@ -146,6 +157,26 @@ def test_to_ndarray_pandas_series_numeric_with_na(dtype): npt.assert_array_equal(result, np.array([1, np.nan, 3], dtype=np.float64)) +@pytest.mark.parametrize( + "dtype", + [ + # None, + # np.str_, + "string[python]", + pytest.param("string[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("string[pyarrow_numpy]", marks=skip_if_no(package="pyarrow")), + ], +) +def test_to_ndarray_pandas_series_string(dtype): + """ + Test the _to_ndarray function with pandas Series with string dtype. + """ + series = pd.Series(["a", "bcd", "12345"], dtype=dtype) + result = _to_ndarray(series) + _check_result(result) + npt.assert_array_equal(result, series) + + @pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") @pytest.mark.parametrize( "dtype", @@ -184,3 +215,14 @@ def test_to_ndarray_pyarrow_array_float16(): result = _to_ndarray(array) _check_result(result) npt.assert_array_equal(result, array) + + +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") +def test_to_ndarray_pyarrow_array_string(): + """ + Test the _to_ndarray function with pyarrow string array. + """ + array = pa.array(["a", "bcd", "12345"], type=pa.string()) + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) From ead07e4e874a509db9415e6a3039231092656fd2 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 3 Nov 2024 20:29:06 +0800 Subject: [PATCH 8/8] Add tests for datetime --- pygmt/tests/test_clib_to_ndarray.py | 72 +++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/pygmt/tests/test_clib_to_ndarray.py b/pygmt/tests/test_clib_to_ndarray.py index f67c87dc19c..b48bc548d6c 100644 --- a/pygmt/tests/test_clib_to_ndarray.py +++ b/pygmt/tests/test_clib_to_ndarray.py @@ -83,6 +83,35 @@ def test_to_ndarray_numpy_ndarray_numpy_string(dtype): npt.assert_array_equal(result, array) +@pytest.mark.parametrize( + "dtype", + [ + np.datetime64, + "datetime64[Y]", + "datetime64[M]", + "datetime64[W]", + "datetime64[D]", + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + "datetime64[ps]", + "datetime64[fs]", + "datetime64[as]", + ], +) +def test_to_ndarray_numpy_ndarray_numpy_datetime(dtype): + """ + Test the _to_ndarray function with 1-D NumPy arrays of datetime. + """ + array = np.array(["2024-01-01", "2024-01-02", "2024-01-03"], dtype=dtype) + result = _to_ndarray(array) + _check_result(result) + npt.assert_array_equal(result, array) + + @pytest.mark.parametrize( "dtype", [ @@ -177,6 +206,49 @@ def test_to_ndarray_pandas_series_string(dtype): npt.assert_array_equal(result, series) +@pytest.mark.parametrize( + "dtype", + [ + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + # pd.DatetimeTZDtype(tz="UTC"), + pytest.param("date32[day][pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("date64[ms][pyarrow]", marks=skip_if_no(package="pyarrow")), + ], +) +def test_to_ndarray_pandas_series_datetime(dtype): + """ + Test the _to_ndarray function with pandas Series with datetime dtype. + """ + series = pd.Series( + ["2024-01-01T00:00:00", "2024-01-02T00:00:00", "2024-01-03T00:00:00"], + dtype=dtype, + ) + result = _to_ndarray(series) + _check_result(result) + npt.assert_array_equal(result, series) + + +# @pytest.mark.parametrize( +# "dtype", +# [ +# pytest.param("time32[s][pyarrow]", marks=skip_if_no(package="pyarrow")), +# pytest.param("time32[ms][pyarrow]", marks=skip_if_no(package="pyarrow")), +# pytest.param("time64[us][pyarrow]", marks=skip_if_no(package="pyarrow")), +# pytest.param("time64[ns][pyarrow]", marks=skip_if_no(package="pyarrow")), +# ], +# ) +# def test_to_ndarray_pandas_series_time(dtype): +# """ +# Test the _to_ndarray function with pandas Series with time dtype. +# """ +# series = pd.Series(["00:00:00", "01:02:03", "23:59:59"], dtype=dtype) +# result = _to_ndarray(series) +# _check_result(result) + + @pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") @pytest.mark.parametrize( "dtype",