From a9b10d668d49170a66d5669da4e28a745acc003b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 5 Nov 2024 22:11:31 +0800 Subject: [PATCH 01/14] clib.conversion._to_numpy: Add tests for pandas.Series and pyarrow.array with pyarrow numeric dtypes --- pygmt/tests/test_clib_to_numpy.py | 146 ++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index 12b8c1d782d..c693a36f73b 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -11,6 +11,13 @@ from packaging.version import Version from pygmt.clib.conversion import _to_numpy +try: + import pyarrow as pa + + _HAS_PYARROW = True +except ImportError: + _HAS_PYARROW = False + def _check_result(result, expected_dtype): """ @@ -138,6 +145,11 @@ def test_to_numpy_ndarray_numpy_dtypes_numeric(dtype, expected_dtype): # - BooleanDtype # - ArrowDtype: a special dtype used to store data in the PyArrow format. # +# PyArrow dtypes can be specified using the following formats: +# +# - Prefixed with the name of the dtype and "[pyarrow]" (e.g., "int8[pyarrow]") +# - Specified using ``ArrowDType`` (e.g., "pd.ArrowDtype(pa.int8())") +# # References: # 1. https://pandas.pydata.org/docs/reference/arrays.html # 2. https://pandas.pydata.org/docs/user_guide/basics.html#basics-dtypes @@ -152,3 +164,137 @@ def test_to_numpy_pandas_series_numpy_dtypes_numeric(dtype, expected_dtype): result = _to_numpy(series) _check_result(result, expected_dtype) npt.assert_array_equal(result, series) + + +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") +@pytest.mark.parametrize( + ("dtype", "expected_dtype"), + [ + pytest.param("int8[pyarrow]", np.int8, id="int8[pyarrow]"), + pytest.param("int16[pyarrow]", np.int16, id="int16[pyarrow]"), + pytest.param("int32[pyarrow]", np.int32, id="int32[pyarrow]"), + pytest.param("int64[pyarrow]", np.int64, id="int64[pyarrow]"), + pytest.param("uint8[pyarrow]", np.uint8, id="uint8[pyarrow]"), + pytest.param("uint16[pyarrow]", np.uint16, id="uint16[pyarrow]"), + pytest.param("uint32[pyarrow]", np.uint32, id="uint32[pyarrow]"), + pytest.param("uint64[pyarrow]", np.uint64, id="uint64[pyarrow]"), + pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]"), + pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]"), + pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]"), + ], +) +def test_to_numpy_pandas_series_pyarrow_dtypes_numeric(dtype, expected_dtype): + """ + Test the _to_numpy function with pandas.Series of PyArrow numeric dtypes. + """ + series = pd.Series([1, 2, 3, 4, 5, 6], dtype=dtype)[::2] # Not C-contiguous + result = _to_numpy(series) + _check_result(result, expected_dtype) + npt.assert_array_equal(result, series) + + +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") +@pytest.mark.parametrize( + ("dtype", "expected_dtype"), + [ + pytest.param("int8[pyarrow]", np.float64, id="int8[pyarrow]"), + pytest.param("int16[pyarrow]", np.float64, id="int16[pyarrow]"), + pytest.param("int32[pyarrow]", np.float64, id="int32[pyarrow]"), + pytest.param("int64[pyarrow]", np.float64, id="int64[pyarrow]"), + pytest.param("uint8[pyarrow]", np.float64, id="uint8[pyarrow]"), + pytest.param("uint16[pyarrow]", np.float64, id="uint16[pyarrow]"), + pytest.param("uint32[pyarrow]", np.float64, id="uint32[pyarrow]"), + pytest.param("uint64[pyarrow]", np.float64, id="uint64[pyarrow]"), + # pytest.param("float16[pyarrow]", np.float64, id="float16[pyarrow]"), + pytest.param("float32[pyarrow]", np.float64, id="float32[pyarrow]"), + pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]"), + ], +) +def test_to_numpy_pandas_series_pyarrow_dtypes_numeric_with_na(dtype, expected_dtype): + """ + Test the _to_numpy function with pandas.Series of PyArrow numeric dtypes and NA. + """ + series = pd.Series([1, 2, pd.NA, 4, 5, 6], dtype=dtype)[::2] + assert series.isna().any() + result = _to_numpy(series) + _check_result(result, expected_dtype) + npt.assert_array_equal(result, np.array([1.0, np.nan, 5.0], dtype=expected_dtype)) + + +######################################################################################## +# Test the _to_numpy function with PyArrow arrays. +# +# PyArrow provides the following dtypes: +# +# - Numeric dtypes: +# - int8, int16, int32, int64 +# - uint8, uint16, uint32, uint64 +# - float16, float32, float64 +# +# In PyArrow, array types can be specified in two ways: +# +# - Using string aliases (e.g., "int8") +# - Using pyarrow.DataType (e.g., ``pa.int8()``) +# +# Reference: https://arrow.apache.org/docs/python/api/datatypes.html +######################################################################################## +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") +@pytest.mark.parametrize( + ("dtype", "expected_dtype"), + [ + pytest.param("int8", np.int8, id="int8"), + pytest.param("int16", np.int16, id="int16"), + pytest.param("int32", np.int32, id="int32"), + pytest.param("int64", np.int64, id="int64"), + pytest.param("uint8", np.uint8, id="uint8"), + pytest.param("uint16", np.uint16, id="uint16"), + pytest.param("uint32", np.uint32, id="uint32"), + pytest.param("uint64", np.uint64, id="uint64"), + pytest.param("float16", np.float16, id="float16"), + pytest.param("float32", np.float32, id="float32"), + pytest.param("float64", np.float64, id="float64"), + ], +) +def test_to_numpy_pyarrow_array_pyarrow_dtypes_numeric(dtype, expected_dtype): + """ + Test the _to_numpy function with PyArrow arrays of PyArrow numeric dtypes. + """ + data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + if dtype == "float16": # float16 needs special handling + # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + data = np.array(data, dtype=np.float16) + array = pa.array(data, type=dtype)[::2] + result = _to_numpy(array) + _check_result(result, expected_dtype) + npt.assert_array_equal(result, array) + + +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") +@pytest.mark.parametrize( + ("dtype", "expected_dtype"), + [ + pytest.param("int8", np.float64, id="int8"), + pytest.param("int16", np.float64, id="int16"), + pytest.param("int32", np.float64, id="int32"), + pytest.param("int64", np.float64, id="int64"), + pytest.param("uint8", np.float64, id="uint8"), + pytest.param("uint16", np.float64, id="uint16"), + pytest.param("uint32", np.float64, id="uint32"), + pytest.param("uint64", np.float64, id="uint64"), + pytest.param("float16", np.float16, id="float16"), + pytest.param("float32", np.float32, id="float32"), + pytest.param("float64", np.float64, id="float64"), + ], +) +def test_to_numpy_pyarrow_array_pyarrow_dtypes_numeric_with_na(dtype, expected_dtype): + """ + Test the _to_numpy function with PyArrow arrays of PyArrow numeric dtypes and NA. + """ + data = [1.0, 2.0, None, 4.0, 5.0, 6.0] + if dtype == "float16": # float16 needs special handling + # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + data = np.array(data, dtype=np.float16) + array = pa.array(data, type=dtype)[::2] + result = _to_numpy(array) + _check_result(result, expected_dtype) + npt.assert_array_equal(result, array) From 54d1a37c17564e07fc78aea667823f9997466139 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 7 Nov 2024 08:19:38 +0800 Subject: [PATCH 02/14] Add workarounds for pandas nullable dtypes prior pandas v2.1 --- pygmt/clib/conversion.py | 43 ++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index af8eb3458d4..b93822e2d4c 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -162,19 +162,36 @@ def _to_numpy(data: Any) -> np.ndarray: "date64[ms][pyarrow]": np.datetime64, } - if ( - hasattr(data, "isna") - and data.isna().any() - and Version(pd.__version__) < Version("2.2") - ): - # Workaround for dealing with pd.NA with pandas < 2.2. - # Bug report at: https://github.com/GenericMappingTools/pygmt/issues/2844 - # Following SPEC0, pandas 2.1 will be dropped in 2025 Q3, so it's likely - # we can remove the workaround in PyGMT v0.17.0. - array = np.ascontiguousarray(data.astype(float)) - else: - vec_dtype = str(getattr(data, "dtype", "")) - array = np.ascontiguousarray(data, dtype=dtypes.get(vec_dtype)) + # pandas nullable dtypes and pyarrow types were converted to np.object_ dtype + # before, and are converted to suitable numpy dtypes since pandas 2.2. + # Refer to the following link for details: + # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#to-numpy-for-numpy-nullable-and-arrow-types-converts-to-suitable-numpy-dtype + # Here are the workarounds for pandas < 2.2. + # Following SPEC 0, pandas 2.1 should be dropped in 2025 Q3, so it's likely we can + # remove the workaround in PyGMT v0.17.0. + if Version(pd.__version__) < Version("2.2"): + dtypes.update( + { + "Int8": np.int8, + "Int16": np.int16, + "Int32": np.int32, + "Int64": np.int64, + "UInt8": np.uint8, + "UInt16": np.uint16, + "UInt32": np.uint32, + "UInt64": np.uint64, + "Float32": np.float32, + "Float64": np.float64, + } + ) + if hasattr(data, "isna") and data.isna().any(): + # Integer dtypes with missing values are cast to NumPy float dtypes and NaN + # is used as missing value indicator. + dtype = np.float64 if data.dtype.kind in "iu" else data.dtype.numpy_dtype + data = data.to_numpy(dtype=dtype, na_value=np.nan) + + vec_dtype = str(getattr(data, "dtype", "")) + array = np.ascontiguousarray(data, dtype=dtypes.get(vec_dtype)) return array From 50e6872a02f365872e893a24f0d0580cdb768117 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 7 Nov 2024 23:36:57 +0800 Subject: [PATCH 03/14] Install pandas 2.0 + pyarrow in the Python 3.11 job --- .github/workflows/ci_tests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci_tests.yaml b/.github/workflows/ci_tests.yaml index bcdd3d14304..1aec8e29220 100644 --- a/.github/workflows/ci_tests.yaml +++ b/.github/workflows/ci_tests.yaml @@ -85,9 +85,9 @@ jobs: - os: 'ubuntu-latest' python-version: '3.11' # Can't be 3.10 or 3.12. numpy-version: '1.24' - pandas-version: '' + pandas-version: '=2.0' xarray-version: '' - optional-packages: ' geopandas<1' + optional-packages: ' geopandas<1 pyarrow' timeout-minutes: 30 defaults: From 1317e11655d97145fbe2c7659b9301f5de91f071 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 7 Nov 2024 23:40:48 +0800 Subject: [PATCH 04/14] float32[pyarrow] with NA should be mapped to float32 --- pygmt/tests/test_clib_to_numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index c693a36f73b..d08337969a1 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -206,7 +206,7 @@ def test_to_numpy_pandas_series_pyarrow_dtypes_numeric(dtype, expected_dtype): pytest.param("uint32[pyarrow]", np.float64, id="uint32[pyarrow]"), pytest.param("uint64[pyarrow]", np.float64, id="uint64[pyarrow]"), # pytest.param("float16[pyarrow]", np.float64, id="float16[pyarrow]"), - pytest.param("float32[pyarrow]", np.float64, id="float32[pyarrow]"), + pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]"), pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]"), ], ) From e09aa75f78f4a99a44b6d62bd8c44adba0bb2896 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 7 Nov 2024 23:41:06 +0800 Subject: [PATCH 05/14] Enable the test for float16[pyarrow] --- pygmt/tests/test_clib_to_numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index d08337969a1..7a433ce570d 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -205,7 +205,7 @@ def test_to_numpy_pandas_series_pyarrow_dtypes_numeric(dtype, expected_dtype): pytest.param("uint16[pyarrow]", np.float64, id="uint16[pyarrow]"), pytest.param("uint32[pyarrow]", np.float64, id="uint32[pyarrow]"), pytest.param("uint64[pyarrow]", np.float64, id="uint64[pyarrow]"), - # pytest.param("float16[pyarrow]", np.float64, id="float16[pyarrow]"), + pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]"), pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]"), pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]"), ], From 06e6958c6785df971ca01e31940543f0a2adf404 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 8 Nov 2024 00:11:06 +0800 Subject: [PATCH 06/14] Use data.to_numpy(na_value=np.nan).astype(dtype=dtype) for float16 --- pygmt/clib/conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index b93822e2d4c..acf43782a73 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -188,7 +188,7 @@ def _to_numpy(data: Any) -> np.ndarray: # Integer dtypes with missing values are cast to NumPy float dtypes and NaN # is used as missing value indicator. dtype = np.float64 if data.dtype.kind in "iu" else data.dtype.numpy_dtype - data = data.to_numpy(dtype=dtype, na_value=np.nan) + data = data.to_numpy(na_value=np.nan).astype(dtype=dtype) vec_dtype = str(getattr(data, "dtype", "")) array = np.ascontiguousarray(data, dtype=dtypes.get(vec_dtype)) From 94f335bcad19c81de49e3dc658c6c90ee707c73f Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 8 Nov 2024 00:28:08 +0800 Subject: [PATCH 07/14] Add special handling for float16 --- pygmt/tests/test_clib_to_numpy.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index 7a433ce570d..6540eef4bdc 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -187,7 +187,12 @@ def test_to_numpy_pandas_series_pyarrow_dtypes_numeric(dtype, expected_dtype): """ Test the _to_numpy function with pandas.Series of PyArrow numeric dtypes. """ - series = pd.Series([1, 2, 3, 4, 5, 6], dtype=dtype)[::2] # Not C-contiguous + data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"): + # float16 needs special handling for pandas < 2.2. + # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + data = np.array(data, dtype=np.float16) + series = pd.Series(data, dtype=dtype)[::2] # Not C-contiguous result = _to_numpy(series) _check_result(result, expected_dtype) npt.assert_array_equal(result, series) @@ -214,7 +219,12 @@ def test_to_numpy_pandas_series_pyarrow_dtypes_numeric_with_na(dtype, expected_d """ Test the _to_numpy function with pandas.Series of PyArrow numeric dtypes and NA. """ - series = pd.Series([1, 2, pd.NA, 4, 5, 6], dtype=dtype)[::2] + data = [1.0, 2.0, None, 4.0, 5.0, 6.0] + if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"): + # float16 needs special handling for pandas < 2.2. + # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + data = np.array(data, dtype=np.float16) + series = pd.Series(data, dtype=dtype)[::2] # Not C-contiguous assert series.isna().any() result = _to_numpy(series) _check_result(result, expected_dtype) From bba729642bbb7a3e14c067274712be800a5377d0 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 8 Nov 2024 11:51:54 +0800 Subject: [PATCH 08/14] Remove tests for pyarrow.array. Already in PR #3599 --- pygmt/tests/test_clib_to_numpy.py | 79 ------------------------------- 1 file changed, 79 deletions(-) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index 6540eef4bdc..6e51d0c9f2a 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -229,82 +229,3 @@ def test_to_numpy_pandas_series_pyarrow_dtypes_numeric_with_na(dtype, expected_d result = _to_numpy(series) _check_result(result, expected_dtype) npt.assert_array_equal(result, np.array([1.0, np.nan, 5.0], dtype=expected_dtype)) - - -######################################################################################## -# Test the _to_numpy function with PyArrow arrays. -# -# PyArrow provides the following dtypes: -# -# - Numeric dtypes: -# - int8, int16, int32, int64 -# - uint8, uint16, uint32, uint64 -# - float16, float32, float64 -# -# In PyArrow, array types can be specified in two ways: -# -# - Using string aliases (e.g., "int8") -# - Using pyarrow.DataType (e.g., ``pa.int8()``) -# -# Reference: https://arrow.apache.org/docs/python/api/datatypes.html -######################################################################################## -@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") -@pytest.mark.parametrize( - ("dtype", "expected_dtype"), - [ - pytest.param("int8", np.int8, id="int8"), - pytest.param("int16", np.int16, id="int16"), - pytest.param("int32", np.int32, id="int32"), - pytest.param("int64", np.int64, id="int64"), - pytest.param("uint8", np.uint8, id="uint8"), - pytest.param("uint16", np.uint16, id="uint16"), - pytest.param("uint32", np.uint32, id="uint32"), - pytest.param("uint64", np.uint64, id="uint64"), - pytest.param("float16", np.float16, id="float16"), - pytest.param("float32", np.float32, id="float32"), - pytest.param("float64", np.float64, id="float64"), - ], -) -def test_to_numpy_pyarrow_array_pyarrow_dtypes_numeric(dtype, expected_dtype): - """ - Test the _to_numpy function with PyArrow arrays of PyArrow numeric dtypes. - """ - data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] - if dtype == "float16": # float16 needs special handling - # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html - data = np.array(data, dtype=np.float16) - array = pa.array(data, type=dtype)[::2] - result = _to_numpy(array) - _check_result(result, expected_dtype) - npt.assert_array_equal(result, array) - - -@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") -@pytest.mark.parametrize( - ("dtype", "expected_dtype"), - [ - pytest.param("int8", np.float64, id="int8"), - pytest.param("int16", np.float64, id="int16"), - pytest.param("int32", np.float64, id="int32"), - pytest.param("int64", np.float64, id="int64"), - pytest.param("uint8", np.float64, id="uint8"), - pytest.param("uint16", np.float64, id="uint16"), - pytest.param("uint32", np.float64, id="uint32"), - pytest.param("uint64", np.float64, id="uint64"), - pytest.param("float16", np.float16, id="float16"), - pytest.param("float32", np.float32, id="float32"), - pytest.param("float64", np.float64, id="float64"), - ], -) -def test_to_numpy_pyarrow_array_pyarrow_dtypes_numeric_with_na(dtype, expected_dtype): - """ - Test the _to_numpy function with PyArrow arrays of PyArrow numeric dtypes and NA. - """ - data = [1.0, 2.0, None, 4.0, 5.0, 6.0] - if dtype == "float16": # float16 needs special handling - # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html - data = np.array(data, dtype=np.float16) - array = pa.array(data, type=dtype)[::2] - result = _to_numpy(array) - _check_result(result, expected_dtype) - npt.assert_array_equal(result, array) From 41509930e99ce0104c63535c163c2249073eacd3 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 9 Nov 2024 16:47:05 +0800 Subject: [PATCH 09/14] Change dtype to type for PyArrow --- pygmt/tests/test_clib_to_numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index 5921c69498f..c00c3326c44 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -145,7 +145,7 @@ def test_to_numpy_ndarray_numpy_dtypes_numeric(dtype, expected_dtype): # - BooleanDtype # - ArrowDtype: a special dtype used to store data in the PyArrow format. # -# PyArrow dtypes can be specified using the following formats: +# In pandas, PyArrow types can be specified using the following formats: # # - Prefixed with the name of the dtype and "[pyarrow]" (e.g., "int8[pyarrow]") # - Specified using ``ArrowDType`` (e.g., "pd.ArrowDtype(pa.int8())") From b947b83b0c3a96e09b7e83858e659f0681e5c954 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 9 Nov 2024 22:06:55 +0800 Subject: [PATCH 10/14] Revert "Install pandas 2.0 + pyarrow in the Python 3.11 job" This reverts commit 50e6872a02f365872e893a24f0d0580cdb768117. --- .github/workflows/ci_tests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci_tests.yaml b/.github/workflows/ci_tests.yaml index 1aec8e29220..bcdd3d14304 100644 --- a/.github/workflows/ci_tests.yaml +++ b/.github/workflows/ci_tests.yaml @@ -85,9 +85,9 @@ jobs: - os: 'ubuntu-latest' python-version: '3.11' # Can't be 3.10 or 3.12. numpy-version: '1.24' - pandas-version: '=2.0' + pandas-version: '' xarray-version: '' - optional-packages: ' geopandas<1 pyarrow' + optional-packages: ' geopandas<1' timeout-minutes: 30 defaults: From 98dfb29ea7be841ecbb41d57edb8847b8d1b2f0c Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 10 Nov 2024 12:53:29 +0800 Subject: [PATCH 11/14] Format --- pygmt/tests/test_clib_to_numpy.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index c343f7818e2..b0ab559ff7e 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -165,6 +165,7 @@ def test_to_numpy_pandas_series_numpy_dtypes_numeric(dtype, expected_dtype): _check_result(result, expected_dtype) npt.assert_array_equal(result, series) + @pytest.mark.parametrize( ("dtype", "expected_dtype"), [ @@ -189,6 +190,7 @@ def test_to_numpy_pandas_series_pandas_dtypes_numeric(dtype, expected_dtype): _check_result(result, expected_dtype) npt.assert_array_equal(result, series) + @pytest.mark.parametrize( ("dtype", "expected_dtype"), [ From 10526dadea0f2c0b189c36a3770c9760b649a3ee Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 7 Nov 2024 23:36:57 +0800 Subject: [PATCH 12/14] Install pandas 2.0 + pyarrow in the Python 3.11 job --- .github/workflows/ci_tests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci_tests.yaml b/.github/workflows/ci_tests.yaml index bcdd3d14304..1aec8e29220 100644 --- a/.github/workflows/ci_tests.yaml +++ b/.github/workflows/ci_tests.yaml @@ -85,9 +85,9 @@ jobs: - os: 'ubuntu-latest' python-version: '3.11' # Can't be 3.10 or 3.12. numpy-version: '1.24' - pandas-version: '' + pandas-version: '=2.0' xarray-version: '' - optional-packages: ' geopandas<1' + optional-packages: ' geopandas<1 pyarrow' timeout-minutes: 30 defaults: From c504bf2dcebcae1c0ed9d858ef35655b871087cb Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 12 Nov 2024 07:35:22 +0800 Subject: [PATCH 13/14] Merge pyarrow-backed tests into pandas ones --- pygmt/tests/test_clib_to_numpy.py | 99 +++++++++++-------------------- 1 file changed, 36 insertions(+), 63 deletions(-) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index b0ab559ff7e..dbefa5953db 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -10,6 +10,7 @@ import pytest from packaging.version import Version from pygmt.clib.conversion import _to_numpy +from pygmt.tests.helpers import skip_if_no try: import pyarrow as pa @@ -18,6 +19,9 @@ except ImportError: _HAS_PYARROW = False +# Mark tests that require pyarrow +pa_marks = {"marks": skip_if_no(package="pyarrow")} + def _check_result(result, expected_dtype): """ @@ -179,13 +183,30 @@ def test_to_numpy_pandas_series_numpy_dtypes_numeric(dtype, expected_dtype): pytest.param(pd.UInt64Dtype(), np.uint64, id="UInt64"), pytest.param(pd.Float32Dtype(), np.float32, id="Float32"), pytest.param(pd.Float64Dtype(), np.float64, id="Float64"), + pytest.param("int8[pyarrow]", np.int8, id="int8[pyarrow]", **pa_marks), + pytest.param("int16[pyarrow]", np.int16, id="int16[pyarrow]", **pa_marks), + pytest.param("int32[pyarrow]", np.int32, id="int32[pyarrow]", **pa_marks), + pytest.param("int64[pyarrow]", np.int64, id="int64[pyarrow]", **pa_marks), + pytest.param("uint8[pyarrow]", np.uint8, id="uint8[pyarrow]", **pa_marks), + pytest.param("uint16[pyarrow]", np.uint16, id="uint16[pyarrow]", **pa_marks), + pytest.param("uint32[pyarrow]", np.uint32, id="uint32[pyarrow]", **pa_marks), + pytest.param("uint64[pyarrow]", np.uint64, id="uint64[pyarrow]", **pa_marks), + pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]", **pa_marks), + pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]", **pa_marks), + pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]", **pa_marks), ], ) def test_to_numpy_pandas_series_pandas_dtypes_numeric(dtype, expected_dtype): """ - Test the _to_numpy function with pandas.Series of pandas numeric dtypes. + Test the _to_numpy function with pandas.Series of pandas/PyArrow numeric dtypes. """ - series = pd.Series([1, 2, 3, 4, 5, 6], dtype=dtype)[::2] # Not C-contiguous + data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"): + # float16 needs special handling for pandas < 2.2. + # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html + data = np.array(data, dtype=np.float16) + + series = pd.Series(data, dtype=dtype)[::2] # Not C-contiguous result = _to_numpy(series) _check_result(result, expected_dtype) npt.assert_array_equal(result, series) @@ -204,71 +225,23 @@ def test_to_numpy_pandas_series_pandas_dtypes_numeric(dtype, expected_dtype): pytest.param(pd.UInt64Dtype(), np.float64, id="UInt64"), pytest.param(pd.Float32Dtype(), np.float32, id="Float32"), pytest.param(pd.Float64Dtype(), np.float64, id="Float64"), + pytest.param("int8[pyarrow]", np.float64, id="int8[pyarrow]", **pa_marks), + pytest.param("int16[pyarrow]", np.float64, id="int16[pyarrow]", **pa_marks), + pytest.param("int32[pyarrow]", np.float64, id="int32[pyarrow]", **pa_marks), + pytest.param("int64[pyarrow]", np.float64, id="int64[pyarrow]", **pa_marks), + pytest.param("uint8[pyarrow]", np.float64, id="uint8[pyarrow]", **pa_marks), + pytest.param("uint16[pyarrow]", np.float64, id="uint16[pyarrow]", **pa_marks), + pytest.param("uint32[pyarrow]", np.float64, id="uint32[pyarrow]", **pa_marks), + pytest.param("uint64[pyarrow]", np.float64, id="uint64[pyarrow]", **pa_marks), + pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]", **pa_marks), + pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]", **pa_marks), + pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]", **pa_marks), ], ) def test_to_numpy_pandas_series_pandas_dtypes_numeric_with_na(dtype, expected_dtype): """ - Test the _to_numpy function with pandas.Series of pandas numeric dtypes and NA. - """ - series = pd.Series([1, 2, pd.NA, 4, 5, 6], dtype=dtype)[::2] # Not C-contiguous - assert series.isna().any() - result = _to_numpy(series) - _check_result(result, expected_dtype) - npt.assert_array_equal(result, np.array([1.0, np.nan, 5.0], dtype=expected_dtype)) - - -@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") -@pytest.mark.parametrize( - ("dtype", "expected_dtype"), - [ - pytest.param("int8[pyarrow]", np.int8, id="int8[pyarrow]"), - pytest.param("int16[pyarrow]", np.int16, id="int16[pyarrow]"), - pytest.param("int32[pyarrow]", np.int32, id="int32[pyarrow]"), - pytest.param("int64[pyarrow]", np.int64, id="int64[pyarrow]"), - pytest.param("uint8[pyarrow]", np.uint8, id="uint8[pyarrow]"), - pytest.param("uint16[pyarrow]", np.uint16, id="uint16[pyarrow]"), - pytest.param("uint32[pyarrow]", np.uint32, id="uint32[pyarrow]"), - pytest.param("uint64[pyarrow]", np.uint64, id="uint64[pyarrow]"), - pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]"), - pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]"), - pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]"), - ], -) -def test_to_numpy_pandas_series_pyarrow_dtypes_numeric(dtype, expected_dtype): - """ - Test the _to_numpy function with pandas.Series of PyArrow numeric dtypes. - """ - data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] - if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"): - # float16 needs special handling for pandas < 2.2. - # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html - data = np.array(data, dtype=np.float16) - series = pd.Series(data, dtype=dtype)[::2] # Not C-contiguous - result = _to_numpy(series) - _check_result(result, expected_dtype) - npt.assert_array_equal(result, series) - - -@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") -@pytest.mark.parametrize( - ("dtype", "expected_dtype"), - [ - pytest.param("int8[pyarrow]", np.float64, id="int8[pyarrow]"), - pytest.param("int16[pyarrow]", np.float64, id="int16[pyarrow]"), - pytest.param("int32[pyarrow]", np.float64, id="int32[pyarrow]"), - pytest.param("int64[pyarrow]", np.float64, id="int64[pyarrow]"), - pytest.param("uint8[pyarrow]", np.float64, id="uint8[pyarrow]"), - pytest.param("uint16[pyarrow]", np.float64, id="uint16[pyarrow]"), - pytest.param("uint32[pyarrow]", np.float64, id="uint32[pyarrow]"), - pytest.param("uint64[pyarrow]", np.float64, id="uint64[pyarrow]"), - pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]"), - pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]"), - pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]"), - ], -) -def test_to_numpy_pandas_series_pyarrow_dtypes_numeric_with_na(dtype, expected_dtype): - """ - Test the _to_numpy function with pandas.Series of PyArrow numeric dtypes and NA. + Test the _to_numpy function with pandas.Series of pandas/PyArrow numeric dtypes and + missing values (NA). """ data = [1.0, 2.0, None, 4.0, 5.0, 6.0] if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"): From cf397b712bc71757bfb47f75bb31bb9458b2e32f Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 12 Nov 2024 08:01:55 +0800 Subject: [PATCH 14/14] Fix a typo --- pygmt/tests/test_clib_to_numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index dbefa5953db..e77dd53ea4a 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -10,7 +10,7 @@ import pytest from packaging.version import Version from pygmt.clib.conversion import _to_numpy -from pygmt.tests.helpers import skip_if_no +from pygmt.helpers.testing import skip_if_no try: import pyarrow as pa