|
| 1 | +""" |
| 2 | +Tests for the _to_numpy function in the clib.conversion module. |
| 3 | +""" |
| 4 | + |
| 5 | +import sys |
| 6 | +from datetime import date, datetime |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +import numpy.testing as npt |
| 10 | +import pandas as pd |
| 11 | +import pytest |
| 12 | +from packaging.version import Version |
| 13 | +from pygmt.clib.conversion import _to_numpy |
| 14 | +from pygmt.helpers.testing import skip_if_no |
| 15 | + |
| 16 | +# Skip all tests on macOS. |
| 17 | +pytestmark = pytest.mark.skipif( |
| 18 | + sys.platform == "darwin", |
| 19 | + reason="For unknown reasons, tests in the file cause unrelated failures on macOS.", |
| 20 | +) |
| 21 | + |
| 22 | +try: |
| 23 | + import pyarrow as pa |
| 24 | + |
| 25 | + _HAS_PYARROW = True |
| 26 | +except ImportError: |
| 27 | + _HAS_PYARROW = False |
| 28 | + |
| 29 | + |
| 30 | +def _check_result(result, expected_dtype): |
| 31 | + """ |
| 32 | + A helper function to check if the result of the _to_numpy function is a C-contiguous |
| 33 | + NumPy array with the expected dtype. |
| 34 | + """ |
| 35 | + assert isinstance(result, np.ndarray) |
| 36 | + assert result.flags.c_contiguous |
| 37 | + assert result.dtype.type == expected_dtype |
| 38 | + |
| 39 | + |
| 40 | +######################################################################################## |
| 41 | +# Test the _to_numpy function with Python built-in types. |
| 42 | +######################################################################################## |
| 43 | +@pytest.mark.parametrize( |
| 44 | + ("data", "expected_dtype"), |
| 45 | + [ |
| 46 | + pytest.param( |
| 47 | + [1, 2, 3], |
| 48 | + np.int32 |
| 49 | + if sys.platform == "win32" and Version(np.__version__) < Version("2.0") |
| 50 | + else np.int64, |
| 51 | + id="int", |
| 52 | + ), |
| 53 | + pytest.param([1.0, 2.0, 3.0], np.float64, id="float"), |
| 54 | + pytest.param( |
| 55 | + [complex(+1), complex(-2j), complex("-Infinity+NaNj")], |
| 56 | + np.complex128, |
| 57 | + id="complex", |
| 58 | + ), |
| 59 | + pytest.param(["abc", "defg", "12345"], np.str_, id="string"), |
| 60 | + ], |
| 61 | +) |
| 62 | +def test_to_numpy_python_types(data, expected_dtype): |
| 63 | + """ |
| 64 | + Test the _to_numpy function with Python built-in types. |
| 65 | + """ |
| 66 | + result = _to_numpy(data) |
| 67 | + _check_result(result, expected_dtype) |
| 68 | + npt.assert_array_equal(result, data) |
| 69 | + |
| 70 | + |
| 71 | +######################################################################################## |
| 72 | +# Test the _to_numpy function with NumPy arrays. |
| 73 | +# |
| 74 | +# There are 24 fundamental dtypes in NumPy. Not all of them are supported by PyGMT. |
| 75 | +# |
| 76 | +# - Numeric dtypes: |
| 77 | +# - int8, int16, int32, int64, longlong |
| 78 | +# - uint8, uint16, uint32, uint64, ulonglong |
| 79 | +# - float16, float32, float64, longdouble |
| 80 | +# - complex64, complex128, clongdouble |
| 81 | +# - bool |
| 82 | +# - datetime64, timedelta64 |
| 83 | +# - str_ |
| 84 | +# - bytes_ |
| 85 | +# - object_ |
| 86 | +# - void |
| 87 | +# |
| 88 | +# Reference: https://numpy.org/doc/2.1/reference/arrays.scalars.html |
| 89 | +######################################################################################## |
| 90 | +np_dtype_params = [ |
| 91 | + pytest.param(np.int8, np.int8, id="int8"), |
| 92 | + pytest.param(np.int16, np.int16, id="int16"), |
| 93 | + pytest.param(np.int32, np.int32, id="int32"), |
| 94 | + pytest.param(np.int64, np.int64, id="int64"), |
| 95 | + pytest.param(np.longlong, np.longlong, id="longlong"), |
| 96 | + pytest.param(np.uint8, np.uint8, id="uint8"), |
| 97 | + pytest.param(np.uint16, np.uint16, id="uint16"), |
| 98 | + pytest.param(np.uint32, np.uint32, id="uint32"), |
| 99 | + pytest.param(np.uint64, np.uint64, id="uint64"), |
| 100 | + pytest.param(np.ulonglong, np.ulonglong, id="ulonglong"), |
| 101 | + pytest.param(np.float16, np.float16, id="float16"), |
| 102 | + pytest.param(np.float32, np.float32, id="float32"), |
| 103 | + pytest.param(np.float64, np.float64, id="float64"), |
| 104 | + pytest.param(np.longdouble, np.longdouble, id="longdouble"), |
| 105 | + pytest.param(np.complex64, np.complex64, id="complex64"), |
| 106 | + pytest.param(np.complex128, np.complex128, id="complex128"), |
| 107 | + pytest.param(np.clongdouble, np.clongdouble, id="clongdouble"), |
| 108 | +] |
| 109 | + |
| 110 | + |
| 111 | +@pytest.mark.parametrize(("dtype", "expected_dtype"), np_dtype_params) |
| 112 | +def test_to_numpy_ndarray_numpy_dtypes_numeric(dtype, expected_dtype): |
| 113 | + """ |
| 114 | + Test the _to_numpy function with NumPy arrays of NumPy numeric dtypes. |
| 115 | +
|
| 116 | + Test both 1-D and 2-D arrays which are not C-contiguous. |
| 117 | + """ |
| 118 | + # 1-D array that is not C-contiguous |
| 119 | + array = np.array([1, 2, 3, 4, 5, 6], dtype=dtype)[::2] |
| 120 | + assert array.flags.c_contiguous is False |
| 121 | + result = _to_numpy(array) |
| 122 | + _check_result(result, expected_dtype) |
| 123 | + npt.assert_array_equal(result, array, strict=True) |
| 124 | + |
| 125 | + # 2-D array that is not C-contiguous |
| 126 | + array = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype)[::2, ::2] |
| 127 | + assert array.flags.c_contiguous is False |
| 128 | + result = _to_numpy(array) |
| 129 | + _check_result(result, expected_dtype) |
| 130 | + npt.assert_array_equal(result, array, strict=True) |
| 131 | + |
| 132 | + |
| 133 | +@pytest.mark.parametrize("dtype", [None, np.str_, "U10"]) |
| 134 | +def test_to_numpy_ndarray_numpy_dtypes_string(dtype): |
| 135 | + """ |
| 136 | + Test the _to_numpy function with NumPy arrays of string types. |
| 137 | + """ |
| 138 | + array = np.array(["abc", "defg", "12345"], dtype=dtype) |
| 139 | + result = _to_numpy(array) |
| 140 | + _check_result(result, np.str_) |
| 141 | + npt.assert_array_equal(result, array) |
| 142 | + |
| 143 | + |
| 144 | +######################################################################################## |
| 145 | +# Test the _to_numpy function with pandas.Series. |
| 146 | +# |
| 147 | +# In pandas, dtype can be specified by |
| 148 | +# |
| 149 | +# 1. NumPy dtypes (see above) |
| 150 | +# 2. pandas dtypes |
| 151 | +# 3. PyArrow types (see below) |
| 152 | +# |
| 153 | +# pandas provides following dtypes: |
| 154 | +# |
| 155 | +# - Numeric dtypes: |
| 156 | +# - Int8, Int16, Int32, Int64 |
| 157 | +# - UInt8, UInt16, UInt32, UInt64 |
| 158 | +# - Float32, Float64 |
| 159 | +# - DatetimeTZDtype |
| 160 | +# - PeriodDtype |
| 161 | +# - IntervalDtype |
| 162 | +# - StringDtype |
| 163 | +# - CategoricalDtype |
| 164 | +# - SparseDtype |
| 165 | +# - BooleanDtype |
| 166 | +# - ArrowDtype: a special dtype used to store data in the PyArrow format. |
| 167 | +# |
| 168 | +# References: |
| 169 | +# 1. https://pandas.pydata.org/docs/reference/arrays.html |
| 170 | +# 2. https://pandas.pydata.org/docs/user_guide/basics.html#basics-dtypes |
| 171 | +# 3. https://pandas.pydata.org/docs/user_guide/pyarrow.html |
| 172 | +######################################################################################## |
| 173 | +@pytest.mark.parametrize(("dtype", "expected_dtype"), np_dtype_params) |
| 174 | +def test_to_numpy_pandas_series_numpy_dtypes_numeric(dtype, expected_dtype): |
| 175 | + """ |
| 176 | + Test the _to_numpy function with pandas.Series of NumPy numeric dtypes. |
| 177 | + """ |
| 178 | + series = pd.Series([1, 2, 3, 4, 5, 6], dtype=dtype)[::2] # Not C-contiguous |
| 179 | + result = _to_numpy(series) |
| 180 | + _check_result(result, expected_dtype) |
| 181 | + npt.assert_array_equal(result, series) |
| 182 | + |
| 183 | + |
| 184 | +@pytest.mark.parametrize( |
| 185 | + "dtype", |
| 186 | + [ |
| 187 | + None, |
| 188 | + np.str_, |
| 189 | + "U10", |
| 190 | + "string[python]", |
| 191 | + pytest.param("string[pyarrow]", marks=skip_if_no(package="pyarrow")), |
| 192 | + pytest.param( |
| 193 | + "string[pyarrow_numpy]", |
| 194 | + marks=[ |
| 195 | + skip_if_no(package="pyarrow"), |
| 196 | + pytest.mark.skipif( |
| 197 | + Version(pd.__version__) < Version("2.1"), |
| 198 | + reason="string[pyarrow_numpy] was added since pandas 2.1", |
| 199 | + ), |
| 200 | + ], |
| 201 | + ), |
| 202 | + ], |
| 203 | +) |
| 204 | +def test_to_numpy_pandas_series_pandas_dtypes_string(dtype): |
| 205 | + """ |
| 206 | + Test the _to_numpy function with pandas.Series of pandas string types. |
| 207 | +
|
| 208 | + In pandas, string arrays can be specified in multiple ways. |
| 209 | +
|
| 210 | + Reference: https://pandas.pydata.org/docs/reference/api/pandas.StringDtype.html |
| 211 | + """ |
| 212 | + array = pd.Series(["abc", "defg", "12345"], dtype=dtype) |
| 213 | + result = _to_numpy(array) |
| 214 | + _check_result(result, np.str_) |
| 215 | + npt.assert_array_equal(result, array) |
| 216 | + |
| 217 | + |
| 218 | +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") |
| 219 | +@pytest.mark.parametrize( |
| 220 | + ("dtype", "expected_dtype"), |
| 221 | + [ |
| 222 | + pytest.param("date32[day][pyarrow]", "datetime64[D]", id="date32[day]"), |
| 223 | + pytest.param("date64[ms][pyarrow]", "datetime64[ms]", id="date64[ms]"), |
| 224 | + ], |
| 225 | +) |
| 226 | +def test_to_numpy_pandas_series_pyarrow_dtypes_date(dtype, expected_dtype): |
| 227 | + """ |
| 228 | + Test the _to_numpy function with pandas.Series of PyArrow date32/date64 types. |
| 229 | + """ |
| 230 | + series = pd.Series(pd.date_range(start="2024-01-01", periods=3), dtype=dtype) |
| 231 | + result = _to_numpy(series) |
| 232 | + _check_result(result, np.datetime64) |
| 233 | + assert result.dtype == expected_dtype # Explicitly check the date unit. |
| 234 | + npt.assert_array_equal( |
| 235 | + result, |
| 236 | + np.array(["2024-01-01", "2024-01-02", "2024-01-03"], dtype=expected_dtype), |
| 237 | + ) |
| 238 | + |
| 239 | + |
| 240 | +######################################################################################## |
| 241 | +# Test the _to_numpy function with PyArrow arrays. |
| 242 | +# |
| 243 | +# PyArrow provides the following types: |
| 244 | +# |
| 245 | +# - Numeric types: |
| 246 | +# - int8, int16, int32, int64 |
| 247 | +# - uint8, uint16, uint32, uint64 |
| 248 | +# - float16, float32, float64 |
| 249 | +# - String types: string/utf8, large_string/large_utf8, string_view |
| 250 | +# - Date types: |
| 251 | +# - date32[day] |
| 252 | +# - date64[ms] |
| 253 | +# |
| 254 | +# In PyArrow, array types can be specified in two ways: |
| 255 | +# |
| 256 | +# - Using string aliases (e.g., "int8") |
| 257 | +# - Using pyarrow.DataType (e.g., ``pa.int8()``) |
| 258 | +# |
| 259 | +# Reference: https://arrow.apache.org/docs/python/api/datatypes.html |
| 260 | +######################################################################################## |
| 261 | +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") |
| 262 | +@pytest.mark.parametrize( |
| 263 | + ("dtype", "expected_dtype"), |
| 264 | + [ |
| 265 | + pytest.param("int8", np.int8, id="int8"), |
| 266 | + pytest.param("int16", np.int16, id="int16"), |
| 267 | + pytest.param("int32", np.int32, id="int32"), |
| 268 | + pytest.param("int64", np.int64, id="int64"), |
| 269 | + pytest.param("uint8", np.uint8, id="uint8"), |
| 270 | + pytest.param("uint16", np.uint16, id="uint16"), |
| 271 | + pytest.param("uint32", np.uint32, id="uint32"), |
| 272 | + pytest.param("uint64", np.uint64, id="uint64"), |
| 273 | + pytest.param("float16", np.float16, id="float16"), |
| 274 | + pytest.param("float32", np.float32, id="float32"), |
| 275 | + pytest.param("float64", np.float64, id="float64"), |
| 276 | + ], |
| 277 | +) |
| 278 | +def test_to_numpy_pyarrow_array_pyarrow_dtypes_numeric(dtype, expected_dtype): |
| 279 | + """ |
| 280 | + Test the _to_numpy function with PyArrow arrays of PyArrow numeric types. |
| 281 | + """ |
| 282 | + data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] |
| 283 | + if dtype == "float16": # float16 needs special handling |
| 284 | + # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html |
| 285 | + data = np.array(data, dtype=np.float16) |
| 286 | + array = pa.array(data, type=dtype)[::2] |
| 287 | + result = _to_numpy(array) |
| 288 | + _check_result(result, expected_dtype) |
| 289 | + npt.assert_array_equal(result, array) |
| 290 | + |
| 291 | + |
| 292 | +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") |
| 293 | +@pytest.mark.parametrize( |
| 294 | + ("dtype", "expected_dtype"), |
| 295 | + [ |
| 296 | + pytest.param("int8", np.float64, id="int8"), |
| 297 | + pytest.param("int16", np.float64, id="int16"), |
| 298 | + pytest.param("int32", np.float64, id="int32"), |
| 299 | + pytest.param("int64", np.float64, id="int64"), |
| 300 | + pytest.param("uint8", np.float64, id="uint8"), |
| 301 | + pytest.param("uint16", np.float64, id="uint16"), |
| 302 | + pytest.param("uint32", np.float64, id="uint32"), |
| 303 | + pytest.param("uint64", np.float64, id="uint64"), |
| 304 | + pytest.param("float16", np.float16, id="float16"), |
| 305 | + pytest.param("float32", np.float32, id="float32"), |
| 306 | + pytest.param("float64", np.float64, id="float64"), |
| 307 | + ], |
| 308 | +) |
| 309 | +def test_to_numpy_pyarrow_array_pyarrow_dtypes_numeric_with_na(dtype, expected_dtype): |
| 310 | + """ |
| 311 | + Test the _to_numpy function with PyArrow arrays of PyArrow numeric types and NA. |
| 312 | + """ |
| 313 | + data = [1.0, 2.0, None, 4.0, 5.0, 6.0] |
| 314 | + if dtype == "float16": # float16 needs special handling |
| 315 | + # Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html |
| 316 | + data = np.array(data, dtype=np.float16) |
| 317 | + array = pa.array(data, type=dtype)[::2] |
| 318 | + result = _to_numpy(array) |
| 319 | + _check_result(result, expected_dtype) |
| 320 | + npt.assert_array_equal(result, array) |
| 321 | + |
| 322 | + |
| 323 | +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") |
| 324 | +@pytest.mark.parametrize( |
| 325 | + "dtype", |
| 326 | + [ |
| 327 | + None, |
| 328 | + "string", |
| 329 | + "utf8", # alias for string |
| 330 | + "large_string", |
| 331 | + "large_utf8", # alias for large_string |
| 332 | + "string_view", |
| 333 | + ], |
| 334 | +) |
| 335 | +def test_to_numpy_pyarrow_array_pyarrow_dtypes_string(dtype): |
| 336 | + """ |
| 337 | + Test the _to_numpy function with PyArrow arrays of PyArrow string types. |
| 338 | + """ |
| 339 | + array = pa.array(["abc", "defg", "12345"], type=dtype) |
| 340 | + result = _to_numpy(array) |
| 341 | + _check_result(result, np.str_) |
| 342 | + npt.assert_array_equal(result, array) |
| 343 | + |
| 344 | + |
| 345 | +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") |
| 346 | +@pytest.mark.parametrize( |
| 347 | + ("dtype", "expected_dtype"), |
| 348 | + [ |
| 349 | + pytest.param("date32[day]", "datetime64[D]", id="date32[day]"), |
| 350 | + pytest.param("date64[ms]", "datetime64[ms]", id="date64[ms]"), |
| 351 | + ], |
| 352 | +) |
| 353 | +def test_to_numpy_pyarrow_array_pyarrow_dtypes_date(dtype, expected_dtype): |
| 354 | + """ |
| 355 | + Test the _to_numpy function with PyArrow arrays of PyArrow date types. |
| 356 | +
|
| 357 | + date32[day] and date64[ms] are stored as 32-bit and 64-bit integers, respectively, |
| 358 | + representing the number of days and milliseconds since the UNIX epoch (1970-01-01). |
| 359 | +
|
| 360 | + Here we explicitly check the dtype and date unit of the result. |
| 361 | + """ |
| 362 | + data = [ |
| 363 | + date(2024, 1, 1), |
| 364 | + datetime(2024, 1, 2), |
| 365 | + datetime(2024, 1, 3), |
| 366 | + ] |
| 367 | + array = pa.array(data, type=dtype) |
| 368 | + result = _to_numpy(array) |
| 369 | + _check_result(result, np.datetime64) |
| 370 | + assert result.dtype == expected_dtype # Explicitly check the date unit. |
| 371 | + npt.assert_array_equal( |
| 372 | + result, |
| 373 | + np.array(["2024-01-01", "2024-01-02", "2024-01-03"], dtype=expected_dtype), |
| 374 | + ) |
0 commit comments