Skip to content

Commit 7552263

Browse files
committed
Revert "Remove pygmt/tests/test_clib_to_numpy.py"
This reverts commit bbfcd95.
1 parent b0fc357 commit 7552263

File tree

1 file changed

+374
-0
lines changed

1 file changed

+374
-0
lines changed

pygmt/tests/test_clib_to_numpy.py

Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
"""
2+
Tests for the _to_numpy function in the clib.conversion module.
3+
"""
4+
5+
import sys
6+
from datetime import date, datetime
7+
8+
import numpy as np
9+
import numpy.testing as npt
10+
import pandas as pd
11+
import pytest
12+
from packaging.version import Version
13+
from pygmt.clib.conversion import _to_numpy
14+
from pygmt.helpers.testing import skip_if_no
15+
16+
# Skip all tests on macOS.
17+
pytestmark = pytest.mark.skipif(
18+
sys.platform == "darwin",
19+
reason="For unknown reasons, tests in the file cause unrelated failures on macOS.",
20+
)
21+
22+
try:
23+
import pyarrow as pa
24+
25+
_HAS_PYARROW = True
26+
except ImportError:
27+
_HAS_PYARROW = False
28+
29+
30+
def _check_result(result, expected_dtype):
31+
"""
32+
A helper function to check if the result of the _to_numpy function is a C-contiguous
33+
NumPy array with the expected dtype.
34+
"""
35+
assert isinstance(result, np.ndarray)
36+
assert result.flags.c_contiguous
37+
assert result.dtype.type == expected_dtype
38+
39+
40+
########################################################################################
41+
# Test the _to_numpy function with Python built-in types.
42+
########################################################################################
43+
@pytest.mark.parametrize(
44+
("data", "expected_dtype"),
45+
[
46+
pytest.param(
47+
[1, 2, 3],
48+
np.int32
49+
if sys.platform == "win32" and Version(np.__version__) < Version("2.0")
50+
else np.int64,
51+
id="int",
52+
),
53+
pytest.param([1.0, 2.0, 3.0], np.float64, id="float"),
54+
pytest.param(
55+
[complex(+1), complex(-2j), complex("-Infinity+NaNj")],
56+
np.complex128,
57+
id="complex",
58+
),
59+
pytest.param(["abc", "defg", "12345"], np.str_, id="string"),
60+
],
61+
)
62+
def test_to_numpy_python_types(data, expected_dtype):
63+
"""
64+
Test the _to_numpy function with Python built-in types.
65+
"""
66+
result = _to_numpy(data)
67+
_check_result(result, expected_dtype)
68+
npt.assert_array_equal(result, data)
69+
70+
71+
########################################################################################
72+
# Test the _to_numpy function with NumPy arrays.
73+
#
74+
# There are 24 fundamental dtypes in NumPy. Not all of them are supported by PyGMT.
75+
#
76+
# - Numeric dtypes:
77+
# - int8, int16, int32, int64, longlong
78+
# - uint8, uint16, uint32, uint64, ulonglong
79+
# - float16, float32, float64, longdouble
80+
# - complex64, complex128, clongdouble
81+
# - bool
82+
# - datetime64, timedelta64
83+
# - str_
84+
# - bytes_
85+
# - object_
86+
# - void
87+
#
88+
# Reference: https://numpy.org/doc/2.1/reference/arrays.scalars.html
89+
########################################################################################
90+
np_dtype_params = [
91+
pytest.param(np.int8, np.int8, id="int8"),
92+
pytest.param(np.int16, np.int16, id="int16"),
93+
pytest.param(np.int32, np.int32, id="int32"),
94+
pytest.param(np.int64, np.int64, id="int64"),
95+
pytest.param(np.longlong, np.longlong, id="longlong"),
96+
pytest.param(np.uint8, np.uint8, id="uint8"),
97+
pytest.param(np.uint16, np.uint16, id="uint16"),
98+
pytest.param(np.uint32, np.uint32, id="uint32"),
99+
pytest.param(np.uint64, np.uint64, id="uint64"),
100+
pytest.param(np.ulonglong, np.ulonglong, id="ulonglong"),
101+
pytest.param(np.float16, np.float16, id="float16"),
102+
pytest.param(np.float32, np.float32, id="float32"),
103+
pytest.param(np.float64, np.float64, id="float64"),
104+
pytest.param(np.longdouble, np.longdouble, id="longdouble"),
105+
pytest.param(np.complex64, np.complex64, id="complex64"),
106+
pytest.param(np.complex128, np.complex128, id="complex128"),
107+
pytest.param(np.clongdouble, np.clongdouble, id="clongdouble"),
108+
]
109+
110+
111+
@pytest.mark.parametrize(("dtype", "expected_dtype"), np_dtype_params)
112+
def test_to_numpy_ndarray_numpy_dtypes_numeric(dtype, expected_dtype):
113+
"""
114+
Test the _to_numpy function with NumPy arrays of NumPy numeric dtypes.
115+
116+
Test both 1-D and 2-D arrays which are not C-contiguous.
117+
"""
118+
# 1-D array that is not C-contiguous
119+
array = np.array([1, 2, 3, 4, 5, 6], dtype=dtype)[::2]
120+
assert array.flags.c_contiguous is False
121+
result = _to_numpy(array)
122+
_check_result(result, expected_dtype)
123+
npt.assert_array_equal(result, array, strict=True)
124+
125+
# 2-D array that is not C-contiguous
126+
array = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype)[::2, ::2]
127+
assert array.flags.c_contiguous is False
128+
result = _to_numpy(array)
129+
_check_result(result, expected_dtype)
130+
npt.assert_array_equal(result, array, strict=True)
131+
132+
133+
@pytest.mark.parametrize("dtype", [None, np.str_, "U10"])
134+
def test_to_numpy_ndarray_numpy_dtypes_string(dtype):
135+
"""
136+
Test the _to_numpy function with NumPy arrays of string types.
137+
"""
138+
array = np.array(["abc", "defg", "12345"], dtype=dtype)
139+
result = _to_numpy(array)
140+
_check_result(result, np.str_)
141+
npt.assert_array_equal(result, array)
142+
143+
144+
########################################################################################
145+
# Test the _to_numpy function with pandas.Series.
146+
#
147+
# In pandas, dtype can be specified by
148+
#
149+
# 1. NumPy dtypes (see above)
150+
# 2. pandas dtypes
151+
# 3. PyArrow types (see below)
152+
#
153+
# pandas provides following dtypes:
154+
#
155+
# - Numeric dtypes:
156+
# - Int8, Int16, Int32, Int64
157+
# - UInt8, UInt16, UInt32, UInt64
158+
# - Float32, Float64
159+
# - DatetimeTZDtype
160+
# - PeriodDtype
161+
# - IntervalDtype
162+
# - StringDtype
163+
# - CategoricalDtype
164+
# - SparseDtype
165+
# - BooleanDtype
166+
# - ArrowDtype: a special dtype used to store data in the PyArrow format.
167+
#
168+
# References:
169+
# 1. https://pandas.pydata.org/docs/reference/arrays.html
170+
# 2. https://pandas.pydata.org/docs/user_guide/basics.html#basics-dtypes
171+
# 3. https://pandas.pydata.org/docs/user_guide/pyarrow.html
172+
########################################################################################
173+
@pytest.mark.parametrize(("dtype", "expected_dtype"), np_dtype_params)
174+
def test_to_numpy_pandas_series_numpy_dtypes_numeric(dtype, expected_dtype):
175+
"""
176+
Test the _to_numpy function with pandas.Series of NumPy numeric dtypes.
177+
"""
178+
series = pd.Series([1, 2, 3, 4, 5, 6], dtype=dtype)[::2] # Not C-contiguous
179+
result = _to_numpy(series)
180+
_check_result(result, expected_dtype)
181+
npt.assert_array_equal(result, series)
182+
183+
184+
@pytest.mark.parametrize(
185+
"dtype",
186+
[
187+
None,
188+
np.str_,
189+
"U10",
190+
"string[python]",
191+
pytest.param("string[pyarrow]", marks=skip_if_no(package="pyarrow")),
192+
pytest.param(
193+
"string[pyarrow_numpy]",
194+
marks=[
195+
skip_if_no(package="pyarrow"),
196+
pytest.mark.skipif(
197+
Version(pd.__version__) < Version("2.1"),
198+
reason="string[pyarrow_numpy] was added since pandas 2.1",
199+
),
200+
],
201+
),
202+
],
203+
)
204+
def test_to_numpy_pandas_series_pandas_dtypes_string(dtype):
205+
"""
206+
Test the _to_numpy function with pandas.Series of pandas string types.
207+
208+
In pandas, string arrays can be specified in multiple ways.
209+
210+
Reference: https://pandas.pydata.org/docs/reference/api/pandas.StringDtype.html
211+
"""
212+
array = pd.Series(["abc", "defg", "12345"], dtype=dtype)
213+
result = _to_numpy(array)
214+
_check_result(result, np.str_)
215+
npt.assert_array_equal(result, array)
216+
217+
218+
@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed")
219+
@pytest.mark.parametrize(
220+
("dtype", "expected_dtype"),
221+
[
222+
pytest.param("date32[day][pyarrow]", "datetime64[D]", id="date32[day]"),
223+
pytest.param("date64[ms][pyarrow]", "datetime64[ms]", id="date64[ms]"),
224+
],
225+
)
226+
def test_to_numpy_pandas_series_pyarrow_dtypes_date(dtype, expected_dtype):
227+
"""
228+
Test the _to_numpy function with pandas.Series of PyArrow date32/date64 types.
229+
"""
230+
series = pd.Series(pd.date_range(start="2024-01-01", periods=3), dtype=dtype)
231+
result = _to_numpy(series)
232+
_check_result(result, np.datetime64)
233+
assert result.dtype == expected_dtype # Explicitly check the date unit.
234+
npt.assert_array_equal(
235+
result,
236+
np.array(["2024-01-01", "2024-01-02", "2024-01-03"], dtype=expected_dtype),
237+
)
238+
239+
240+
########################################################################################
241+
# Test the _to_numpy function with PyArrow arrays.
242+
#
243+
# PyArrow provides the following types:
244+
#
245+
# - Numeric types:
246+
# - int8, int16, int32, int64
247+
# - uint8, uint16, uint32, uint64
248+
# - float16, float32, float64
249+
# - String types: string/utf8, large_string/large_utf8, string_view
250+
# - Date types:
251+
# - date32[day]
252+
# - date64[ms]
253+
#
254+
# In PyArrow, array types can be specified in two ways:
255+
#
256+
# - Using string aliases (e.g., "int8")
257+
# - Using pyarrow.DataType (e.g., ``pa.int8()``)
258+
#
259+
# Reference: https://arrow.apache.org/docs/python/api/datatypes.html
260+
########################################################################################
261+
@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed")
262+
@pytest.mark.parametrize(
263+
("dtype", "expected_dtype"),
264+
[
265+
pytest.param("int8", np.int8, id="int8"),
266+
pytest.param("int16", np.int16, id="int16"),
267+
pytest.param("int32", np.int32, id="int32"),
268+
pytest.param("int64", np.int64, id="int64"),
269+
pytest.param("uint8", np.uint8, id="uint8"),
270+
pytest.param("uint16", np.uint16, id="uint16"),
271+
pytest.param("uint32", np.uint32, id="uint32"),
272+
pytest.param("uint64", np.uint64, id="uint64"),
273+
pytest.param("float16", np.float16, id="float16"),
274+
pytest.param("float32", np.float32, id="float32"),
275+
pytest.param("float64", np.float64, id="float64"),
276+
],
277+
)
278+
def test_to_numpy_pyarrow_array_pyarrow_dtypes_numeric(dtype, expected_dtype):
279+
"""
280+
Test the _to_numpy function with PyArrow arrays of PyArrow numeric types.
281+
"""
282+
data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
283+
if dtype == "float16": # float16 needs special handling
284+
# Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html
285+
data = np.array(data, dtype=np.float16)
286+
array = pa.array(data, type=dtype)[::2]
287+
result = _to_numpy(array)
288+
_check_result(result, expected_dtype)
289+
npt.assert_array_equal(result, array)
290+
291+
292+
@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed")
293+
@pytest.mark.parametrize(
294+
("dtype", "expected_dtype"),
295+
[
296+
pytest.param("int8", np.float64, id="int8"),
297+
pytest.param("int16", np.float64, id="int16"),
298+
pytest.param("int32", np.float64, id="int32"),
299+
pytest.param("int64", np.float64, id="int64"),
300+
pytest.param("uint8", np.float64, id="uint8"),
301+
pytest.param("uint16", np.float64, id="uint16"),
302+
pytest.param("uint32", np.float64, id="uint32"),
303+
pytest.param("uint64", np.float64, id="uint64"),
304+
pytest.param("float16", np.float16, id="float16"),
305+
pytest.param("float32", np.float32, id="float32"),
306+
pytest.param("float64", np.float64, id="float64"),
307+
],
308+
)
309+
def test_to_numpy_pyarrow_array_pyarrow_dtypes_numeric_with_na(dtype, expected_dtype):
310+
"""
311+
Test the _to_numpy function with PyArrow arrays of PyArrow numeric types and NA.
312+
"""
313+
data = [1.0, 2.0, None, 4.0, 5.0, 6.0]
314+
if dtype == "float16": # float16 needs special handling
315+
# Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html
316+
data = np.array(data, dtype=np.float16)
317+
array = pa.array(data, type=dtype)[::2]
318+
result = _to_numpy(array)
319+
_check_result(result, expected_dtype)
320+
npt.assert_array_equal(result, array)
321+
322+
323+
@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed")
324+
@pytest.mark.parametrize(
325+
"dtype",
326+
[
327+
None,
328+
"string",
329+
"utf8", # alias for string
330+
"large_string",
331+
"large_utf8", # alias for large_string
332+
"string_view",
333+
],
334+
)
335+
def test_to_numpy_pyarrow_array_pyarrow_dtypes_string(dtype):
336+
"""
337+
Test the _to_numpy function with PyArrow arrays of PyArrow string types.
338+
"""
339+
array = pa.array(["abc", "defg", "12345"], type=dtype)
340+
result = _to_numpy(array)
341+
_check_result(result, np.str_)
342+
npt.assert_array_equal(result, array)
343+
344+
345+
@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed")
346+
@pytest.mark.parametrize(
347+
("dtype", "expected_dtype"),
348+
[
349+
pytest.param("date32[day]", "datetime64[D]", id="date32[day]"),
350+
pytest.param("date64[ms]", "datetime64[ms]", id="date64[ms]"),
351+
],
352+
)
353+
def test_to_numpy_pyarrow_array_pyarrow_dtypes_date(dtype, expected_dtype):
354+
"""
355+
Test the _to_numpy function with PyArrow arrays of PyArrow date types.
356+
357+
date32[day] and date64[ms] are stored as 32-bit and 64-bit integers, respectively,
358+
representing the number of days and milliseconds since the UNIX epoch (1970-01-01).
359+
360+
Here we explicitly check the dtype and date unit of the result.
361+
"""
362+
data = [
363+
date(2024, 1, 1),
364+
datetime(2024, 1, 2),
365+
datetime(2024, 1, 3),
366+
]
367+
array = pa.array(data, type=dtype)
368+
result = _to_numpy(array)
369+
_check_result(result, np.datetime64)
370+
assert result.dtype == expected_dtype # Explicitly check the date unit.
371+
npt.assert_array_equal(
372+
result,
373+
np.array(["2024-01-01", "2024-01-02", "2024-01-03"], dtype=expected_dtype),
374+
)

0 commit comments

Comments
 (0)