Skip to content

clib.conversion._to_numpy: Add tests for pandas.Series with pyarrow numeric dtypes #3585

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Nov 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 47 additions & 4 deletions pygmt/tests/test_clib_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pytest
from packaging.version import Version
from pygmt.clib.conversion import _to_numpy
from pygmt.helpers.testing import skip_if_no

try:
import pyarrow as pa
Expand All @@ -18,6 +19,9 @@
except ImportError:
_HAS_PYARROW = False

# Mark tests that require pyarrow
pa_marks = {"marks": skip_if_no(package="pyarrow")}


def _check_result(result, expected_dtype):
"""
Expand Down Expand Up @@ -145,6 +149,11 @@ def test_to_numpy_ndarray_numpy_dtypes_numeric(dtype, expected_dtype):
# - BooleanDtype
# - ArrowDtype: a special dtype used to store data in the PyArrow format.
#
# In pandas, PyArrow types can be specified using the following formats:
#
# - Prefixed with the name of the dtype and "[pyarrow]" (e.g., "int8[pyarrow]")
# - Specified using ``ArrowDType`` (e.g., "pd.ArrowDtype(pa.int8())")
Copy link
Member Author

@seisman seisman Nov 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't use pd.ArrowDtype(pa.int8()) here because pa is not defined when pyarrow is not installed. So we have to use the string aliases.

#
# References:
# 1. https://pandas.pydata.org/docs/reference/arrays.html
# 2. https://pandas.pydata.org/docs/user_guide/basics.html#basics-dtypes
Expand Down Expand Up @@ -174,13 +183,30 @@ def test_to_numpy_pandas_series_numpy_dtypes_numeric(dtype, expected_dtype):
pytest.param(pd.UInt64Dtype(), np.uint64, id="UInt64"),
pytest.param(pd.Float32Dtype(), np.float32, id="Float32"),
pytest.param(pd.Float64Dtype(), np.float64, id="Float64"),
pytest.param("int8[pyarrow]", np.int8, id="int8[pyarrow]", **pa_marks),
pytest.param("int16[pyarrow]", np.int16, id="int16[pyarrow]", **pa_marks),
pytest.param("int32[pyarrow]", np.int32, id="int32[pyarrow]", **pa_marks),
pytest.param("int64[pyarrow]", np.int64, id="int64[pyarrow]", **pa_marks),
pytest.param("uint8[pyarrow]", np.uint8, id="uint8[pyarrow]", **pa_marks),
pytest.param("uint16[pyarrow]", np.uint16, id="uint16[pyarrow]", **pa_marks),
pytest.param("uint32[pyarrow]", np.uint32, id="uint32[pyarrow]", **pa_marks),
pytest.param("uint64[pyarrow]", np.uint64, id="uint64[pyarrow]", **pa_marks),
pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]", **pa_marks),
pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]", **pa_marks),
pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]", **pa_marks),
],
)
def test_to_numpy_pandas_series_pandas_dtypes_numeric(dtype, expected_dtype):
"""
Test the _to_numpy function with pandas.Series of pandas numeric dtypes.
Test the _to_numpy function with pandas.Series of pandas/PyArrow numeric dtypes.
"""
series = pd.Series([1, 2, 3, 4, 5, 6], dtype=dtype)[::2] # Not C-contiguous
data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"):
# float16 needs special handling for pandas < 2.2.
# Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html
data = np.array(data, dtype=np.float16)

series = pd.Series(data, dtype=dtype)[::2] # Not C-contiguous
result = _to_numpy(series)
_check_result(result, expected_dtype)
npt.assert_array_equal(result, series)
Expand All @@ -199,13 +225,30 @@ def test_to_numpy_pandas_series_pandas_dtypes_numeric(dtype, expected_dtype):
pytest.param(pd.UInt64Dtype(), np.float64, id="UInt64"),
pytest.param(pd.Float32Dtype(), np.float32, id="Float32"),
pytest.param(pd.Float64Dtype(), np.float64, id="Float64"),
pytest.param("int8[pyarrow]", np.float64, id="int8[pyarrow]", **pa_marks),
pytest.param("int16[pyarrow]", np.float64, id="int16[pyarrow]", **pa_marks),
pytest.param("int32[pyarrow]", np.float64, id="int32[pyarrow]", **pa_marks),
pytest.param("int64[pyarrow]", np.float64, id="int64[pyarrow]", **pa_marks),
pytest.param("uint8[pyarrow]", np.float64, id="uint8[pyarrow]", **pa_marks),
pytest.param("uint16[pyarrow]", np.float64, id="uint16[pyarrow]", **pa_marks),
pytest.param("uint32[pyarrow]", np.float64, id="uint32[pyarrow]", **pa_marks),
pytest.param("uint64[pyarrow]", np.float64, id="uint64[pyarrow]", **pa_marks),
pytest.param("float16[pyarrow]", np.float16, id="float16[pyarrow]", **pa_marks),
pytest.param("float32[pyarrow]", np.float32, id="float32[pyarrow]", **pa_marks),
pytest.param("float64[pyarrow]", np.float64, id="float64[pyarrow]", **pa_marks),
],
)
def test_to_numpy_pandas_series_pandas_dtypes_numeric_with_na(dtype, expected_dtype):
"""
Test the _to_numpy function with pandas.Series of pandas numeric dtypes and NA.
Test the _to_numpy function with pandas.Series of pandas/PyArrow numeric dtypes and
missing values (NA).
"""
series = pd.Series([1, 2, pd.NA, 4, 5, 6], dtype=dtype)[::2] # Not C-contiguous
data = [1.0, 2.0, None, 4.0, 5.0, 6.0]
if dtype == "float16[pyarrow]" and Version(pd.__version__) < Version("2.2"):
# float16 needs special handling for pandas < 2.2.
# Example from https://arrow.apache.org/docs/python/generated/pyarrow.float16.html
data = np.array(data, dtype=np.float16)
series = pd.Series(data, dtype=dtype)[::2] # Not C-contiguous
assert series.isna().any()
result = _to_numpy(series)
_check_result(result, expected_dtype)
Expand Down
Loading