Skip to content

Commit 55628cd

Browse files
raulcdpitrou
andauthored
GH-46611: [Python][C++] Allow building float16 arrays without numpy (#46618)
### Rationale for this change When we added Float16 we did not update pyarrow to be able to convert from Python objects to Arrow. Float16 required numpy and it crashed if numpy was not present. ### What changes are included in this PR? Allow to not require numpy to generate float16 scalars and arrays on pyarrow and do not fail if numpy is not present. ### Are these changes tested? Yes, new tests have been added ### Are there any user-facing changes? No changes for old functionality. Users will be allowed to use float16 without requiring to use np.float16 and directly from Python objects * GitHub Issue: #46611 Lead-authored-by: Raúl Cumplido <[email protected]> Co-authored-by: Antoine Pitrou <[email protected]> Signed-off-by: Raúl Cumplido <[email protected]>
1 parent fbed20a commit 55628cd

File tree

11 files changed

+66
-49
lines changed

11 files changed

+66
-49
lines changed

python/pyarrow/includes/common.pxd

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,6 @@ cdef extern from "<Python.h>":
8383
void Py_XDECREF(PyObject* o)
8484
Py_ssize_t Py_REFCNT(PyObject* o)
8585

86-
cdef extern from "numpy/halffloat.h":
87-
ctypedef uint16_t npy_half
88-
8986
cdef extern from "arrow/api.h" namespace "arrow" nogil:
9087
# We can later add more of the common status factory methods as needed
9188
cdef CStatus CStatus_OK "arrow::Status::OK"()

python/pyarrow/includes/libarrow.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1314,7 +1314,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
13141314
uint64_t value
13151315

13161316
cdef cppclass CHalfFloatScalar" arrow::HalfFloatScalar"(CScalar):
1317-
npy_half value
1317+
uint16_t value
13181318

13191319
cdef cppclass CFloatScalar" arrow::FloatScalar"(CScalar):
13201320
float value

python/pyarrow/includes/libarrow_python.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ cdef extern from "arrow/python/arrow_to_pandas.h" namespace "arrow::py::MapConve
5555
cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
5656
shared_ptr[CDataType] GetPrimitiveType(Type type)
5757

58-
object PyHalf_FromHalf(npy_half value)
58+
object PyFloat_FromHalf(uint16_t value)
5959

6060
cdef cppclass PyConversionOptions:
6161
PyConversionOptions()

python/pyarrow/scalar.pxi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ cdef class HalfFloatScalar(Scalar):
388388
This parameter is ignored for non-nested Scalars.
389389
"""
390390
cdef CHalfFloatScalar* sp = <CHalfFloatScalar*> self.wrapped.get()
391-
return PyHalf_FromHalf(sp.value) if sp.is_valid else None
391+
return PyFloat_FromHalf(sp.value) if sp.is_valid else None
392392

393393

394394
cdef class FloatScalar(Scalar):

python/pyarrow/src/arrow/python/helpers.cc

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// under the License.
1717

1818
// helpers.h includes a NumPy header, so we include this first
19+
#include "arrow/python/numpy_init.h"
1920
#include "arrow/python/numpy_interop.h"
2021

2122
#include "arrow/python/helpers.h"
@@ -31,6 +32,7 @@
3132
#include "arrow/type_fwd.h"
3233
#include "arrow/util/checked_cast.h"
3334
#include "arrow/util/config.h"
35+
#include "arrow/util/float16.h"
3436
#include "arrow/util/logging.h"
3537

3638
namespace arrow {
@@ -73,21 +75,22 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
7375
}
7476
}
7577

76-
PyObject* PyHalf_FromHalf(npy_half value) {
77-
PyObject* result = PyArrayScalar_New(Half);
78-
if (result != NULL) {
79-
PyArrayScalar_ASSIGN(result, Half, value);
80-
}
81-
return result;
78+
PyObject* PyFloat_FromHalf(uint16_t value) {
79+
// Convert the uint16_t Float16 value to a PyFloat object
80+
arrow::util::Float16 half_val = arrow::util::Float16::FromBits(value);
81+
return PyFloat_FromDouble(half_val.ToDouble());
8282
}
8383

84-
Status PyFloat_AsHalf(PyObject* obj, npy_half* out) {
85-
if (PyArray_IsScalar(obj, Half)) {
86-
*out = PyArrayScalar_VAL(obj, Half);
87-
return Status::OK();
84+
Result<uint16_t> PyFloat_AsHalf(PyObject* obj) {
85+
if (PyFloat_Check(obj)) {
86+
arrow::util::Float16 half_val =
87+
arrow::util::Float16::FromDouble(PyFloat_AsDouble(obj));
88+
return half_val.bits();
89+
} else if (has_numpy() && PyArray_IsScalar(obj, Half)) {
90+
return PyArrayScalar_VAL(obj, Half);
8891
} else {
89-
// XXX: cannot use npy_double_to_half() without linking with Numpy
90-
return Status::TypeError("Expected np.float16 instance");
92+
return Status::TypeError("conversion to float16 expects a `float` or ",
93+
"`np.float16` object, got ", Py_TYPE(obj)->tp_name);
9194
}
9295
}
9396

python/pyarrow/src/arrow/python/helpers.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@
2626

2727
#include "arrow/python/numpy_interop.h"
2828

29-
#include <numpy/halffloat.h>
30-
3129
#include "arrow/python/visibility.h"
3230
#include "arrow/type.h"
3331
#include "arrow/util/macros.h"
@@ -43,11 +41,11 @@ class OwnedRef;
4341
// \return A shared pointer to DataType
4442
ARROW_PYTHON_EXPORT std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
4543

46-
// \brief Construct a np.float16 object from a npy_half value.
47-
ARROW_PYTHON_EXPORT PyObject* PyHalf_FromHalf(npy_half value);
44+
// \brief Construct a Python float object from a half-float uint16_t value.
45+
ARROW_PYTHON_EXPORT PyObject* PyFloat_FromHalf(uint16_t value);
4846

49-
// \brief Convert a Python object to a npy_half value.
50-
ARROW_PYTHON_EXPORT Status PyFloat_AsHalf(PyObject* obj, npy_half* out);
47+
// \brief Convert a Python object to a half-float uint16_t value.
48+
ARROW_PYTHON_EXPORT Result<uint16_t> PyFloat_AsHalf(PyObject* obj);
5149

5250
namespace internal {
5351

python/pyarrow/src/arrow/python/python_to_arrow.cc

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,16 @@ class PyValue {
226226
}
227227

228228
static Result<uint16_t> Convert(const HalfFloatType*, const O&, I obj) {
229-
uint16_t value;
230-
RETURN_NOT_OK(PyFloat_AsHalf(obj, &value));
231-
return value;
229+
if (internal::PyFloatScalar_Check(obj)) {
230+
return PyFloat_AsHalf(obj);
231+
} else if (internal::PyIntScalar_Check(obj)) {
232+
double float_val{};
233+
RETURN_NOT_OK(internal::IntegerScalarToDoubleSafe(obj, &float_val));
234+
const auto half_val = arrow::util::Float16::FromDouble(float_val);
235+
return half_val.bits();
236+
} else {
237+
return internal::InvalidValue(obj, "tried to convert to float16");
238+
}
232239
}
233240

234241
static Result<float> Convert(const FloatType*, const O&, I obj) {

python/pyarrow/src/arrow/python/type_traits.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,8 @@
2626

2727
#include "arrow/python/numpy_interop.h"
2828

29-
#include <numpy/halffloat.h>
30-
3129
#include "arrow/type_fwd.h"
30+
#include "arrow/util/float16.h"
3231
#include "arrow/util/logging.h"
3332

3433
namespace arrow {
@@ -87,15 +86,18 @@ NPY_INT_DECL(ULONGLONG, UInt64, uint64_t);
8786

8887
template <>
8988
struct npy_traits<NPY_FLOAT16> {
90-
typedef npy_half value_type;
89+
typedef uint16_t value_type;
9190
using TypeClass = HalfFloatType;
9291
using BuilderClass = HalfFloatBuilder;
9392

94-
static constexpr npy_half na_sentinel = NPY_HALF_NAN;
93+
static constexpr uint16_t na_sentinel =
94+
std::numeric_limits<arrow::util::Float16>::quiet_NaN().bits();
9595

9696
static constexpr bool supports_nulls = true;
9797

98-
static inline bool isnull(npy_half v) { return v == NPY_HALF_NAN; }
98+
static inline bool isnull(uint16_t v) {
99+
return arrow::util::Float16::FromBits(v).is_nan();
100+
}
99101
};
100102

101103
template <>
@@ -201,7 +203,8 @@ template <>
201203
struct arrow_traits<Type::HALF_FLOAT> {
202204
static constexpr int npy_type = NPY_FLOAT16;
203205
static constexpr bool supports_nulls = true;
204-
static constexpr uint16_t na_value = NPY_HALF_NAN;
206+
static constexpr uint16_t na_value =
207+
std::numeric_limits<arrow::util::Float16>::quiet_NaN().bits();
205208
typedef typename npy_traits<NPY_FLOAT16>::value_type T;
206209
};
207210

python/pyarrow/tests/test_array.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,6 +1664,16 @@ def test_floating_point_truncate_unsafe():
16641664
_check_cast_case(case, safe=False)
16651665

16661666

1667+
def test_half_float_array_from_python():
1668+
# GH-46611
1669+
arr = pa.array([1.0, 2.0, 3, None, 12345.6789, 1.234567], type=pa.float16())
1670+
assert arr.type == pa.float16()
1671+
assert arr.to_pylist() == [1.0, 2.0, 3.0, None, 12344.0, 1.234375]
1672+
msg1 = "Could not convert 'a' with type str: tried to convert to float16"
1673+
with pytest.raises(pa.ArrowInvalid, match=msg1):
1674+
arr = pa.array(['a', 3, None], type=pa.float16())
1675+
1676+
16671677
def test_decimal_to_int_safe():
16681678
safe_cases = [
16691679
(
@@ -2281,10 +2291,11 @@ def test_array_conversions_no_sentinel_values():
22812291

22822292
assert arr2.type == 'int8'
22832293

2284-
arr3 = pa.array(np.array([1, np.nan, 2, 3, np.nan, 4], dtype='float32'),
2285-
type='float32')
2286-
assert arr3.type == 'float32'
2287-
assert arr3.null_count == 0
2294+
for ty in ['float16', 'float32', 'float64']:
2295+
arr3 = pa.array(np.array([1, np.nan, 2, 3, np.nan, 4], dtype=ty),
2296+
type=ty)
2297+
assert arr3.type == ty
2298+
assert arr3.null_count == 0
22882299

22892300

22902301
def test_time32_time64_from_integer():

python/pyarrow/tests/test_pandas.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,14 +1007,14 @@ def test_half_floats_from_numpy(self):
10071007
arr = np.array([1.5, np.nan], dtype=np.float16)
10081008
a = pa.array(arr, type=pa.float16())
10091009
x, y = a.to_pylist()
1010-
assert isinstance(x, np.float16)
1010+
assert isinstance(x, float)
10111011
assert x == 1.5
1012-
assert isinstance(y, np.float16)
1012+
assert isinstance(y, float)
10131013
assert np.isnan(y)
10141014

10151015
a = pa.array(arr, type=pa.float16(), from_pandas=True)
10161016
x, y = a.to_pylist()
1017-
assert isinstance(x, np.float16)
1017+
assert isinstance(x, float)
10181018
assert x == 1.5
10191019
assert y is None
10201020

0 commit comments

Comments
 (0)