From c7bbac1b8731650e60180092ed6297a33b8db2cd Mon Sep 17 00:00:00 2001
From: Juniper Tyree <50025784+juntyr@users.noreply.github.com>
Date: Sun, 7 Sep 2025 11:43:25 +0000
Subject: [PATCH 1/4] Implement cast support for ubyte and half

Implement cast support for ubyte

Use template magic to distinguish npy_bool and npy_half

Implement cast support for half
---
 quaddtype/meson.build                   | 11 ++-
 quaddtype/numpy_quaddtype/src/casts.cpp | 97 +++++++++++++++++++++----
 quaddtype/tests/test_quaddtype.py       | 10 +++
 3 files changed, 101 insertions(+), 17 deletions(-)
diff --git a/quaddtype/meson.build b/quaddtype/meson.build
index 69e4edf7..04b506b7 100644
--- a/quaddtype/meson.build
+++ b/quaddtype/meson.build
@@ -13,6 +13,8 @@ if is_windows
   add_project_arguments('-DWIN32', '-D_WINDOWS', language : ['c', 'cpp'])
 endif
 
+qblas_dep = dependency('qblas', fallback: ['qblas', 'qblas_dep'])
+
 sleef_subproj = subproject('sleef', required: true)
 sleef_dep = sleef_subproj.get_variable('sleef_dep')
 sleefquad_dep = sleef_subproj.get_variable('sleefquad_dep')
@@ -22,10 +24,13 @@ incdir_numpy = run_command(py,
   check : true
 ).stdout().strip()
 
-# OpenMP dependency (optional, for threading)
+npymath_path = incdir_numpy / '..' / 'lib'
+npymath_lib = c.find_library('npymath', dirs: npymath_path)
+
+dependencies = [py_dep, qblas_dep, sleef_dep, sleefquad_dep, npymath_lib]
+
+# Add OpenMP dependency (optional, for threading)
 openmp_dep = dependency('openmp', required: false)
-qblas_dep = dependency('qblas', fallback: ['qblas', 'qblas_dep'])
-dependencies = [py_dep, qblas_dep, sleef_dep, sleefquad_dep]
 if openmp_dep.found()
     dependencies += openmp_dep
 endif
diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp
index c662b4d5..263f4fe9 100644
--- a/quaddtype/numpy_quaddtype/src/casts.cpp
+++ b/quaddtype/numpy_quaddtype/src/casts.cpp
@@ -9,6 +9,7 @@ extern "C" {
 #include <Python.h>
 
 #include "numpy/arrayobject.h"
+#include "numpy/halffloat.h"
 #include "numpy/ndarraytypes.h"
 #include "numpy/dtype_api.h"
 }
@@ -20,7 +21,7 @@ extern "C" {
 #include "casts.h"
 #include "dtype.h"
 
-#define NUM_CASTS 29  // 14 to_casts + 14 from_casts + 1 quad_to_quad
+#define NUM_CASTS 33  // 16 to_casts + 16 from_casts + 1 quad_to_quad
 
 static NPY_CASTING
 quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self),
@@ -150,15 +151,26 @@ quad_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const da
     return 0;
 }
 
+// Template magic to ensure npy_bool/npy_ubyte and npy_half/npy_ushort do not alias in templates
+struct my_npy_bool {};
+struct my_npy_half {};
+
+template<typename T>
+struct NpyType { typedef T TYPE; };
+template<>
+struct NpyType<my_npy_bool>{ typedef npy_bool TYPE; };
+template<>
+struct NpyType<my_npy_half>{ typedef npy_half TYPE; };
+
 // Casting from other types to QuadDType
 
 template <typename T>
 static inline quad_value
-to_quad(T x, QuadBackendType backend);
+to_quad(typename NpyType<T>::TYPE x, QuadBackendType backend);
 
 template <>
 inline quad_value
-to_quad<npy_bool>(npy_bool x, QuadBackendType backend)
+to_quad<my_npy_bool>(npy_bool x, QuadBackendType backend)
 {
     quad_value result;
     if (backend == BACKEND_SLEEF) {
@@ -184,6 +196,20 @@ to_quad<npy_byte>(npy_byte x, QuadBackendType backend)
     return result;
 }
 
+template <>
+inline quad_value
+to_quad<npy_ubyte>(npy_ubyte x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_uint64q1(x);
+    }
+    else {
+        result.longdouble_value = (long double)x;
+    }
+    return result;
+}
+
 template <>
 inline quad_value
 to_quad<npy_short>(npy_short x, QuadBackendType backend)
@@ -295,6 +321,21 @@ to_quad<npy_ulonglong>(npy_ulonglong x, QuadBackendType backend)
     }
     return result;
 }
+
+template <>
+inline quad_value
+to_quad<my_npy_half>(npy_half x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_doubleq1(npy_half_to_double(x));
+    }
+    else {
+        result.longdouble_value = (long double)npy_half_to_double(x);
+    }
+    return result;
+}
+
 template <>
 inline quad_value
 to_quad<float>(float x, QuadBackendType backend)
@@ -374,10 +415,10 @@ numpy_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const
     size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
 
     while (N--) {
-        T in_val;
+        typename NpyType<T>::TYPE in_val;
         quad_value out_val;
 
-        memcpy(&in_val, in_ptr, sizeof(T));
+        memcpy(&in_val, in_ptr, sizeof(typename NpyType<T>::TYPE));
         out_val = to_quad<T>(in_val, backend);
         memcpy(out_ptr, &out_val, elem_size);
 
@@ -401,7 +442,7 @@ numpy_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const d
     QuadBackendType backend = descr_out->backend;
 
     while (N--) {
-        T in_val = *(T *)in_ptr;
+        typename NpyType<T>::TYPE in_val = *(typename NpyType<T>::TYPE *)in_ptr;
         quad_value out_val = to_quad<T>(in_val, backend);
 
         if (backend == BACKEND_SLEEF) {
@@ -420,12 +461,12 @@ numpy_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const d
 // Casting from QuadDType to other types
 
 template <typename T>
-static inline T
+static inline typename NpyType<T>::TYPE
 from_quad(quad_value x, QuadBackendType backend);
 
 template <>
 inline npy_bool
-from_quad<npy_bool>(quad_value x, QuadBackendType backend)
+from_quad<my_npy_bool>(quad_value x, QuadBackendType backend)
 {
     if (backend == BACKEND_SLEEF) {
         return Sleef_cast_to_int64q1(x.sleef_value) != 0;
@@ -447,6 +488,18 @@ from_quad<npy_byte>(quad_value x, QuadBackendType backend)
     }
 }
 
+template <>
+inline npy_ubyte
+from_quad<npy_ubyte>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return (npy_ubyte)Sleef_cast_to_uint64q1(x.sleef_value);
+    }
+    else {
+        return (npy_ubyte)x.longdouble_value;
+    }
+}
+
 template <>
 inline npy_short
 from_quad<npy_short>(quad_value x, QuadBackendType backend)
@@ -543,6 +596,18 @@ from_quad<npy_ulonglong>(quad_value x, QuadBackendType backend)
     }
 }
 
+template <>
+inline npy_half
+from_quad<my_npy_half>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return npy_double_to_half(Sleef_cast_to_doubleq1(x.sleef_value));
+    }
+    else {
+        return npy_double_to_half((double)x.longdouble_value);
+    }
+}
+
 template <>
 inline float
 from_quad<float>(quad_value x, QuadBackendType backend)
@@ -611,8 +676,8 @@ quad_to_numpy_strided_loop_unaligned(PyArrayMethod_Context *context, char *const
         quad_value in_val;
         memcpy(&in_val, in_ptr, elem_size);
 
-        T out_val = from_quad<T>(in_val, backend);
-        memcpy(out_ptr, &out_val, sizeof(T));
+        typename NpyType<T>::TYPE out_val = from_quad<T>(in_val, backend);
+        memcpy(out_ptr, &out_val, sizeof(typename NpyType<T>::TYPE));
 
         in_ptr += strides[0];
         out_ptr += strides[1];
@@ -642,8 +707,8 @@ quad_to_numpy_strided_loop_aligned(PyArrayMethod_Context *context, char *const d
             in_val.longdouble_value = *(long double *)in_ptr;
         }
 
-        T out_val = from_quad<T>(in_val, backend);
-        *(T *)(out_ptr) = out_val;
+        typename NpyType<T>::TYPE out_val = from_quad<T>(in_val, backend);
+        *(typename NpyType<T>::TYPE *)(out_ptr) = out_val;
 
         in_ptr += strides[0];
         out_ptr += strides[1];
@@ -739,8 +804,9 @@ init_casts_internal(void)
 
     add_spec(quad2quad_spec);
 
-    add_cast_to<npy_bool>(&PyArray_BoolDType);
+    add_cast_to<my_npy_bool>(&PyArray_BoolDType);
     add_cast_to<npy_byte>(&PyArray_ByteDType);
+    add_cast_to<npy_ubyte>(&PyArray_UByteDType);
     add_cast_to<npy_short>(&PyArray_ShortDType);
     add_cast_to<npy_ushort>(&PyArray_UShortDType);
     add_cast_to<npy_int>(&PyArray_IntDType);
@@ -749,12 +815,14 @@ init_casts_internal(void)
     add_cast_to<npy_ulong>(&PyArray_ULongDType);
     add_cast_to<npy_longlong>(&PyArray_LongLongDType);
     add_cast_to<npy_ulonglong>(&PyArray_ULongLongDType);
+    add_cast_to<my_npy_half>(&PyArray_HalfDType);
     add_cast_to<float>(&PyArray_FloatDType);
     add_cast_to<double>(&PyArray_DoubleDType);
     add_cast_to<long double>(&PyArray_LongDoubleDType);
 
-    add_cast_from<npy_bool>(&PyArray_BoolDType);
+    add_cast_from<my_npy_bool>(&PyArray_BoolDType);
     add_cast_from<npy_byte>(&PyArray_ByteDType);
+    add_cast_from<npy_ubyte>(&PyArray_UByteDType);
     add_cast_from<npy_short>(&PyArray_ShortDType);
     add_cast_from<npy_ushort>(&PyArray_UShortDType);
     add_cast_from<npy_int>(&PyArray_IntDType);
@@ -763,6 +831,7 @@ init_casts_internal(void)
     add_cast_from<npy_ulong>(&PyArray_ULongDType);
     add_cast_from<npy_longlong>(&PyArray_LongLongDType);
     add_cast_from<npy_ulonglong>(&PyArray_ULongLongDType);
+    add_cast_from<my_npy_half>(&PyArray_HalfDType);
     add_cast_from<float>(&PyArray_FloatDType);
     add_cast_from<double>(&PyArray_DoubleDType);
     add_cast_from<long double>(&PyArray_LongDoubleDType);
diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py
index 1833c4e6..29f677b7 100644
--- a/quaddtype/tests/test_quaddtype.py
+++ b/quaddtype/tests/test_quaddtype.py
@@ -39,6 +39,16 @@ def test_finfo_int_constant(name, value):
     assert getattr(numpy_quaddtype, name) == value
 
 
+@pytest.mark.parametrize("dtype", ["bool", "byte", "int8", "ubyte", "uint8", "short", "int16", "ushort", "uint16", "int", "int32", "uint", "uint32", "long", "ulong", "longlong", "int64", "ulonglong", "uint64", "half", "float16", "float", "float32", "double", "float64", "longdouble"])
+def test_astype(dtype):
+    orig = np.array(1, dtype=dtype)
+    quad = orig.astype(QuadPrecDType, casting="safe")
+    back = quad.astype(dtype, casting="unsafe")
+
+    assert quad == 1
+    assert back == orig
+
+
 def test_basic_equality():
     assert QuadPrecision("12") == QuadPrecision(
         "12.0") == QuadPrecision("12.00")

From d398884d20615bb5f55e1785c1da3898eb394572 Mon Sep 17 00:00:00 2001
From: Juniper Tyree <50025784+juntyr@users.noreply.github.com>
Date: Tue, 9 Sep 2025 06:02:08 +0000
Subject: [PATCH 2/4] Test casting for all dtypes

---
 quaddtype/numpy_quaddtype/src/casts.cpp | 27 ++++++++++++-----------
 quaddtype/tests/test_quaddtype.py       | 29 +++++++++++++++++++++++--
 2 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp
index 263f4fe9..e351e608 100644
--- a/quaddtype/numpy_quaddtype/src/casts.cpp
+++ b/quaddtype/numpy_quaddtype/src/casts.cpp
@@ -151,16 +151,17 @@ quad_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const da
     return 0;
 }
 
-// Template magic to ensure npy_bool/npy_ubyte and npy_half/npy_ushort do not alias in templates
-struct my_npy_bool {};
-struct my_npy_half {};
+// Tag dispatching to ensure npy_bool/npy_ubyte and npy_half/npy_ushort do not alias in templates
+// see e.g. https://stackoverflow.com/q/32522279
+struct spec_npy_bool {};
+struct spec_npy_half {};
 
 template<typename T>
 struct NpyType { typedef T TYPE; };
 template<>
-struct NpyType<my_npy_bool>{ typedef npy_bool TYPE; };
+struct NpyType<spec_npy_bool>{ typedef npy_bool TYPE; };
 template<>
-struct NpyType<my_npy_half>{ typedef npy_half TYPE; };
+struct NpyType<spec_npy_half>{ typedef npy_half TYPE; };
 
 // Casting from other types to QuadDType
 
@@ -170,7 +171,7 @@ to_quad(typename NpyType<T>::TYPE x, QuadBackendType backend);
 
 template <>
 inline quad_value
-to_quad<my_npy_bool>(npy_bool x, QuadBackendType backend)
+to_quad<spec_npy_bool>(npy_bool x, QuadBackendType backend)
 {
     quad_value result;
     if (backend == BACKEND_SLEEF) {
@@ -324,7 +325,7 @@ to_quad<npy_ulonglong>(npy_ulonglong x, QuadBackendType backend)
 
 template <>
 inline quad_value
-to_quad<my_npy_half>(npy_half x, QuadBackendType backend)
+to_quad<spec_npy_half>(npy_half x, QuadBackendType backend)
 {
     quad_value result;
     if (backend == BACKEND_SLEEF) {
@@ -466,7 +467,7 @@ from_quad(quad_value x, QuadBackendType backend);
 
 template <>
 inline npy_bool
-from_quad<my_npy_bool>(quad_value x, QuadBackendType backend)
+from_quad<spec_npy_bool>(quad_value x, QuadBackendType backend)
 {
     if (backend == BACKEND_SLEEF) {
         return Sleef_cast_to_int64q1(x.sleef_value) != 0;
@@ -598,7 +599,7 @@ from_quad<npy_ulonglong>(quad_value x, QuadBackendType backend)
 
 template <>
 inline npy_half
-from_quad<my_npy_half>(quad_value x, QuadBackendType backend)
+from_quad<spec_npy_half>(quad_value x, QuadBackendType backend)
 {
     if (backend == BACKEND_SLEEF) {
         return npy_double_to_half(Sleef_cast_to_doubleq1(x.sleef_value));
@@ -804,7 +805,7 @@ init_casts_internal(void)
 
     add_spec(quad2quad_spec);
 
-    add_cast_to<my_npy_bool>(&PyArray_BoolDType);
+    add_cast_to<spec_npy_bool>(&PyArray_BoolDType);
     add_cast_to<npy_byte>(&PyArray_ByteDType);
     add_cast_to<npy_ubyte>(&PyArray_UByteDType);
     add_cast_to<npy_short>(&PyArray_ShortDType);
@@ -815,12 +816,12 @@ init_casts_internal(void)
     add_cast_to<npy_ulong>(&PyArray_ULongDType);
     add_cast_to<npy_longlong>(&PyArray_LongLongDType);
     add_cast_to<npy_ulonglong>(&PyArray_ULongLongDType);
-    add_cast_to<my_npy_half>(&PyArray_HalfDType);
+    add_cast_to<spec_npy_half>(&PyArray_HalfDType);
     add_cast_to<float>(&PyArray_FloatDType);
     add_cast_to<double>(&PyArray_DoubleDType);
     add_cast_to<long double>(&PyArray_LongDoubleDType);
 
-    add_cast_from<my_npy_bool>(&PyArray_BoolDType);
+    add_cast_from<spec_npy_bool>(&PyArray_BoolDType);
     add_cast_from<npy_byte>(&PyArray_ByteDType);
     add_cast_from<npy_ubyte>(&PyArray_UByteDType);
     add_cast_from<npy_short>(&PyArray_ShortDType);
@@ -831,7 +832,7 @@ init_casts_internal(void)
     add_cast_from<npy_ulong>(&PyArray_ULongDType);
     add_cast_from<npy_longlong>(&PyArray_LongLongDType);
     add_cast_from<npy_ulonglong>(&PyArray_ULongLongDType);
-    add_cast_from<my_npy_half>(&PyArray_HalfDType);
+    add_cast_from<spec_npy_half>(&PyArray_HalfDType);
     add_cast_from<float>(&PyArray_FloatDType);
     add_cast_from<double>(&PyArray_DoubleDType);
     add_cast_from<long double>(&PyArray_LongDoubleDType);
diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py
index 29f677b7..275541a0 100644
--- a/quaddtype/tests/test_quaddtype.py
+++ b/quaddtype/tests/test_quaddtype.py
@@ -39,8 +39,22 @@ def test_finfo_int_constant(name, value):
     assert getattr(numpy_quaddtype, name) == value
 
 
-@pytest.mark.parametrize("dtype", ["bool", "byte", "int8", "ubyte", "uint8", "short", "int16", "ushort", "uint16", "int", "int32", "uint", "uint32", "long", "ulong", "longlong", "int64", "ulonglong", "uint64", "half", "float16", "float", "float32", "double", "float64", "longdouble"])
-def test_astype(dtype):
+@pytest.mark.parametrize("dtype", [
+    "bool",
+    "byte", "int8", "ubyte", "uint8",
+    "short", "int16", "ushort", "uint16",
+    "int", "int32", "uint", "uint32",
+    "long", "ulong",
+    "longlong", "int64", "ulonglong", "uint64",
+    "half", "float16",
+    "float", "float32",
+    "double", "float64",
+    "longdouble", "float96", "float128",
+])
+def test_supported_astype(dtype):
+    if dtype in ("float96", "float128") and getattr(np, dtype, None) is None:
+        pytest.skip(f"{dtype} is unsupported on the current platform")
+
     orig = np.array(1, dtype=dtype)
     quad = orig.astype(QuadPrecDType, casting="safe")
     back = quad.astype(dtype, casting="unsafe")
@@ -49,6 +63,17 @@ def test_astype(dtype):
     assert back == orig
 
 
+@pytest.mark.parametrize("dtype", ["S10", "U10", "T", "V10", "datetime64[ms]", "timedelta64[ms]"])
+def test_unsupported_astype(dtype):
+    val = 1 if dtype != "V10" else b"1"
+
+    with pytest.raises(TypeError if dtype != "V10" else ValueError, match="cast"):
+        np.array(val, dtype=dtype).astype(QuadPrecDType, casting="unsafe")
+
+    with pytest.raises(TypeError if dtype != "V10" else ValueError, match="cast"):
+        np.array(QuadPrecision(1)).astype(dtype, casting="unsafe")
+
+
 def test_basic_equality():
     assert QuadPrecision("12") == QuadPrecision(
         "12.0") == QuadPrecision("12.00")

From bc9412c340378e144c0b838a66a354470b21823a Mon Sep 17 00:00:00 2001
From: Juniper Tyree <50025784+juntyr@users.noreply.github.com>
Date: Tue, 9 Sep 2025 06:11:16 +0000
Subject: [PATCH 3/4] Skip segfault test

---
 quaddtype/tests/test_quaddtype.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py
index 275541a0..4ade4e3a 100644
--- a/quaddtype/tests/test_quaddtype.py
+++ b/quaddtype/tests/test_quaddtype.py
@@ -70,7 +70,10 @@ def test_unsupported_astype(dtype):
     with pytest.raises(TypeError if dtype != "V10" else ValueError, match="cast"):
         np.array(val, dtype=dtype).astype(QuadPrecDType, casting="unsafe")
 
-    with pytest.raises(TypeError if dtype != "V10" else ValueError, match="cast"):
+    if dtype == "V10":
+        pytest.xfail("cast from QuadPrecision to V10 segfaults")
+
+    with pytest.raises(TypeError, match="cast"):
         np.array(QuadPrecision(1)).astype(dtype, casting="unsafe")
 
 

From 4f6fbe6e92816937bcc9eef9cf2976af2e084dbb Mon Sep 17 00:00:00 2001
From: Juniper Tyree <50025784+juntyr@users.noreply.github.com>
Date: Tue, 9 Sep 2025 06:25:52 +0000
Subject: [PATCH 4/4] More segfaults?

---
 quaddtype/tests/test_quaddtype.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py
index 4ade4e3a..1319ac86 100644
--- a/quaddtype/tests/test_quaddtype.py
+++ b/quaddtype/tests/test_quaddtype.py
@@ -65,13 +65,11 @@ def test_supported_astype(dtype):
 
 @pytest.mark.parametrize("dtype", ["S10", "U10", "T", "V10", "datetime64[ms]", "timedelta64[ms]"])
 def test_unsupported_astype(dtype):
-    val = 1 if dtype != "V10" else b"1"
-
-    with pytest.raises(TypeError if dtype != "V10" else ValueError, match="cast"):
-        np.array(val, dtype=dtype).astype(QuadPrecDType, casting="unsafe")
-
     if dtype == "V10":
-        pytest.xfail("cast from QuadPrecision to V10 segfaults")
+        pytest.xfail("casts to and from V10 segfault")
+
+    with pytest.raises(TypeError, match="cast"):
+        np.array(1, dtype=dtype).astype(QuadPrecDType, casting="unsafe")
 
     with pytest.raises(TypeError, match="cast"):
         np.array(QuadPrecision(1)).astype(dtype, casting="unsafe")