diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index 9b842c5f..e50ba7ee 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -36,31 +36,39 @@ quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), QuadPrecDTypeObject *given_descrs[2], QuadPrecDTypeObject *loop_descrs[2], npy_intp *view_offset) { - NPY_CASTING casting = NPY_NO_CASTING; - Py_INCREF(given_descrs[0]); loop_descrs[0] = given_descrs[0]; if (given_descrs[1] == NULL) { Py_INCREF(given_descrs[0]); loop_descrs[1] = given_descrs[0]; + *view_offset = 0; + return NPY_NO_CASTING; } - else { - Py_INCREF(given_descrs[1]); - loop_descrs[1] = given_descrs[1]; - if (given_descrs[0]->backend != given_descrs[1]->backend) { - casting = NPY_UNSAFE_CASTING; + + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + + if (given_descrs[0]->backend != given_descrs[1]->backend) { + // Different backends require actual conversion, no view possible + *view_offset = NPY_MIN_INTP; + if (given_descrs[0]->backend == BACKEND_SLEEF) { + // SLEEF -> long double may lose precision + return NPY_SAME_KIND_CASTING; } + // long double -> SLEEF preserves value exactly + return NPY_SAFE_CASTING; } *view_offset = 0; - return casting; + return NPY_NO_CASTING; } +template static int -quad_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) +quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) { npy_intp N = dimensions[0]; char *in_ptr = data[0]; @@ -70,93 +78,56 @@ quad_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0]; QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; + QuadBackendType backend_in = descr_in->backend; + QuadBackendType backend_out = descr_out->backend; // inter-backend casting - if (descr_in->backend != descr_out->backend) { + if (backend_in != backend_out) { while (N--) { - quad_value in_val, out_val; - if (descr_in->backend == BACKEND_SLEEF) { - memcpy(&in_val.sleef_value, in_ptr, sizeof(Sleef_quad)); - out_val.longdouble_value = Sleef_cast_to_doubleq1(in_val.sleef_value); + quad_value in_val; + load_quad(in_ptr, backend_in, &in_val); + quad_value out_val; + if (backend_in == BACKEND_SLEEF) + { + out_val.longdouble_value = static_cast(cast_sleef_to_double(in_val.sleef_value)); } - else { - memcpy(&in_val.longdouble_value, in_ptr, sizeof(long double)); - out_val.sleef_value = Sleef_cast_from_doubleq1(in_val.longdouble_value); + else + { + long double ld = in_val.longdouble_value; + if (std::isnan(ld)) { + out_val.sleef_value = (!ld_signbit(&ld)) ? QUAD_PRECISION_NAN : QUAD_PRECISION_NEG_NAN; + } + else if (std::isinf(ld)) { + out_val.sleef_value = (ld > 0) ? QUAD_PRECISION_INF : QUAD_PRECISION_NINF; + } + else + { + // to prevent compiler optimizations, ABI handling issues with __float128 on x86-64 machines + // won't be expensive as for fixed size compiler can optimize memcpy with movq + Sleef_quad temp = Sleef_cast_from_doubleq1(static_cast(ld)); + std::memcpy(&out_val.sleef_value, &temp, sizeof(Sleef_quad)); + } } - memcpy(out_ptr, &out_val, - (descr_out->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) - : sizeof(long double)); + + store_quad(out_ptr, &out_val, backend_out); in_ptr += in_stride; out_ptr += out_stride; } - return 0; } - size_t elem_size = - (descr_in->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); - - while (N--) { - memcpy(out_ptr, in_ptr, elem_size); + // same backend: direct copy + // same_value casting not needed here as values are identical + while(N--) { + quad_value val; + load_quad(in_ptr, backend_in, &val); + store_quad(out_ptr, &val, backend_out); in_ptr += in_stride; out_ptr += out_stride; } return 0; } -static int -quad_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) -{ - npy_intp N = dimensions[0]; - char *in_ptr = data[0]; - char *out_ptr = data[1]; - npy_intp in_stride = strides[0]; - npy_intp out_stride = strides[1]; - - QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; - - // inter-backend casting - if (descr_in->backend != descr_out->backend) { - if (descr_in->backend == BACKEND_SLEEF) { - while (N--) { - Sleef_quad in_val = *(Sleef_quad *)in_ptr; - *(long double *)out_ptr = Sleef_cast_to_doubleq1(in_val); - in_ptr += in_stride; - out_ptr += out_stride; - } - } - else { - while (N--) { - long double in_val = *(long double *)in_ptr; - *(Sleef_quad *)out_ptr = Sleef_cast_from_doubleq1(in_val); - in_ptr += in_stride; - out_ptr += out_stride; - } - } - - return 0; - } - - if (descr_in->backend == BACKEND_SLEEF) { - while (N--) { - *(Sleef_quad *)out_ptr = *(Sleef_quad *)in_ptr; - in_ptr += in_stride; - out_ptr += out_stride; - } - } - else { - while (N--) { - *(long double *)out_ptr = *(long double *)in_ptr; - in_ptr += in_stride; - out_ptr += out_stride; - } - } - - return 0; -} static NPY_CASTING void_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], @@ -277,7 +248,7 @@ unicode_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], return -1; } - store_quad(out_ptr, out_val, backend); + store_quad(out_ptr, &out_val, backend); in_ptr += in_stride; out_ptr += out_stride; @@ -419,7 +390,8 @@ quad_to_unicode_loop(PyArrayMethod_Context *context, char *const data[], npy_intp unicode_size_chars = descrs[1]->elsize / 4; while (N--) { - quad_value in_val = load_quad(in_ptr, backend); + quad_value in_val; + load_quad(in_ptr, backend, &in_val); // Convert to Sleef_quad for Dragon4 Sleef_quad sleef_val = quad_to_sleef_quad(&in_val, backend); @@ -551,7 +523,7 @@ bytes_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], return -1; } - store_quad(out_ptr, out_val, backend); + store_quad(out_ptr, &out_val, backend); in_ptr += in_stride; out_ptr += out_stride; @@ -612,7 +584,8 @@ quad_to_bytes_loop(PyArrayMethod_Context *context, char *const data[], npy_intp bytes_size = descrs[1]->elsize; while (N--) { - quad_value in_val = load_quad(in_ptr, backend); + quad_value in_val; + load_quad(in_ptr, backend, &in_val); Sleef_quad sleef_val = quad_to_sleef_quad(&in_val, backend); PyObject *py_str = quad_to_string_adaptive(&sleef_val, bytes_size); if (py_str == NULL) { @@ -708,7 +681,7 @@ stringdtype_to_quad_strided_loop(PyArrayMethod_Context *context, char *const dat return -1; } - store_quad(out_ptr, out_val, backend); + store_quad(out_ptr, &out_val, backend); in_ptr += in_stride; out_ptr += out_stride; @@ -765,7 +738,8 @@ quad_to_stringdtype_strided_loop(PyArrayMethod_Context *context, char *const dat npy_string_allocator *allocator = NpyString_acquire_allocator(str_descr); while (N--) { - quad_value in_val = load_quad(in_ptr, backend); + quad_value in_val; + load_quad(in_ptr, backend, &in_val); Sleef_quad sleef_val = quad_to_sleef_quad(&in_val, backend); // Get string representation with adaptive notation @@ -976,11 +950,21 @@ inline quad_value to_quad(npy_half x, QuadBackendType backend) { quad_value result; + double d = npy_half_to_double(x); if (backend == BACKEND_SLEEF) { - result.sleef_value = Sleef_cast_from_doubleq1(npy_half_to_double(x)); + if (std::isnan(d)) { + result.sleef_value = std::signbit(d) ? QUAD_PRECISION_NEG_NAN : QUAD_PRECISION_NAN; + } + else if (std::isinf(d)) { + result.sleef_value = (d > 0) ? QUAD_PRECISION_INF : QUAD_PRECISION_NINF; + } + else { + Sleef_quad temp = Sleef_cast_from_doubleq1(d); + std::memcpy(&result.sleef_value, &temp, sizeof(Sleef_quad)); + } } else { - result.longdouble_value = (long double)npy_half_to_double(x); + result.longdouble_value = (long double)d; } return result; } @@ -990,8 +974,18 @@ inline quad_value to_quad(float x, QuadBackendType backend) { quad_value result; - if (backend == BACKEND_SLEEF) { - result.sleef_value = Sleef_cast_from_doubleq1(x); + if (backend == BACKEND_SLEEF) + { + if (std::isnan(x)) { + result.sleef_value = std::signbit(x) ? QUAD_PRECISION_NEG_NAN : QUAD_PRECISION_NAN; + } + else if (std::isinf(x)) { + result.sleef_value = (x > 0) ? QUAD_PRECISION_INF : QUAD_PRECISION_NINF; + } + else { + Sleef_quad temp = Sleef_cast_from_doubleq1(static_cast(x)); + std::memcpy(&result.sleef_value, &temp, sizeof(Sleef_quad)); + } } else { result.longdouble_value = (long double)x; @@ -1004,8 +998,18 @@ inline quad_value to_quad(double x, QuadBackendType backend) { quad_value result; - if (backend == BACKEND_SLEEF) { - result.sleef_value = Sleef_cast_from_doubleq1(x); + if (backend == BACKEND_SLEEF) + { + if (std::isnan(x)) { + result.sleef_value = std::signbit(x) ? QUAD_PRECISION_NEG_NAN : QUAD_PRECISION_NAN; + } + else if (std::isinf(x)) { + result.sleef_value = (x > 0) ? QUAD_PRECISION_INF : QUAD_PRECISION_NINF; + } + else { + Sleef_quad temp = Sleef_cast_from_doubleq1(x); + std::memcpy(&result.sleef_value, &temp, sizeof(Sleef_quad)); + } } else { result.longdouble_value = (long double)x; @@ -1018,8 +1022,18 @@ inline quad_value to_quad(long double x, QuadBackendType backend) { quad_value result; - if (backend == BACKEND_SLEEF) { - result.sleef_value = Sleef_cast_from_doubleq1(x); + if (backend == BACKEND_SLEEF) + { + if (std::isnan(x)) { + result.sleef_value = std::signbit(x) ? QUAD_PRECISION_NEG_NAN : QUAD_PRECISION_NAN; + } + else if (std::isinf(x)) { + result.sleef_value = (x > 0) ? QUAD_PRECISION_INF : QUAD_PRECISION_NINF; + } + else { + Sleef_quad temp = Sleef_cast_from_doubleq1(static_cast(x)); + std::memcpy(&result.sleef_value, &temp, sizeof(Sleef_quad)); + } } else { result.longdouble_value = x; @@ -1111,188 +1125,188 @@ numpy_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const d template static inline typename NpyType::TYPE -from_quad(quad_value x, QuadBackendType backend); +from_quad(const quad_value *x, QuadBackendType backend); template <> inline npy_bool -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return Sleef_cast_to_int64q1(x.sleef_value) != 0; + return Sleef_cast_to_int64q1(x->sleef_value) != 0; } else { - return x.longdouble_value != 0; + return x->longdouble_value != 0; } } template <> inline npy_byte -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { // runtime warnings often comes from/to casting of NaN, inf // casting is used by ops at several positions leading to warnings // fix can be catching the cases and returning corresponding type value without casting if (backend == BACKEND_SLEEF) { - return (npy_byte)Sleef_cast_to_int64q1(x.sleef_value); + return (npy_byte)Sleef_cast_to_int64q1(x->sleef_value); } else { - return (npy_byte)x.longdouble_value; + return (npy_byte)x->longdouble_value; } } template <> inline npy_ubyte -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return (npy_ubyte)Sleef_cast_to_uint64q1(x.sleef_value); + return (npy_ubyte)Sleef_cast_to_uint64q1(x->sleef_value); } else { - return (npy_ubyte)x.longdouble_value; + return (npy_ubyte)x->longdouble_value; } } template <> inline npy_short -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return (npy_short)Sleef_cast_to_int64q1(x.sleef_value); + return (npy_short)Sleef_cast_to_int64q1(x->sleef_value); } else { - return (npy_short)x.longdouble_value; + return (npy_short)x->longdouble_value; } } template <> inline npy_ushort -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return (npy_ushort)Sleef_cast_to_uint64q1(x.sleef_value); + return (npy_ushort)Sleef_cast_to_uint64q1(x->sleef_value); } else { - return (npy_ushort)x.longdouble_value; + return (npy_ushort)x->longdouble_value; } } template <> inline npy_int -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return (npy_int)Sleef_cast_to_int64q1(x.sleef_value); + return (npy_int)Sleef_cast_to_int64q1(x->sleef_value); } else { - return (npy_int)x.longdouble_value; + return (npy_int)x->longdouble_value; } } template <> inline npy_uint -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return (npy_uint)Sleef_cast_to_uint64q1(x.sleef_value); + return (npy_uint)Sleef_cast_to_uint64q1(x->sleef_value); } else { - return (npy_uint)x.longdouble_value; + return (npy_uint)x->longdouble_value; } } template <> inline npy_long -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return (npy_long)Sleef_cast_to_int64q1(x.sleef_value); + return (npy_long)Sleef_cast_to_int64q1(x->sleef_value); } else { - return (npy_long)x.longdouble_value; + return (npy_long)x->longdouble_value; } } template <> inline npy_ulong -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return (npy_ulong)Sleef_cast_to_uint64q1(x.sleef_value); + return (npy_ulong)Sleef_cast_to_uint64q1(x->sleef_value); } else { - return (npy_ulong)x.longdouble_value; + return (npy_ulong)x->longdouble_value; } } template <> inline npy_longlong -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return Sleef_cast_to_int64q1(x.sleef_value); + return Sleef_cast_to_int64q1(x->sleef_value); } else { - return (npy_longlong)x.longdouble_value; + return (npy_longlong)x->longdouble_value; } } template <> inline npy_ulonglong -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return Sleef_cast_to_uint64q1(x.sleef_value); + return Sleef_cast_to_uint64q1(x->sleef_value); } else { - return (npy_ulonglong)x.longdouble_value; + return (npy_ulonglong)x->longdouble_value; } } template <> inline npy_half -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return npy_double_to_half(Sleef_cast_to_doubleq1(x.sleef_value)); + return npy_double_to_half(cast_sleef_to_double(x->sleef_value)); } else { - return npy_double_to_half((double)x.longdouble_value); + return npy_double_to_half((double)x->longdouble_value); } } template <> inline float -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return (float)Sleef_cast_to_doubleq1(x.sleef_value); + return (float)cast_sleef_to_double(x->sleef_value); } else { - return (float)x.longdouble_value; + return (float)x->longdouble_value; } } template <> inline double -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return Sleef_cast_to_doubleq1(x.sleef_value); + return cast_sleef_to_double(x->sleef_value); } else { - return (double)x.longdouble_value; + return (double)x->longdouble_value; } } template <> inline long double -from_quad(quad_value x, QuadBackendType backend) +from_quad(const quad_value *x, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return (long double)Sleef_cast_to_doubleq1(x.sleef_value); + return (long double)cast_sleef_to_double(x->sleef_value); } else { - return x.longdouble_value; + return x->longdouble_value; } } @@ -1328,7 +1342,7 @@ quad_to_numpy_strided_loop_unaligned(PyArrayMethod_Context *context, char *const quad_value in_val; memcpy(&in_val, in_ptr, elem_size); - typename NpyType::TYPE out_val = from_quad(in_val, backend); + typename NpyType::TYPE out_val = from_quad(&in_val, backend); memcpy(out_ptr, &out_val, sizeof(typename NpyType::TYPE)); in_ptr += strides[0]; @@ -1359,7 +1373,7 @@ quad_to_numpy_strided_loop_aligned(PyArrayMethod_Context *context, char *const d in_val.longdouble_value = *(long double *)in_ptr; } - typename NpyType::TYPE out_val = from_quad(in_val, backend); + typename NpyType::TYPE out_val = from_quad(&in_val, backend); *(typename NpyType::TYPE *)(out_ptr) = out_val; in_ptr += strides[0]; @@ -1440,8 +1454,8 @@ init_casts_internal(void) PyArray_DTypeMeta **quad2quad_dtypes = new PyArray_DTypeMeta *[2]{nullptr, nullptr}; PyType_Slot *quad2quad_slots = new PyType_Slot[4]{ {NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop_aligned}, - {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop_unaligned}, + {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop}, + {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop}, {0, nullptr}}; PyArrayMethod_Spec *quad2quad_spec = new PyArrayMethod_Spec{ diff --git a/quaddtype/numpy_quaddtype/src/constants.hpp b/quaddtype/numpy_quaddtype/src/constants.hpp index 25bcabe9..e9733f04 100644 --- a/quaddtype/numpy_quaddtype/src/constants.hpp +++ b/quaddtype/numpy_quaddtype/src/constants.hpp @@ -12,10 +12,12 @@ extern "C" { // Quad precision constants using sleef_q macro #define QUAD_PRECISION_ZERO sleef_q(+0x0000000000000LL, 0x0000000000000000ULL, -16383) +#define QUAD_PRECISION_NEG_ZERO sleef_q(-0x0000000000000LL, 0x0000000000000000ULL, -16383) #define QUAD_PRECISION_ONE sleef_q(+0x1000000000000LL, 0x0000000000000000ULL, 0) #define QUAD_PRECISION_INF sleef_q(+0x1000000000000LL, 0x0000000000000000ULL, 16384) #define QUAD_PRECISION_NINF sleef_q(-0x1000000000000LL, 0x0000000000000000ULL, 16384) #define QUAD_PRECISION_NAN sleef_q(+0x1ffffffffffffLL, 0xffffffffffffffffULL, 16384) +#define QUAD_PRECISION_NEG_NAN sleef_q(-0x1ffffffffffffLL, 0xffffffffffffffffULL, 16384) // Additional constants #define QUAD_PRECISION_MAX_FINITE SLEEF_QUAD_MAX diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c index b47e292f..02818207 100644 --- a/quaddtype/numpy_quaddtype/src/dragon4.c +++ b/quaddtype/numpy_quaddtype/src/dragon4.c @@ -974,22 +974,22 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, npy_uint32 DEBUG_ASSERT(bufferSize > 0); + /* Print sign for both inf and nan values */ + if (signbit == '+') { + if (pos < maxPrintLen - 1) { + buffer[pos++] = '+'; + } + } + else if (signbit == '-') { + if (pos < maxPrintLen - 1) { + buffer[pos++] = '-'; + } + } + /* Check for infinity */ if (mantissa == 0) { npy_uint32 printLen; - /* only print sign for inf values (though nan can have a sign set) */ - if (signbit == '+') { - if (pos < maxPrintLen - 1) { - buffer[pos++] = '+'; - } - } - else if (signbit == '-') { - if (pos < maxPrintLen - 1) { - buffer[pos++] = '-'; - } - } - /* copy and make sure the buffer is terminated */ printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos; memcpy(buffer + pos, "inf", printLen); diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp index df9c4dd5..6649e7a4 100644 --- a/quaddtype/numpy_quaddtype/src/ops.hpp +++ b/quaddtype/numpy_quaddtype/src/ops.hpp @@ -1,12 +1,7 @@ #include #include #include - -// Quad Constants, generated with qutil -#define QUAD_ZERO sleef_q(+0x0000000000000LL, 0x0000000000000000ULL, -16383) -#define QUAD_ONE sleef_q(+0x1000000000000LL, 0x0000000000000000ULL, 0) -#define QUAD_POS_INF sleef_q(+0x1000000000000LL, 0x0000000000000000ULL, 16384) -#define QUAD_NAN sleef_q(+0x1ffffffffffffLL, 0xffffffffffffffffULL, 16384) +#include "constants.hpp" // Unary Quad Operations typedef Sleef_quad (*unary_op_quad_def)(const Sleef_quad *); @@ -28,7 +23,7 @@ quad_positive(const Sleef_quad *op) static inline Sleef_quad quad_sign(const Sleef_quad *op) { - int sign = Sleef_icmpq1(*op, QUAD_ZERO); + int sign = Sleef_icmpq1(*op, QUAD_PRECISION_ZERO); // sign(x=NaN) = x; otherwise sign(x) in { -1.0; 0.0; +1.0 } return Sleef_iunordq1(*op, *op) ? *op : Sleef_cast_from_int64q1(sign); } @@ -96,20 +91,20 @@ quad_cbrt(const Sleef_quad *op) if (Sleef_iunordq1(*op, *op)) { return *op; // NaN } - if (Sleef_icmpeqq1(*op, QUAD_ZERO)) { + if (Sleef_icmpeqq1(*op, QUAD_PRECISION_ZERO)) { return *op; // ±0 } // Check if op is ±inf: isinf(x) = abs(x) == inf - if (Sleef_icmpeqq1(Sleef_fabsq1(*op), QUAD_POS_INF)) { + if (Sleef_icmpeqq1(Sleef_fabsq1(*op), QUAD_PRECISION_INF)) { return *op; // ±inf } // Compute 1/3 as a quad precision constant Sleef_quad three = Sleef_cast_from_int64q1(3); - Sleef_quad one_third = Sleef_divq1_u05(QUAD_ONE, three); + Sleef_quad one_third = Sleef_divq1_u05(QUAD_PRECISION_ONE, three); // Handle negative values: cbrt(-x) = -cbrt(x) - if (Sleef_icmpltq1(*op, QUAD_ZERO)) { + if (Sleef_icmpltq1(*op, QUAD_PRECISION_ZERO)) { Sleef_quad abs_val = Sleef_fabsq1(*op); Sleef_quad result = Sleef_powq1_u10(abs_val, one_third); return Sleef_negq1(result); @@ -128,7 +123,7 @@ quad_square(const Sleef_quad *op) static inline Sleef_quad quad_reciprocal(const Sleef_quad *op) { - return Sleef_divq1_u05(QUAD_ONE, *op); + return Sleef_divq1_u05(QUAD_PRECISION_ONE, *op); } static inline Sleef_quad @@ -494,23 +489,23 @@ quad_signbit(const Sleef_quad *op) { // FIXME @juntyr or @SwayamInSync: replace with binary implementation // once we test big and little endian in CI - Sleef_quad one_signed = Sleef_copysignq1(QUAD_ONE, *op); + Sleef_quad one_signed = Sleef_copysignq1(QUAD_PRECISION_ONE, *op); // signbit(x) = 1 iff copysign(1, x) == -1 - return Sleef_icmpltq1(one_signed, QUAD_ZERO); + return Sleef_icmpltq1(one_signed, QUAD_PRECISION_ZERO); } static inline npy_bool quad_isfinite(const Sleef_quad *op) { // isfinite(x) = abs(x) < inf - return Sleef_icmpltq1(Sleef_fabsq1(*op), QUAD_POS_INF); + return Sleef_icmpltq1(Sleef_fabsq1(*op), QUAD_PRECISION_INF); } static inline npy_bool quad_isinf(const Sleef_quad *op) { // isinf(x) = abs(x) == inf - return Sleef_icmpeqq1(Sleef_fabsq1(*op), QUAD_POS_INF); + return Sleef_icmpeqq1(Sleef_fabsq1(*op), QUAD_PRECISION_INF); } static inline npy_bool @@ -585,13 +580,13 @@ quad_floor_divide(const Sleef_quad *a, const Sleef_quad *b) // inf / finite_nonzero or -inf / finite_nonzero -> NaN // But inf / 0 -> inf - if (quad_isinf(a) && quad_isfinite(b) && !Sleef_icmpeqq1(*b, QUAD_ZERO)) { - return QUAD_NAN; + if (quad_isinf(a) && quad_isfinite(b) && !Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) { + return QUAD_PRECISION_NAN; } // 0 / 0 (including -0.0 / 0.0, 0.0 / -0.0, -0.0 / -0.0) -> NaN - if (Sleef_icmpeqq1(*a, QUAD_ZERO) && Sleef_icmpeqq1(*b, QUAD_ZERO)) { - return QUAD_NAN; + if (Sleef_icmpeqq1(*a, QUAD_PRECISION_ZERO) && Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) { + return QUAD_PRECISION_NAN; } Sleef_quad quotient = Sleef_divq1_u05(*a, *b); @@ -600,9 +595,9 @@ quad_floor_divide(const Sleef_quad *a, const Sleef_quad *b) // floor_divide semantics: when result is -0.0 from non-zero numerator, convert to -1.0 // This happens when: (negative & non-zero)/+inf, (positive & non-zero)/-inf // But NOT when numerator is ±0.0 (then result stays as ±0.0) - if (Sleef_icmpeqq1(result, QUAD_ZERO) && quad_signbit(&result) && - !Sleef_icmpeqq1(*a, QUAD_ZERO)) { - return Sleef_negq1(QUAD_ONE); // -1.0 + if (Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO) && quad_signbit(&result) && + !Sleef_icmpeqq1(*a, QUAD_PRECISION_ZERO)) { + return Sleef_negq1(QUAD_PRECISION_ONE); // -1.0 } return result; @@ -618,8 +613,8 @@ static inline Sleef_quad quad_mod(const Sleef_quad *a, const Sleef_quad *b) { // division by zero - if (Sleef_icmpeqq1(*b, QUAD_ZERO)) { - return QUAD_NAN; + if (Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) { + return QUAD_PRECISION_NAN; } // NaN inputs @@ -629,7 +624,7 @@ quad_mod(const Sleef_quad *a, const Sleef_quad *b) // infinity dividend -> NaN if (quad_isinf(a)) { - return QUAD_NAN; + return QUAD_PRECISION_NAN; } // finite % inf @@ -649,12 +644,12 @@ quad_mod(const Sleef_quad *a, const Sleef_quad *b) // Handle zero result sign: when result is exactly zero, // it should have the same sign as the divisor (NumPy convention) - if (Sleef_icmpeqq1(result, QUAD_ZERO)) { - if (Sleef_icmpltq1(*b, QUAD_ZERO)) { - return Sleef_negq1(QUAD_ZERO); // -0.0 + if (Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO)) { + if (Sleef_icmpltq1(*b, QUAD_PRECISION_ZERO)) { + return Sleef_negq1(QUAD_PRECISION_ZERO); // -0.0 } else { - return QUAD_ZERO; // +0.0 + return QUAD_PRECISION_ZERO; // +0.0 } } @@ -670,13 +665,13 @@ quad_fmod(const Sleef_quad *a, const Sleef_quad *b) } // Division by zero -> NaN - if (Sleef_icmpeqq1(*b, QUAD_ZERO)) { - return QUAD_NAN; + if (Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) { + return QUAD_PRECISION_NAN; } // Infinity dividend -> NaN if (quad_isinf(a)) { - return QUAD_NAN; + return QUAD_PRECISION_NAN; } // Finite % infinity -> return dividend (same as a) @@ -687,14 +682,14 @@ quad_fmod(const Sleef_quad *a, const Sleef_quad *b) // x - trunc(x/y) * y Sleef_quad result = Sleef_fmodq1(*a, *b); - if (Sleef_icmpeqq1(result, QUAD_ZERO)) { + if (Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO)) { // Preserve sign of dividend (first argument) - Sleef_quad sign_test = Sleef_copysignq1(QUAD_ONE, *a); - if (Sleef_icmpltq1(sign_test, QUAD_ZERO)) { - return Sleef_negq1(QUAD_ZERO); // -0.0 + Sleef_quad sign_test = Sleef_copysignq1(QUAD_PRECISION_ONE, *a); + if (Sleef_icmpltq1(sign_test, QUAD_PRECISION_ZERO)) { + return Sleef_negq1(QUAD_PRECISION_ZERO); // -0.0 } else { - return QUAD_ZERO; // +0.0 + return QUAD_PRECISION_ZERO; // +0.0 } } @@ -716,8 +711,8 @@ quad_minimum(const Sleef_quad *in1, const Sleef_quad *in2) return Sleef_iunordq1(*in1, *in1) ? *in1 : *in2; } // minimum(-0.0, +0.0) = -0.0 - if (Sleef_icmpeqq1(*in1, QUAD_ZERO) && Sleef_icmpeqq1(*in2, QUAD_ZERO)) { - return Sleef_icmpleq1(Sleef_copysignq1(QUAD_ONE, *in1), Sleef_copysignq1(QUAD_ONE, *in2)) ? *in1 : *in2; + if (Sleef_icmpeqq1(*in1, QUAD_PRECISION_ZERO) && Sleef_icmpeqq1(*in2, QUAD_PRECISION_ZERO)) { + return Sleef_icmpleq1(Sleef_copysignq1(QUAD_PRECISION_ONE, *in1), Sleef_copysignq1(QUAD_PRECISION_ONE, *in2)) ? *in1 : *in2; } return Sleef_fminq1(*in1, *in2); } @@ -729,8 +724,8 @@ quad_maximum(const Sleef_quad *in1, const Sleef_quad *in2) return Sleef_iunordq1(*in1, *in1) ? *in1 : *in2; } // maximum(-0.0, +0.0) = +0.0 - if (Sleef_icmpeqq1(*in1, QUAD_ZERO) && Sleef_icmpeqq1(*in2, QUAD_ZERO)) { - return Sleef_icmpgeq1(Sleef_copysignq1(QUAD_ONE, *in1), Sleef_copysignq1(QUAD_ONE, *in2)) ? *in1 : *in2; + if (Sleef_icmpeqq1(*in1, QUAD_PRECISION_ZERO) && Sleef_icmpeqq1(*in2, QUAD_PRECISION_ZERO)) { + return Sleef_icmpgeq1(Sleef_copysignq1(QUAD_PRECISION_ONE, *in1), Sleef_copysignq1(QUAD_PRECISION_ONE, *in2)) ? *in1 : *in2; } return Sleef_fmaxq1(*in1, *in2); } @@ -742,8 +737,8 @@ quad_fmin(const Sleef_quad *in1, const Sleef_quad *in2) return Sleef_iunordq1(*in2, *in2) ? *in1 : *in2; } // fmin(-0.0, +0.0) = -0.0 - if (Sleef_icmpeqq1(*in1, QUAD_ZERO) && Sleef_icmpeqq1(*in2, QUAD_ZERO)) { - return Sleef_icmpleq1(Sleef_copysignq1(QUAD_ONE, *in1), Sleef_copysignq1(QUAD_ONE, *in2)) ? *in1 : *in2; + if (Sleef_icmpeqq1(*in1, QUAD_PRECISION_ZERO) && Sleef_icmpeqq1(*in2, QUAD_PRECISION_ZERO)) { + return Sleef_icmpleq1(Sleef_copysignq1(QUAD_PRECISION_ONE, *in1), Sleef_copysignq1(QUAD_PRECISION_ONE, *in2)) ? *in1 : *in2; } return Sleef_fminq1(*in1, *in2); } @@ -755,8 +750,8 @@ quad_fmax(const Sleef_quad *in1, const Sleef_quad *in2) return Sleef_iunordq1(*in2, *in2) ? *in1 : *in2; } // maximum(-0.0, +0.0) = +0.0 - if (Sleef_icmpeqq1(*in1, QUAD_ZERO) && Sleef_icmpeqq1(*in2, QUAD_ZERO)) { - return Sleef_icmpgeq1(Sleef_copysignq1(QUAD_ONE, *in1), Sleef_copysignq1(QUAD_ONE, *in2)) ? *in1 : *in2; + if (Sleef_icmpeqq1(*in1, QUAD_PRECISION_ZERO) && Sleef_icmpeqq1(*in2, QUAD_PRECISION_ZERO)) { + return Sleef_icmpgeq1(Sleef_copysignq1(QUAD_PRECISION_ONE, *in1), Sleef_copysignq1(QUAD_PRECISION_ONE, *in2)) ? *in1 : *in2; } return Sleef_fmaxq1(*in1, *in2); } @@ -786,14 +781,14 @@ quad_logaddexp(const Sleef_quad *x, const Sleef_quad *y) // Handle infinities // If both are -inf, result is -inf - Sleef_quad neg_inf = Sleef_negq1(QUAD_POS_INF); + Sleef_quad neg_inf = Sleef_negq1(QUAD_PRECISION_INF); if (Sleef_icmpeqq1(*x, neg_inf) && Sleef_icmpeqq1(*y, neg_inf)) { return neg_inf; } // If either is +inf, result is +inf - if (Sleef_icmpeqq1(*x, QUAD_POS_INF) || Sleef_icmpeqq1(*y, QUAD_POS_INF)) { - return QUAD_POS_INF; + if (Sleef_icmpeqq1(*x, QUAD_PRECISION_INF) || Sleef_icmpeqq1(*y, QUAD_PRECISION_INF)) { + return QUAD_PRECISION_INF; } // If one is -inf, result is the other value @@ -828,14 +823,14 @@ quad_logaddexp2(const Sleef_quad *x, const Sleef_quad *y) // Handle infinities // If both are -inf, result is -inf - Sleef_quad neg_inf = Sleef_negq1(QUAD_POS_INF); + Sleef_quad neg_inf = Sleef_negq1(QUAD_PRECISION_INF); if (Sleef_icmpeqq1(*x, neg_inf) && Sleef_icmpeqq1(*y, neg_inf)) { return neg_inf; } // If either is +inf, result is +inf - if (Sleef_icmpeqq1(*x, QUAD_POS_INF) || Sleef_icmpeqq1(*y, QUAD_POS_INF)) { - return QUAD_POS_INF; + if (Sleef_icmpeqq1(*x, QUAD_PRECISION_INF) || Sleef_icmpeqq1(*y, QUAD_PRECISION_INF)) { + return QUAD_PRECISION_INF; } // If one is -inf, result is the other value @@ -851,7 +846,7 @@ quad_logaddexp2(const Sleef_quad *x, const Sleef_quad *y) Sleef_quad abs_diff = Sleef_fabsq1(diff); Sleef_quad neg_abs_diff = Sleef_negq1(abs_diff); Sleef_quad exp2_term = Sleef_exp2q1_u10(neg_abs_diff); - Sleef_quad one_plus_exp2 = Sleef_addq1_u05(QUAD_ONE, exp2_term); + Sleef_quad one_plus_exp2 = Sleef_addq1_u05(QUAD_PRECISION_ONE, exp2_term); Sleef_quad log2_term = Sleef_log2q1_u10(one_plus_exp2); Sleef_quad max_val = Sleef_icmpgtq1(*x, *y) ? *x : *y; @@ -867,14 +862,14 @@ quad_heaviside(const Sleef_quad *x1, const Sleef_quad *x2) return *x1; // x1 is NaN, return NaN } - if (Sleef_icmpltq1(*x1, QUAD_ZERO)) { - return QUAD_ZERO; + if (Sleef_icmpltq1(*x1, QUAD_PRECISION_ZERO)) { + return QUAD_PRECISION_ZERO; } - else if (Sleef_icmpeqq1(*x1, QUAD_ZERO)) { + else if (Sleef_icmpeqq1(*x1, QUAD_PRECISION_ZERO)) { return *x2; // When x1 == 0, return x2 (even if x2 is NaN) } else { - return QUAD_ONE; + return QUAD_PRECISION_ONE; } } @@ -1025,17 +1020,17 @@ quad_spacing(const Sleef_quad *x) // Handle infinity -> NaN (numpy convention) if (quad_isinf(x)) { - return QUAD_NAN; + return QUAD_PRECISION_NAN; } // Determine direction based on sign of x Sleef_quad direction; - if (Sleef_icmpltq1(*x, QUAD_ZERO)) { + if (Sleef_icmpltq1(*x, QUAD_PRECISION_ZERO)) { // Negative: move toward -inf - direction = Sleef_negq1(QUAD_POS_INF); + direction = Sleef_negq1(QUAD_PRECISION_INF); } else { // Positive or zero: move toward +inf - direction = QUAD_POS_INF; + direction = QUAD_PRECISION_INF; } // Compute nextafter(x, direction) @@ -1067,7 +1062,7 @@ quad_ldexp(const Sleef_quad *x, const int *exp) } // ±0 * 2^exp = ±0 (preserves sign of zero) - if (Sleef_icmpeqq1(*x, QUAD_ZERO)) { + if (Sleef_icmpeqq1(*x, QUAD_PRECISION_ZERO)) { return *x; } @@ -1124,7 +1119,7 @@ quad_frexp(const Sleef_quad *x, int *exp) } // ±0 -> mantissa ±0 with exponent 0 (preserves sign of zero) - if (Sleef_icmpeqq1(*x, QUAD_ZERO)) { + if (Sleef_icmpeqq1(*x, QUAD_PRECISION_ZERO)) { *exp = 0; return *x; } @@ -1587,7 +1582,7 @@ quad_is_nonzero(const Sleef_quad *a) { // A value is falsy if it's exactly zero (positive or negative) // NaN and inf are truthy - npy_bool is_zero = Sleef_icmpeqq1(*a, QUAD_ZERO); + npy_bool is_zero = Sleef_icmpeqq1(*a, QUAD_PRECISION_ZERO); return !is_zero; } @@ -1657,4 +1652,21 @@ static inline npy_bool ld_logical_not(const long double *a) { return !ld_is_nonzero(a); +} + +// Casting operations +static inline double +cast_sleef_to_double(const Sleef_quad in) +{ + if (quad_isnan(&in)) { + return quad_signbit(&in) ? -NAN : NAN; + } + if (quad_isinf(&in)) { + return quad_signbit(&in) ? -INFINITY : INFINITY; + } + if (Sleef_icmpeqq1(in, QUAD_PRECISION_ZERO)) + { + return quad_signbit(&in) ? -0.0 : 0.0; + } + return Sleef_cast_to_doubleq1(in); } \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/quadblas_interface.cpp b/quaddtype/numpy_quaddtype/src/quadblas_interface.cpp index b51851a3..9d8c7627 100644 --- a/quaddtype/numpy_quaddtype/src/quadblas_interface.cpp +++ b/quaddtype/numpy_quaddtype/src/quadblas_interface.cpp @@ -3,7 +3,7 @@ #include #ifndef DISABLE_QUADBLAS -#include "../subprojects/qblas/include/quadblas/quadblas.hpp" +#include "quadblas/quadblas.hpp" #endif // DISABLE_QUADBLAS extern "C" { diff --git a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp index ef0fa843..2898034f 100644 --- a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp +++ b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp @@ -210,7 +210,7 @@ static PyObject * QuadPrecision_float(QuadPrecisionObject *self) { if (self->backend == BACKEND_SLEEF) { - return PyFloat_FromDouble(Sleef_cast_to_doubleq1(self->value.sleef_value)); + return PyFloat_FromDouble(cast_sleef_to_double(self->value.sleef_value)); } else { return PyFloat_FromDouble((double)self->value.longdouble_value); diff --git a/quaddtype/numpy_quaddtype/src/utilities.c b/quaddtype/numpy_quaddtype/src/utilities.c index 1842a7ff..33f07987 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.c +++ b/quaddtype/numpy_quaddtype/src/utilities.c @@ -262,9 +262,13 @@ Sleef_quad quad_to_sleef_quad(const quad_value *in_val, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - return in_val->sleef_value; + // can directly return but that causes union heisenbugs, + // but this helper is rare to use, so acceptable + Sleef_quad result; + memcpy(&result, &in_val->sleef_value, sizeof(Sleef_quad)); + return result; } else { - return Sleef_cast_from_doubleq1(in_val->longdouble_value); + return Sleef_cast_from_doubleq1((double)(in_val->longdouble_value)); } } \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/utilities.h b/quaddtype/numpy_quaddtype/src/utilities.h index f3513328..4697d6eb 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.h +++ b/quaddtype/numpy_quaddtype/src/utilities.h @@ -48,7 +48,7 @@ load(const char *ptr) // Store a value to memory, handling alignment template static inline void -store(char *ptr, T val) +store(char *ptr, const T &val) { if constexpr (Aligned) { *(T *)ptr = val; @@ -60,29 +60,28 @@ store(char *ptr, T val) // Load quad_value from memory based on backend and alignment template -static inline quad_value -load_quad(const char *ptr, QuadBackendType backend) +static inline void +load_quad(const char *ptr, QuadBackendType backend, quad_value *out) { quad_value val; if (backend == BACKEND_SLEEF) { - val.sleef_value = load(ptr); + out->sleef_value = load(ptr); } else { - val.longdouble_value = load(ptr); + out->longdouble_value = load(ptr); } - return val; } // Store quad_value to memory based on backend and alignment template static inline void -store_quad(char *ptr, const quad_value &val, QuadBackendType backend) +store_quad(char *ptr, const quad_value *val, QuadBackendType backend) { if (backend == BACKEND_SLEEF) { - store(ptr, val.sleef_value); + store(ptr, val->sleef_value); } else { - store(ptr, val.longdouble_value); + store(ptr, val->longdouble_value); } } diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index dabe9744..d7b43687 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -5368,3 +5368,47 @@ def test_hash_backends(self, backend): """Test hash works for both backends.""" quad_val = QuadPrecision(1.5, backend=backend) assert hash(quad_val) == hash(1.5) + +@pytest.mark.parametrize("src_backend,dst_backend", [ + ("sleef", "longdouble"), + ("longdouble", "sleef"), + ("sleef", "sleef"), + ("longdouble", "longdouble"), +]) +@pytest.mark.parametrize("value", [ + "0.0", "-0.0", "1.0", "-1.0", "3.14159265358979323846", + "inf", "-inf", "nan", "1e100", "1e-100", "-nan" +]) +def test_quad_to_quad_backend_casting(src_backend, dst_backend, value): + """Test casting between QuadPrecDType with different backends.""" + + src_arr = np.array([value], dtype=QuadPrecDType(backend=src_backend)) + dst_arr = src_arr.astype(QuadPrecDType(backend=dst_backend)) + res_arr = np.array([value], dtype=QuadPrecDType(backend=dst_backend)) + + expected_backend = 0 if dst_backend == 'sleef' else 1 + assert dst_arr.dtype.backend == expected_backend + + assert np.signbit(src_arr[0]) == np.signbit(dst_arr[0]) + if np.isnan(src_arr[0]): + assert np.isnan(dst_arr[0]) + elif np.isinf(src_arr[0]): + assert np.isinf(dst_arr[0]) + elif src_backend != dst_backend: + np.testing.assert_allclose(dst_arr, res_arr, rtol=1e-15) + else: + np.testing.assert_array_equal(dst_arr, res_arr) + +# quad -> float will be tested in same_values tests +@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64, np.longdouble]) +@pytest.mark.parametrize("val", [0.0, -0.0, float('inf'), float('-inf'), float('nan'), float("-nan")]) +def test_float_to_quad_sign_preserve(dtype, val): + """Test that special floating-point values roundtrip correctly.""" + src = np.array([val], dtype=dtype) + result = src.astype(QuadPrecDType()) + + assert np.signbit(result) == np.signbit(val), f"Sign bit failed for {dtype} with value {val}" + if np.isnan(val): + assert np.isnan(result), f"NaN failed for {dtype}" + else: + assert result == val, f"{val} failed for {dtype}" \ No newline at end of file