From 76d823f59a464e98f8fbdd8ec28b5a8bd97aaaa7 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 13:39:02 +0530
Subject: [PATCH 01/13] fixing subnormal cross-platform

---
 .../numpy_quaddtype/src/quaddtype_main.c      | 19 +++++++++++++++++++
 quaddtype/tests/test_quaddtype.py             | 10 ++++++++++
 2 files changed, 29 insertions(+)

diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
index 929927a4..9e98faa3 100644
--- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -71,7 +71,26 @@ get_sleef_constant(PyObject *self, PyObject *args)
         result->value.sleef_value = SLEEF_QUAD_MIN;
     }
     else if (strcmp(constant_name, "smallest_subnormal") == 0) {
+#ifdef SLEEF_QUAD_C
+        // On platforms with native __float128 support, use the correct literal
         result->value.sleef_value = SLEEF_QUAD_DENORM_MIN;
+#else
+        // On platforms without native __float128, SLEEF_QUAD_DENORM_MIN is broken
+        // Manually constructing the smallest subnormal: 1 * 2^(-16382-112) = 2^(-16494)
+        // This represents 0x0.0000000000000000000000000001p-16382
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+        struct {
+            uint64_t h, l;
+        } c;
+#else
+        struct {
+            uint64_t l, h;
+        } c;
+#endif
+        c.h = 0x0000000000000000ULL;  // exponent = 0 (subnormal), mantissa high = 0
+        c.l = 0x0000000000000001ULL;  // mantissa low = 1 (smallest possible)
+        memcpy(&result->value.sleef_value, &c, 16);
+#endif
     }
     else if (strcmp(constant_name, "bits") == 0) {
         Py_DECREF(result);
diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py
index 0a2cf3ff..1833c4e6 100644
--- a/quaddtype/tests/test_quaddtype.py
+++ b/quaddtype/tests/test_quaddtype.py
@@ -24,6 +24,16 @@ def test_finfo_constant(name):
     assert isinstance(getattr(numpy_quaddtype, name), QuadPrecision)
 
 
+def test_smallest_subnormal_value():
+    """Test that smallest_subnormal has the correct value across all platforms."""
+    smallest_sub = numpy_quaddtype.smallest_subnormal
+    repr_str = repr(smallest_sub)
+    
+    # The repr should show QuadPrecision('6.0e-4966', backend='sleef')
+    assert "6.0e-4966" in repr_str, f"Expected '6.0e-4966' in repr, got {repr_str}"
+    
+    assert smallest_sub > 0, "smallest_subnormal should be positive"
+
 @pytest.mark.parametrize("name,value", [("bits", 128), ("precision", 33)])
 def test_finfo_int_constant(name, value):
     assert getattr(numpy_quaddtype, name) == value

From 41bce9a7ec3400f824bb2b73eadf26984044e57c Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 18:10:30 +0530
Subject: [PATCH 02/13] using mutex before memcpy

---
 quaddtype/numpy_quaddtype/src/quaddtype_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
index 9e98faa3..76709c6a 100644
--- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -18,6 +18,9 @@
 #include "quad_common.h"
 #include "quadblas_interface.h"
 #include "float.h"
+#include <pthread.h>
+
+static pthread_mutex_t constant_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 static PyObject *
 py_is_longdouble_128(PyObject *self, PyObject *args)
@@ -75,6 +78,7 @@ get_sleef_constant(PyObject *self, PyObject *args)
         // On platforms with native __float128 support, use the correct literal
         result->value.sleef_value = SLEEF_QUAD_DENORM_MIN;
 #else
+        pthread_mutex_lock(&constant_mutex);
         // On platforms without native __float128, SLEEF_QUAD_DENORM_MIN is broken
         // Manually constructing the smallest subnormal: 1 * 2^(-16382-112) = 2^(-16494)
         // This represents 0x0.0000000000000000000000000001p-16382
@@ -90,6 +94,7 @@ get_sleef_constant(PyObject *self, PyObject *args)
         c.h = 0x0000000000000000ULL;  // exponent = 0 (subnormal), mantissa high = 0
         c.l = 0x0000000000000001ULL;  // mantissa low = 1 (smallest possible)
         memcpy(&result->value.sleef_value, &c, 16);
+        pthread_mutex_unlock(&constant_mutex);
 #endif
     }
     else if (strcmp(constant_name, "bits") == 0) {

From e41c8321c15329d43be0ecb4bc340eb2e52a717e Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 18:22:55 +0530
Subject: [PATCH 03/13] precomputing the value

---
 .../numpy_quaddtype/src/quaddtype_main.c      | 36 ++++++++++---------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
index 76709c6a..d5565fd4 100644
--- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -33,6 +33,24 @@ py_is_longdouble_128(PyObject *self, PyObject *args)
     }
 }
 
+#ifndef SLEEF_QUAD_C
+static const union {
+    struct {
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+        uint64_t h, l;
+#else
+        uint64_t l, h;
+#endif
+    } parts;
+    Sleef_quad quad_value;
+    long double alignment_dummy __attribute__((aligned(16)));
+} SMALLEST_SUBNORMAL_CONST = {
+        .parts = {
+                .h = 0x0000000000000000ULL,  // exponent = 0 (subnormal), mantissa high = 0
+                .l = 0x0000000000000001ULL   // mantissa low = 1 (smallest possible)
+        }};
+#endif
+
 static PyObject *
 get_sleef_constant(PyObject *self, PyObject *args)
 {
@@ -78,23 +96,7 @@ get_sleef_constant(PyObject *self, PyObject *args)
         // On platforms with native __float128 support, use the correct literal
         result->value.sleef_value = SLEEF_QUAD_DENORM_MIN;
 #else
-        pthread_mutex_lock(&constant_mutex);
-        // On platforms without native __float128, SLEEF_QUAD_DENORM_MIN is broken
-        // Manually constructing the smallest subnormal: 1 * 2^(-16382-112) = 2^(-16494)
-        // This represents 0x0.0000000000000000000000000001p-16382
-#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-        struct {
-            uint64_t h, l;
-        } c;
-#else
-        struct {
-            uint64_t l, h;
-        } c;
-#endif
-        c.h = 0x0000000000000000ULL;  // exponent = 0 (subnormal), mantissa high = 0
-        c.l = 0x0000000000000001ULL;  // mantissa low = 1 (smallest possible)
-        memcpy(&result->value.sleef_value, &c, 16);
-        pthread_mutex_unlock(&constant_mutex);
+        result->value.sleef_value = SMALLEST_SUBNORMAL_CONST.quad_value;
 #endif
     }
     else if (strcmp(constant_name, "bits") == 0) {

From 1addb5286c222bad42452d8e74c03092df8f3c42 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 19:10:38 +0530
Subject: [PATCH 04/13] precomputing the value

---
 quaddtype/numpy_quaddtype/src/quaddtype_main.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
index d5565fd4..fc697c99 100644
--- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -18,9 +18,6 @@
 #include "quad_common.h"
 #include "quadblas_interface.h"
 #include "float.h"
-#include <pthread.h>
-
-static pthread_mutex_t constant_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 static PyObject *
 py_is_longdouble_128(PyObject *self, PyObject *args)

From 1ff0191bd299afdc92c8376bd6420e56b9530717 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 19:26:28 +0530
Subject: [PATCH 05/13] static union

---
 .../numpy_quaddtype/src/quaddtype_main.c      | 22 ++++++++-----------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
index fc697c99..e91016b0 100644
--- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -30,22 +30,18 @@ py_is_longdouble_128(PyObject *self, PyObject *args)
     }
 }
 
-#ifndef SLEEF_QUAD_C
+#ifdef SLEEF_QUAD_C
+// Native __float128 support
+static const Sleef_quad SMALLEST_SUBNORMAL_VALUE = SLEEF_QUAD_DENORM_MIN;
+#else
+// Use static union for thread-safe initialization
 static const union {
     struct {
-#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-        uint64_t h, l;
-#else
         uint64_t l, h;
-#endif
     } parts;
-    Sleef_quad quad_value;
-    long double alignment_dummy __attribute__((aligned(16)));
-} SMALLEST_SUBNORMAL_CONST = {
-        .parts = {
-                .h = 0x0000000000000000ULL,  // exponent = 0 (subnormal), mantissa high = 0
-                .l = 0x0000000000000001ULL   // mantissa low = 1 (smallest possible)
-        }};
+    Sleef_quad value;
+} smallest_subnormal_const = {.parts = {.l = 0x0000000000000001ULL, .h = 0x0000000000000000ULL}};
+#define SMALLEST_SUBNORMAL_VALUE (smallest_subnormal_const.value)
 #endif
 
 static PyObject *
@@ -93,7 +89,7 @@ get_sleef_constant(PyObject *self, PyObject *args)
         // On platforms with native __float128 support, use the correct literal
         result->value.sleef_value = SLEEF_QUAD_DENORM_MIN;
 #else
-        result->value.sleef_value = SMALLEST_SUBNORMAL_CONST.quad_value;
+        result->value.sleef_value = SMALLEST_SUBNORMAL_VALUE;
 #endif
     }
     else if (strcmp(constant_name, "bits") == 0) {

From e7104b7d50b24f6f0f2a9a02d355c53514b0c0b1 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 19:53:05 +0530
Subject: [PATCH 06/13] precompile cache

---
 quaddtype/numpy_quaddtype/src/quaddtype_main.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
index e91016b0..6d68c5af 100644
--- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -31,16 +31,25 @@ py_is_longdouble_128(PyObject *self, PyObject *args)
 }
 
 #ifdef SLEEF_QUAD_C
-// Native __float128 support
 static const Sleef_quad SMALLEST_SUBNORMAL_VALUE = SLEEF_QUAD_DENORM_MIN;
 #else
-// Use static union for thread-safe initialization
+// Use the exact same struct layout as the original buggy code
 static const union {
     struct {
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+        uint64_t h, l;
+#else
         uint64_t l, h;
+#endif
     } parts;
     Sleef_quad value;
-} smallest_subnormal_const = {.parts = {.l = 0x0000000000000001ULL, .h = 0x0000000000000000ULL}};
+} smallest_subnormal_const = {.parts = {
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+                                      .h = 0x0000000000000000ULL, .l = 0x0000000000000001ULL
+#else
+                                      .l = 0x0000000000000001ULL, .h = 0x0000000000000000ULL
+#endif
+                              }};
 #define SMALLEST_SUBNORMAL_VALUE (smallest_subnormal_const.value)
 #endif
 

From ea212dde1724f6b0a6b7ded808b0480cf0c5daa8 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 20:11:01 +0530
Subject: [PATCH 07/13] using pymutex on object creation

---
 quaddtype/numpy_quaddtype/src/scalar.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index 6c2e4b51..6292423a 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -15,10 +15,22 @@
 #include "scalar_ops.h"
 #include "dragon4.h"
 
+#ifdef Py_GIL_DISABLED
+static PyMutex quad_creation_mutex = {0};
+#endif
+
 QuadPrecisionObject *
 QuadPrecision_raw_new(QuadBackendType backend)
 {
-    QuadPrecisionObject *new = PyObject_New(QuadPrecisionObject, &QuadPrecision_Type);
+    QuadPrecisionObject *new;
+#ifdef Py_GIL_DISABLED
+    PyMutex_Lock(&quad_creation_mutex);
+#endif
+    new = PyObject_New(QuadPrecisionObject, &QuadPrecision_Type);
+#ifdef Py_GIL_DISABLED
+    PyMutex_Unlock(&quad_creation_mutex);
+#endif
+
     if (!new)
         return NULL;
     new->backend = backend;

From 4b250290d494d5129f509880973cc5f62920ec4c Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 20:21:57 +0530
Subject: [PATCH 08/13] repr string building with mutex

---
 quaddtype/numpy_quaddtype/src/scalar.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index 6292423a..79dbc1c6 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -16,7 +16,7 @@
 #include "dragon4.h"
 
 #ifdef Py_GIL_DISABLED
-static PyMutex quad_creation_mutex = {0};
+static PyMutex scalar_mutex = {0};
 #endif
 
 QuadPrecisionObject *
@@ -24,11 +24,11 @@ QuadPrecision_raw_new(QuadBackendType backend)
 {
     QuadPrecisionObject *new;
 #ifdef Py_GIL_DISABLED
-    PyMutex_Lock(&quad_creation_mutex);
+    PyMutex_Lock(&scalar_mutex);
 #endif
     new = PyObject_New(QuadPrecisionObject, &QuadPrecision_Type);
 #ifdef Py_GIL_DISABLED
-    PyMutex_Unlock(&quad_creation_mutex);
+    PyMutex_Unlock(&scalar_mutex);
 #endif
 
     if (!new)
@@ -196,6 +196,9 @@ QuadPrecision_str(QuadPrecisionObject *self)
 static PyObject *
 QuadPrecision_repr(QuadPrecisionObject *self)
 {
+#ifdef Py_GIL_DISABLED
+    PyMutex_Lock(&scalar_mutex);
+#endif
     PyObject *str = QuadPrecision_str(self);
     if (str == NULL) {
         return NULL;
@@ -203,6 +206,9 @@ QuadPrecision_repr(QuadPrecisionObject *self)
     const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble";
     PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S', backend='%s')", str, backend_str);
     Py_DECREF(str);
+#ifdef Py_GIL_DISABLED
+    PyMutex_Unlock(&scalar_mutex);
+#endif
     return res;
 }
 

From 378e86a245624bffa4d3eb0ecf1aefd2b5e6b047 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 20:36:55 +0530
Subject: [PATCH 09/13] repr string building with mutex

---
 quaddtype/numpy_quaddtype/src/scalar.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index 79dbc1c6..07b18bf0 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -215,6 +215,9 @@ QuadPrecision_repr(QuadPrecisionObject *self)
 static PyObject *
 QuadPrecision_repr_dragon4(QuadPrecisionObject *self)
 {
+#ifdef Py_GIL_DISABLED
+    PyMutex_Lock(&scalar_mutex);
+#endif
     Dragon4_Options opt = {.scientific = 1,
                            .digit_mode = DigitMode_Unique,
                            .cutoff_mode = CutoffMode_TotalLength,
@@ -244,6 +247,9 @@ QuadPrecision_repr_dragon4(QuadPrecisionObject *self)
     const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble";
     PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S', backend='%s')", str, backend_str);
     Py_DECREF(str);
+#ifdef Py_GIL_DISABLED
+    PyMutex_Unlock(&scalar_mutex);
+#endif
     return res;
 }
 

From 1d9514ba909121e3f55bc31da52d1769ee21bb3b Mon Sep 17 00:00:00 2001
From: SwayamInSync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 17:23:25 +0000
Subject: [PATCH 10/13] selecting TLS support

---
 quaddtype/meson.build                   | 40 ++++++++++++++++++++++++-
 quaddtype/numpy_quaddtype/src/dragon4.c | 12 ++++++++
 quaddtype/numpy_quaddtype/src/scalar.c  | 21 -------------
 3 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/quaddtype/meson.build b/quaddtype/meson.build
index e50f6937..735db3fc 100644
--- a/quaddtype/meson.build
+++ b/quaddtype/meson.build
@@ -30,6 +30,44 @@ if openmp_dep.found()
     dependencies += openmp_dep
 endif
 
+# Thread-local storage detection (borrowed from NumPy)
+optional_variable_attributes = [
+  ['thread_local', 'HAVE_THREAD_LOCAL'],    # C23
+  ['_Thread_local', 'HAVE__THREAD_LOCAL'],  # C11/C17
+  ['__thread', 'HAVE___THREAD'],           # GCC/Clang
+  ['__declspec(thread)', 'HAVE___DECLSPEC_THREAD_']  # MSVC
+]
+
+if not is_variable('cdata')
+  cdata = configuration_data()
+endif
+
+foreach optional_attr: optional_variable_attributes
+  attr = optional_attr[0]
+  code = '''
+    #pragma GCC diagnostic error "-Wattributes"
+    #pragma clang diagnostic error "-Wattributes"
+    
+    int @0@ foo;
+    
+    int main() {
+      return 0;
+    }
+  '''.format(attr)
+  
+  if c.compiles(code, name: optional_attr[0])
+    cdata.set10(optional_attr[1], true)
+    message('Thread-local storage support found: @0@'.format(attr))
+  endif
+endforeach
+
+configure_file(
+  output: 'quaddtype_config.h',
+  configuration: cdata
+)
+
+build_includes = include_directories('.')
+
 includes = include_directories(
     [
         incdir_numpy,
@@ -84,5 +122,5 @@ py.extension_module('_quaddtype_main',
   dependencies: dependencies,
   install: true,
   subdir: 'numpy_quaddtype',
-  include_directories: includes
+  include_directories: [includes, build_includes]
 )
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c
index 34ad4cbb..fc60a5f1 100644
--- a/quaddtype/numpy_quaddtype/src/dragon4.c
+++ b/quaddtype/numpy_quaddtype/src/dragon4.c
@@ -11,6 +11,9 @@ Modifications are specific to support the SLEEF_QUAD
 #include <sleef.h>
 #include <sleefquad.h>
 
+#include "quaddtype_config.h"
+
+
 #define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API
 #define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API
 #define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION
@@ -22,6 +25,15 @@ Modifications are specific to support the SLEEF_QUAD
 #include "dtype.h"
 #include "scalar.h"
 
+
+#if !defined(HAVE_THREAD_LOCAL) && !defined(HAVE__THREAD_LOCAL) && \
+    !defined(HAVE___THREAD) && !defined(HAVE___DECLSPEC_THREAD_) && \
+    !defined(__cplusplus)
+#warning "No thread-local storage support detected! NPY_TLS will be empty, causing thread safety issues."
+#else
+#warning "NPY_TLS Thread-local storage support detected."
+#endif
+
 #if 0
 #define DEBUG_ASSERT(stmnt) assert(stmnt)
 #else
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index 07b18bf0..6d82d198 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -15,21 +15,12 @@
 #include "scalar_ops.h"
 #include "dragon4.h"
 
-#ifdef Py_GIL_DISABLED
-static PyMutex scalar_mutex = {0};
-#endif
 
 QuadPrecisionObject *
 QuadPrecision_raw_new(QuadBackendType backend)
 {
     QuadPrecisionObject *new;
-#ifdef Py_GIL_DISABLED
-    PyMutex_Lock(&scalar_mutex);
-#endif
     new = PyObject_New(QuadPrecisionObject, &QuadPrecision_Type);
-#ifdef Py_GIL_DISABLED
-    PyMutex_Unlock(&scalar_mutex);
-#endif
 
     if (!new)
         return NULL;
@@ -196,9 +187,6 @@ QuadPrecision_str(QuadPrecisionObject *self)
 static PyObject *
 QuadPrecision_repr(QuadPrecisionObject *self)
 {
-#ifdef Py_GIL_DISABLED
-    PyMutex_Lock(&scalar_mutex);
-#endif
     PyObject *str = QuadPrecision_str(self);
     if (str == NULL) {
         return NULL;
@@ -206,18 +194,12 @@ QuadPrecision_repr(QuadPrecisionObject *self)
     const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble";
     PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S', backend='%s')", str, backend_str);
     Py_DECREF(str);
-#ifdef Py_GIL_DISABLED
-    PyMutex_Unlock(&scalar_mutex);
-#endif
     return res;
 }
 
 static PyObject *
 QuadPrecision_repr_dragon4(QuadPrecisionObject *self)
 {
-#ifdef Py_GIL_DISABLED
-    PyMutex_Lock(&scalar_mutex);
-#endif
     Dragon4_Options opt = {.scientific = 1,
                            .digit_mode = DigitMode_Unique,
                            .cutoff_mode = CutoffMode_TotalLength,
@@ -247,9 +229,6 @@ QuadPrecision_repr_dragon4(QuadPrecisionObject *self)
     const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble";
     PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S', backend='%s')", str, backend_str);
     Py_DECREF(str);
-#ifdef Py_GIL_DISABLED
-    PyMutex_Unlock(&scalar_mutex);
-#endif
     return res;
 }
 

From 501cf7ef85f15bcadcdce69a90e16c96a2247c6a Mon Sep 17 00:00:00 2001
From: SwayamInSync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 17:31:32 +0000
Subject: [PATCH 11/13] adding explicit macro def inside dragon4.c

---
 quaddtype/numpy_quaddtype/src/dragon4.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c
index fc60a5f1..7f3904a3 100644
--- a/quaddtype/numpy_quaddtype/src/dragon4.c
+++ b/quaddtype/numpy_quaddtype/src/dragon4.c
@@ -25,7 +25,6 @@ Modifications are specific to support the SLEEF_QUAD
 #include "dtype.h"
 #include "scalar.h"
 
-
 #if !defined(HAVE_THREAD_LOCAL) && !defined(HAVE__THREAD_LOCAL) && \
     !defined(HAVE___THREAD) && !defined(HAVE___DECLSPEC_THREAD_) && \
     !defined(__cplusplus)
@@ -34,6 +33,20 @@ Modifications are specific to support the SLEEF_QUAD
 #warning "NPY_TLS Thread-local storage support detected."
 #endif
 
+#ifdef __cplusplus
+    #define NPY_TLS thread_local
+#elif defined(HAVE_THREAD_LOCAL)
+    #define NPY_TLS thread_local
+#elif defined(HAVE__THREAD_LOCAL)
+    #define NPY_TLS _Thread_local
+#elif defined(HAVE___THREAD)
+    #define NPY_TLS __thread
+#elif defined(HAVE___DECLSPEC_THREAD_)
+    #define NPY_TLS __declspec(thread)
+#else
+    #define NPY_TLS
+#endif
+
 #if 0
 #define DEBUG_ASSERT(stmnt) assert(stmnt)
 #else

From 0a9d1e3bc32ffe7ed56d2ee888c45841598c108b Mon Sep 17 00:00:00 2001
From: SwayamInSync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 17:48:33 +0000
Subject: [PATCH 12/13] removing inline code warnings

---
 quaddtype/numpy_quaddtype/src/dragon4.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c
index 7f3904a3..8e96c0bc 100644
--- a/quaddtype/numpy_quaddtype/src/dragon4.c
+++ b/quaddtype/numpy_quaddtype/src/dragon4.c
@@ -25,13 +25,6 @@ Modifications are specific to support the SLEEF_QUAD
 #include "dtype.h"
 #include "scalar.h"
 
-#if !defined(HAVE_THREAD_LOCAL) && !defined(HAVE__THREAD_LOCAL) && \
-    !defined(HAVE___THREAD) && !defined(HAVE___DECLSPEC_THREAD_) && \
-    !defined(__cplusplus)
-#warning "No thread-local storage support detected! NPY_TLS will be empty, causing thread safety issues."
-#else
-#warning "NPY_TLS Thread-local storage support detected."
-#endif
 
 #ifdef __cplusplus
     #define NPY_TLS thread_local

From 1ced8a6cfa82b7c19c6e4e0addc1318832ae586c Mon Sep 17 00:00:00 2001
From: SwayamInSync <hawkempire007@gmail.com>
Date: Thu, 4 Sep 2025 19:11:30 +0000
Subject: [PATCH 13/13] remvoing redundant SLEEF_QUAD compile check

---
 quaddtype/numpy_quaddtype/src/quaddtype_main.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
index 6d68c5af..734de21b 100644
--- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -94,12 +94,7 @@ get_sleef_constant(PyObject *self, PyObject *args)
         result->value.sleef_value = SLEEF_QUAD_MIN;
     }
     else if (strcmp(constant_name, "smallest_subnormal") == 0) {
-#ifdef SLEEF_QUAD_C
-        // On platforms with native __float128 support, use the correct literal
-        result->value.sleef_value = SLEEF_QUAD_DENORM_MIN;
-#else
         result->value.sleef_value = SMALLEST_SUBNORMAL_VALUE;
-#endif
     }
     else if (strcmp(constant_name, "bits") == 0) {
         Py_DECREF(result);