add thread-local to tell threads how many numas are active in mirror mode (for future cross-numa data slicing)

dbsanfte · dbsanfte · commit 23c978471273 · 2025-09-17T21:02:21.000Z
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
@@ -665,8 +665,9 @@ extern "C" {
 
     // Tensor data accessor functions for NUMA model mirroring compatibility:
     
-    // External thread-local variable set at OMP threadpool creation time
+    // External thread-local variables set at OMP threadpool creation time
     extern __thread int ggml_current_numa_node;
+    extern __thread int ggml_numa_nodes_active;
     
     static inline void * tensor_data(const struct ggml_tensor * tensor) {
         // Fast path: if no NUMA mirrors exist, avoid thread-local access entirely
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -29,8 +29,9 @@
 #include <string.h>
 #include <stdint.h>
 
-// External thread-local variable for NUMA node binding
+// External thread-local variables for NUMA node binding
 extern __thread int ggml_current_numa_node;
+extern __thread int ggml_numa_nodes_active;
 #include <inttypes.h>
 #include <stdio.h>
 #include <float.h>
@@ -615,13 +616,10 @@ static void ggml_openmp_bind_thread_to_numa_node(int thread_id, int n_threads) {
     // Cache strategy check to avoid repeated calls
     static bool strategy_checked = false;
     static bool is_numa_mirror = false;
-    static int num_numa_nodes = 0;
+    static int num_numa_nodes = 1;
     
     if (!strategy_checked) {
         is_numa_mirror = (g_state.numa.numa_strategy == GGML_NUMA_STRATEGY_MIRROR);
-        if (is_numa_mirror) {
-            num_numa_nodes = numa_max_node() + 1;
-        }
         strategy_checked = true;
     }
     
@@ -635,6 +633,9 @@ static void ggml_openmp_bind_thread_to_numa_node(int thread_id, int n_threads) {
         return;
     }
 
+    // Set the numa_nodes_active for all threads, regardless of NUMA mode
+    ggml_numa_nodes_active = numa_max_node() + 1;
+
     // Round-robin assignment of threads to NUMA nodes
     int target_numa_node = thread_id % num_numa_nodes;
     
@@ -669,8 +670,9 @@ static void ggml_openmp_bind_thread_to_numa_node(int thread_id, int n_threads) {
             ggml_thread_numa_node = target_numa_node;
             ggml_thread_numa_initialized = true;
             
-            // Update the global thread-local variable for tensor data access
+            // Update the global thread-local variables for tensor data access
             ggml_current_numa_node = target_numa_node;
+            ggml_numa_nodes_active = num_numa_nodes;
             
             // Debug output using standard GGML logging
             GGML_LOG_DEBUG("NUMA: Bound OpenMP thread %d to NUMA node %d (total threads: %d)\n", 
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
@@ -20,8 +20,9 @@
 #include <alloca.h>
 #endif
 
-// Thread-local variable for NUMA node binding (used by tensor_data())
+// Thread-local variables for NUMA node binding (used by tensor_data())
 __thread int ggml_current_numa_node = 0;
+__thread int ggml_numa_nodes_active = 1;
 
 #include <assert.h>
 #include <errno.h>