2929#include <string.h>
3030#include <stdint.h>
3131
32- // External thread-local variable for NUMA node binding
32+ // External thread-local variables for NUMA node binding
3333extern __thread int ggml_current_numa_node ;
34+ extern __thread int ggml_numa_nodes_active ;
3435#include <inttypes.h>
3536#include <stdio.h>
3637#include <float.h>
@@ -615,13 +616,10 @@ static void ggml_openmp_bind_thread_to_numa_node(int thread_id, int n_threads) {
615616 // Cache strategy check to avoid repeated calls
616617 static bool strategy_checked = false;
617618 static bool is_numa_mirror = false;
618- static int num_numa_nodes = 0 ;
619+ static int num_numa_nodes = 1 ;
619620
620621 if (!strategy_checked ) {
621622 is_numa_mirror = (g_state .numa .numa_strategy == GGML_NUMA_STRATEGY_MIRROR );
622- if (is_numa_mirror ) {
623- num_numa_nodes = numa_max_node () + 1 ;
624- }
625623 strategy_checked = true;
626624 }
627625
@@ -635,6 +633,9 @@ static void ggml_openmp_bind_thread_to_numa_node(int thread_id, int n_threads) {
635633 return ;
636634 }
637635
636+ // Set the numa_nodes_active for all threads, regardless of NUMA mode
637+ ggml_numa_nodes_active = numa_max_node () + 1 ;
638+
638639 // Round-robin assignment of threads to NUMA nodes
639640 int target_numa_node = thread_id % num_numa_nodes ;
640641
@@ -669,8 +670,9 @@ static void ggml_openmp_bind_thread_to_numa_node(int thread_id, int n_threads) {
669670 ggml_thread_numa_node = target_numa_node ;
670671 ggml_thread_numa_initialized = true;
671672
672- // Update the global thread-local variable for tensor data access
673+ // Update the global thread-local variables for tensor data access
673674 ggml_current_numa_node = target_numa_node ;
675+ ggml_numa_nodes_active = num_numa_nodes ;
674676
675677 // Debug output using standard GGML logging
676678 GGML_LOG_DEBUG ("NUMA: Bound OpenMP thread %d to NUMA node %d (total threads: %d)\n" ,
0 commit comments