Skip to content

Commit 23c9784

Browse files
committed
add thread-local to tell threads how many numas are active in mirror mode (for future cross-numa data slicing)
1 parent fa3a5b4 commit 23c9784

File tree

3 files changed

+12
-8
lines changed

3 files changed

+12
-8
lines changed

ggml/include/ggml.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -665,8 +665,9 @@ extern "C" {
665665

666666
// Tensor data accessor functions for NUMA model mirroring compatibility:
667667

668-
// External thread-local variable set at OMP threadpool creation time
668+
// External thread-local variables set at OMP threadpool creation time
669669
extern __thread int ggml_current_numa_node;
670+
extern __thread int ggml_numa_nodes_active;
670671

671672
static inline void * tensor_data(const struct ggml_tensor * tensor) {
672673
// Fast path: if no NUMA mirrors exist, avoid thread-local access entirely

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@
2929
#include <string.h>
3030
#include <stdint.h>
3131

32-
// External thread-local variable for NUMA node binding
32+
// External thread-local variables for NUMA node binding
3333
extern __thread int ggml_current_numa_node;
34+
extern __thread int ggml_numa_nodes_active;
3435
#include <inttypes.h>
3536
#include <stdio.h>
3637
#include <float.h>
@@ -615,13 +616,10 @@ static void ggml_openmp_bind_thread_to_numa_node(int thread_id, int n_threads) {
615616
// Cache strategy check to avoid repeated calls
616617
static bool strategy_checked = false;
617618
static bool is_numa_mirror = false;
618-
static int num_numa_nodes = 0;
619+
static int num_numa_nodes = 1;
619620

620621
if (!strategy_checked) {
621622
is_numa_mirror = (g_state.numa.numa_strategy == GGML_NUMA_STRATEGY_MIRROR);
622-
if (is_numa_mirror) {
623-
num_numa_nodes = numa_max_node() + 1;
624-
}
625623
strategy_checked = true;
626624
}
627625

@@ -635,6 +633,9 @@ static void ggml_openmp_bind_thread_to_numa_node(int thread_id, int n_threads) {
635633
return;
636634
}
637635

636+
// Set the numa_nodes_active for all threads, regardless of NUMA mode
637+
ggml_numa_nodes_active = numa_max_node() + 1;
638+
638639
// Round-robin assignment of threads to NUMA nodes
639640
int target_numa_node = thread_id % num_numa_nodes;
640641

@@ -669,8 +670,9 @@ static void ggml_openmp_bind_thread_to_numa_node(int thread_id, int n_threads) {
669670
ggml_thread_numa_node = target_numa_node;
670671
ggml_thread_numa_initialized = true;
671672

672-
// Update the global thread-local variable for tensor data access
673+
// Update the global thread-local variables for tensor data access
673674
ggml_current_numa_node = target_numa_node;
675+
ggml_numa_nodes_active = num_numa_nodes;
674676

675677
// Debug output using standard GGML logging
676678
GGML_LOG_DEBUG("NUMA: Bound OpenMP thread %d to NUMA node %d (total threads: %d)\n",

ggml/src/ggml.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@
2020
#include <alloca.h>
2121
#endif
2222

23-
// Thread-local variable for NUMA node binding (used by tensor_data())
23+
// Thread-local variables for NUMA node binding (used by tensor_data())
2424
__thread int ggml_current_numa_node = 0;
25+
__thread int ggml_numa_nodes_active = 1;
2526

2627
#include <assert.h>
2728
#include <errno.h>

0 commit comments

Comments
 (0)