@@ -445,7 +445,7 @@ struct ggml_threadpool {
445445#ifdef GGML_USE_NUMA_MIGRATE
446446 atomic_int GGML_CACHE_ALIGN * n_barrier_node [GGML_NUMA_MIGRATE_NODES ];
447447 atomic_int GGML_CACHE_ALIGN * n_barrier_passed_node [GGML_NUMA_MIGRATE_NODES ];
448- atomic_int GGML_CACHE_ALIGN * n_barrier_passed_last ;
448+ atomic_int GGML_CACHE_ALIGN * n_barrier_passed_last [ GGML_BARRIER_NODE_CNTS ] ;
449449#endif
450450
451451 atomic_int GGML_CACHE_ALIGN current_chunk ; // currently processing chunk during Mat_Mul, shared between all the threads.
@@ -568,8 +568,8 @@ void ggml_barrier(struct ggml_threadpool * tp) {
568568}
569569
570570#ifdef GGML_USE_NUMA_MIGRATE
571- static int get_node_from_cpu (int cpu , int cores_per_numa ) {
572- return cpu / cores_per_numa ;
571+ int ggml_get_node_from_cpu (int cpu ) {
572+ return cpu / g_state . numa . nodes [ 0 ]. n_cpus ;
573573}
574574
575575int ggml_cores_per_numa (void ) {
@@ -594,15 +594,15 @@ void ggml_barrier_numa_aware(struct ggml_threadpool * tp, int ith, int node_n) {
594594 return ;
595595 }
596596
597- int cores_per_numa = g_state . numa . nodes [ 0 ]. n_cpus ;
597+ int cores_per_numa = ggml_cores_per_numa () ;
598598 int numa_nodes = n_threads / cores_per_numa ;
599599 int remaining_cores = n_threads % cores_per_numa ;
600600 if ((numa_nodes != GGML_NUMA_MIGRATE_NODES ) || remaining_cores ) {
601601 ggml_barrier (tp );
602602 return ;
603603 }
604604
605- int node = get_node_from_cpu (ith , cores_per_numa );
605+ int node = ggml_get_node_from_cpu (ith );
606606
607607 int n_passed = atomic_load_explicit (tp -> n_barrier_passed_node [node ], memory_order_relaxed );
608608
@@ -613,13 +613,13 @@ void ggml_barrier_numa_aware(struct ggml_threadpool * tp, int ith, int node_n) {
613613 // last thread of current numa node
614614 atomic_store_explicit (tp -> n_barrier_node [node ], 0 , memory_order_seq_cst );
615615
616- int n_passed_node = atomic_fetch_add_explicit (& tp -> n_barrier_passed_last [node_n ], 1 , memory_order_seq_cst );
616+ int n_passed_node = atomic_fetch_add_explicit (tp -> n_barrier_passed_last [node_n ], 1 , memory_order_seq_cst );
617617
618618 if (n_passed_node == (numa_nodes - 1 )) { // last numa node cpu
619619 atomic_fetch_add_explicit (tp -> n_barrier_passed_node [node ], 1 , memory_order_seq_cst );
620- atomic_store_explicit (& tp -> n_barrier_passed_last [node_n ], 0 , memory_order_seq_cst );
620+ atomic_store_explicit (tp -> n_barrier_passed_last [node_n ], 0 , memory_order_seq_cst );
621621 } else {
622- while (atomic_load_explicit (& tp -> n_barrier_passed_last [node_n ], memory_order_relaxed )) {
622+ while (atomic_load_explicit (tp -> n_barrier_passed_last [node_n ], memory_order_relaxed )) {
623623 ggml_thread_cpu_relax ();
624624 }
625625 atomic_fetch_add_explicit (tp -> n_barrier_passed_node [node ], 1 , memory_order_seq_cst );
@@ -2968,7 +2968,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
29682968
29692969#ifdef GGML_USE_NUMA_MIGRATE
29702970 for (int i = 0 ; i < GGML_NUMA_MIGRATE_NODES ; i ++ ) {
2971- params .wdata_numa [i ] = cplan -> work_data_numa [numa_node_of_cpu (state -> ith )];
2971+ params .wdata_numa [i ] = cplan -> work_data_numa [ggml_get_node_from_cpu (state -> ith )];
29722972 }
29732973#endif
29742974
@@ -3161,9 +3161,9 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
31613161 * threadpool -> n_barrier_passed_node [node ] = 0 ;
31623162 }
31633163
3164- threadpool -> n_barrier_passed_last = ( atomic_int * ) malloc ( GGML_BARRIER_NODE_LAST * sizeof ( atomic_int ));
3165- for ( int i = 0 ; i < GGML_BARRIER_NODE_LAST ; i ++ ) {
3166- threadpool -> n_barrier_passed_last [i ] = 0 ;
3164+ for ( int i = 0 ; i < GGML_BARRIER_NODE_CNTS ; i ++ ) {
3165+ threadpool -> n_barrier_passed_last [ i ] = ( atomic_int * ) malloc ( sizeof ( atomic_int ));
3166+ * threadpool -> n_barrier_passed_last [i ] = 0 ;
31673167 }
31683168#endif
31693169
0 commit comments