@@ -2954,62 +2954,18 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
29542954 /*.threadpool=*/ tp ,
29552955 };
29562956
2957- #ifdef GGML_USE_NUMA_MIGRATE_DEBUG
2958- struct timespec t0 , t1 , t2 , t3 , t4 ;
2959- long d12 , d32 , d43 ;
2960- bool log_time = true;
2961- int log_node_n = 0 ;
2962- if (log_time ) {
2963- clock_gettime (CLOCK_MONOTONIC , & t0 );
2964- }
2965- #endif
2966-
29672957 for (int node_n = 0 ; node_n < cgraph -> n_nodes && atomic_load_explicit (& tp -> abort , memory_order_relaxed ) != node_n ; node_n ++ ) {
29682958 struct ggml_tensor * node = cgraph -> nodes [node_n ];
2969- #ifdef GGML_USE_NUMA_MIGRATE_DEBUG
2970- if ((node -> op == GGML_OP_MUL_MAT )) {
2971- log_node_n = node_n ;
2972- log_time = true;
2973- } else {
2974- log_time = false;
2975- }
2976- if (log_time ) {
2977- clock_gettime (CLOCK_MONOTONIC , & t1 );
2978- }
2979- #endif
29802959 ggml_compute_forward (& params , node );
29812960
2982- #ifdef GGML_USE_NUMA_MIGRATE_DEBUG
2983- if (log_time ) {
2984- clock_gettime (CLOCK_MONOTONIC , & t2 );
2985- }
2986- #endif
2987-
29882961 if (state -> ith == 0 && cplan -> abort_callback &&
29892962 cplan -> abort_callback (cplan -> abort_callback_data )) {
29902963 atomic_store_explicit (& tp -> abort , node_n + 1 , memory_order_relaxed );
29912964 tp -> ec = GGML_STATUS_ABORTED ;
29922965 }
29932966
29942967 if (node_n + 1 < cgraph -> n_nodes ) {
2995- #ifdef GGML_USE_NUMA_MIGRATE_DEBUG
2996- if (log_time ) {
2997- clock_gettime (CLOCK_MONOTONIC , & t3 );
2998- }
2999- #endif
3000-
30012968 ggml_barrier_numa_aware (state -> threadpool , state -> ith , node_n % GGML_BARRIER_NODE_LAST );
3002-
3003- #ifdef GGML_USE_NUMA_MIGRATE_DEBUG
3004- if (log_time ) {
3005- clock_gettime (CLOCK_MONOTONIC , & t4 );
3006- d12 = (t2 .tv_sec - t1 .tv_sec ) * 1e9 + (t2 .tv_nsec - t1 .tv_nsec );
3007- d32 = (t3 .tv_sec - t2 .tv_sec ) * 1e9 + (t3 .tv_nsec - t2 .tv_nsec );
3008- d43 = (t4 .tv_sec - t3 .tv_sec ) * 1e9 + (t4 .tv_nsec - t3 .tv_nsec );
3009- printf ("%s, op: %d, ith: %d, cpu: %d, d12: %ld, d32: %ld, d43: %ld, t1: %ld, t2: %ld, t3: %ld, t4: %ld\n" , \
3010- __func__ , node -> op , state -> ith , sched_getcpu (), d12 , d32 , d43 , t1 .tv_nsec , t2 .tv_nsec , t3 .tv_nsec , t4 .tv_nsec );
3011- }
3012- #endif
30132969 }
30142970 }
30152971
0 commit comments