@@ -57,7 +57,7 @@ enum ggml_status ggml_numa_executor_call_direct_kernel(struct ggml_tensor * tens
5757 return GGML_STATUS_FAILED ;
5858 }
5959
60- GGML_LOG_DEBUG ("Direct Kernel: Dispatching operation %s directly\n" , ggml_op_name (tensor -> op ));
60+ NUMA_LOG_DEBUG ("Direct Kernel: Dispatching operation %s directly\n" , ggml_op_name (tensor -> op ));
6161
6262 // Direct kernel dispatch based on operation type - no temporary graph overhead
6363 switch (tensor -> op ) {
@@ -341,52 +341,6 @@ struct mmid_row_mapping {
341341// Core Executor Implementation
342342// ============================================================================
343343
344- /**
345- * @brief Compute graph execution with NUMA-aware optimization
346- *
347- * Processes a complete compute graph by analyzing each node and dispatching
348- * to appropriate NUMA kernels or fallback mechanisms. This function provides
349- * the main execution loop for NUMA-optimized computation.
350- *
351- * Execution Flow:
352- * 1. Validates input parameters and initializes kernel registry
353- * 2. Iterates through all graph nodes in dependency order
354- * 3. For each node, selects optimal execution strategy
355- * 4. Delegates to NUMA kernels or fallback as appropriate
356- * 5. Collects performance statistics and handles errors
357- *
358- * @param cgraph The compute graph to execute
359- * @param cplan The compute plan with threading and buffer information
360- * @return GGML_STATUS_SUCCESS on success, error code on failure
361- */
362- enum ggml_status ggml_numa_executor_execute_graph (struct ggml_cgraph * cgraph , struct ggml_cplan * cplan ) {
363- if (!cgraph || !cplan ) {
364- return GGML_STATUS_FAILED ;
365- }
366-
367- GGML_LOG_DEBUG ("NUMA Executor: Processing compute graph with %d nodes\n" , cgraph -> n_nodes );
368-
369- // Initialize kernel registry if not already done
370- if (ggml_numa_kernels_init () != GGML_STATUS_SUCCESS ) {
371- GGML_LOG_ERROR ("NUMA Executor: Failed to initialize kernel registry\n" );
372- return GGML_STATUS_FAILED ;
373- }
374-
375- // Process each node in the graph
376- for (int i = 0 ; i < cgraph -> n_nodes ; i ++ ) {
377- struct ggml_tensor * node = cgraph -> nodes [i ];
378-
379- enum ggml_status result = ggml_numa_executor_execute_tensor (node , cplan );
380- if (result != GGML_STATUS_SUCCESS ) {
381- GGML_LOG_ERROR ("NUMA Executor: Failed to execute node %d (%s)\n" , i , ggml_op_name (node -> op ));
382- return result ;
383- }
384- }
385-
386- GGML_LOG_DEBUG ("NUMA Executor: Successfully completed graph execution\n" );
387- return GGML_STATUS_SUCCESS ;
388- }
389-
390344// ============================================================================
391345// Public API Implementation
392346// ============================================================================
@@ -507,13 +461,24 @@ enum ggml_status ggml_numa_executor_execute_tensor(struct ggml_tensor * tensor,
507461 NUMA_LOG_DEBUG ("DEBUG: NUMA Executor: NUMA info not available (not Linux)\n" );
508462 #endif
509463
464+ // Initialize kernel registry if not already done (critical for first operation)
465+ if (ggml_numa_kernels_init () != GGML_STATUS_SUCCESS ) {
466+ GGML_LOG_ERROR ("NUMA Executor: Failed to initialize kernel registry\n" );
467+ NUMA_PERF_END ();
468+ return GGML_STATUS_FAILED ;
469+ }
470+
510471 // Get cache entry and query for execution strategy (hot path - must be fast)
511472 NUMA_PERF_START (NUMA_PERF_EXECUTOR_QUERY , op_name , "kernel_registry" , -1 , 0 , 0 );
512473 const ggml_numa_kernel_cache_entry_t * cache_entry = ggml_numa_lookup_kernel_direct (tensor -> op );
513474 ggml_numa_execution_strategy_t strategy = ggml_numa_kernels_query (tensor );
514475 NUMA_PERF_END ();
515476
516477 if (!cache_entry || !cache_entry -> supported ) {
478+ NUMA_LOG_DEBUG ("NUMA Executor: Operation %s fallback analysis: cache_entry=%p, supported=%s\n" ,
479+ op_name ,
480+ (void * )cache_entry ,
481+ cache_entry ? (cache_entry -> supported ? "true" : "false" ) : "N/A" );
517482 GGML_LOG_DEBUG ("NUMA Executor: Operation %s not supported by NUMA kernels, using direct kernel dispatch\n" ,
518483 op_name );
519484 enum ggml_status result = ggml_numa_executor_direct_kernel_dispatch (tensor , cplan );
@@ -552,6 +517,8 @@ enum ggml_status ggml_numa_executor_execute_tensor(struct ggml_tensor * tensor,
552517 NUMA_PERF_START (NUMA_PERF_COORDINATOR_INIT , op_name , kernel_name , -1 , 0 , cplan -> n_threads );
553518 if (!ggml_numa_openmp_coordinator_init ()) {
554519 NUMA_PERF_END ();
520+ NUMA_LOG_DEBUG ("NUMA Executor: OpenMP coordinator init failed for %s, using direct kernel dispatch\n" ,
521+ op_name );
555522 GGML_LOG_DEBUG ("NUMA Executor: Failed to initialize OpenMP coordinator, using direct kernel dispatch for %s\n" ,
556523 op_name );
557524 enum ggml_status result = ggml_numa_executor_direct_kernel_dispatch (tensor , cplan );
0 commit comments