add guard clause: --numa mirror requires OpenMP

dbsanfte · dbsanfte · commit c95135768fef · 2025-09-15T13:51:38.000Z
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -2505,14 +2505,19 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         "- distribute: spread execution evenly over all nodes\n"
         "- isolate: only spawn threads on CPUs on the node that execution started on\n"
         "- numactl: use the CPU map provided by numactl\n"
-        "- mirror: enable NUMA-aware model mirroring\n"
+        "- mirror: enable NUMA-aware model mirroring (requires OpenMP)\n"
         "if run without this previously, it is recommended to drop the system page cache before using this\n"
         "see https://github.com/ggml-org/llama.cpp/issues/1437",
         [](common_params & params, const std::string & value) {
             /**/ if (value == "distribute" || value == "") { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; }
             else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; }
             else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
-            else if (value == "mirror") { params.numa = GGML_NUMA_STRATEGY_MIRROR; }
+            else if (value == "mirror") { 
+#ifndef GGML_USE_OPENMP
+                throw std::invalid_argument("--numa mirror requires OpenMP support (compile with -DGGML_OPENMP=ON)");
+#endif
+                params.numa = GGML_NUMA_STRATEGY_MIRROR; 
+            }
             else { throw std::invalid_argument("invalid value"); }
         }
     ).set_env("LLAMA_ARG_NUMA"));
diff --git a/common/common.cpp b/common/common.cpp
@@ -361,44 +361,6 @@ void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model)
     }
 }
 
-bool cpu_mask_set_physical_cores_only(bool (&boolmask)[GGML_MAX_N_THREADS]) {
-#ifdef _WIN32
-    // Windows implementation would require different approach
-    LOG_WRN("Physical core detection is not supported on Windows\n");
-    return false;
-#else
-    std::memset(boolmask, false, sizeof(bool) * GGML_MAX_N_THREADS);
-    
-    // Use the common topology detection logic
-    std::vector<int> physical_cores;
-    if (!cpu_get_physical_cores_topology(physical_cores)) {
-        // Fallback: if we couldn't detect topology, just use all CPUs
-        int num_cpus = std::thread::hardware_concurrency();
-        for (int cpu = 0; cpu < num_cpus && cpu < GGML_MAX_N_THREADS; cpu++) {
-            boolmask[cpu] = true;
-        }
-        LOG_WRN("Could not detect CPU topology, using all CPUs\n");
-        return false;
-    }
-    
-    // Set the mask for detected physical cores
-    for (int core_id : physical_cores) {
-        if (core_id < GGML_MAX_N_THREADS) {
-            boolmask[core_id] = true;
-        }
-    }
-    
-    LOG("Detected %zu physical cores (excluding hyperthreads): ", physical_cores.size());
-    for (size_t i = 0; i < physical_cores.size(); i++) {
-        if (i > 0) LOG(", ");
-        LOG("%d", physical_cores[i]);
-    }
-    LOG("\n");
-    
-    return true;
-#endif
-}
-
 bool cpu_mask_set_physical_cores_with_hyperthreading(bool (&boolmask)[GGML_MAX_N_THREADS]) {
 #ifdef _WIN32
     // Windows implementation would require different approach
diff --git a/common/common.h b/common/common.h
@@ -67,7 +67,6 @@ int32_t cpu_get_num_physical_cores();
 int32_t cpu_get_num_math();
 int32_t cpu_detect_physical_cores_topology(); // Detect actual physical cores using CPU topology
 bool cpu_get_physical_cores_topology(std::vector<int> & physical_cores); // Get list of physical core IDs
-bool cpu_mask_set_physical_cores_only(bool(&boolmask)[GGML_MAX_N_THREADS]);
 bool cpu_mask_set_physical_cores_with_hyperthreading(bool(&boolmask)[GGML_MAX_N_THREADS]); // Set mask to include physical cores + hyperthread siblings
 
 //