@@ -135,10 +135,12 @@ double CNTime::elapsedsec() const
135135 return nowsec - initial;
136136}
137137
138+ #ifdef _OPENMP
138139namespace {
139140#if defined(_WIN32)
140141// Returns the number of physical CPU cores on Windows (excluding hyper-threaded cores)
141- static int getPhysicalCoreCount () {
142+ unsigned getPhysicalCoreCount () {
143+ #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
142144 int physicalCoreCount = 0 ;
143145
144146 // Determine the required buffer size.
@@ -161,9 +163,13 @@ static int getPhysicalCoreCount() {
161163 return MY_OMP_GET_MAX_THREADS ();
162164 }
163165 return physicalCoreCount;
166+ #else
167+ // On non-x86 architectures, there should be no hyper-threading
168+ return MY_OMP_GET_MAX_THREADS ();
169+ #endif
164170}
165171
166- static int getAllowedCoreCount () {
172+ unsigned getAllowedCoreCount () {
167173 DWORD_PTR processMask, systemMask;
168174 if (!GetProcessAffinityMask (GetCurrentProcess (), &processMask, &systemMask)) {
169175 return 0 ; // API call failed (should rarely happen for the current process)
@@ -180,7 +186,7 @@ static int getAllowedCoreCount() {
180186#elif defined(__APPLE__)
181187
182188// Returns the number of physical CPU cores on macOS (excluding hyper-threaded cores)
183- static int getPhysicalCoreCount () {
189+ unsigned getPhysicalCoreCount () {
184190 int physicalCoreCount = 0 ;
185191 int cores = 0 ;
186192 size_t size = sizeof (cores);
@@ -194,81 +200,76 @@ static int getPhysicalCoreCount() {
194200 return physicalCoreCount;
195201}
196202
197- static int getAllowedCoreCount () {
203+ unsigned getAllowedCoreCount () {
198204 // MacOS does not support CPU affinity, so we return the maximum number of threads.
199205 return MY_OMP_GET_MAX_THREADS ();
200206}
201207
202208#elif defined(__linux__)
203209// Returns the number of physical CPU cores on Linux (excluding hyper-threaded cores)
204- static int getPhysicalCoreCount () {
205- int physicalCoreCount = 0 ;
206- std::ifstream cpuinfo (" /proc/cpuinfo" );
207- if (!cpuinfo.is_open ()) return MY_OMP_GET_MAX_THREADS ();
208-
209- std::set<std::string> coreSet;
210- std::string line;
211- int physicalId = -1 , coreId = -1 ;
212- bool foundPhysical = false , foundCore = false ;
213-
214- while (std::getline (cpuinfo, line)) {
215- // An empty line indicates the end of a processor block.
216- if (line.empty ()) {
217- if (foundPhysical && foundCore)
218- coreSet.insert (std::to_string (physicalId) + " -" + std::to_string (coreId));
219- // Reset for the next processor block.
220- foundPhysical = foundCore = false ;
221- physicalId = coreId = -1 ;
222- } else {
223- auto colonPos = line.find (' :' );
224- if (colonPos == std::string::npos) continue ;
225- std::string key = line.substr (0 , colonPos);
226- std::string value = line.substr (colonPos + 1 );
227- // Trim whitespace.
228- key.erase (key.find_last_not_of (" \t " ) + 1 );
229- value.erase (0 , value.find_first_not_of (" \t " ));
230-
231- if (key == " physical id" ) {
232- physicalId = std::stoi (value);
233- foundPhysical = true ;
234- } else if (key == " core id" ) {
235- coreId = std::stoi (value);
236- foundCore = true ;
210+ // Compatibility:
211+ // - Linux kernels 2.6 and later (provides /sys/devices/system/cpu topology interface)
212+ // - Any CPU architecture supported by the kernel (Intel, AMD, ARM, POWER, etc.)
213+ // - Works in containers or cgroups (reflects host topology)
214+ unsigned getPhysicalCoreCount () {
215+ // only x86_64 and x86_32 architectures support HT (hyper-threading)
216+ // in all other cases, we assume no HT and return MY_OMP_GET_MAX_THREADS()
217+ #if defined(__i386__) || defined(__x86_64__)
218+ // Parse strings like "0-3,5,7-9" → {0,1,2,3,5,7,8,9}
219+ auto parseCpuList = [](const std::string &s) {
220+ std::vector<int > cpus;
221+ std::istringstream iss (s);
222+ for (std::string tok; std::getline (iss, tok, ' ,' );) {
223+ auto dash = tok.find (' -' );
224+ if (dash == std::string::npos) {
225+ cpus.push_back (std::stoi (tok));
226+ } else {
227+ int start = std::stoi (tok.substr (0 , dash));
228+ int end = std::stoi (tok.substr (dash + 1 ));
229+ for (int i = start; i <= end; ++i) cpus.push_back (i);
237230 }
238231 }
232+ return cpus;
233+ };
234+
235+ // Read a single integer from the given sysfs file
236+ auto readInt = [&](const std::string &path, int &out) -> bool {
237+ std::ifstream f (path);
238+ if (!f.is_open ()) return false ;
239+ f >> out;
240+ return !f.fail ();
241+ };
242+
243+ // 1) Read list of present CPUs
244+ std::ifstream presentF (" /sys/devices/system/cpu/present" );
245+ if (!presentF.is_open ()) {
246+ return MY_OMP_GET_MAX_THREADS ();
239247 }
240- // In case the file doesn't end with an empty line.
241- if (foundPhysical && foundCore)
242- coreSet.insert (std::to_string (physicalId) + " -" + std::to_string (coreId));
243-
244- if (!coreSet.empty ()) {
245- physicalCoreCount = static_cast <int >(coreSet.size ());
246- } else {
247- // Fallback: try reading "cpu cores" from the first processor block.
248- cpuinfo.clear ();
249- cpuinfo.seekg (0 , std::ios::beg);
250- while (std::getline (cpuinfo, line)) {
251- auto colonPos = line.find (' :' );
252- if (colonPos != std::string::npos) {
253- std::string key = line.substr (0 , colonPos);
254- std::string value = line.substr (colonPos + 1 );
255- key.erase (key.find_last_not_of (" \t " ) + 1 );
256- value.erase (0 , value.find_first_not_of (" \t " ));
257- if (key == " cpu cores" ) {
258- physicalCoreCount = std::stoi (value);
259- break ;
260- }
261- }
262- }
248+ std::string presentLine;
249+ std::getline (presentF, presentLine);
250+ auto cpus = parseCpuList (presentLine);
251+
252+ // 2) For each CPU, read its package & core IDs
253+ std::set<std::pair<int , int >> physicalCores;
254+ for (int cpu : cpus) {
255+ std::string topoBase =
256+ " /sys/devices/system/cpu/cpu" + std::to_string (cpu) + " /topology/" ;
257+ int pkg = -1 , core = -1 ;
258+ if (!readInt (topoBase + " physical_package_id" , pkg)) continue ;
259+ if (!readInt (topoBase + " core_id" , core)) continue ;
260+ physicalCores.emplace (pkg, core);
263261 }
264262
265- if (physicalCoreCount == 0 ) {
266- return MY_OMP_GET_MAX_THREADS ();
263+ // 3) Return count if successful, else fallback
264+ if (!physicalCores.empty ()) {
265+ return static_cast <unsigned >(physicalCores.size ());
267266 }
268- return physicalCoreCount;
267+ #endif
268+ // in ARM and RISKV we only need this
269+ return MY_OMP_GET_MAX_THREADS ();
269270}
270271
271- static int getAllowedCoreCount () {
272+ unsigned getAllowedCoreCount () {
272273 cpu_set_t cpuSet;
273274 CPU_ZERO (&cpuSet);
274275 if (sched_getaffinity (0 , sizeof (cpu_set_t ), &cpuSet) != 0 ) {
@@ -287,44 +288,45 @@ static int getAllowedCoreCount() {
287288
288289#warning "Unknown platform. Impossible to detect the number of physical cores."
289290// Fallback version if none of the above platforms is detected.
290- static int getPhysicalCoreCount () { return MY_OMP_GET_MAX_THREADS (); }
291- static int getAllowedCoreCount () { return MY_OMP_GET_MAX_THREADS (); }
291+ unsigned getPhysicalCoreCount () { return MY_OMP_GET_MAX_THREADS (); }
292+ unsigned getAllowedCoreCount () { return MY_OMP_GET_MAX_THREADS (); }
292293
293294#endif
294295
295296} // namespace
296297
297- int getOptimalThreadCount () {
298+ unsigned getOptimalThreadCount () {
298299 // if the user has set the OMP_NUM_THREADS environment variable, use that value
299- const auto OMP_THREADS = std::getenv (" OMP_NUM_THREADS" );
300- if (OMP_THREADS) {
301- return std::stoi (OMP_THREADS);
302- }
303- // otherwise, use the min between number of physical cores or the number of allowed
304- // cores (e.g. by taskset)
305- const auto physicalCores = getPhysicalCoreCount ();
306- const auto allowedCores = getAllowedCoreCount ();
307- if (physicalCores < allowedCores) {
308- return physicalCores;
309- }
310- return allowedCores;
311- }
300+ static const auto cached_threads = []() -> unsigned {
301+ const auto OMP_THREADS = std::getenv (" OMP_NUM_THREADS" );
302+ if (OMP_THREADS) {
303+ try {
304+ return std::stoi (OMP_THREADS);
305+ } catch (...) {
306+ std::cerr << " Invalid OMP_NUM_THREADS value: " << OMP_THREADS
307+ << " . using default thread count." << std::endl;
308+ }
309+ }
310+ // otherwise, use the min between number of physical cores or the number of allowed
311+ // cores (e.g. by taskset)
312+ try {
313+ const auto physicalCores = getPhysicalCoreCount ();
314+ const auto allowedCores = getAllowedCoreCount ();
315+ if (physicalCores < allowedCores) {
316+ return physicalCores;
317+ }
318+ return allowedCores;
319+ } catch (const std::exception &e) {
320+ std::cerr << " Error determining optimal thread count: " << e.what ()
321+ << " . Using OpenMP default thread count." << std::endl;
322+ }
323+ return MY_OMP_GET_MAX_THREADS ();
324+ }();
312325
313- // -------------------------- openmp helpers -------------------------------
314- int get_num_threads_parallel_block ()
315- // return how many threads an omp parallel block would use.
316- // omp_get_max_threads() does not report this; consider case of NESTED=0.
317- // Why is there no such routine? Barnett 5/22/20
318- {
319- int nth_used;
320- #pragma omp parallel
321- {
322- #pragma omp single
323- nth_used = MY_OMP_GET_NUM_THREADS ();
324- }
325- return nth_used;
326+ return cached_threads;
326327}
327328
329+ #endif // _OPENMP
328330// ---------- thread-safe rand number generator for Windows platform ---------
329331// (note this is used by macros in test_defs.h, and supplied in linux/macosx)
330332#ifdef _WIN32
0 commit comments