Skip to content

Commit 8f746b4

Browse files
committed
Support systems with more than 1024 cores
Dynamically determine the size of the cpu_set_t struct doubling it on each try
1 parent 7123af1 commit 8f746b4

File tree

1 file changed

+25
-13
lines changed

1 file changed

+25
-13
lines changed

easybuild/tools/systemtools.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
@auther: Ward Poelmans (Ghent University)
3030
"""
3131
import ctypes
32+
import errno
3233
import fcntl
3334
import grp # @UnresolvedImport
3435
import os
@@ -160,24 +161,35 @@ class SystemToolsException(Exception):
160161
def sched_getaffinity():
161162
"""Determine list of available cores for current process."""
162163
cpu_mask_t = ctypes.c_ulong
163-
cpu_setsize = 1024
164164
n_cpu_bits = 8 * ctypes.sizeof(cpu_mask_t)
165-
n_mask_bits = cpu_setsize // n_cpu_bits
166-
167-
class cpu_set_t(ctypes.Structure):
168-
"""Class that implements the cpu_set_t struct."""
169-
_fields_ = [('bits', cpu_mask_t * n_mask_bits)]
170165

171166
_libc_lib = find_library('c')
172-
_libc = ctypes.cdll.LoadLibrary(_libc_lib)
167+
_libc = ctypes.CDLL(_libc_lib, use_errno=True)
173168

174169
pid = os.getpid()
175-
cs = cpu_set_t()
176-
ec = _libc.sched_getaffinity(os.getpid(), ctypes.sizeof(cpu_set_t), ctypes.pointer(cs))
177-
if ec == 0:
178-
_log.debug("sched_getaffinity for pid %s successful", pid)
179-
else:
180-
raise EasyBuildError("sched_getaffinity failed for pid %s ec %s", pid, ec)
170+
171+
cpu_setsize = 1024 # Max number of CPUs currently detectable
172+
max_cpu_setsize = ctypes.c_ulong(-1).value // 4 # (INT_MAX / 2)
173+
# Limit it to something reasonable but still big enough
174+
max_cpu_setsize = min(max_cpu_setsize, 1e9)
175+
while cpu_setsize < max_cpu_setsize:
176+
n_mask_bits = cpu_setsize // n_cpu_bits
177+
178+
class cpu_set_t(ctypes.Structure):
179+
"""Class that implements the cpu_set_t struct."""
180+
_fields_ = [('bits', cpu_mask_t * n_mask_bits)]
181+
182+
cs = cpu_set_t()
183+
ec = _libc.sched_getaffinity(pid, ctypes.sizeof(cpu_set_t), ctypes.pointer(cs))
184+
if ec == 0:
185+
_log.debug("sched_getaffinity for pid %s successful", pid)
186+
break
187+
elif ctypes.get_errno() != errno.EINVAL:
188+
raise EasyBuildError("sched_getaffinity failed for pid %s errno %s", pid, ctypes.get_errno())
189+
cpu_setsize *= 2
190+
191+
if ec != 0:
192+
raise EasyBuildError("sched_getaffinity failed finding a large enough cpuset for pid %s", pid)
181193

182194
cpus = []
183195
for bitmask in cs.bits:

0 commit comments

Comments
 (0)