Skip to content

Commit 5bc3537

Browse files
committed
Basic level NUMA support (build and runtime availability check).
1 parent 0f55955 commit 5bc3537

File tree

4 files changed

+109
-9
lines changed

4 files changed

+109
-9
lines changed

build/common.mk

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1811,6 +1811,38 @@ ESMF_F90LINKRPATHSTHIRD += $(ESMF_F90RPATHPREFIX)$(ESMF_BABELTRACE_LIBPATH)
18111811
endif
18121812
endif
18131813

1814+
#-------------------------------------------------------------------------------
1815+
# NUMA
1816+
#-------------------------------------------------------------------------------
1817+
ifeq ($(ESMF_NUMA),ON)
1818+
ESMF_NUMA = standard
1819+
endif
1820+
ifeq ($(ESMF_NUMA),standard)
1821+
ifneq ($(origin ESMF_NUMA_LIBS), environment)
1822+
ESMF_NUMA_LIBS = -lnuma
1823+
endif
1824+
endif
1825+
1826+
ifdef ESMF_NUMA
1827+
ESMF_CPPFLAGS += -DESMF_NUMA=1
1828+
ifdef ESMF_NUMA_INCLUDE
1829+
ESMF_CXXCOMPILEPATHSTHIRD += -I$(ESMF_NUMA_INCLUDE)
1830+
ESMF_F90COMPILEPATHSTHIRD += -I$(ESMF_NUMA_INCLUDE)
1831+
endif
1832+
ifdef ESMF_NUMA_LIBS
1833+
ESMF_CXXLINKLIBS += $(ESMF_NUMA_LIBS)
1834+
ESMF_CXXLINKRPATHSTHIRD += $(addprefix $(ESMF_CXXRPATHPREFIX),$(subst -L,,$(filter -L%,$(ESMF_NUMA_LIBS))))
1835+
ESMF_F90LINKLIBS += $(ESMF_NUMA_LIBS)
1836+
ESMF_F90LINKRPATHSTHIRD += $(addprefix $(ESMF_F90RPATHPREFIX),$(subst -L,,$(filter -L%,$(ESMF_NUMA_LIBS))))
1837+
endif
1838+
ifdef ESMF_NUMA_LIBPATH
1839+
ESMF_CXXLINKPATHSTHIRD += -L$(ESMF_NUMA_LIBPATH)
1840+
ESMF_F90LINKPATHSTHIRD += -L$(ESMF_NUMA_LIBPATH)
1841+
ESMF_CXXLINKRPATHSTHIRD += $(ESMF_CXXRPATHPREFIX)$(ESMF_NUMA_LIBPATH)
1842+
ESMF_F90LINKRPATHSTHIRD += $(ESMF_F90RPATHPREFIX)$(ESMF_NUMA_LIBPATH)
1843+
endif
1844+
endif
1845+
18141846
#-------------------------------------------------------------------------------
18151847
# NVML
18161848
#-------------------------------------------------------------------------------

makefile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,18 @@ endif
304304
echo "ESMF_BABELTRACE_LIBPATH: $(ESMF_BABELTRACE_LIBPATH)" ; \
305305
fi; \
306306
fi
307+
-@if [ -n "$(ESMF_NUMA)" ] ; then \
308+
echo "ESMF_NUMA: $(ESMF_NUMA)" ; \
309+
if [ -n "$(ESMF_NUMA_INCLUDE)" ] ; then \
310+
echo "ESMF_NUMA_INCLUDE: $(ESMF_NUMA_INCLUDE)" ; \
311+
fi; \
312+
if [ -n "$(ESMF_NUMA_LIBS)" ] ; then \
313+
echo "ESMF_NUMA_LIBS: $(ESMF_NUMA_LIBS)" ; \
314+
fi; \
315+
if [ -n "$(ESMF_NUMA_LIBPATH)" ] ; then \
316+
echo "ESMF_NUMA_LIBPATH: $(ESMF_NUMA_LIBPATH)" ; \
317+
fi; \
318+
fi
307319
-@if [ -n "$(ESMF_NVML)" ] ; then \
308320
echo "ESMF_NVML: $(ESMF_NVML)" ; \
309321
if [ -n "$(ESMF_NVML_INCLUDE)" ] ; then \
@@ -709,6 +721,18 @@ endif
709721
echo "# ESMF_BABELTRACE_LIBPATH:$(ESMF_BABELTRACE_LIBPATH)" >> $(MKINFO) ; \
710722
fi; \
711723
fi
724+
-@if [ -n "$(ESMF_NUMA)" ] ; then \
725+
echo "# ESMF_NUMA: $(ESMF_NUMA)" >> $(MKINFO) ; \
726+
if [ -n "$(ESMF_NUMA_INCLUDE)" ] ; then \
727+
echo "# ESMF_NUMA_INCLUDE: $(ESMF_NUMA_INCLUDE)" >> $(MKINFO) ; \
728+
fi; \
729+
if [ -n "$(ESMF_NUMA_LIBS)" ] ; then \
730+
echo "# ESMF_NUMA_LIBS: $(ESMF_NUMA_LIBS)" >> $(MKINFO) ; \
731+
fi; \
732+
if [ -n "$(ESMF_NUMA_LIBPATH)" ] ; then \
733+
echo "# ESMF_NUMA_LIBPATH: $(ESMF_NUMA_LIBPATH)" >> $(MKINFO) ; \
734+
fi; \
735+
fi
712736
-@if [ -n "$(ESMF_NVML)" ] ; then \
713737
echo "# ESMF_NVML: $(ESMF_NVML)" >> $(MKINFO) ; \
714738
if [ -n "$(ESMF_NVML_INCLUDE)" ] ; then \

src/Infrastructure/VM/include/ESMCI_VMKernel.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,21 +313,26 @@ class VMK{
313313
int *ncpet; // number of cores this pet references
314314
int *nadevs;//TODO: to be removed // number of accelerator devices accessible from this pet
315315
int **cid; // core id of the cores this pet references
316+
// SSI
316317
int ssiCount; // number of single system images in this VMK
317318
int ssiMinPetCount; // minimum PETs on a single system image
318319
int ssiMaxPetCount; // maximum PETs on a single system image
319320
int ssiLocalPetCount; // number of PETs on the same SSI as localPet (incl.)
320321
int ssiLocalPet; // id of local PET in the local SSI
321322
int *ssiLocalPetList; // PETs that are on the same SSI as localPet (incl.)
323+
// SSI DEV
322324
int devCount; // number of devices associated with this VMK all SSI
323325
int ssiLocalDevCount;// number of devices associated with this VMK on local SSI
324326
int *ssiLocalDevList;// list of SSI-local device indices associated with this VMK
325327
// Use this index to make local association calls (e.g. via
326328
// acc_set_device_num() or omp_set_default_device()), and
327329
// to look up global device index in ssidevs array.
330+
// SSI NUMA
331+
int ssiLocalNumaCount; // number of NUMA modes on the same SSI as localPet (incl.)
332+
int *ssiLocalNumaList; // NUMA nodes
328333
// general information about this VMK
329-
int mpionly; // 0: there is multi-threading, 1: MPI-only
330-
bool threadsflag; // threaded or none-threaded VM
334+
bool mpionly; // false: there is multi-threading, true: MPI-only
335+
bool threadsflag; // threaded or none-threaded VM
331336
// MPI Communicator handles
332337
MPI_Comm mpi_c; // communicator across the entire VM
333338
MPI_Comm mpi_c_ssi; // communicator holding PETs on the same SSI
@@ -399,6 +404,10 @@ class VMK{
399404
static int argc_mpich;
400405
static char *argv_mpich_store[100];
401406
static char **argv_mpich;
407+
// NVML support
408+
static bool nvmlEnabled; // NVML support enabled or disabled
409+
// NUMA support
410+
static bool numaEnabled; // NUMA support enabled or disabled
402411

403412
// methods
404413
private:
@@ -515,6 +524,12 @@ class VMK{
515524
return false;
516525
#endif
517526
}
527+
static bool isNvmlEnabled(){
528+
return nvmlEnabled;
529+
}
530+
static bool isNumaEnabled(){
531+
return numaEnabled;
532+
}
518533

519534
#define XSTR(X) STR(X)
520535
#define STR(X) #X

src/Infrastructure/VM/src/ESMCI_VMKernel.C

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ using namespace std;
9494
#include <openacc.h>
9595
#endif
9696

97+
// NUMA support
98+
#ifdef ESMF_NUMA
99+
#include <numa.h>
100+
#endif
101+
102+
// NVML support
97103
#ifdef ESMF_NVML
98104
#include <nvml.h>
99105
#endif
@@ -157,6 +163,10 @@ char **VMK::argv = &(argv_store[0]);
157163
int VMK::argc_mpich;
158164
char *VMK::argv_mpich_store[100];
159165
char **VMK::argv_mpich = &(argv_mpich_store[0]);
166+
// NUMA support
167+
bool VMK::nvmlEnabled;
168+
// NUMA support
169+
bool VMK::numaEnabled;
160170

161171
} // namespace ESMCI
162172

@@ -447,12 +457,12 @@ void VMK::init(MPI_Comm mpiCommunicator, bool globalResourceControl){
447457
mypthid=0;
448458
#endif
449459
#ifdef ESMF_MPIUNI
450-
mpionly=0; // this way the commtype will be checked in comm calls
460+
mpionly=false; // this way the commtype will be checked in comm calls
451461
#else
452462
if (npets==1)
453-
mpionly=0; // this way the commtype will be checked in comm calls
463+
mpionly=false; // this way the commtype will be checked in comm calls
454464
else
455-
mpionly=1; // normally the default VM can only be MPI-only
465+
mpionly=true; // normally the default VM can only be MPI-only
456466
#endif
457467
// no threading in default global VM
458468
threadsflag = false;
@@ -472,6 +482,19 @@ void VMK::init(MPI_Comm mpiCommunicator, bool globalResourceControl){
472482
MPI_Comm_rank(mpi_c_ssi, &color);
473483
if (color>0) color = MPI_UNDEFINED; // only root PETs on each SSI
474484
MPI_Comm_split(mpi_c, color, 0, &mpi_c_ssi_roots);
485+
#endif
486+
// check whether NVML support is enabled
487+
#ifdef ESMF_NVML
488+
nvmlEnabled = true;
489+
#else
490+
nvmlEnabled = false;
491+
#endif
492+
// check whether NUMA support is enabled
493+
#ifdef ESMF_NUMA
494+
numaEnabled = true;
495+
if (numa_available()<0) numaEnabled = false; // NUMA not available at runtime
496+
#else
497+
numaEnabled = false;
475498
#endif
476499
// initialize the shared memory variables
477500
pth_finish_count = NULL;
@@ -1101,15 +1124,15 @@ void VMK::construct(void *ssarg){
11011124
}
11021125
#ifdef ESMF_MPIUNI
11031126
// don't set mpionly flag so that comm call check for commtype
1104-
mpionly=0;
1127+
mpionly=false;
11051128
#else
11061129
if (npets==1)
1107-
mpionly=0;
1130+
mpionly=false;
11081131
else{
11091132
// determine whether we are dealing with an MPI-only VMK
1110-
mpionly=1; // assume this is MPI-only VMK until found otherwise
1133+
mpionly=true; // assume this is MPI-only VMK until found otherwise
11111134
for (int i=0; i<npets; i++)
1112-
if (tid[i]>0) mpionly=0; // found multi-threading PET
1135+
if (tid[i]>0) mpionly=false; // found multi-threading PET
11131136
}
11141137
#endif
11151138
threadsflag = sarg->threadsflag;
@@ -3096,6 +3119,12 @@ void VMK::logSystem(std::string prefix, ESMC_LogMsgType_Flag msgType){
30963119
msg << prefix << "isSsiSharedMemoryEnabled=" << isSsiSharedMemoryEnabled();
30973120
ESMC_LogDefault.Write(msg.str(), msgType);
30983121
msg.str(""); // clear
3122+
msg << prefix << "isNvmlEnabled=" << isNvmlEnabled();
3123+
ESMC_LogDefault.Write(msg.str(), msgType);
3124+
msg.str(""); // clear
3125+
msg << prefix << "isNumaEnabled=" << isNumaEnabled();
3126+
ESMC_LogDefault.Write(msg.str(), msgType);
3127+
msg.str(""); // clear
30993128
msg << prefix << "ssiCount=" << nssiid << " peCount=" << ncores;
31003129
ESMC_LogDefault.Write(msg.str(), msgType);
31013130
for (int i=0; i<ncores; i++){

0 commit comments

Comments
 (0)