Skip to content

Commit 5b46506

Browse files
rbuchevan-charmworks
authored andcommitted
Add support for OFI Cray Shasta Build (#3499)
* Remove unnecessary OFI linker flag in configure * Add ofi-crayshasta build * Recommend +ofi_runtime_tcp when OFI startup fails * Add Cray Shasta support to smart-build.pl
1 parent c6c92f3 commit 5b46506

File tree

10 files changed

+168
-7
lines changed

10 files changed

+168
-7
lines changed

CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ set(CHARM_PLATFORM "${NETWORK}-${CHARM_OS}-${CHARM_CPU}")
387387

388388

389389
set(CMK_BUILD_CRAY 0)
390-
if(${NETWORK} MATCHES "gni-")
390+
if(${NETWORK} MATCHES "gni-" OR ${NETWORK} MATCHES "ofi-cray")
391391
set(CHARM_PLATFORM "${NETWORK}")
392392
set(CMK_BUILD_CRAY 1)
393393
elseif(${NETWORK} MATCHES "mpi-cray")
@@ -566,6 +566,8 @@ elseif(${NETWORK} MATCHES "gni-")
566566
set(GDIR "gni")
567567
elseif(${NETWORK} MATCHES "mpi-cray")
568568
set(GDIR "mpi")
569+
elseif(${NETWORK} MATCHES "ofi-cray")
570+
set(GDIR "ofi")
569571
else()
570572
set(GDIR ${NETWORK})
571573
endif()
@@ -751,7 +753,7 @@ endforeach()
751753

752754

753755
# proc_management
754-
if(${NETWORK} STREQUAL "ucx" OR ${NETWORK} STREQUAL "ofi")
756+
if(${NETWORK} MATCHES "ucx" OR ${NETWORK} MATCHES "ofi")
755757
# file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/include/proc_management/simple_pmi)
756758

757759
set(proc_management-sources

buildcmake

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,10 @@ for c in $opt_lrts_pmi $opt_compiler; do
448448
[[ -n $c ]] && builddir_extra+="-${c}"
449449
done
450450

451+
if [[ "$actual_triplet" = ofi-cray* && -z "$opt_lrts_pmi" ]]; then
452+
opt_lrts_pmi="slurmpmi2"
453+
fi
454+
451455
if [[ -n $opt_destination ]]; then
452456
# Note that $builddir_extra is intentionally ignored here
453457
builddir=$opt_destination$opt_suffix
@@ -492,7 +496,7 @@ if [[ $opt_ampi_only -eq 1 ]]; then
492496
opt_extra_opts="-DCMK_NO_INTEROP=1 -DCMK_NO_MSG_PRIOS=1 -DCMK_FIFO_QUEUE_ONLY=1 -DCMK_OBJID_COLLECTION_BITS=29 $opt_extra_opts "
493497
fi
494498

495-
# Special handling for gni-crayxc, gni-crayxe, mpi-crayxc, mpi-crayxe
499+
# Special handling for gni-crayxc, gni-crayxe, mpi-crayxc, mpi-crayxe, mpi-crayshasta, ofi-crayshasta
496500
if [[ $actual_triplet == gni-* || $actual_triplet == *-cray* ]]; then
497501
opt_network=$actual_triplet
498502
# Need to use Cray's compiler frontends on Cray systems

doc/charm++/manual.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11360,6 +11360,7 @@ appropriate choices for the build one wants to perform.
1136011360
Cray XE6 ``./build charm++ gni-crayxe --with-production -j8``
1136111361
Cray XK7 ``./build charm++ gni-crayxe-cuda --with-production -j8``
1136211362
Cray XC40 ``./build charm++ gni-crayxc --with-production -j8``
11363+
Cray Shasta ``./build charm++ ofi-crayshasta --with-production -j8``
1136311364
================================================================ =====================================================================
1136411365

1136511366
As mentioned earlier, one can also build Charm++ using the precompiled
@@ -11868,6 +11869,13 @@ Parameters that function as boolean flags within Charmrun (taking no
1186811869
other parameters) can be prefixed with "no-" to negate their effect.
1186911870
For example, ``++no-scalable-start``.
1187011871

11872+
.. note::
11873+
11874+
When running on OFI platforms such as Cray Shasta, the OFI runtime parameter
11875+
``+ofi_runtime_tcp`` may be required. By default, the exchange of EP names at
11876+
startup is done via both PMI and OFI. With this flag, it is only done via
11877+
PMI.
11878+
1187111879
.. _command line options:
1187211880

1187311881
Command Line Options

smart-build.pl

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ sub promptUserYN {
133133
}
134134

135135

136-
# check for GNI
136+
# check for Cray
137137

138138
if($skip_choosing eq "false"){
139139
my $craycc_found = index(`which CC 2>/dev/null`, "/opt/cray/") != -1;
@@ -143,11 +143,23 @@ sub promptUserYN {
143143
$PE_PRODUCT_LIST = "";
144144
}
145145

146+
my $CRAYPE_NETWORK_TARGET = $ENV{'CRAYPE_NETWORK_TARGET'};
147+
if (not defined $CRAYPE_NETWORK_TARGET) {
148+
$CRAYPE_NETWORK_TARGET = "";
149+
}
150+
146151
my $CRAY_UGNI_found = index(":$PE_PRODUCT_LIST:", ":CRAY_UGNI:") != -1;
147152

148153
my $gni_found = $craycc_found || $CRAY_UGNI_found;
149154

150-
if ($gni_found) {
155+
if ($CRAYPE_NETWORK_TARGET eq "ofi") {
156+
print "\nI found that you have a Cray environment.\nDo you want to build Charm++ targeting Cray Shasta? [Y/n]: ";
157+
my $p = promptUserYN();
158+
if($p eq "yes" || $p eq "default") {
159+
$arch = "ofi-crayshasta";
160+
$skip_choosing = "true";
161+
}
162+
} elsif ($gni_found) {
151163
my $CRAYPE_INTERLAGOS_found = index(":$PE_PRODUCT_LIST:", ":CRAYPE_INTERLAGOS:") != -1;
152164
if ($CRAYPE_INTERLAGOS_found) {
153165
print "\nI found that you have a Cray environment with Interlagos processors.\nDo you want to build Charm++ targeting Cray XE? [Y/n]: ";
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#define CMK_SMP 1
2+
3+
4+
#undef CMK_SHARED_VARS_UNAVAILABLE
5+
#undef CMK_SHARED_VARS_POSIX_THREADS_SMP
6+
#define CMK_SHARED_VARS_UNAVAILABLE 0
7+
#define CMK_SHARED_VARS_POSIX_THREADS_SMP 1
8+
9+
#undef CMK_TIMER_USE_GETRUSAGE
10+
#undef CMK_TIMER_USE_SPECIAL
11+
#define CMK_TIMER_USE_GETRUSAGE 1
12+
#define CMK_TIMER_USE_SPECIAL 0
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
CMK_DEFS="$CMK_DEFS -D_REENTRANT"
2+
CMK_LIBS=" -lpthread $CMK_LIBS "
3+
CMK_SMP='1'
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#ifndef _CONV_MACH_H
2+
#define _CONV_MACH_H
3+
4+
#define CMK_OFI 1
5+
6+
/* define the default linker, together with its options */
7+
#define CMK_DLL_CC "g++ -shared -O3 -o "
8+
9+
/* 1 if the machine has a function called "getpagesize()", 0 otherwise .
10+
used in the memory files of converse */
11+
#define CMK_GETPAGESIZE_AVAILABLE 1
12+
13+
/* defines which version of memory handlers should be used.
14+
used in conv-core/machine.C */
15+
#define CMK_MALLOC_USE_GNU_MALLOC 0
16+
#define CMK_MALLOC_USE_OS_BUILTIN 1
17+
18+
#define CMK_MEMORY_PAGESIZE 4096
19+
#define CMK_MEMORY_PROTECTABLE 1
20+
21+
/* the following definitions set the type of shared variables to be used. only
22+
one of them must be 1, all the others 0. The different implementations are in
23+
converse.h. Typically used are UNAVAILABLE for non SMP versions and
24+
POSIX_THREADS_SMP for SMP versions. The others are used only in special
25+
cases: NT_THREADS for Windows. */
26+
#define CMK_SHARED_VARS_UNAVAILABLE 1 /* non SMP versions */
27+
#define CMK_SHARED_VARS_POSIX_THREADS_SMP 0 /* SMP versions */
28+
#define CMK_SHARED_VARS_NT_THREADS 0
29+
30+
/* the following define if signal handlers should be used, both equal to zero
31+
means that signals will not be used. only one of the following can be 1, the
32+
other must be 0. they differ in the fact that the second (_WITH_RESTART)
33+
enables retry on interrupt (a function is recalled upon interrupt and does
34+
not return EINTR as in the first case) */
35+
#define CMK_SIGNAL_USE_SIGACTION 0
36+
#define CMK_SIGNAL_USE_SIGACTION_WITH_RESTART 1
37+
38+
/* specifies whether the CthCpv variables should be defined as Cpv (0) or
39+
directly as normal c variables (1) */
40+
#define CMK_THREADS_REQUIRE_NO_CPV 0
41+
42+
/* decide which is the default implementation of the threads (see threads.C)
43+
Only one of the following can be 1. If none of them is selected, qthreads
44+
will be used as default. This default can be overwritten at compile time
45+
using -DCMK_THREADS_BUILD_"type"=1 */
46+
#define CMK_THREADS_USE_CONTEXT 0
47+
#define CMK_THREADS_USE_FCONTEXT 1
48+
#define CMK_THREADS_USE_JCONTEXT 0
49+
#define CMK_THREADS_USE_PTHREADS 0
50+
51+
/* Specifies what kind of timer to use, and the correspondent headers will be
52+
included in convcore.C. If none is selected, then the machine.C file needs to
53+
implement the timer primitives. */
54+
#define CMK_TIMER_USE_RTC 0
55+
#define CMK_TIMER_USE_RDTSC 0
56+
#define CMK_TIMER_USE_GETRUSAGE 1
57+
#define CMK_TIMER_USE_SPECIAL 0
58+
#define CMK_TIMER_USE_TIMES 0
59+
#define CMK_TIMER_USE_BLUEGENEL 0
60+
61+
/* Specifies what the processor will do when it is idle, either sleep (1) or go
62+
into busy waiting mode (0). In convcore.C there are a few files included if
63+
sleeping mode, but the real distinct implementation is in the machine.C
64+
file. */
65+
#define CMK_WHEN_PROCESSOR_IDLE_USLEEP 0
66+
67+
/* specifies whether there is a web server collecting utilization statistics (1)
68+
or not (0) */
69+
#define CMK_WEB_MODE 1
70+
71+
#define CMK_DEBUG_MODE 0
72+
73+
/* enables the load balancer framework. set to 1 for almost all the machines */
74+
#define CMK_LBDB_ON 1
75+
76+
#define CMK_64BIT 1
77+
#define CMK_AMD64 1
78+
79+
/* Other possible definitions:
80+
81+
In fault tolerant architectures, CK_MEM_CHECKPOINT can be set. In this case the
82+
extended header must contain also another field called "pn" (phase number).
83+
84+
*/
85+
86+
/* Use PMI2 by default on Cray systems with cray-pmi */
87+
#include "conv-mach-slurmpmi2.h"
88+
89+
#endif
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
CMK_BUILD_CRAY=1
2+
CMK_CRAY_NOGNI=1
3+
. $CHARMINC/conv-mach-craype.sh
4+
5+
# For libfabric
6+
#If the user doesn't pass --basedir, use defaults for libfabric headers and library
7+
if test -z "$USER_OPTS_LD"
8+
then
9+
if test -z "$LIBFABRIC"
10+
then
11+
CMK_INCDIR="$CMK_INCDIR -I/usr/include/"
12+
CMK_LIBDIR="$CMK_LIBDIR -L/usr/lib64/"
13+
else
14+
CMK_INCDIR="$CMK_INCDIR -I$LIBFABRIC/include/"
15+
CMK_LIBDIR="$CMK_LIBDIR -L$LIBFABRIC/lib64/"
16+
fi
17+
fi
18+
19+
# For cray-pmi
20+
if test -n "$CRAY_PMI_PREFIX"
21+
then
22+
CMK_INCDIR="$CMK_INCDIR -I$CRAY_PMI_PREFIX/include"
23+
CMK_LIBDIR="$CMK_LIBDIR -L$CRAY_PMI_PREFIX/lib"
24+
fi
25+
26+
CMK_LIBS="$CMK_LIBS -lfabric"
27+
# Use PMI2 by default on Cray systems with cray-pmi
28+
. $CHARMINC/conv-mach-slurmpmi2.sh
29+
30+
# For runtime
31+
CMK_INCDIR="$CMK_INCDIR -I./proc_management/"

src/arch/ofi/machine.C

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1633,7 +1633,7 @@ int fill_av_ofi(int myid,
16331633
OFI_OP_NAMES,
16341634
&epnames_contexts[i]);
16351635
if (ret) {
1636-
CmiAbort("OFI::LrtsInit::fi_tsend error");
1636+
CmiAbort("OFI::LrtsInit::fi_tsend error (+ofi_runtime_tcp may be needed)");
16371637
}
16381638
}
16391639

src/scripts/configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1590,7 +1590,7 @@ int main(int argc, char **argv)
15901590
return 0;
15911591
}
15921592
EOT
1593-
test_cc "whether build on OFI" "yes" "no" "-lfabric"
1593+
test_cc "whether build on OFI" "yes" "no"
15941594
AC_DEFINE_UNQUOTED(CMK_BUILD_ON_OFI, $strictpass, [build OFI.])
15951595
BUILD_OFI=$strictpass
15961596

0 commit comments

Comments
 (0)