Skip to content

Commit b0a8e4d

Browse files
author
Lucas A. Wilson
authored
Merge pull request #8 from TACC/process-binding
Process binding
2 parents cea045e + 7aa5ce7 commit b0a8e4d

File tree

4 files changed

+73
-11
lines changed

4 files changed

+73
-11
lines changed

init_launcher

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,30 @@
88
# The University of Texas at Austin
99
#------------------------------------------------
1010

11-
for i in `seq 0 $(($LAUNCHER_PPN - 1))`
12-
do
13-
#Start the launcher script for every process to be spawned on this host and give it a unique task id
14-
env `$LAUNCHER_DIR/pass_env` LAUNCHER_TSK_ID=$(($LAUNCHER_PPN * $LAUNCHER_HOST_ID + $i)) $LAUNCHER_DIR/launcher &
15-
done
11+
if [ "$LAUNCHER_BIND" -eq "1" ]
12+
then
13+
if [ $LAUNCHER_BIND_HT -eq 1 ]
14+
then
15+
bind_obj="pu"
16+
else
17+
bind_obj="core"
18+
fi
19+
20+
for i in `seq 0 $(($LAUNCHER_PPN - 1))`
21+
do
22+
spu=$(($i * $LAUNCHER_PUPT))
23+
epu=$(($spu + $LAUNCHER_PUPT - 1))
24+
#Start the launcher script for every process to be spawned on this host and give it a unique task id
25+
env `$LAUNCHER_DIR/pass_env` LAUNCHER_HWLOC_CMD="hwloc-bind ${bind_obj}:$spu-$epu" LAUNCHER_TSK_ID=$(($LAUNCHER_PPN * $LAUNCHER_HOST_ID + $i)) $LAUNCHER_DIR/launcher &
26+
done
27+
else
28+
for i in `seq 0 $(($LAUNCHER_PPN - 1))`
29+
do
30+
#Start the launcher script for every process to be spawned on this host and give it a unique task id
31+
env `$LAUNCHER_DIR/pass_env` LAUNCHER_TSK_ID=$(($LAUNCHER_PPN * $LAUNCHER_HOST_ID + $i)) $LAUNCHER_DIR/launcher &
32+
done
33+
fi
34+
1635

1736
if [ ! "x$LAUNCHER_NPHI" == "x" ]
1837
then

launcher

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ while [ $COMPLETE == "false" ]; do
8080

8181
START_TIME=`date +"%s"`
8282
echo "Launcher: Task $LAUNCHER_TSK_ID running job $LAUNCHER_JID on `hostname` ($CMD)"
83-
eval $CMD
83+
eval $LAUNCHER_HWLOC_CMD $CMD
8484
END_TIME=`date +"%s"`
8585
EXEC_TIME=`expr $END_TIME - $START_TIME`
8686
echo "Launcher: Job $LAUNCHER_JID completed in $EXEC_TIME seconds."

paramrun

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,33 @@ if [ "$LAUNCHER_SCHED" == "dynamic" ]; then
168168
fi
169169
fi
170170

171+
if [ $LAUNCHER_BIND -eq 1 ]
172+
then
173+
num_socks=$(lstopo-no-graphics --only socket | wc -l | awk '{print $1}')
174+
if [ $num_socks -eq 0 ]
175+
then
176+
num_socks=1
177+
fi
178+
num_cores=$(lstopo-no-graphics --only core | wc -l | awk '{print $1}')
179+
num_threads=$(lstopo-no-graphics --only pu | wc -l | awk '{print $1}')
180+
if [ $LAUNCHER_PPN -gt $num_cores ]
181+
then
182+
export LAUNCHER_BIND_HT=1
183+
if [ $LAUNCHER_PPN -gt $num_threads ]
184+
then
185+
echo "WARNING: Requested Processes per Node ($LAUNCHER_PPN) exceeds number of available threads ($num_threads). Resetting..."
186+
export LAUNCHER_PPN=$num_threads
187+
export LAUNCHER_NPROCS=$(($LAUNCHER_NHOSTS * $LAUNCHER_PPN))
188+
fi
189+
pu_per_task=$(($num_threads / $LAUNCHER_PPN))
190+
else
191+
export LAUNCHER_BIND_HT=0
192+
pu_per_task=$(($num_cores / $LAUNCHER_PPN))
193+
fi
194+
195+
export LAUNCHER_PUPT=$pu_per_task
196+
fi
197+
171198
#------------------------------
172199
# Let's finally launch the job
173200
#------------------------------
@@ -181,13 +208,29 @@ echo " Processes per host: $LAUNCHER_PPN"
181208
echo " Total processes: $LAUNCHER_NPROCS"
182209
echo " Total jobs: $LAUNCHER_NJOBS"
183210
echo " Scheduling method: $LAUNCHER_SCHED"
211+
if [ $LAUNCHER_BIND -eq 1 ]
212+
then
213+
echo
214+
echo "------ Process Binding Enabled ------"
215+
echo " Sockets per host: $num_socks"
216+
echo " Cores per host: $num_cores"
217+
echo " Threads per host: $num_threads"
218+
echo -n " Binding each task to $LAUNCHER_PUPT "
219+
if [ $LAUNCHER_BIND_HT -eq 1 ]
220+
then
221+
echo "threads (Hyperthreads in use)"
222+
else
223+
echo "cores (Hyperthreads ignored)"
224+
fi
225+
fi
226+
227+
184228
if [ $LAUNCHER_USE_PHI -eq 1 ]
185229
then
186230
echo
187231
echo "--- Intel Xeon Phi Support Enabled ---"
188232
echo " Cards per host: $LAUNCHER_NPHI"
189233
echo " Processes per card: $LAUNCHER_PHI_PPN"
190-
echo
191234
fi
192235
echo
193236
echo "-------------------------------------"

pass_env

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
#!/bin/bash
22

3-
#PASS_PREFIX is a list of variable prefixes that should be passed to launcher tasks on remote hosts
4-
PASS_PREFIX="LAUNCHER TACC ICC GCC LMOD MV2 IMPI PATH LD_LIBRARY_PATH OMP KMP MIC PYTHON"
3+
#EXCLUDE is a list of patterns that should be removed from the environment before passing to launcher tasks on remote hosts
4+
EXCLUDE="BASH_FUNC ModuleTable LS_ LESS SSH_ PE_MPICH MINICOM SLURM_NODELIST"
55

66
GREP_ARGS=`
7-
for prefix in $PASS_PREFIX
7+
for prefix in $EXCLUDE
88
do
99
echo -n "-e ^$prefix "
1010
done
1111
`
1212

13-
env | grep $GREP_ARGS | tr '\n' ' '
13+
env | grep -v -e "}$" -e "^ " -e "=$" -e "^_" -e "(" $GREP_ARGS | grep -v -e " " | tr '\n' ' '

0 commit comments

Comments
 (0)