Skip to content

Commit 6cf7f1e

Browse files
authored
Merge pull request #5671 from tensor-tang/autocpu
auto set cpu env when mkldnn or mklml enabled
2 parents a76b614 + 6337007 commit 6cf7f1e

File tree

3 files changed

+78
-5
lines changed

3 files changed

+78
-5
lines changed

benchmark/paddle/image/run_mkldnn.sh

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
set -e
22

33
function train() {
4-
unset OMP_NUM_THREADS MKL_NUM_THREADS
5-
export OMP_DYNAMIC="FALSE"
6-
export KMP_AFFINITY="granularity=fine,compact,0,0"
4+
unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
75
topology=$1
86
layer_num=$2
97
bs=$3
@@ -14,8 +12,6 @@ function train() {
1412
elif [ $4 == "False" ]; then
1513
thread=`nproc`
1614
# each trainer_count use only 1 core to avoid conflict
17-
export OMP_NUM_THREADS=1
18-
export MKL_NUM_THREADS=1
1915
log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log"
2016
else
2117
echo "Wrong input $3, use True or False."

paddle/scripts/submit_local.sh.in

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,54 @@ function ver2num() {
4343
set +e
4444
}
4545

46+
function cpu_config() {
47+
# auto set KMP_AFFINITY and OMP_DYNAMIC from Hyper Threading Status
48+
# only when MKLDNN or MKLML enabled
49+
if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then
50+
return 0
51+
fi
52+
ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
53+
if [ $ht -eq 1 ]; then # HT is OFF
54+
if [ -z "$KMP_AFFINITY" ]; then
55+
export KMP_AFFINITY="granularity=fine,compact,0,0"
56+
fi
57+
if [ -z "$OMP_DYNAMIC" ]; then
58+
export OMP_DYNAMIC="FALSE"
59+
fi
60+
else # HT is ON
61+
if [ -z "$KMP_AFFINITY" ]; then
62+
export KMP_AFFINITY="granularity=fine,compact,1,0"
63+
fi
64+
if [ -z "$OMP_DYNAMIC" ]; then
65+
export OMP_DYNAMIC="True"
66+
fi
67+
fi
68+
}
69+
70+
function threads_config() {
71+
# auto set OMP_NUM_THREADS and MKL_NUM_THREADS
72+
# according to trainer_count and total processors
73+
# only when MKLDNN or MKLML enabled
74+
if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then
75+
return 0
76+
fi
77+
processors=`grep "processor" /proc/cpuinfo|sort -u|wc -l`
78+
trainers=`grep -Eo 'trainer_count.[0-9]+' <<< "$@" |grep -Eo '[0-9]+'|xargs`
79+
if [ -z $trainers ]; then
80+
trainers=1
81+
fi
82+
threads=$((processors / trainers))
83+
if [ $threads -eq 0 ]; then
84+
threads=1
85+
fi
86+
if [ -z "$OMP_NUM_THREADS" ]; then
87+
export OMP_NUM_THREADS=$threads
88+
fi
89+
if [ -z "$MKL_NUM_THREADS" ]; then
90+
export MKL_NUM_THREADS=$threads
91+
fi
92+
}
93+
4694
PADDLE_CONF_HOME="$HOME/.config/paddle"
4795
mkdir -p ${PADDLE_CONF_HOME}
4896

@@ -92,9 +140,13 @@ else:
92140
sys.exit(0)
93141
EOF
94142

143+
cpu_config
144+
# echo $KMP_AFFINITY $OMP_DYNAMIC
95145

96146
case "$1" in
97147
"train")
148+
threads_config $@
149+
# echo $OMP_NUM_THREADS $MKL_NUM_THREADS
98150
${DEBUGGER} $PADDLE_BIN_PATH/paddle_trainer ${@:2}
99151
;;
100152
"merge_model")

python/paddle/v2/__init__.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,31 @@ def init(**kwargs):
7676
for key in args_dict.keys():
7777
args.append('--%s=%s' % (key, str(args_dict[key])))
7878

79+
# auto set cpu environment
80+
def set_env(key, value):
81+
'''If the key has not been set in the environment, set it with value.'''
82+
assert isinstance(key, str)
83+
assert isinstance(value, str)
84+
envset = os.environ.get(key)
85+
if envset is None:
86+
os.environ[key] = value
87+
88+
ht = os.popen("lscpu |grep \"per core\"|awk -F':' '{print $2}'|xargs")
89+
ht = int(ht.read())
90+
if ht == 1: # ht is off
91+
set_env("OMP_DYNAMIC", "false")
92+
set_env("KMP_AFFINITY", "granularity=fine,compact,0,0")
93+
else:
94+
set_env("OMP_DYNAMIC", "true")
95+
set_env("KMP_AFFINITY", "granularity=fine,compact,1,0")
96+
processors = os.popen("grep \"processor\" /proc/cpuinfo|sort -u|wc -l")
97+
processors = int(processors.read())
98+
trainers = kwargs.get('trainer_count', 1)
99+
threads = processors / trainers
100+
threads = '1' if threads < 1 else str(threads)
101+
set_env("OMP_NUM_THREADS", threads)
102+
set_env("MKL_NUM_THREADS", threads)
103+
79104
if 'use_gpu' in kwargs:
80105
cp.g_command_config_args['use_gpu'] = kwargs['use_gpu']
81106
if 'use_mkldnn' in kwargs:

0 commit comments

Comments
 (0)