Skip to content

Commit c096130

Browse files
authored
Merge pull request #6291 from tensor-tang/check_env
add script to check the cpu env
2 parents 3628695 + 32cc0db commit c096130

File tree

1 file changed

+261
-0
lines changed

1 file changed

+261
-0
lines changed

paddle/scripts/check_env.sh

Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
#!/bin/bash
2+
3+
if [ "`uname -s`" != "Linux" ]; then
4+
echo "Current scenario only support in Linux yet!"
5+
exit 0
6+
fi
7+
8+
echo "========================= Hardware Information ========================="
9+
sockets=`grep 'physical id' /proc/cpuinfo | sort -u | wc -l`
10+
cores_per_socket=`grep 'core id' /proc/cpuinfo | sort -u | wc -l`
11+
ht=`lscpu |grep "per core" |awk -F':' '{print $2}'|xargs`
12+
physical_cores=$((sockets * cores_per_socket))
13+
virtual_cores=`grep 'processor' /proc/cpuinfo | sort -u | wc -l`
14+
numa_nodes=`lscpu |grep "NUMA node(s)"|awk -F':' '{print $2}'|xargs`
15+
echo "CPU Name : `cat /proc/cpuinfo |grep -i "model name" |uniq |awk -F ':' '{print $2}'|xargs`"
16+
echo "CPU Family : `lscpu |grep \"CPU family\" |awk -F':' '{print $2}'|xargs`"
17+
echo "Socket Number : $sockets"
18+
echo "Cores Per Socket : $cores_per_socket"
19+
echo "Total Physical Cores : $physical_cores"
20+
echo "Total Virtual Cores : $virtual_cores"
21+
if [ $ht -eq 1 ]; then
22+
echo "Hyper Threading : OFF"
23+
if [ $physical_cores -ne $virtual_cores ]; then
24+
echo "Error: HT logical error"
25+
fi
26+
else
27+
echo "Hyper Threading : ON"
28+
if [ $physical_cores -ge $virtual_cores ]; then
29+
echo "Error: HT logical error"
30+
fi
31+
fi
32+
echo "NUMA Nodes : $numa_nodes"
33+
if [ $numa_nodes -lt $sockets ]; then
34+
echo "Warning: NUMA node is not enough for the best performance,\
35+
at least $sockets"
36+
fi
37+
38+
echo "-------------------------- Memory Information --------------------------"
39+
# dmidecode support start from 2.11
40+
dmi_ver=`dmidecode --version|awk -F '.' '{print $1}'|xargs`
41+
if [ $dmi_ver -lt 2 ]; then
42+
echo "Error: dmidecode unknown or version is too old"
43+
exit 0
44+
fi
45+
if [ `dmidecode | grep -ic "Permission denied"` -ne 0 ]; then
46+
echo "Error: need root to run dmidecode"
47+
exit 0
48+
fi
49+
max_dimms=0
50+
num_dimms_installed=0
51+
for dimm_id in `dmidecode |grep Locator|sort -u | awk -F ':' '{print $2}'`; do
52+
num_refered=`dmidecode |grep -wc "$dimm_id"`
53+
# the actual dimm id should be refered only once
54+
if [ $num_refered -eq 1 ]; then
55+
num_unknown=`dmidecode | awk '/'$dimm_id'/ {s=1; f=0};
56+
/Unknown/ {f=1};
57+
/Manufacturer/ {if (s==1) {print f; exit 0;}};'`
58+
if [ $num_unknown -eq 0 ]; then
59+
dimms_installed="$dimms_installed \n $dimm_id"
60+
((num_dimms_installed++))
61+
else
62+
dimms_uninstalled="$dimms_uninstalled \n $dimm_id"
63+
fi
64+
((max_dimms++))
65+
fi
66+
done
67+
echo "Installed DIMM number : $num_dimms_installed"
68+
num_dimms_mapped=`dmidecode | grep "Memory Device Mapped" | wc -l`
69+
if [ $num_dimms_installed -ne $num_dimms_mapped ]; then
70+
echo "Error: The installed DIMMs number does ont match the mapped memory device: $num_dimms_mapped"
71+
fi
72+
num_clock_configed=`dmidecode | grep -i "Configured Clock Speed" |grep -ic "Hz"`
73+
if [ $num_dimms_installed -ne $num_clock_configed ]; then
74+
echo "Error: The installed DIMMs number does ont match configured clocks: $num_clock_configed"
75+
fi
76+
echo -e "Installed DIMMs Locator: $dimms_installed"
77+
echo -e "Not installed DIMMs : $dimms_uninstalled"
78+
max_dimm_slots=`dmidecode | grep -c "Bank Locator"`
79+
echo "DIMMs max slots : $max_dimm_slots"
80+
if [ $max_dimms -ne $max_dimm_slots ]; then
81+
echo "Error: The max dimm slots do not match the max dimms: $max_dimms"
82+
fi
83+
free_ver_main=`free -V|awk -F ' ' '{print $NF}'|awk -F '.' '{print $1}'`
84+
free_ver_sub=`free -V|awk -F ' ' '{print $NF}'|awk -F '.' '{print $2}'`
85+
if [ $free_ver_main -lt 3 ] || [ $free_ver_sub -lt 3 ]; then
86+
mem_sz=`free |grep -i mem |awk -F' ' '{print $2}'|xargs`
87+
swap_sz=`free |grep -i swap |awk -F' ' '{print $2}'|xargs`
88+
total_sz=`free -t |grep -i total |tail -n 1| awk -F' ' '{print $2}'|xargs`
89+
mem_sz="`awk 'BEGIN{printf "%.1f\n",('$mem_sz'/1024/1024)}'` GB"
90+
swap_sz="`awk 'BEGIN{printf "%.1f\n",('$swap_sz'/1024/1024)}'` GB"
91+
total_sz="`awk 'BEGIN{printf "%.1f\n",('$total_sz'/1024/1024)}'` GB"
92+
else
93+
mem_sz=`free -h |grep -i mem |awk -F' ' '{print $2}'|xargs`
94+
swap_sz=`free -h |grep -i swap |awk -F' ' '{print $2}'|xargs`
95+
total_sz=`free -th |grep -i total |tail -n 1| awk -F' ' '{print $2}'|xargs`
96+
fi
97+
echo "Memory Size : $mem_sz"
98+
echo "Swap Memory Size : $swap_sz"
99+
echo "Total Memory Size : $total_sz"
100+
echo "Max Memory Capacity : `dmidecode |grep -i \"maximum capacity\"|sort -u|awk -F':' '{print $2}'|xargs`"
101+
# DIMMs fequency
102+
clock_speeds=`dmidecode | grep -i "Configured Clock Speed" | grep -i "Hz" |sort -u | awk -F':' '{print $2}'|xargs`
103+
echo "Configed Clock Speed : $clock_speeds"
104+
num_clock_type=`dmidecode | grep -i "Configured Clock Speed" | grep -i "Hz" |sort -u | wc -l`
105+
if [ $num_clock_type -ne 1 ]; then
106+
echo "Warning: Have more than 1 speed type, all DIMMs should have same fequency: $clock_speeds"
107+
fi
108+
109+
echo "-------------------------- Turbo Information --------------------------"
110+
scaling_drive=`cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_driver`
111+
echo "Scaling Driver : $scaling_drive"
112+
if [ $scaling_drive == "intel_pstate" ] && [ -e /sys/devices/system/cpu/intel_pstate/no_turbo ]; then
113+
turbo=`cat /sys/devices/system/cpu/intel_pstate/no_turbo`
114+
if [ $turbo -eq 1 ]; then
115+
echo "Turbo Status : OFF"
116+
else
117+
echo "Turbo Status : ON"
118+
fi
119+
else
120+
echo "Warning: Scaling driver is not intel_pstarte, maybe should enable it in BIOS"
121+
echo "Turbo Status : Unknown"
122+
fi
123+
# cpu frequency
124+
num_max_freq=`cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq| sort -u |wc -l`
125+
num_min_freq=`cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq| sort -u |wc -l`
126+
if [ $num_max_freq -ne 1 ]; then
127+
echo "Error: the max_frequency of all CPU should be equal"
128+
fi
129+
if [ $num_min_freq -ne 1 ]; then
130+
echo "Error: the min_frequency of all CPU should be equal"
131+
fi
132+
max_freq=`cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq| uniq|xargs` # kHz
133+
max_freq=`awk 'BEGIN{printf "%.2f",('$max_freq' / 1000000)}'` # GHz
134+
min_freq=`cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq| uniq|xargs` # kHz
135+
min_freq=`awk 'BEGIN{printf "%.2f",('$min_freq' / 1000000)}'` # GHz
136+
echo "CPU Max Frequency : $max_freq GHz"
137+
echo "CPU Min Frequency : $min_freq GHz"
138+
# cpu governor
139+
num_governor=`cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor| sort -u |wc -l`
140+
if [ $num_governor -ne 1 ]; then
141+
echo "Error: the governor of all CPU should be the same"
142+
fi
143+
governor=`cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor| sort -u |uniq`
144+
echo "CPU Freq Governor : $governor"
145+
146+
147+
echo "========================= Software Information ========================="
148+
echo "BIOS Release Date : `dmidecode | grep "Release Date"|awk -F ':' '{print $2}'|xargs`"
149+
echo "OS Version : `cat /etc/redhat-release`"
150+
echo "Kernel Release Version : `uname -r`"
151+
echo "Kernel Patch Version : `uname -v`"
152+
echo "GCC Version :`gcc --version | head -n 1|awk -F '\\\(GCC\\\)' '{print $2}'`"
153+
if command -v cmake >/dev/null 2>&1; then
154+
cmake_ver=`cmake --version | head -n 1 | awk -F 'version' '{print $2}'`
155+
else
156+
cmake_ver=" Not installed"
157+
fi
158+
echo "CMake Version :$cmake_ver"
159+
echo "------------------ Environment Variables Information -------------------"
160+
kmp_affinity=`env | grep KMP_AFFINITY`
161+
omp_dynamic=`env | grep OMP_DYNAMIC`
162+
omp_nested=`env | grep OMP_NESTED`
163+
omp_num_threads=`env | grep OMP_NUM_THREADS`
164+
mkl_num_threads=`env | grep MKL_NUM_THREADS`
165+
mkl_dynamic=`env | grep MKL_DYNAMIC`
166+
if [ ! $kmp_affinity ]; then kmp_affinity="unset"; fi
167+
if [ ! $omp_dynamic ]; then omp_dynamic="unset"; fi
168+
if [ ! $omp_nested ]; then omp_nested="unset"; fi
169+
if [ ! $omp_num_threads ]; then omp_num_threads="unset"; fi
170+
if [ ! $mkl_num_threads ]; then mkl_num_threads="unset"; fi
171+
if [ ! $mkl_dynamic ]; then mkl_dynamic="unset"; fi
172+
echo "KMP_AFFINITY : $kmp_affinity"
173+
echo "OMP_DYNAMIC : $omp_dynamic"
174+
echo "OMP_NESTED : $omp_nested"
175+
echo "OMP_NUM_THREADS : $omp_num_threads"
176+
echo "MKL_NUM_THREADS : $mkl_num_threads"
177+
echo "MKL_DYNAMIC : $mkl_dynamic"
178+
# Check if any MKL related libraries have been installed in LD_LIBRARY_PATH
179+
for path in `echo $LD_LIBRARY_PATH | awk -F ':' '{for(i=1;i<=NF;++i)print $i}'`; do
180+
mkldnn_found=`find $path -name "libmkldnn.so"`
181+
if [ "$mkldnn_found" ]; then
182+
echo "Found MKL-DNN : $mkldnn_found"
183+
fi
184+
mklml_found=`find $path -name "libmklml_intel.so"`
185+
if [ "$mklml_found" ]; then
186+
echo "Found MKLML : $mklml_found"
187+
fi
188+
iomp_found=`find $path -name "libiomp5.so"`
189+
if [ "$iomp_found" ]; then
190+
echo "Found IOMP : $iomp_found"
191+
fi
192+
done
193+
194+
# dump all details for fully check
195+
lscpu > lscpu.dump
196+
dmidecode > dmidecode.dump
197+
198+
# The expected result would be like:
199+
# ========================= Hardware Information =========================
200+
# CPU Name : Intel(R) Xeon(R) Gold 6148M CPU @ 2.40GHz
201+
# CPU Family : 6
202+
# Socket Number : 2
203+
# Cores Per Socket : 20
204+
# Total Physical Cores : 40
205+
# Total Virtual Cores : 40
206+
# Hyper Threading : OFF
207+
# NUMA Nodes : 2
208+
# -------------------------- Memory Information --------------------------
209+
# Installed DIMM number : 12
210+
# Installed DIMMs Locator:
211+
# CPU1_DIMM_A1
212+
# CPU1_DIMM_B1
213+
# CPU1_DIMM_C1
214+
# CPU1_DIMM_D1
215+
# CPU1_DIMM_E1
216+
# CPU1_DIMM_F1
217+
# CPU2_DIMM_A1
218+
# CPU2_DIMM_B1
219+
# CPU2_DIMM_C1
220+
# CPU2_DIMM_D1
221+
# CPU2_DIMM_E1
222+
# CPU2_DIMM_F1
223+
# Not installed DIMMs :
224+
# CPU1_DIMM_A2
225+
# CPU1_DIMM_B2
226+
# CPU1_DIMM_C2
227+
# CPU1_DIMM_D2
228+
# CPU1_DIMM_E2
229+
# CPU1_DIMM_F2
230+
# CPU2_DIMM_A2
231+
# CPU2_DIMM_B2
232+
# CPU2_DIMM_C2
233+
# CPU2_DIMM_D2
234+
# CPU2_DIMM_E2
235+
# CPU2_DIMM_F2
236+
# DIMMs max slots : 24
237+
# Memory Size : 376G
238+
# Swap Memory Size : 4.0G
239+
# Total Memory Size : 380G
240+
# Max Memory Capacity : 2304 GB
241+
# Configed Clock Speed : 2666 MHz
242+
# -------------------------- Turbo Information --------------------------
243+
# Scaling Driver : intel_pstate
244+
# Turbo Status : ON
245+
# CPU Max Frequency : 3.70 GHz
246+
# CPU Min Frequency : 1.00 GHz
247+
# CPU Freq Governor : performance
248+
# ========================= Software Information =========================
249+
# BIOS Release Date : 03/10/2017
250+
# OS Version : CentOS Linux release 7.3.1611 (Core)
251+
# Kernel Release Version : 3.10.0-514.el7.x86_64
252+
# Kernel Patch Version : #1 SMP Tue Nov 22 16:42:41 UTC 2016
253+
# GCC Version : 4.8.5 20150623 (Red Hat 4.8.5-11)
254+
# CMake Version : 3.5.2
255+
# ------------------ Environment Variables Information -------------------
256+
# KMP_AFFINITY : unset
257+
# OMP_DYNAMIC : unset
258+
# OMP_NESTED : unset
259+
# OMP_NUM_THREADS : unset
260+
# MKL_NUM_THREADS : unset
261+
# MKL_DYNAMIC : unset

0 commit comments

Comments
 (0)