Skip to content

Commit ee2d596

Browse files
committed
vstart: Add new --crimson-balance-cpu option
Signed-off-by: Jose J Palacios-Perez <[email protected]>
1 parent 83c03c0 commit ee2d596

File tree

2 files changed

+90
-19
lines changed

2 files changed

+90
-19
lines changed

src/stop.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,3 +234,8 @@ else
234234
[ $stop_rgw -eq 1 ] && do_killall radosgw lt-radosgw apache2
235235
[ $stop_cephadm -eq 1 ] && do_killcephadm
236236
fi
237+
238+
# Check whether the --crimson-balance-cpu option was used, if so remove any auxiliary files left:
239+
if [ "$ceph_osd" == "crimson-osd" ] && [ -f /tmp/numa_args_*.out ]; then
240+
rm -f /tmp/numa_args_*.out
241+
fi

src/vstart.sh

Lines changed: 85 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ declare -a block_devs
199199
declare -a bluestore_db_devs
200200
declare -a bluestore_wal_devs
201201
declare -a secondary_block_devs
202+
declare -a cpu_table
202203
secondary_block_devs_type="SSD"
203204

204205
VSTART_SEC="client.vstart.sh"
@@ -275,6 +276,7 @@ options:
275276
--crimson-smp: number of cores to use for crimson
276277
--crimson-alien-num-threads: number of alien-tp threads
277278
--crimson-alien-num-cores: number of cores to use for alien-tp
279+
--crimson-balance-cpu: distribute the Seastar reactors uniformly across OSDs (osd) or NUMA (socket)
278280
--osds-per-host: populate crush_location as each host holds the specified number of osds if set
279281
--require-osd-and-client-version: if supplied, do set-require-min-compat-client and require-osd-release to specified value
280282
--use-crush-tunables: if supplied, set tunables to specified value
@@ -346,10 +348,43 @@ parse_secondary_devs() {
346348
done
347349
}
348350

351+
# Auxiliar function to prepare the CPU cores to pin Seastar reactors
352+
prep_balance_cpu() {
353+
local crimson_smp=$1
354+
local balance_strategy=$2
355+
local in_file_name="/tmp/numa_args_${balance_strategy}.out"
356+
local out_file_name="/tmp/numa_nodes.json"
357+
local log_file_name="/tmp/numa_bal_${balance_strategy}.log"
358+
local cmd
359+
360+
# Check the table is empty
361+
if [ "${#cpu_table[@]}" -eq 0 ]; then
362+
# Ensure the file with the CPU mappings exist
363+
if [ ! -f ${in_file_name} ]; then
364+
debug echo "lscpu --json > ${out_file_name}"
365+
lscpu --json > ${out_file_name}
366+
MY_CPUS=$(taskset -acp $$ | awk -F : '{print $2}')
367+
cmd="python3 ${CEPH_DIR}/../src/tools/contrib/balance_cpu.py -o ${CEPH_NUM_OSD}\
368+
-r ${crimson_smp} -b ${balance_strategy} -u ${out_file_name} -t ${MY_CPUS} > ${in_file_name}"
369+
debug echo "$cmd"
370+
eval "$cmd" >> ${log_file_name}
371+
fi
372+
373+
readarray -t cpu_table < ${in_file_name}
374+
# Check the table is not empty, bail out otherwise
375+
if [ "${#cpu_table[@]}" -ne 0 ]; then
376+
debug echo "CPU table not empty with ${#cpu_table[@]} entries"
377+
else
378+
debug echo "CPU table empty, bailing out. Check ${log_file_name}"
379+
fi
380+
fi
381+
}
382+
349383
# Default values for the crimson options
350384
crimson_smp=1
351385
crimson_alien_num_threads=0
352386
crimson_alien_num_cores=0
387+
crimson_balance_cpu="" # "osd", "socket"
353388

354389
while [ $# -ge 1 ]; do
355390
case $1 in
@@ -589,6 +624,10 @@ case $1 in
589624
crimson_alien_num_cores=$2
590625
shift
591626
;;
627+
--crimson-balance-cpu)
628+
crimson_balance_cpu=$2
629+
shift
630+
;;
592631
--bluestore-spdk)
593632
[ -z "$2" ] && usage_exit
594633
IFS=',' read -r -a bluestore_spdk_dev <<< "$2"
@@ -1156,6 +1195,14 @@ start_cephexporter() {
11561195
--addrs "$IP"
11571196
}
11581197

1198+
do_balance_cpu() {
1199+
local osd=$1
1200+
1201+
interval=${cpu_table[${osd}]}
1202+
echo "$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores $interval"
1203+
$CEPH_BIN/ceph -c $conf_fn config set "osd.$osd" crimson_seastar_cpu_cores "$interval"
1204+
}
1205+
11591206
start_osd() {
11601207
if [ $inc_osd_num -gt 0 ]; then
11611208
old_maxosd=$($CEPH_BIN/ceph osd getmaxosd | sed -e 's/max_osd = //' -e 's/ in epoch.*//')
@@ -1167,15 +1214,24 @@ start_osd() {
11671214
end=$(($CEPH_NUM_OSD-1))
11681215
fi
11691216
local osds_wait
1217+
# If the type of OSD is Crimson and the option to balance the Seastar reactors is true
1218+
if [ "$ceph_osd" == "crimson-osd" ] && [ ! -z "$crimson_balance_cpu" ]; then
1219+
debug echo "Preparing balance CPU for Crimson"
1220+
prep_balance_cpu $crimson_smp $crimson_balance_cpu
1221+
fi
11701222
for osd in `seq $start $end`
11711223
do
11721224
if [ "$ceph_osd" == "crimson-osd" ]; then
1173-
bottom_cpu=$(( osd * crimson_smp ))
1174-
top_cpu=$(( bottom_cpu + crimson_smp - 1 ))
1175-
# set exclusive CPU nodes for each osd
1176-
echo "$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores $bottom_cpu-$top_cpu"
1177-
$CEPH_BIN/ceph -c $conf_fn config set "osd.$osd" crimson_seastar_cpu_cores "$bottom_cpu-$top_cpu"
1178-
fi
1225+
if [ ! -z "$crimson_balance_cpu" ]; then
1226+
do_balance_cpu $osd
1227+
else
1228+
bottom_cpu=$(( osd * crimson_smp ))
1229+
top_cpu=$(( bottom_cpu + crimson_smp - 1 ))
1230+
# set exclusive CPU nodes for each osd
1231+
echo "$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores $bottom_cpu-$top_cpu"
1232+
$CEPH_BIN/ceph -c $conf_fn config set "osd.$osd" crimson_seastar_cpu_cores "$bottom_cpu-$top_cpu"
1233+
fi
1234+
fi
11791235
if [ "$new" -eq 1 -o $inc_osd_num -gt 0 ]; then
11801236
wconf <<EOF
11811237
[osd.$osd]
@@ -1703,21 +1759,31 @@ if [ "$ceph_osd" == "crimson-osd" ]; then
17031759
extra_seastar_args=" --trace"
17041760
fi
17051761
if [ "$objectstore" == "bluestore" ]; then
1762+
# This condition verifies the number of logical CPU cores
17061763
if [ "$(expr $(nproc) - 1)" -gt "$(($CEPH_NUM_OSD * crimson_smp))" ]; then
1707-
if [ $crimson_alien_num_cores -gt 0 ]; then
1708-
alien_bottom_cpu=$(($CEPH_NUM_OSD * crimson_smp))
1709-
alien_top_cpu=$(( alien_bottom_cpu + crimson_alien_num_cores - 1 ))
1710-
# Ensure top value within range:
1711-
if [ "$(($alien_top_cpu))" -gt "$(expr $(nproc) - 1)" ]; then
1712-
alien_top_cpu=$(expr $(nproc) - 1)
1713-
fi
1714-
echo "crimson_alien_thread_cpu_cores: $alien_bottom_cpu-$alien_top_cpu"
1715-
# This is a (logical) processor id range, it could be refined to encompass only physical processor ids
1716-
# (equivalently, ignore hyperthreading sibling processor ids)
1717-
$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores "$alien_bottom_cpu-$alien_top_cpu"
1764+
if [ ! -z "$crimson_balance_cpu" ]; then
1765+
debug echo "Preparing balance CPU for Crimson"
1766+
prep_balance_cpu $crimson_smp $crimson_balance_cpu
1767+
available_cpus="${cpu_table[-1]}"
1768+
echo "crimson_alien_thread_cpu_cores: '$available_cpus'"
1769+
$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores "$available_cpus"
17181770
else
1719-
echo "crimson_alien_thread_cpu_cores:" $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)"
1720-
$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)"
1771+
if [ $crimson_alien_num_cores -gt 0 ]; then
1772+
alien_bottom_cpu=$(($CEPH_NUM_OSD * crimson_smp))
1773+
alien_top_cpu=$(( alien_bottom_cpu + crimson_alien_num_cores - 1 ))
1774+
# Ensure top value within range:
1775+
if [ "$(($alien_top_cpu))" -gt "$(expr $(nproc) - 1)" ]; then
1776+
alien_top_cpu=$(expr $(nproc) - 1)
1777+
fi
1778+
echo "crimson_alien_thread_cpu_cores: $alien_bottom_cpu-$alien_top_cpu"
1779+
# This is a (logical) processor id range, it could be refined to encompass only physical processor ids
1780+
# (equivalently, ignore hyperthreading sibling processor ids)
1781+
$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores "$alien_bottom_cpu-$alien_top_cpu"
1782+
else
1783+
# This is the legacy default case
1784+
echo "crimson_alien_thread_cpu_cores:" $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)"
1785+
$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)"
1786+
fi
17211787
fi
17221788
if [ $crimson_alien_num_threads -gt 0 ]; then
17231789
echo "$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_op_num_threads $crimson_alien_num_threads"

0 commit comments

Comments
 (0)