Skip to content

Commit 3a38b2f

Browse files
athanatosMatan-B
authored andcommitted
vstart.sh: simplify crimson core assignment, use assign_crimson_cores.py
This commit simplifies the internal flow in a few ways: - core assignment is entirely handled by prep_balance_cpu and do_balance_cpu. The latter simply does as the cpu_table instructs. - assign_crimson_cores calls lscpu and taskset internally, no need for temp files. It also changes some defaults: - if crimson-balance-cpu is unset or set to none, crimson-osd will not pin cpus at all rather than using the simple sequential allocation scheme, which could be much less efficient on platforms where cpuids 0,1,2,3,... are on socket 0,1,2,3,... "osd" and "socket" options provide numa aware assignments when requested. New features: - Alienstore cores are now assigned with assign_crimson_cores using the same balance strategy using --crimson-alien-num-cores. - --crimson-reactor-physical-only and --crimson-alienstore-physical-only will cause reactor or alienstore cpus respectively to be allocated with one cpu per physical core rather than including smt siblings. Fixes: https://tracker.ceph.com/issues/71096 Signed-off-by: Samuel Just <[email protected]> (cherry picked from commit 1795f46)
1 parent f823ec7 commit 3a38b2f

File tree

1 file changed

+64
-73
lines changed

1 file changed

+64
-73
lines changed

src/vstart.sh

Lines changed: 64 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,9 @@ options:
275275
--seastore-secondary-devs-type: device type of all secondary blockdevs. HDD, SSD(default), ZNS or RANDOM_BLOCK_SSD
276276
--crimson-smp: number of cores to use for crimson
277277
--crimson-alien-num-threads: number of alien-tp threads
278-
--crimson-alien-num-cores: number of cores to use for alien-tp
278+
--crimson-reactor-physical-only: use only one cpu per physical core for seastar reactors
279+
--crimson-alien-num-cores: number of cpus to use for alien threads
280+
--crimson-alienstore-physical-only: use only one cpu per physical core for alienstore
279281
--crimson-balance-cpu: distribute the Seastar reactors uniformly across OSDs (osd) or NUMA (socket)
280282
--osds-per-host: populate crush_location as each host holds the specified number of osds if set
281283
--require-osd-and-client-version: if supplied, do set-require-min-compat-client and require-osd-release to specified value
@@ -350,40 +352,38 @@ parse_secondary_devs() {
350352

351353
# Auxiliar function to prepare the CPU cores to pin Seastar reactors
352354
prep_balance_cpu() {
353-
local crimson_smp=$1
354-
local balance_strategy=$2
355-
local in_file_name="/tmp/numa_args_${balance_strategy}.out"
356-
local out_file_name="/tmp/numa_nodes.json"
357-
local log_file_name="/tmp/numa_bal_${balance_strategy}.log"
358-
local cmd
359-
360-
# Check the table is empty
361-
if [ "${#cpu_table[@]}" -eq 0 ]; then
362-
# Ensure the file with the CPU mappings exist
363-
if [ ! -f ${in_file_name} ]; then
364-
debug echo "lscpu --json > ${out_file_name}"
365-
lscpu --json > ${out_file_name}
366-
MY_CPUS=$(taskset -acp $$ | awk -F : '{print $2}')
367-
cmd="python3 ${CEPH_DIR}/../src/tools/contrib/balance_cpu.py -o ${CEPH_NUM_OSD}\
368-
-r ${crimson_smp} -b ${balance_strategy} -u ${out_file_name} -t ${MY_CPUS} > ${in_file_name}"
369-
debug echo "$cmd"
370-
eval "$cmd" >> ${log_file_name}
371-
fi
355+
if [ -z $crimson_balance_cpu ] || [ "${crimson_balance_cpu}" == "none" ] ; then
356+
echo "Not assigning cpus for crimson"
357+
return
358+
fi
372359

373-
readarray -t cpu_table < ${in_file_name}
374-
# Check the table is not empty, bail out otherwise
375-
if [ "${#cpu_table[@]}" -ne 0 ]; then
376-
debug echo "CPU table not empty with ${#cpu_table[@]} entries"
377-
else
378-
debug echo "CPU table empty, bailing out. Check ${log_file_name}"
379-
fi
360+
cmd="python3 ${CEPH_DIR}/../src/tools/contrib/assign_crimson_cores.py"
361+
cmd+=" -o ${CEPH_NUM_OSD} -r ${crimson_smp} -a ${crimson_alien_num_cores}"
362+
cmd+=" -b ${crimson_balance_cpu}"
363+
if [ ${crimson_reactor_physical_only} != 0 ]; then
364+
cmd+=" --physical-only-seastar"
365+
fi
366+
if [ ${crimson_alienstore_physical_only} != 0 ]; then
367+
cmd+=" --physical-only-alienstore"
368+
fi
369+
370+
echo $cmd
371+
readarray -t cpu_table < <($cmd)
372+
# Check the table is not empty, bail out otherwise
373+
if [ "${#cpu_table[@]}" -ne 0 ]; then
374+
debug echo "CPU table not empty with ${#cpu_table[@]} entries"
375+
else
376+
debug echo "CPU table empty, bailing out."
377+
exit 1
380378
fi
381379
}
382380

383381
# Default values for the crimson options
384382
crimson_smp=1
385383
crimson_alien_num_threads=0
384+
crimson_reactor_physical_only=0
386385
crimson_alien_num_cores=0
386+
crimson_alienstore_physical_only=0
387387
crimson_balance_cpu="" # "osd", "socket"
388388

389389
while [ $# -ge 1 ]; do
@@ -620,10 +620,16 @@ case $1 in
620620
crimson_alien_num_threads=$2
621621
shift
622622
;;
623+
--crimson-reactor-physical-only)
624+
crimson_reactor_physical_only=1
625+
;;
623626
--crimson-alien-num-cores)
624627
crimson_alien_num_cores=$2
625628
shift
626629
;;
630+
--crimson-alienstore-physical-only)
631+
crimson_alienstore_physical_only=1
632+
;;
627633
--crimson-balance-cpu)
628634
crimson_balance_cpu=$2
629635
shift
@@ -1197,10 +1203,32 @@ start_cephexporter() {
11971203

11981204
do_balance_cpu() {
11991205
local osd=$1
1206+
local alienstore_idx=$(( osd + CEPH_NUM_OSD ))
1207+
1208+
local reactor_interval=${cpu_table[${osd}]}
1209+
if ! [ "${reactor_interval}" == "" ]; then
1210+
local cmd="$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores ${reactor_interval}"
1211+
echo $cmd
1212+
$cmd
1213+
else
1214+
echo "No cpu_table entry for osd $osd, setting crimson_seastar_num_reactors"
1215+
local cmd="$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_num_threads $crimson_smp"
1216+
echo $cmd
1217+
$cmd
1218+
return
1219+
fi
1220+
1221+
1222+
local alienstore_interval=${cpu_table[${alienstore_idx}]}
1223+
if [ ! "${alienstore_interval}" == "" ]; then
1224+
local cmd="$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_alien_thread_cpu_cores ${alienstore_interval}"
1225+
echo $cmd
1226+
$cmd
1227+
else
1228+
echo "No alienstore cpu_table entry for osd $osd"
1229+
return
1230+
fi
12001231

1201-
interval=${cpu_table[${osd}]}
1202-
echo "$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores $interval"
1203-
$CEPH_BIN/ceph -c $conf_fn config set "osd.$osd" crimson_seastar_cpu_cores "$interval"
12041232
}
12051233

12061234
start_osd() {
@@ -1215,22 +1243,14 @@ start_osd() {
12151243
fi
12161244
local osds_wait
12171245
# If the type of OSD is Crimson and the option to balance the Seastar reactors is true
1218-
if [ "$ceph_osd" == "crimson-osd" ] && [ ! -z "$crimson_balance_cpu" ]; then
1246+
if [ "$ceph_osd" == "crimson-osd" ]; then
12191247
debug echo "Preparing balance CPU for Crimson"
1220-
prep_balance_cpu $crimson_smp $crimson_balance_cpu
1248+
prep_balance_cpu
12211249
fi
12221250
for osd in `seq $start $end`
12231251
do
12241252
if [ "$ceph_osd" == "crimson-osd" ]; then
1225-
if [ ! -z "$crimson_balance_cpu" ]; then
1226-
do_balance_cpu $osd
1227-
else
1228-
bottom_cpu=$(( osd * crimson_smp ))
1229-
top_cpu=$(( bottom_cpu + crimson_smp - 1 ))
1230-
# set exclusive CPU nodes for each osd
1231-
echo "$CEPH_BIN/ceph -c $conf_fn config set osd.$osd crimson_seastar_cpu_cores $bottom_cpu-$top_cpu"
1232-
$CEPH_BIN/ceph -c $conf_fn config set "osd.$osd" crimson_seastar_cpu_cores "$bottom_cpu-$top_cpu"
1233-
fi
1253+
do_balance_cpu $osd
12341254
fi
12351255
if [ "$new" -eq 1 -o $inc_osd_num -gt 0 ]; then
12361256
wconf <<EOF
@@ -1759,38 +1779,9 @@ if [ "$ceph_osd" == "crimson-osd" ]; then
17591779
extra_seastar_args=" --trace"
17601780
fi
17611781
if [ "$objectstore" == "bluestore" ]; then
1762-
# This condition verifies the number of logical CPU cores
1763-
if [ "$(expr $(nproc) - 1)" -gt "$(($CEPH_NUM_OSD * crimson_smp))" ]; then
1764-
if [ ! -z "$crimson_balance_cpu" ]; then
1765-
debug echo "Preparing balance CPU for Crimson"
1766-
prep_balance_cpu $crimson_smp $crimson_balance_cpu
1767-
available_cpus="${cpu_table[-1]}"
1768-
echo "crimson_alien_thread_cpu_cores: '$available_cpus'"
1769-
$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores "$available_cpus"
1770-
else
1771-
if [ $crimson_alien_num_cores -gt 0 ]; then
1772-
alien_bottom_cpu=$(($CEPH_NUM_OSD * crimson_smp))
1773-
alien_top_cpu=$(( alien_bottom_cpu + crimson_alien_num_cores - 1 ))
1774-
# Ensure top value within range:
1775-
if [ "$(($alien_top_cpu))" -gt "$(expr $(nproc) - 1)" ]; then
1776-
alien_top_cpu=$(expr $(nproc) - 1)
1777-
fi
1778-
echo "crimson_alien_thread_cpu_cores: $alien_bottom_cpu-$alien_top_cpu"
1779-
# This is a (logical) processor id range, it could be refined to encompass only physical processor ids
1780-
# (equivalently, ignore hyperthreading sibling processor ids)
1781-
$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores "$alien_bottom_cpu-$alien_top_cpu"
1782-
else
1783-
# This is the legacy default case
1784-
echo "crimson_alien_thread_cpu_cores:" $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)"
1785-
$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)"
1786-
fi
1787-
fi
1788-
if [ $crimson_alien_num_threads -gt 0 ]; then
1789-
echo "$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_op_num_threads $crimson_alien_num_threads"
1790-
$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_op_num_threads "$crimson_alien_num_threads"
1791-
fi
1792-
else
1793-
echo "No alien thread cpu core isolation"
1782+
if [ $crimson_alien_num_threads -gt 0 ]; then
1783+
echo "$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_op_num_threads $crimson_alien_num_threads"
1784+
$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_op_num_threads "$crimson_alien_num_threads"
17941785
fi
17951786
fi
17961787
fi

0 commit comments

Comments
 (0)