Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
eede80f
Dev: sbd: Check all nodes are reachable when checking SBD-related tim…
liangxin1300 Dec 23, 2025
cb0f055
Dev: sbd: check SBD_DELAY_START for non integer case
liangxin1300 Dec 23, 2025
c94bb1d
Dev: sbd: Calculate expected msgwait timeout correctly with crashdump…
liangxin1300 Dec 24, 2025
f148c5b
Dev: ui_sbd: Get minimum timeout value dynamically
liangxin1300 Dec 24, 2025
7e824a8
Dev: behave: Adjust functional test for previous commit
liangxin1300 Dec 26, 2025
6560b8e
Dev: sbd: Check and fix drop-in file which to unset SBD_DELAY_START
liangxin1300 Dec 27, 2025
ec8e941
Dev: utils: Refactor utils.check_all_nodes_reachable
liangxin1300 Dec 30, 2025
3523529
Dev: sbd: Check configurations consistency only if there are reachabl…
liangxin1300 Dec 30, 2025
8e7a0f0
Dev: sbd: Show diff output after error output when checking consistency
liangxin1300 Jan 4, 2026
d36cbcd
Dev: sbd: Enable to fix devices metadata consistency issue
liangxin1300 Jan 5, 2026
e3dc3a7
Dev: sbd: Show multi errors or warnings at once if detected
liangxin1300 Jan 5, 2026
b6145ef
Dev: sbd: Check if the drop-in file which to unset SBD_DELAY_START ex…
liangxin1300 Jan 6, 2026
6563036
Dev: sbd: Check sbd systemd start timeout on all nodes
liangxin1300 Jan 6, 2026
7e1e249
Dev: sbd: Ignore comment line and blank line when checking consistency
liangxin1300 Jan 6, 2026
4812922
Dev: ui_sbd: Do sbd timeout-related configurations check on 'crm sbd …
liangxin1300 Jan 7, 2026
fae0ef7
Dev: sbd: Check if sbd.service is enabled on all nodes
liangxin1300 Jan 7, 2026
21ef350
Dev: sbd: Check and fix fence_sbd agent
liangxin1300 Jan 7, 2026
cb7ee9d
Dev: sbd: Add debug log for sbd checking results
liangxin1300 Jan 8, 2026
f497c52
Dev: sbd: Refactor to enable checking when cluster is down
liangxin1300 Jan 9, 2026
f19f855
Dev: unittests: Adjust unit test for previous commits
liangxin1300 Dec 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 12 additions & 25 deletions crmsh/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,15 +731,8 @@ def start_pacemaker(node_list=[], enable_flag=False):
Return success node list
"""
# not _context means not in init or join process
if not _context and \
utils.package_is_installed("sbd") and \
ServiceManager().service_is_enabled(constants.SBD_SERVICE) and \
sbd.SBDTimeout.is_sbd_delay_start():
cmd1 = f"mkdir -p {sbd.SBDManager.SBD_SYSTEMD_DELAY_START_DISABLE_DIR}"
cmd2 = f"echo -e '[Service]\nUnsetEnvironment=SBD_DELAY_START' > {sbd.SBDManager.SBD_SYSTEMD_DELAY_START_DISABLE_FILE}"
cmd3 = "systemctl daemon-reload"
for cmd in [cmd1, cmd2, cmd3]:
parallax.parallax_call(node_list, cmd)
if not _context:
sbd.SBDManager.unset_sbd_delay_start(node_list)

# To avoid possible JOIN flood in corosync
service_manager = ServiceManager()
Expand Down Expand Up @@ -2075,14 +2068,9 @@ def adjust_priority_fencing_delay(is_2node_wo_qdevice):
and the current cluster is 2 nodes without qdevice,
set priority-fencing-delay=2*pcmk_delay_max
"""
out = sh.cluster_shell().get_stdout_or_raise_error("crm configure show related:stonith")
if not out:
return
pcmk_delay_max_v_list = re.findall(r"pcmk_delay_max=(\w+)", out)
if pcmk_delay_max_v_list:
max_value = max([int(utils.crm_msec(v)/1000) for v in pcmk_delay_max_v_list])
if pcmk_delay_max_v_list and is_2node_wo_qdevice:
utils.set_property("priority-fencing-delay", 2*max_value, conditional=True)
pcmk_delay_max_value = utils.get_pcmk_delay_max_configured_value()
if pcmk_delay_max_value > 0 and is_2node_wo_qdevice:
utils.set_property("priority-fencing-delay", 2*pcmk_delay_max_value, conditional=True)
else:
utils.set_property("priority-fencing-delay", 0)

Expand Down Expand Up @@ -2181,10 +2169,9 @@ def remove_node_from_cluster(node, dead_node=False):
corosync.del_node(node_ip if node_ip is not None else node)

corosync.configure_two_node(removing=True)
adjust_properties()

logger.info("Propagating configuration changes across the remaining nodes")
sync_path(corosync.conf())
adjust_properties()

sh.cluster_shell().get_stdout_or_raise_error("corosync-cfgtool -R")

Expand Down Expand Up @@ -2469,7 +2456,7 @@ def bootstrap_remove(context):
try:
utils.check_all_nodes_reachable("removing a node from the cluster")
except utils.DeadNodeError as e:
if force_flag and cluster_node in e.dead_nodes:
if force_flag and cluster_node in e.summary.dead_nodes:
remove_node_from_cluster(cluster_node, dead_node=True)
bootstrap_finished()
return
Expand Down Expand Up @@ -2768,19 +2755,19 @@ def adjust_pcmk_delay_max(is_2node_wo_qdevice):
logger.info("Delete parameter 'pcmk_delay_max' for resource '{}'".format(res))


def adjust_stonith_timeout(with_sbd: bool = False):
def adjust_stonith_timeout():
"""
Adjust stonith-timeout for sbd and other scenarios
"""
if ServiceManager().service_is_active(constants.SBD_SERVICE) or with_sbd:
sbd.SBDTimeoutChecker(quiet=True, fix=True, from_bootstrap=True).check_and_fix()
if ServiceManager().service_is_active(constants.SBD_SERVICE):
sbd.SBDTimeoutChecker(quiet=True, fix=True).check_and_fix()
else:
value = get_stonith_timeout_generally_expected()
if value:
utils.set_property("stonith-timeout", value, conditional=True)


def adjust_properties(with_sbd: bool = False):
def adjust_properties():
"""
Adjust properties for the cluster:
- pcmk_delay_max
Expand All @@ -2798,7 +2785,7 @@ def adjust_properties(with_sbd: bool = False):
return
is_2node_wo_qdevice = utils.is_2node_cluster_without_qdevice()
adjust_pcmk_delay_max(is_2node_wo_qdevice)
adjust_stonith_timeout(with_sbd=with_sbd)
adjust_stonith_timeout()
adjust_priority_in_rsc_defaults(is_2node_wo_qdevice)
adjust_priority_fencing_delay(is_2node_wo_qdevice)

Expand Down
Loading
Loading