Skip to content

Commit bd7e591

Browse files
Refresh cluster maps after node recovery
1 parent e3257cf commit bd7e591

File tree

1 file changed

+19
-1
lines changed

1 file changed

+19
-1
lines changed

simplyblock_core/storage_node_ops.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,21 @@ def _connect_to_remote_jm_devs(this_node, jm_ids=None):
987987
return new_devs
988988

989989

990+
def _refresh_cluster_maps_after_node_recovery(snode: StorageNode):
991+
db_controller = DBController()
992+
snode = db_controller.get_storage_node_by_id(snode.get_id())
993+
994+
# Push a full cluster map after reconnect/restart recovery so peers do not
995+
# remain on stale per-device availability derived from transient reconnect state.
996+
distr_controller.send_cluster_map_to_node(snode)
997+
998+
for node in db_controller.get_storage_nodes_by_cluster_id(snode.cluster_id):
999+
if node.get_id() == snode.get_id():
1000+
continue
1001+
if node.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]:
1002+
distr_controller.send_cluster_map_to_node(node)
1003+
1004+
9901005
def ifc_is_tcp(nic):
9911006
addrs = psutil.net_if_addrs().get(nic, [])
9921007
for addr in addrs:
@@ -2196,6 +2211,7 @@ def restart_storage_node(
21962211
logger.info("Cluster is not ready yet")
21972212
logger.info("Setting node status to Online")
21982213
set_node_status(node_id, StorageNode.STATUS_ONLINE, reconnect_on_online=False)
2214+
_refresh_cluster_maps_after_node_recovery(snode)
21992215
return True
22002216

22012217
else:
@@ -2266,6 +2282,7 @@ def restart_storage_node(
22662282

22672283
logger.info("Setting node status to Online")
22682284
set_node_status(snode.get_id(), StorageNode.STATUS_ONLINE)
2285+
_refresh_cluster_maps_after_node_recovery(snode)
22692286

22702287
lvol_list = db_controller.get_lvols_by_node_id(snode.get_id())
22712288
logger.info(f"Found {len(lvol_list)} lvols")
@@ -2702,6 +2719,7 @@ def resume_storage_node(node_id):
27022719
if snode.enable_ha_jm:
27032720
snode.remote_jm_devices = _connect_to_remote_jm_devs(snode)
27042721
snode.write_to_db()
2722+
_refresh_cluster_maps_after_node_recovery(snode)
27052723

27062724
fw_api = FirewallClient(snode, timeout=20, retry=1)
27072725
port_type = "tcp"
@@ -4498,4 +4516,4 @@ def lvs_dump_tree(node_id):
44984516
logger.error("Failed to dump lvstore tree")
44994517
return False
45004518

4501-
return json.dumps(ret, indent=2)
4519+
return json.dumps(ret, indent=2)

0 commit comments

Comments
 (0)