Skip to content

Commit 9c2dec2

Browse files
authored
Merge pull request ceph#60915 from Kushal-deb/fix-issue-2313279
cephadm: Add pre_remove and ensure deployment values are reset and API settings are updated when removing Prometheus or Alertmanager daemons Reviewed-by: Adam King <[email protected]>
2 parents 6a194b7 + 798b946 commit 9c2dec2

File tree

1 file changed

+74
-0
lines changed

1 file changed

+74
-0
lines changed

src/pybind/mgr/cephadm/services/monitoring.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import socket
55
from typing import List, Any, Tuple, Dict, Optional, cast, TYPE_CHECKING
66
import ipaddress
7+
import time
8+
import requests
79

810
from mgr_module import HandleCommandResult
911
from .service_registry import register_cephadm_service
@@ -445,6 +447,36 @@ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
445447
service_url
446448
)
447449

450+
def pre_remove(self, daemon: DaemonDescription) -> None:
451+
"""
452+
Called before Alertmanager is removed
453+
"""
454+
if daemon.hostname is None:
455+
return
456+
try:
457+
current_api_host = self.mgr.check_mon_command({"prefix": "dashboard get-alertmanager-api-host"}).stdout.strip()
458+
daemon_addr = daemon.ip if daemon.ip else self.mgr.get_fqdn(daemon.hostname)
459+
daemon_port = daemon.ports[0] if daemon.ports else self.DEFAULT_SERVICE_PORT
460+
service_url = build_url(scheme='http', host=daemon_addr, port=daemon_port)
461+
462+
if current_api_host == service_url:
463+
# This is the active daemon, update or reset the settings
464+
remaining_daemons = [
465+
d for d in self.mgr.cache.get_daemons_by_service(self.TYPE)
466+
if d.name() != daemon.name()
467+
]
468+
if remaining_daemons:
469+
self.config_dashboard(remaining_daemons)
470+
logger.info("Updated dashboard API settings to point to a remaining Alertmanager daemon")
471+
else:
472+
self.mgr.check_mon_command({"prefix": "dashboard reset-alertmanager-api-host"})
473+
self.mgr.check_mon_command({"prefix": "dashboard reset-alertmanager-api-ssl-verify"})
474+
logger.info("Reset dashboard API settings as no Alertmnager daemons are remaining")
475+
else:
476+
logger.info(f"Alertmanager {daemon.name()} removed; no changes to dashboard API settings")
477+
except Exception as e:
478+
logger.error(f"Error in Alertmanager pre_remove: {str(e)}")
479+
448480
def ok_to_stop(self,
449481
daemon_ids: List[str],
450482
force: bool = False,
@@ -720,6 +752,48 @@ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
720752
service_url
721753
)
722754

755+
def pre_remove(self, daemon: DaemonDescription) -> None:
756+
"""
757+
Called before Prometheus daemon is removed
758+
"""
759+
MAX_RETRIES = 5
760+
RETRY_INTERVAL = 5
761+
if daemon.hostname is None:
762+
return
763+
try:
764+
current_api_host = self.mgr.check_mon_command({"prefix": "dashboard get-prometheus-api-host"}).stdout.strip()
765+
daemon_addr = daemon.ip if daemon.ip else self.mgr.get_fqdn(daemon.hostname)
766+
daemon_port = daemon.ports[0] if daemon.ports else self.DEFAULT_SERVICE_PORT
767+
service_url = build_url(scheme="http", host=daemon_addr, port=daemon_port)
768+
769+
if current_api_host == service_url:
770+
remaining_daemons = [
771+
d for d in self.mgr.cache.get_daemons_by_service(self.TYPE)
772+
if d.name() != daemon.name()
773+
]
774+
if remaining_daemons:
775+
self.config_dashboard(remaining_daemons)
776+
logger.info("Updated Dashboard Settings to point to remaining Prometheus daemons")
777+
for attempt in range(MAX_RETRIES):
778+
try:
779+
response = requests.get(f"{service_url}/api/v1/rules", timeout=5)
780+
if response.status_code == 200:
781+
logger.info(f"Prometheus daemon is ready at {service_url}.")
782+
break
783+
except Exception as e:
784+
logger.info(f"Retry {attempt + 1}: Waiting for Prometheus daemon at {service_url}: {e}")
785+
time.sleep(RETRY_INTERVAL)
786+
else:
787+
logger.warning("Prometheus daemon did not become ready after retries.")
788+
else:
789+
self.mgr.check_mon_command({"prefix": "dashboard reset-prometheus-api-host"})
790+
self.mgr.check_mon_command({"prefix": "dashboard reset-prometheus-api-ssl-verify"})
791+
logger.info("Reset Prometheus API settings as no daemons are remaining")
792+
else:
793+
logger.info("Prometheus daemon removed; no changes to dashboard API settings")
794+
except Exception as e:
795+
logger.error(f"Error in Prometheus pre_remove {str(e)}")
796+
723797
def ok_to_stop(self,
724798
daemon_ids: List[str],
725799
force: bool = False,

0 commit comments

Comments
 (0)