|
4 | 4 | import socket |
5 | 5 | from typing import List, Any, Tuple, Dict, Optional, cast, TYPE_CHECKING |
6 | 6 | import ipaddress |
| 7 | +import time |
| 8 | +import requests |
7 | 9 |
|
8 | 10 | from mgr_module import HandleCommandResult |
9 | 11 | from .service_registry import register_cephadm_service |
@@ -445,6 +447,36 @@ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: |
445 | 447 | service_url |
446 | 448 | ) |
447 | 449 |
|
| 450 | + def pre_remove(self, daemon: DaemonDescription) -> None: |
| 451 | + """ |
| 452 | + Called before Alertmanager is removed |
| 453 | + """ |
| 454 | + if daemon.hostname is None: |
| 455 | + return |
| 456 | + try: |
| 457 | + current_api_host = self.mgr.check_mon_command({"prefix": "dashboard get-alertmanager-api-host"}).stdout.strip() |
| 458 | + daemon_addr = daemon.ip if daemon.ip else self.mgr.get_fqdn(daemon.hostname) |
| 459 | + daemon_port = daemon.ports[0] if daemon.ports else self.DEFAULT_SERVICE_PORT |
| 460 | + service_url = build_url(scheme='http', host=daemon_addr, port=daemon_port) |
| 461 | + |
| 462 | + if current_api_host == service_url: |
| 463 | + # This is the active daemon, update or reset the settings |
| 464 | + remaining_daemons = [ |
| 465 | + d for d in self.mgr.cache.get_daemons_by_service(self.TYPE) |
| 466 | + if d.name() != daemon.name() |
| 467 | + ] |
| 468 | + if remaining_daemons: |
| 469 | + self.config_dashboard(remaining_daemons) |
| 470 | + logger.info("Updated dashboard API settings to point to a remaining Alertmanager daemon") |
| 471 | + else: |
| 472 | + self.mgr.check_mon_command({"prefix": "dashboard reset-alertmanager-api-host"}) |
| 473 | + self.mgr.check_mon_command({"prefix": "dashboard reset-alertmanager-api-ssl-verify"}) |
| 474 | + logger.info("Reset dashboard API settings as no Alertmnager daemons are remaining") |
| 475 | + else: |
| 476 | + logger.info(f"Alertmanager {daemon.name()} removed; no changes to dashboard API settings") |
| 477 | + except Exception as e: |
| 478 | + logger.error(f"Error in Alertmanager pre_remove: {str(e)}") |
| 479 | + |
448 | 480 | def ok_to_stop(self, |
449 | 481 | daemon_ids: List[str], |
450 | 482 | force: bool = False, |
@@ -720,6 +752,48 @@ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: |
720 | 752 | service_url |
721 | 753 | ) |
722 | 754 |
|
| 755 | + def pre_remove(self, daemon: DaemonDescription) -> None: |
| 756 | + """ |
| 757 | + Called before Prometheus daemon is removed |
| 758 | + """ |
| 759 | + MAX_RETRIES = 5 |
| 760 | + RETRY_INTERVAL = 5 |
| 761 | + if daemon.hostname is None: |
| 762 | + return |
| 763 | + try: |
| 764 | + current_api_host = self.mgr.check_mon_command({"prefix": "dashboard get-prometheus-api-host"}).stdout.strip() |
| 765 | + daemon_addr = daemon.ip if daemon.ip else self.mgr.get_fqdn(daemon.hostname) |
| 766 | + daemon_port = daemon.ports[0] if daemon.ports else self.DEFAULT_SERVICE_PORT |
| 767 | + service_url = build_url(scheme="http", host=daemon_addr, port=daemon_port) |
| 768 | + |
| 769 | + if current_api_host == service_url: |
| 770 | + remaining_daemons = [ |
| 771 | + d for d in self.mgr.cache.get_daemons_by_service(self.TYPE) |
| 772 | + if d.name() != daemon.name() |
| 773 | + ] |
| 774 | + if remaining_daemons: |
| 775 | + self.config_dashboard(remaining_daemons) |
| 776 | + logger.info("Updated Dashboard Settings to point to remaining Prometheus daemons") |
| 777 | + for attempt in range(MAX_RETRIES): |
| 778 | + try: |
| 779 | + response = requests.get(f"{service_url}/api/v1/rules", timeout=5) |
| 780 | + if response.status_code == 200: |
| 781 | + logger.info(f"Prometheus daemon is ready at {service_url}.") |
| 782 | + break |
| 783 | + except Exception as e: |
| 784 | + logger.info(f"Retry {attempt + 1}: Waiting for Prometheus daemon at {service_url}: {e}") |
| 785 | + time.sleep(RETRY_INTERVAL) |
| 786 | + else: |
| 787 | + logger.warning("Prometheus daemon did not become ready after retries.") |
| 788 | + else: |
| 789 | + self.mgr.check_mon_command({"prefix": "dashboard reset-prometheus-api-host"}) |
| 790 | + self.mgr.check_mon_command({"prefix": "dashboard reset-prometheus-api-ssl-verify"}) |
| 791 | + logger.info("Reset Prometheus API settings as no daemons are remaining") |
| 792 | + else: |
| 793 | + logger.info("Prometheus daemon removed; no changes to dashboard API settings") |
| 794 | + except Exception as e: |
| 795 | + logger.error(f"Error in Prometheus pre_remove {str(e)}") |
| 796 | + |
723 | 797 | def ok_to_stop(self, |
724 | 798 | daemon_ids: List[str], |
725 | 799 | force: bool = False, |
|
0 commit comments