|
4 | 4 | import socket |
5 | 5 | from typing import List, Any, Tuple, Dict, Optional, cast, TYPE_CHECKING |
6 | 6 | import ipaddress |
| 7 | +import time |
| 8 | +import requests |
7 | 9 |
|
8 | 10 | from mgr_module import HandleCommandResult |
9 | 11 |
|
@@ -442,6 +444,36 @@ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: |
442 | 444 | service_url |
443 | 445 | ) |
444 | 446 |
|
| 447 | + def pre_remove(self, daemon: DaemonDescription) -> None: |
| 448 | + """ |
| 449 | + Called before Alertmanager is removed |
| 450 | + """ |
| 451 | + if daemon.hostname is None: |
| 452 | + return |
| 453 | + try: |
| 454 | + current_api_host = self.mgr.check_mon_command({"prefix": "dashboard get-alertmanager-api-host"}).stdout.strip() |
| 455 | + daemon_addr = daemon.ip if daemon.ip else self.mgr.get_fqdn(daemon.hostname) |
| 456 | + daemon_port = daemon.ports[0] if daemon.ports else self.DEFAULT_SERVICE_PORT |
| 457 | + service_url = build_url(scheme='http', host=daemon_addr, port=daemon_port) |
| 458 | + |
| 459 | + if current_api_host == service_url: |
| 460 | + # This is the active daemon, update or reset the settings |
| 461 | + remaining_daemons = [ |
| 462 | + d for d in self.mgr.cache.get_daemons_by_service(self.TYPE) |
| 463 | + if d.name() != daemon.name() |
| 464 | + ] |
| 465 | + if remaining_daemons: |
| 466 | + self.config_dashboard(remaining_daemons) |
| 467 | + logger.info("Updated dashboard API settings to point to a remaining Alertmanager daemon") |
| 468 | + else: |
| 469 | + self.mgr.check_mon_command({"prefix": "dashboard reset-alertmanager-api-host"}) |
| 470 | + self.mgr.check_mon_command({"prefix": "dashboard reset-alertmanager-api-ssl-verify"}) |
| 471 | + logger.info("Reset dashboard API settings as no Alertmnager daemons are remaining") |
| 472 | + else: |
| 473 | + logger.info(f"Alertmanager {daemon.name()} removed; no changes to dashboard API settings") |
| 474 | + except Exception as e: |
| 475 | + logger.error(f"Error in Alertmanager pre_remove: {str(e)}") |
| 476 | + |
445 | 477 | def ok_to_stop(self, |
446 | 478 | daemon_ids: List[str], |
447 | 479 | force: bool = False, |
@@ -716,6 +748,48 @@ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: |
716 | 748 | service_url |
717 | 749 | ) |
718 | 750 |
|
| 751 | + def pre_remove(self, daemon: DaemonDescription) -> None: |
| 752 | + """ |
| 753 | + Called before Prometheus daemon is removed |
| 754 | + """ |
| 755 | + MAX_RETRIES = 5 |
| 756 | + RETRY_INTERVAL = 5 |
| 757 | + if daemon.hostname is None: |
| 758 | + return |
| 759 | + try: |
| 760 | + current_api_host = self.mgr.check_mon_command({"prefix": "dashboard get-prometheus-api-host"}).stdout.strip() |
| 761 | + daemon_addr = daemon.ip if daemon.ip else self.mgr.get_fqdn(daemon.hostname) |
| 762 | + daemon_port = daemon.ports[0] if daemon.ports else self.DEFAULT_SERVICE_PORT |
| 763 | + service_url = build_url(scheme="http", host=daemon_addr, port=daemon_port) |
| 764 | + |
| 765 | + if current_api_host == service_url: |
| 766 | + remaining_daemons = [ |
| 767 | + d for d in self.mgr.cache.get_daemons_by_service(self.TYPE) |
| 768 | + if d.name() != daemon.name() |
| 769 | + ] |
| 770 | + if remaining_daemons: |
| 771 | + self.config_dashboard(remaining_daemons) |
| 772 | + logger.info("Updated Dashboard Settings to point to remaining Prometheus daemons") |
| 773 | + for attempt in range(MAX_RETRIES): |
| 774 | + try: |
| 775 | + response = requests.get(f"{service_url}/api/v1/rules", timeout=5) |
| 776 | + if response.status_code == 200: |
| 777 | + logger.info(f"Prometheus daemon is ready at {service_url}.") |
| 778 | + break |
| 779 | + except Exception as e: |
| 780 | + logger.info(f"Retry {attempt + 1}: Waiting for Prometheus daemon at {service_url}: {e}") |
| 781 | + time.sleep(RETRY_INTERVAL) |
| 782 | + else: |
| 783 | + logger.warning("Prometheus daemon did not become ready after retries.") |
| 784 | + else: |
| 785 | + self.mgr.check_mon_command({"prefix": "dashboard reset-prometheus-api-host"}) |
| 786 | + self.mgr.check_mon_command({"prefix": "dashboard reset-prometheus-api-ssl-verify"}) |
| 787 | + logger.info("Reset Prometheus API settings as no daemons are remaining") |
| 788 | + else: |
| 789 | + logger.info("Prometheus daemon removed; no changes to dashboard API settings") |
| 790 | + except Exception as e: |
| 791 | + logger.error(f"Error in Prometheus pre_remove {str(e)}") |
| 792 | + |
719 | 793 | def ok_to_stop(self, |
720 | 794 | daemon_ids: List[str], |
721 | 795 | force: bool = False, |
|
0 commit comments