diff --git a/core/src/main/java/com/cloud/agent/api/routing/SetMonitorServiceCommand.java b/core/src/main/java/com/cloud/agent/api/routing/SetMonitorServiceCommand.java index 969117517f6d..8d9d165c2d36 100644 --- a/core/src/main/java/com/cloud/agent/api/routing/SetMonitorServiceCommand.java +++ b/core/src/main/java/com/cloud/agent/api/routing/SetMonitorServiceCommand.java @@ -37,6 +37,7 @@ public class SetMonitorServiceCommand extends NetworkElementCommand { public static final String ROUTER_HEALTH_CHECKS_BASIC_INTERVAL = "router.health.checks.basic.interval"; public static final String ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL = "router.health.checks.advanced.interval"; public static final String ROUTER_HEALTH_CHECKS_EXCLUDED = "router.health.checks.excluded"; + public static final String ROUTER_HEALTH_CHECKS_INCLUDED_SERVICES = "router.health.checks.included.services"; private MonitorServiceTO[] services; private Map healthChecksConfig; diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/SetMonitorServiceConfigItem.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/SetMonitorServiceConfigItem.java index 52d8442b5ac6..0635286dd399 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/SetMonitorServiceConfigItem.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/SetMonitorServiceConfigItem.java @@ -70,6 +70,7 @@ private void setupHealthChecksRelatedInfo(MonitorService monitorService, SetMoni } monitorService.setExcludedHealthChecks(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_EXCLUDED)); + monitorService.setIncludedServices(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_INCLUDED_SERVICES)); monitorService.setHealthChecksConfig(command.getHealthChecksConfig()); } diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/MonitorService.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/MonitorService.java index fe20476f076d..27a2701ed50d 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/MonitorService.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/MonitorService.java @@ -27,6 +27,7 @@ public class MonitorService extends ConfigBase { public Integer healthChecksBasicRunInterval; public Integer healthChecksAdvancedRunInterval; public String excludedHealthChecks; + public String includedServices; public Map healthChecksConfig; public MonitorService() { @@ -92,6 +93,10 @@ public void setExcludedHealthChecks(String excludedHealthChecks) { this.excludedHealthChecks = excludedHealthChecks; } + public void setIncludedServices(String includedServices) { + this.includedServices = includedServices; + } + public void setHealthChecksConfig(Map healthChecksConfig) { this.healthChecksConfig = healthChecksConfig; } diff --git a/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java b/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java index 702f614d7763..4668076ff0ae 100644 --- a/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java +++ b/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java @@ -70,6 +70,7 @@ import org.apache.cloudstack.utils.CloudStackVersion; import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.cloudstack.utils.usage.UsageUtils; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; @@ -1595,10 +1596,11 @@ private SetMonitorServiceCommand createMonitorServiceCommand(DomainRouterVO rout command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ENABLED, RouterHealthChecksEnabled.value().toString()); command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL, RouterHealthChecksBasicInterval.value().toString()); command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL, RouterHealthChecksAdvancedInterval.value().toString()); + + final List routerGuestNtwkIds = _routerDao.getRouterNetworks(router.getId()); String excludedTests = RouterHealthChecksToExclude.valueIn(router.getDataCenterId()); if (router.getIsRedundantRouter()) { // Disable gateway check if VPC has no tiers or no active VM's in it - final List routerGuestNtwkIds = _routerDao.getRouterNetworks(router.getId()); if (RedundantState.BACKUP.equals(router.getRedundantState()) || routerGuestNtwkIds == null || routerGuestNtwkIds.isEmpty()) { excludedTests = excludedTests.isEmpty() ? BACKUP_ROUTER_EXCLUDED_TESTS : excludedTests + "," + BACKUP_ROUTER_EXCLUDED_TESTS; @@ -1606,6 +1608,10 @@ private SetMonitorServiceCommand createMonitorServiceCommand(DomainRouterVO rout } command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_EXCLUDED, excludedTests); + if (router.getVpcId() != null && CollectionUtils.isEmpty(routerGuestNtwkIds)) { + command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_INCLUDED_SERVICES, MonitoringService.Service.Ssh.toString().toLowerCase()); + } + command.setHealthChecksConfig(routerHealthCheckConfig); command.setReconfigureAfterUpdate(reconfigure); command.setDeleteFromProcessedCache(deleteFromProcessedCache); // As part of updating diff --git a/systemvm/debian/opt/cloud/bin/cs/CsMonitor.py b/systemvm/debian/opt/cloud/bin/cs/CsMonitor.py index 5a0ff5b114c4..85bc4cbe4986 100755 --- a/systemvm/debian/opt/cloud/bin/cs/CsMonitor.py +++ b/systemvm/debian/opt/cloud/bin/cs/CsMonitor.py @@ -69,6 +69,12 @@ def setupHealthChecksConfigFile(self): else: hc_data["excluded_health_checks"] = [] + if "included_services" in self.dbag: + included_services = self.dbag["included_services"] + hc_data["included_services"] = [ch.strip() for ch in included_services.split(",")] if len(included_services) > 0 else [] + else: + hc_data["included_services"] = [] + if "health_checks_config" in self.dbag: hc_data["health_checks_config"] = self.dbag["health_checks_config"] else: diff --git a/systemvm/debian/opt/cloud/bin/cs_monitorservice.py b/systemvm/debian/opt/cloud/bin/cs_monitorservice.py index 55c89dfb59b3..7e656c816617 100755 --- a/systemvm/debian/opt/cloud/bin/cs_monitorservice.py +++ b/systemvm/debian/opt/cloud/bin/cs_monitorservice.py @@ -33,4 +33,9 @@ def merge(dbag, data): if "health_checks_config" in data: dbag["health_checks_config"] = data["health_checks_config"] + if "included_services" in data: + dbag["included_services"] = data["included_services"] + else: + dbag["included_services"] = "" + return dbag diff --git a/systemvm/debian/root/monitorServices.py b/systemvm/debian/root/monitorServices.py index 909e419c1801..50dad01d1b6b 100755 --- a/systemvm/debian/root/monitorServices.py +++ b/systemvm/debian/root/monitorServices.py @@ -247,7 +247,7 @@ def checkProcessStatus( process ): return StatusCodes.RUNNING, True -def monitProcess( processes_info ): +def monitProcess( processes_info, included_services ): """ Monitors the processes which got from the config file """ @@ -263,7 +263,10 @@ def monitProcess( processes_info ): #time for noting process down time csec = repr(time.time()).split('.')[0] - for process,properties in processes_info.items(): + for process,properties in list(processes_info.items()): + if included_services and len(included_services) > 0 and process not in included_services: + printd ("---------------------------\nskipping the service %s\n---------------------------- " %process) + continue printd ("---------------------------\nchecking the service %s\n---------------------------- " %process) serviceName = process + ".service" processStatus, wasRestarted = checkProcessStatus(properties) @@ -325,6 +328,7 @@ def main(checkType = "basic"): ''' printd("monitoring started") configDict = getServicesConfig() + hc_data = getHealthChecksData() ''' Step2: Monitor services and Raise Alerts @@ -332,13 +336,12 @@ def main(checkType = "basic"): monitResult = {} failingChecks = [] if checkType == "basic": - monitResult, failingChecks = monitProcess(configDict) + included_services = hc_data["included_services"] if "included_services" in hc_data else [] + monitResult, failingChecks = monitProcess(configDict, included_services) ''' Step3: Run health check scripts as needed ''' - hc_data = getHealthChecksData() - if hc_data is not None and "health_checks_enabled" in hc_data and hc_data['health_checks_enabled']: hc_exclude = hc_data["excluded_health_checks"] if "excluded_health_checks" in hc_data else [] for f in os.listdir(Config.HEALTH_CHECKS_DIR):