Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ public class SetMonitorServiceCommand extends NetworkElementCommand {
public static final String ROUTER_HEALTH_CHECKS_BASIC_INTERVAL = "router.health.checks.basic.interval";
public static final String ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL = "router.health.checks.advanced.interval";
public static final String ROUTER_HEALTH_CHECKS_EXCLUDED = "router.health.checks.excluded";
public static final String ROUTER_HEALTH_CHECKS_INCLUDED_SERVICES = "router.health.checks.included.services";

private MonitorServiceTO[] services;
private Map<String, String> healthChecksConfig;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ private void setupHealthChecksRelatedInfo(MonitorService monitorService, SetMoni
}

monitorService.setExcludedHealthChecks(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_EXCLUDED));
monitorService.setIncludedServices(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_INCLUDED_SERVICES));
monitorService.setHealthChecksConfig(command.getHealthChecksConfig());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public class MonitorService extends ConfigBase {
public Integer healthChecksBasicRunInterval;
public Integer healthChecksAdvancedRunInterval;
public String excludedHealthChecks;
public String includedServices;
public Map<String, String> healthChecksConfig;

public MonitorService() {
Expand Down Expand Up @@ -92,6 +93,10 @@ public void setExcludedHealthChecks(String excludedHealthChecks) {
this.excludedHealthChecks = excludedHealthChecks;
}

public void setIncludedServices(String includedServices) {
this.includedServices = includedServices;
}

public void setHealthChecksConfig(Map<String, String> healthChecksConfig) {
this.healthChecksConfig = healthChecksConfig;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
import org.apache.cloudstack.utils.CloudStackVersion;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.cloudstack.utils.usage.UsageUtils;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
Expand Down Expand Up @@ -1595,17 +1596,22 @@ private SetMonitorServiceCommand createMonitorServiceCommand(DomainRouterVO rout
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ENABLED, RouterHealthChecksEnabled.value().toString());
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL, RouterHealthChecksBasicInterval.value().toString());
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL, RouterHealthChecksAdvancedInterval.value().toString());

final List<Long> routerGuestNtwkIds = _routerDao.getRouterNetworks(router.getId());
String excludedTests = RouterHealthChecksToExclude.valueIn(router.getDataCenterId());
if (router.getIsRedundantRouter()) {
// Disable gateway check if VPC has no tiers or no active VM's in it
final List<Long> routerGuestNtwkIds = _routerDao.getRouterNetworks(router.getId());
if (RedundantState.BACKUP.equals(router.getRedundantState()) ||
routerGuestNtwkIds == null || routerGuestNtwkIds.isEmpty()) {
excludedTests = excludedTests.isEmpty() ? BACKUP_ROUTER_EXCLUDED_TESTS : excludedTests + "," + BACKUP_ROUTER_EXCLUDED_TESTS;
}
}

command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_EXCLUDED, excludedTests);
if (router.getVpcId() != null && CollectionUtils.isEmpty(routerGuestNtwkIds)) {
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_INCLUDED_SERVICES, MonitoringService.Service.Ssh.toString().toLowerCase());
}

command.setHealthChecksConfig(routerHealthCheckConfig);
command.setReconfigureAfterUpdate(reconfigure);
command.setDeleteFromProcessedCache(deleteFromProcessedCache); // As part of updating
Expand Down
6 changes: 6 additions & 0 deletions systemvm/debian/opt/cloud/bin/cs/CsMonitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ def setupHealthChecksConfigFile(self):
else:
hc_data["excluded_health_checks"] = []

if "included_services" in self.dbag:
included_services = self.dbag["included_services"]
hc_data["included_services"] = [ch.strip() for ch in included_services.split(",")] if len(included_services) > 0 else []
else:
hc_data["included_services"] = []

if "health_checks_config" in self.dbag:
hc_data["health_checks_config"] = self.dbag["health_checks_config"]
else:
Expand Down
5 changes: 5 additions & 0 deletions systemvm/debian/opt/cloud/bin/cs_monitorservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,9 @@ def merge(dbag, data):
if "health_checks_config" in data:
dbag["health_checks_config"] = data["health_checks_config"]

if "included_services" in data:
dbag["included_services"] = data["included_services"]
else:
dbag["included_services"] = ""

return dbag
13 changes: 8 additions & 5 deletions systemvm/debian/root/monitorServices.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def checkProcessStatus( process ):
return StatusCodes.RUNNING, True


def monitProcess( processes_info ):
def monitProcess( processes_info, included_services ):
"""
Monitors the processes which got from the config file
"""
Expand All @@ -263,7 +263,10 @@ def monitProcess( processes_info ):
#time for noting process down time
csec = repr(time.time()).split('.')[0]

for process,properties in processes_info.items():
for process,properties in list(processes_info.items()):
if included_services and len(included_services) > 0 and process not in included_services:
printd ("---------------------------\nskipping the service %s\n---------------------------- " %process)
continue
printd ("---------------------------\nchecking the service %s\n---------------------------- " %process)
serviceName = process + ".service"
processStatus, wasRestarted = checkProcessStatus(properties)
Expand Down Expand Up @@ -325,20 +328,20 @@ def main(checkType = "basic"):
'''
printd("monitoring started")
configDict = getServicesConfig()
hc_data = getHealthChecksData()

'''
Step2: Monitor services and Raise Alerts
'''
monitResult = {}
failingChecks = []
if checkType == "basic":
monitResult, failingChecks = monitProcess(configDict)
included_services = hc_data["included_services"] if "included_services" in hc_data else []
monitResult, failingChecks = monitProcess(configDict, included_services)

'''
Step3: Run health check scripts as needed
'''
hc_data = getHealthChecksData()

if hc_data is not None and "health_checks_enabled" in hc_data and hc_data['health_checks_enabled']:
hc_exclude = hc_data["excluded_health_checks"] if "excluded_health_checks" in hc_data else []
for f in os.listdir(Config.HEALTH_CHECKS_DIR):
Expand Down
Loading