From cf4b305fd20b5994ef35233a39273066a4645ee3 Mon Sep 17 00:00:00 2001 From: Suresh Kumar Anaparti Date: Thu, 24 Oct 2024 21:39:17 +0530 Subject: [PATCH 01/20] Support for Management Server Maintenance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New APIs: prepareForMaintenance and cancelMaintenance, with required parameter - managementserverid. - New management server states for maintenance: PreparingForMaintenance, Maintenance. - listHosts API with optional parameter – managementserverid, to list the hosts connected to the management server. - Support management server maintenance when more than one active management servers available. - Triggers transfer agents to other available management servers for maintenance, new agent command MigrateAgentConnectionCommand to initiate transfer of indirect agents. - New global config 'management.server.maintenance.timeout', to set the timeout (in mins) for the management server maintenance window, default: 60 mins. - UI changes: Prepare and Cancel Maintenance in Management Server section, Connected Agents tab, New fields for hosts and management servers. --- .../src/main/java/com/cloud/agent/Agent.java | 64 +- .../main/java/com/cloud/agent/AgentShell.java | 9 + .../java/com/cloud/agent/IAgentShell.java | 4 + api/src/main/java/com/cloud/host/Host.java | 2 + .../com/cloud/resource/ResourceService.java | 8 +- .../server/ManagementServerHostStats.java | 5 + .../apache/cloudstack/api/ApiConstants.java | 5 +- ...Cmd.java => CancelHostMaintenanceCmd.java} | 2 +- .../api/command/admin/host/ListHostsCmd.java | 8 + ...java => PrepareForHostMaintenanceCmd.java} | 2 +- .../api/response/AsyncJobResponse.java | 14 +- .../cloudstack/api/response/HostResponse.java | 24 + .../api/response/LoginCmdResponse.java | 12 + .../response/ManagementServerResponse.java | 12 + .../management/ManagementServerHost.java | 2 +- client/pom.xml | 2 +- .../api/MigrateAgentConnectionAnswer.java | 38 ++ .../api/MigrateAgentConnectionCommand.java | 61 ++ .../com/cloud/agent/api/StartupCommand.java | 9 + .../agent/test/CheckOnHostCommandTest.java | 5 + .../java/com/cloud/agent/AgentManager.java | 7 + engine/orchestration/pom.xml | 2 +- .../cloud/agent/manager/AgentManagerImpl.java | 69 ++- .../manager/ClusteredAgentManagerImpl.java | 177 +++++- .../entity/api/db/EngineHostVO.java | 12 + .../orchestration/NetworkOrchestrator.java | 2 +- .../src/main/java/com/cloud/host/HostVO.java | 12 + .../main/java/com/cloud/host/dao/HostDao.java | 15 +- .../java/com/cloud/host/dao/HostDaoImpl.java | 48 +- .../META-INF/db/schema-41910to42000.sql | 9 + .../cloudstack/agent/lb/IndirectAgentLB.java | 22 + .../com/cloud/cluster/ClusterManagerImpl.java | 24 +- .../cluster/dao/ManagementServerHostDao.java | 6 +- .../dao/ManagementServerHostDaoImpl.java | 30 +- .../dao/ManagementServerHostPeerDao.java | 3 + .../dao/ManagementServerHostPeerDaoImpl.java | 33 + .../jobs/impl/AsyncJobManagerImpl.java | 10 +- plugins/{shutdown => maintenance}/pom.xml | 4 +- .../command/BaseMSMaintenanceActionCmd.java} | 8 +- .../api/command/CancelMaintenanceCmd.java | 60 ++ .../api/command/CancelShutdownCmd.java | 10 +- .../api/command/PrepareForMaintenanceCmd.java | 72 +++ .../api/command/PrepareForShutdownCmd.java | 9 +- .../api/command/ReadyForShutdownCmd.java | 44 +- .../api/command/TriggerShutdownCmd.java | 10 +- .../ManagementServerMaintenanceResponse.java} | 80 ++- .../ManagementServerMaintenanceListener.java | 24 + .../ManagementServerMaintenanceManager.java | 106 ++++ ...anagementServerMaintenanceManagerImpl.java | 584 ++++++++++++++++++ ...seShutdownManagementServerHostCommand.java | 2 +- ...aintenanceManagementServerHostCommand.java | 26 + ...elShutdownManagementServerHostCommand.java | 2 +- ...aintenanceManagementServerHostCommand.java | 36 ++ ...orShutdownManagementServerHostCommand.java | 2 +- ...erShutdownManagementServerHostCommand.java | 2 +- .../cloudstack/maintenance}/module.properties | 2 +- .../spring-maintenance-context.xml} | 4 +- ...mentServerMaintenanceManagerImplTest.java} | 21 +- .../cloudstack/api/MetricConstants.java | 2 + .../metrics/MetricsServiceImpl.java | 2 + .../ManagementServerMetricsResponse.java | 17 + plugins/pom.xml | 2 +- .../cloudstack/shutdown/ShutdownManager.java | 60 -- .../shutdown/ShutdownManagerImpl.java | 265 -------- .../java/com/cloud/api/ApiDispatcher.java | 2 +- .../main/java/com/cloud/api/ApiServer.java | 15 + .../com/cloud/api/query/QueryManagerImpl.java | 10 + .../api/query/dao/AsyncJobJoinDaoImpl.java | 15 +- .../cloud/api/query/dao/HostJoinDaoImpl.java | 11 + .../cloud/network/SshKeysDistriMonitor.java | 13 +- .../security/SecurityGroupListener.java | 33 +- .../cloud/resource/ResourceManagerImpl.java | 8 +- .../RollingMaintenanceManagerImpl.java | 4 +- .../ManagementServerHostStatsEntry.java | 21 + .../cloud/server/ManagementServerImpl.java | 8 +- .../java/com/cloud/server/StatsCollector.java | 3 + .../storage/listener/StoragePoolMonitor.java | 49 +- .../agent/lb/IndirectAgentLBServiceImpl.java | 151 ++++- .../spring-server-core-managers-context.xml | 4 +- .../resource/MockResourceManagerImpl.java | 8 +- .../test/resources/createNetworkOffering.xml | 159 ++--- ... test_ms_maintenance_and_safe_shutdown.py} | 44 +- tools/apidoc/gen_toc.py | 3 +- ui/public/locales/en.json | 17 +- ui/src/components/page/GlobalLayout.vue | 20 +- ui/src/components/view/ListView.vue | 2 +- ui/src/config/section/infra/hosts.js | 3 +- .../config/section/infra/managementServers.js | 41 +- ui/src/store/getters.js | 2 + ui/src/store/modules/app.js | 6 + ui/src/store/modules/user.js | 15 + ui/src/views/AutogenView.vue | 4 +- ui/src/views/infra/Confirmation.vue | 36 ++ ui/src/views/infra/ConnectedAgentsTab.vue | 88 +++ 94 files changed, 2367 insertions(+), 653 deletions(-) rename api/src/main/java/org/apache/cloudstack/api/command/admin/host/{CancelMaintenanceCmd.java => CancelHostMaintenanceCmd.java} (98%) rename api/src/main/java/org/apache/cloudstack/api/command/admin/host/{PrepareForMaintenanceCmd.java => PrepareForHostMaintenanceCmd.java} (98%) create mode 100644 core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionAnswer.java create mode 100644 core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionCommand.java rename plugins/{shutdown => maintenance}/pom.xml (92%) rename plugins/{shutdown/src/main/java/org/apache/cloudstack/api/command/BaseShutdownActionCmd.java => maintenance/src/main/java/org/apache/cloudstack/api/command/BaseMSMaintenanceActionCmd.java} (85%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelMaintenanceCmd.java rename plugins/{shutdown => maintenance}/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java (83%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForMaintenanceCmd.java rename plugins/{shutdown => maintenance}/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java (85%) rename plugins/{shutdown => maintenance}/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java (66%) rename plugins/{shutdown => maintenance}/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java (85%) rename plugins/{shutdown/src/main/java/org/apache/cloudstack/api/response/ReadyForShutdownResponse.java => maintenance/src/main/java/org/apache/cloudstack/api/response/ManagementServerMaintenanceResponse.java} (52%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceListener.java create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManager.java create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java rename plugins/{shutdown/src/main/java/org/apache/cloudstack/shutdown => maintenance/src/main/java/org/apache/cloudstack/maintenance}/command/BaseShutdownManagementServerHostCommand.java (95%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelMaintenanceManagementServerHostCommand.java rename plugins/{shutdown/src/main/java/org/apache/cloudstack/shutdown => maintenance/src/main/java/org/apache/cloudstack/maintenance}/command/CancelShutdownManagementServerHostCommand.java (95%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForMaintenanceManagementServerHostCommand.java rename plugins/{shutdown/src/main/java/org/apache/cloudstack/shutdown => maintenance/src/main/java/org/apache/cloudstack/maintenance}/command/PrepareForShutdownManagementServerHostCommand.java (95%) rename plugins/{shutdown/src/main/java/org/apache/cloudstack/shutdown => maintenance/src/main/java/org/apache/cloudstack/maintenance}/command/TriggerShutdownManagementServerHostCommand.java (95%) rename plugins/{shutdown/src/main/resources/META-INF/cloudstack/shutdown => maintenance/src/main/resources/META-INF/cloudstack/maintenance}/module.properties (97%) rename plugins/{shutdown/src/main/resources/META-INF/cloudstack/shutdown/spring-shutdown-context.xml => maintenance/src/main/resources/META-INF/cloudstack/maintenance/spring-maintenance-context.xml} (83%) rename plugins/{shutdown/src/test/java/org/apache/cloudstack/shutdown/ShutdownManagerImplTest.java => maintenance/src/test/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImplTest.java} (84%) delete mode 100644 plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManager.java delete mode 100644 plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManagerImpl.java rename test/integration/smoke/{test_safe_shutdown.py => test_ms_maintenance_and_safe_shutdown.py} (70%) create mode 100644 ui/src/views/infra/ConnectedAgentsTab.vue diff --git a/agent/src/main/java/com/cloud/agent/Agent.java b/agent/src/main/java/com/cloud/agent/Agent.java index c84179d66609..575e3424a13f 100644 --- a/agent/src/main/java/com/cloud/agent/Agent.java +++ b/agent/src/main/java/com/cloud/agent/Agent.java @@ -27,6 +27,7 @@ import java.nio.channels.ClosedChannelException; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -40,6 +41,8 @@ import javax.naming.ConfigurationException; +import com.cloud.agent.api.MigrateAgentConnectionAnswer; +import com.cloud.agent.api.MigrateAgentConnectionCommand; import com.cloud.resource.AgentStatusUpdater; import com.cloud.resource.ResourceStatusUpdater; import com.cloud.agent.api.PingAnswer; @@ -313,7 +316,6 @@ public void start() { } _shell.updateConnectedHost(); scavengeOldAgentObjects(); - } public void stop(final String reason, final String detail) { @@ -477,6 +479,10 @@ public synchronized void lockStartupTask(final Link link) { } public void sendStartup(final Link link) { + sendStartup(link, false); + } + + public void sendStartup(final Link link, boolean transfer) { final StartupCommand[] startup = _resource.initialize(); if (startup != null) { final String msHostList = _shell.getPersistentProperty(null, "host"); @@ -484,6 +490,7 @@ public void sendStartup(final Link link) { for (int i = 0; i < startup.length; i++) { setupStartupCommand(startup[i]); startup[i].setMSHostList(msHostList); + startup[i].setConnectionTransferred(transfer); commands[i] = startup[i]; } final Request request = new Request(_id != null ? _id : -1, -1, commands, false, false); @@ -541,6 +548,10 @@ public Task create(final Task.Type type, final Link link, final byte[] data) { } protected void reconnect(final Link link) { + reconnect(link, null); + } + + protected void reconnect(final Link link, String host) { if (!_reconnectAllowed) { return; } @@ -576,7 +587,9 @@ protected void reconnect(final Link link) { } do { - final String host = _shell.getNextHost(); + if (StringUtils.isEmpty(host)) { + host = _shell.getNextHost(); + } _connection = new NioClient("Agent", host, _shell.getPort(), _shell.getWorkers(), this); logger.info("Reconnecting to host:{}", host); try { @@ -703,6 +716,8 @@ protected void processRequest(final Request request, final Link link) { } } else if (cmd instanceof SetupMSListCommand) { answer = setupManagementServerList((SetupMSListCommand) cmd); + } else if (cmd instanceof MigrateAgentConnectionCommand) { + answer = migrateAgentToOtherMS((MigrateAgentConnectionCommand) cmd); } else { if (cmd instanceof ReadyCommand) { processReadyCommand(cmd); @@ -858,6 +873,47 @@ private Answer setupManagementServerList(final SetupMSListCommand cmd) { return new SetupMSListAnswer(true); } + private Answer migrateAgentToOtherMS(final MigrateAgentConnectionCommand cmd) { + try { + if (CollectionUtils.isNotEmpty(cmd.getMsList())) { + processManagementServerList(cmd.getMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval()); + } + migrateAgentConnection(cmd.getAvoidMsList()); + } catch (Exception e) { + String errMsg = "Migrate agent connection failed, due to " + e.getMessage(); + logger.debug(errMsg, e); + return new MigrateAgentConnectionAnswer(errMsg); + } + return new MigrateAgentConnectionAnswer(true); + } + + private void migrateAgentConnection(List avoidMsList) { + final String[] msHosts = _shell.getHosts(); + if (msHosts == null || msHosts.length < 1) { + throw new CloudRuntimeException("Management Server hosts empty, not properly configured in agent"); + } + + List msHostsList = new ArrayList<>(Arrays.asList(msHosts)); + msHostsList.removeAll(avoidMsList); + if (msHostsList.isEmpty() || StringUtils.isEmpty(msHostsList.get(0))) { + throw new CloudRuntimeException("No other Management Server hosts to migrate"); + } + + final String preferredHost = msHostsList.get(0); + + try (final Socket socket = new Socket()) { + socket.connect(new InetSocketAddress(preferredHost, _shell.getPort()), 5000); + } catch (final IOException e) { + throw new CloudRuntimeException("Preferred management server host: " + preferredHost + " is not reachable, to migrate"); + } + + logger.debug("Preferred management server host " + preferredHost + " is found to be reachable, trying to reconnect"); + _reconnectAllowed = true; + _shell.resetHostCounter(); + _shell.setConnectionTransfer(true); + reconnect(_link, preferredHost); + } + public void processResponse(final Response response, final Link link) { final Answer answer = response.getAnswer(); logger.debug("Received response: {}", response.toString()); @@ -1153,7 +1209,8 @@ public void doTask(final Task task) throws TaskExecutionException { if (task.getType() == Task.Type.CONNECT) { _shell.getBackoffAlgorithm().reset(); setLink(task.getLink()); - sendStartup(task.getLink()); + sendStartup(task.getLink(), _shell.isConnectionTransfer()); + _shell.setConnectionTransfer(false); } else if (task.getType() == Task.Type.DATA) { Request request; try { @@ -1178,6 +1235,7 @@ public void doTask(final Task task) throws TaskExecutionException { Thread.sleep(5000); } catch (InterruptedException e) { } + _shell.setConnectionTransfer(false); reconnect(task.getLink()); return; } else if (task.getType() == Task.Type.OTHER) { diff --git a/agent/src/main/java/com/cloud/agent/AgentShell.java b/agent/src/main/java/com/cloud/agent/AgentShell.java index 0699e00250bb..d76e5551b453 100644 --- a/agent/src/main/java/com/cloud/agent/AgentShell.java +++ b/agent/src/main/java/com/cloud/agent/AgentShell.java @@ -77,6 +77,7 @@ public class AgentShell implements IAgentShell, Daemon { private String hostToConnect; private String connectedHost; private Long preferredHostCheckInterval; + private boolean connectionTransfer = false; protected AgentProperties agentProperties = new AgentProperties(); public AgentShell() { @@ -215,6 +216,14 @@ public void setPersistentProperty(String prefix, String name, String value) { _storage.persist(name, value); } + public boolean isConnectionTransfer() { + return connectionTransfer; + } + + public void setConnectionTransfer(boolean connectionTransfer) { + this.connectionTransfer = connectionTransfer; + } + void loadProperties() throws ConfigurationException { final File file = PropertiesUtil.findConfigFile("agent.properties"); diff --git a/agent/src/main/java/com/cloud/agent/IAgentShell.java b/agent/src/main/java/com/cloud/agent/IAgentShell.java index 2dd08fffd459..0b9d9e81e95c 100644 --- a/agent/src/main/java/com/cloud/agent/IAgentShell.java +++ b/agent/src/main/java/com/cloud/agent/IAgentShell.java @@ -70,4 +70,8 @@ public interface IAgentShell { String getConnectedHost(); void launchNewAgent(ServerResource resource) throws ConfigurationException; + + boolean isConnectionTransfer(); + + void setConnectionTransfer(boolean connectionTransfer); } diff --git a/api/src/main/java/com/cloud/host/Host.java b/api/src/main/java/com/cloud/host/Host.java index 56b4ed75a311..afac6df56312 100644 --- a/api/src/main/java/com/cloud/host/Host.java +++ b/api/src/main/java/com/cloud/host/Host.java @@ -177,6 +177,8 @@ public static String[] toStrings(Host.Type... types) { */ Long getManagementServerId(); + Long getLastManagementServerId(); + /* *@return removal date */ diff --git a/api/src/main/java/com/cloud/resource/ResourceService.java b/api/src/main/java/com/cloud/resource/ResourceService.java index 2757c918ed65..562c3c418df1 100644 --- a/api/src/main/java/com/cloud/resource/ResourceService.java +++ b/api/src/main/java/com/cloud/resource/ResourceService.java @@ -23,11 +23,11 @@ import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd; import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; -import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; @@ -51,7 +51,7 @@ public interface ResourceService { Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException; - Host cancelMaintenance(CancelMaintenanceCmd cmd); + Host cancelMaintenance(CancelHostMaintenanceCmd cmd); Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException; @@ -69,7 +69,7 @@ public interface ResourceService { List discoverHosts(AddSecondaryStorageCmd cmd) throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException; - Host maintain(PrepareForMaintenanceCmd cmd); + Host maintain(PrepareForHostMaintenanceCmd cmd); Host declareHostAsDegraded(DeclareHostAsDegradedCmd cmd) throws NoTransitionException; diff --git a/api/src/main/java/com/cloud/server/ManagementServerHostStats.java b/api/src/main/java/com/cloud/server/ManagementServerHostStats.java index 1eea7addba38..6eb275031e80 100644 --- a/api/src/main/java/com/cloud/server/ManagementServerHostStats.java +++ b/api/src/main/java/com/cloud/server/ManagementServerHostStats.java @@ -19,6 +19,7 @@ package com.cloud.server; import java.util.Date; +import java.util.List; /** * management server related stats @@ -70,6 +71,10 @@ public interface ManagementServerHostStats { String getOsDistribution(); + List getLastAgents(); + + List getAgents(); + int getAgentCount(); long getHeapMemoryUsed(); diff --git a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java index a406e2d7a722..03de07c37da0 100644 --- a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java +++ b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java @@ -1136,9 +1136,12 @@ public class ApiConstants { public static final String LOGOUT = "logout"; public static final String LIST_IDPS = "listIdps"; - public static final String READY_FOR_SHUTDOWN = "readyforshutdown"; + public static final String MAINTENANCE_INITIATED = "maintenanceinitiated"; public static final String SHUTDOWN_TRIGGERED = "shutdowntriggered"; + public static final String READY_FOR_SHUTDOWN = "readyforshutdown"; public static final String PENDING_JOBS_COUNT = "pendingjobscount"; + public static final String AGENTS_COUNT = "agentscount"; + public static final String AGENTS = "agents"; public static final String PUBLIC_MTU = "publicmtu"; public static final String PRIVATE_MTU = "privatemtu"; diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelMaintenanceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelHostMaintenanceCmd.java similarity index 98% rename from api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelMaintenanceCmd.java rename to api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelHostMaintenanceCmd.java index a514a61b8a41..55fe8ec23cec 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelMaintenanceCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelHostMaintenanceCmd.java @@ -33,7 +33,7 @@ @APICommand(name = "cancelHostMaintenance", description = "Cancels host maintenance.", responseObject = HostResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false) -public class CancelMaintenanceCmd extends BaseAsyncCmd { +public class CancelHostMaintenanceCmd extends BaseAsyncCmd { ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/ListHostsCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/ListHostsCmd.java index af87bbf33bb0..5e229521efe8 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/ListHostsCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/ListHostsCmd.java @@ -31,6 +31,7 @@ import org.apache.cloudstack.api.response.ClusterResponse; import org.apache.cloudstack.api.response.HostResponse; import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.api.response.ManagementServerResponse; import org.apache.cloudstack.api.response.PodResponse; import org.apache.cloudstack.api.response.UserVmResponse; import org.apache.cloudstack.api.response.ZoneResponse; @@ -105,6 +106,9 @@ public class ListHostsCmd extends BaseListCmd { @Parameter(name = ApiConstants.HYPERVISOR, type = CommandType.STRING, description = "hypervisor type of host: XenServer,KVM,VMware,Hyperv,BareMetal,Simulator") private String hypervisor; + @Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the id of the management server", since="4.21.0") + private Long managementServerId; + ///////////////////////////////////////////////////// /////////////////// Accessors /////////////////////// ///////////////////////////////////////////////////// @@ -189,6 +193,10 @@ public String getHostOutOfBandManagementPowerState() { return outOfBandManagementPowerState; } + public Long getManagementServerId() { + return managementServerId; + } + ///////////////////////////////////////////////////// /////////////// API Implementation/////////////////// ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForHostMaintenanceCmd.java similarity index 98% rename from api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java rename to api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForHostMaintenanceCmd.java index 2641c54364ee..5c2b50c87239 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForHostMaintenanceCmd.java @@ -33,7 +33,7 @@ @APICommand(name = "prepareHostForMaintenance", description = "Prepares a host for maintenance.", responseObject = HostResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false) -public class PrepareForMaintenanceCmd extends BaseAsyncCmd { +public class PrepareForHostMaintenanceCmd extends BaseAsyncCmd { ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java index 3eeaaef2afac..6717e63723fa 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java @@ -85,7 +85,11 @@ public class AsyncJobResponse extends BaseResponse { @SerializedName("managementserverid") @Param(description = "the msid of the management server on which the job is running", since = "4.19") - private Long msid; + private String managementServerId; + + @SerializedName("managementservername") + @Param(description = "the management server name of the host", since = "4.21.0") + private String managementServerName; @SerializedName(ApiConstants.CREATED) @Param(description = " the created date of the job") @@ -156,7 +160,11 @@ public void setRemoved(final Date removed) { this.removed = removed; } - public void setMsid(Long msid) { - this.msid = msid; + public void setManagementServerId(String managementServerId) { + this.managementServerId = managementServerId; + } + + public void setManagementServerName(String managementServerName) { + this.managementServerName = managementServerName; } } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java index 62bcc07b16d9..8019633d5365 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java @@ -186,10 +186,18 @@ public class HostResponse extends BaseResponseWithAnnotations { @Param(description = "the date and time the host was last pinged") private Date lastPinged; + @SerializedName(ApiConstants.VIRTUAL_MACHINE_ID) + @Param(description = "the virtual machine id for host type ConsoleProxy and SecondaryStorageVM", since = "4.21.0") + private String virtualMachineId; + @SerializedName("managementserverid") @Param(description = "the management server ID of the host") private String managementServerId; + @SerializedName("managementservername") + @Param(description = "the management server name of the host", since = "4.21.0") + private String managementServerName; + @SerializedName("clusterid") @Param(description = "the cluster ID of the host") private String clusterId; @@ -435,10 +443,18 @@ public void setLastPinged(Date lastPinged) { this.lastPinged = lastPinged; } + public void setVirtualMachineId(String virtualMachineId) { + this.virtualMachineId = virtualMachineId; + } + public void setManagementServerId(String managementServerId) { this.managementServerId = managementServerId; } + public void setManagementServerName(String managementServerName) { + this.managementServerName = managementServerName; + } + public void setClusterId(String clusterId) { this.clusterId = clusterId; } @@ -723,10 +739,18 @@ public Date getLastPinged() { return lastPinged; } + public String getVirtualMachineId() { + return virtualMachineId; + } + public String getManagementServerId() { return managementServerId; } + public String getManagementServerName() { + return managementServerName; + } + public String getClusterId() { return clusterId; } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/LoginCmdResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/LoginCmdResponse.java index 84c79d323218..43f92db84cb5 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/LoginCmdResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/LoginCmdResponse.java @@ -86,6 +86,10 @@ public class LoginCmdResponse extends AuthenticationCmdResponse { @Param(description = "Two factor authentication issuer", since = "4.18.0.0") private String issuerFor2FA; + @SerializedName(value = ApiConstants.MANAGEMENT_SERVER_ID) + @Param(description = "Management Server ID that the user logged to", since = "4.21.0.0") + private String managementServerId; + public String getUsername() { return username; } @@ -211,4 +215,12 @@ public String getIssuerFor2FA() { public void setIssuerFor2FA(String issuerFor2FA) { this.issuerFor2FA = issuerFor2FA; } + + public String getManagementServerId() { + return managementServerId; + } + + public void setManagementServerId(String managementServerId) { + this.managementServerId = managementServerId; + } } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/ManagementServerResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/ManagementServerResponse.java index fc7d3b722abc..d35d34a10bbb 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/ManagementServerResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/ManagementServerResponse.java @@ -82,6 +82,10 @@ public class ManagementServerResponse extends BaseResponse { @Param(description = "the Management Server Peers") private List peers; + @SerializedName(ApiConstants.AGENTS_COUNT) + @Param(description = "the number of host agents this Management Server is responsible for", since = "4.21.0.0") + private Long agentsCount; + public String getId() { return this.id; } @@ -126,6 +130,10 @@ public String getServiceIp() { return serviceIp; } + public Long getAgentsCount() { + return this.agentsCount; + } + public void setId(String id) { this.id = id; } @@ -174,6 +182,10 @@ public void setServiceIp(String serviceIp) { this.serviceIp = serviceIp; } + public void setAgentsCount(Long agentsCount) { + this.agentsCount = agentsCount; + } + public String getKernelVersion() { return kernelVersion; } diff --git a/api/src/main/java/org/apache/cloudstack/management/ManagementServerHost.java b/api/src/main/java/org/apache/cloudstack/management/ManagementServerHost.java index 54a53f39578d..7f81523dab7d 100644 --- a/api/src/main/java/org/apache/cloudstack/management/ManagementServerHost.java +++ b/api/src/main/java/org/apache/cloudstack/management/ManagementServerHost.java @@ -22,7 +22,7 @@ public interface ManagementServerHost extends InternalIdentity, Identity, ControlledEntity { enum State { - Up, Down, PreparingToShutDown, ReadyToShutDown, ShuttingDown + Up, Down, PreparingForMaintenance, Maintenance, PreparingForShutDown, ReadyToShutDown, ShuttingDown } long getMsid(); diff --git a/client/pom.xml b/client/pom.xml index 2ef6c9105096..e12e03954828 100644 --- a/client/pom.xml +++ b/client/pom.xml @@ -624,7 +624,7 @@ org.apache.cloudstack - cloud-plugin-shutdown + cloud-plugin-maintenance ${project.version} diff --git a/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionAnswer.java b/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionAnswer.java new file mode 100644 index 000000000000..33d32c7f6ccb --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionAnswer.java @@ -0,0 +1,38 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package com.cloud.agent.api; + +public class MigrateAgentConnectionAnswer extends Answer { + public MigrateAgentConnectionAnswer() { + } + + public MigrateAgentConnectionAnswer(boolean result) { + this.result = result; + } + + public MigrateAgentConnectionAnswer(String details) { + this.result = false; + this.details = details; + } + + public MigrateAgentConnectionAnswer(MigrateAgentConnectionCommand cmd, boolean result) { + super(cmd, result, null); + } +} diff --git a/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionCommand.java b/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionCommand.java new file mode 100644 index 000000000000..9471a68669fe --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionCommand.java @@ -0,0 +1,61 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package com.cloud.agent.api; + +import java.util.List; + +public class MigrateAgentConnectionCommand extends Command { + private List msList; + private List avoidMsList; + private String lbAlgorithm; + private Long lbCheckInterval; + + public MigrateAgentConnectionCommand() { + } + + public MigrateAgentConnectionCommand(final List msList, final List avoidMsList, final String lbAlgorithm, final Long lbCheckInterval) { + super(); + this.msList = msList; + this.avoidMsList = avoidMsList; + this.lbAlgorithm = lbAlgorithm; + this.lbCheckInterval = lbCheckInterval; + } + + public List getMsList() { + return msList; + } + + public List getAvoidMsList() { + return avoidMsList; + } + + public String getLbAlgorithm() { + return lbAlgorithm; + } + + public Long getLbCheckInterval() { + return lbCheckInterval; + } + + @Override + public boolean executeInSequence() { + return false; + } +} diff --git a/core/src/main/java/com/cloud/agent/api/StartupCommand.java b/core/src/main/java/com/cloud/agent/api/StartupCommand.java index cca5e16b5854..7a18ba2dccc1 100644 --- a/core/src/main/java/com/cloud/agent/api/StartupCommand.java +++ b/core/src/main/java/com/cloud/agent/api/StartupCommand.java @@ -47,6 +47,7 @@ public class StartupCommand extends Command { String resourceName; String gatewayIpAddress; String msHostList; + boolean connectionTransferred; String arch; public StartupCommand(Host.Type type) { @@ -291,6 +292,14 @@ public void setMSHostList(String msHostList) { this.msHostList = msHostList; } + public boolean isConnectionTransferred() { + return connectionTransferred; + } + + public void setConnectionTransferred(boolean connectionTransferred) { + this.connectionTransferred = connectionTransferred; + } + public String getArch() { return arch; } diff --git a/core/src/test/java/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java b/core/src/test/java/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java index 287769d6a76d..be7563be045a 100644 --- a/core/src/test/java/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java +++ b/core/src/test/java/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java @@ -189,6 +189,11 @@ public Long getManagementServerId() { return 2L; }; + @Override + public Long getLastManagementServerId() { + return null; + }; + @Override public Date getRemoved() { Date date = null; diff --git a/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java b/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java index 81525ca13f1b..82e2d29f407c 100644 --- a/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java +++ b/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java @@ -16,6 +16,7 @@ // under the License. package com.cloud.agent; +import java.util.List; import java.util.Map; import org.apache.cloudstack.framework.config.ConfigKey; @@ -170,4 +171,10 @@ public enum TapAgentsAction { void notifyMonitorsOfRemovedHost(long hostId, long clusterId); void propagateChangeToAgents(Map params); + + boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs); + + List getLastAgents(); + + void setLastAgents(List lastAgents); } diff --git a/engine/orchestration/pom.xml b/engine/orchestration/pom.xml index bf8ab14c9520..437c98dac877 100755 --- a/engine/orchestration/pom.xml +++ b/engine/orchestration/pom.xml @@ -70,7 +70,7 @@ org.apache.cloudstack - cloud-plugin-shutdown + cloud-plugin-maintenance ${project.version} diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index 09fb211fedfb..95c91c089dc2 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -38,6 +38,8 @@ import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; import com.cloud.configuration.Config; import com.cloud.org.Cluster; import com.cloud.utils.NumbersUtil; @@ -50,7 +52,10 @@ import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.framework.jobs.AsyncJob; import org.apache.cloudstack.framework.jobs.AsyncJobExecutionContext; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceListener; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager; import org.apache.cloudstack.managed.context.ManagedContextRunnable; +import org.apache.cloudstack.management.ManagementServerHost; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.commons.collections.MapUtils; @@ -130,7 +135,7 @@ /** * Implementation of the Agent Manager. This class controls the connection to the agents. **/ -public class AgentManagerImpl extends ManagerBase implements AgentManager, HandlerFactory, Configurable { +public class AgentManagerImpl extends ManagerBase implements AgentManager, HandlerFactory, ManagementServerMaintenanceListener, Configurable { /** * _agents is a ConcurrentHashMap, but it is used from within a synchronized block. This will be reported by findbugs as JLM_JSR166_UTILCONCURRENT_MONITORENTER. Maybe a @@ -154,6 +159,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl @Inject protected HostDao _hostDao = null; @Inject + private ManagementServerHostDao _mshostDao; + @Inject protected OutOfBandManagementDao outOfBandManagementDao; @Inject protected DataCenterDao _dcDao = null; @@ -175,6 +182,9 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl @Inject protected IndirectAgentLB indirectAgentLB; + @Inject + private ManagementServerMaintenanceManager managementServerMaintenanceManager; + protected int _retry = 2; protected long _nodeId = -1; @@ -187,6 +197,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl private int _directAgentThreadCap; + private List lastAgents = null; + protected StateMachine2 _statusStateMachine = Status.getStateMachine(); private final ConcurrentHashMap _pingMap = new ConcurrentHashMap(10007); @@ -226,6 +238,8 @@ public boolean configure(final String name, final Map params) th registerForHostEvents(new SetHostParamsListener(), true, true, false); + managementServerMaintenanceManager.registerListener(this); + _executor = new ThreadPoolExecutor(threads, threads, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue(), new NamedThreadFactory("AgentTaskPool")); _connectExecutor = new ThreadPoolExecutor(100, 500, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue(), new NamedThreadFactory("AgentConnectTaskPool")); @@ -296,6 +310,18 @@ public void unregisterForHostEvents(final int id) { _hostMonitors.remove(id); } + @Override + public void onManagementServerMaintenance() { + _monitorExecutor.shutdownNow(); + } + + @Override + public void onManagementServerCancelMaintenance() { + if (_monitorExecutor.isShutdown()) { + _monitorExecutor.scheduleWithFixedDelay(new MonitorTask(), mgmtServiceConf.getPingInterval(), mgmtServiceConf.getPingInterval(), TimeUnit.SECONDS); + } + } + private AgentControlAnswer handleControlCommand(final AgentAttache attache, final AgentControlCommand cmd) { AgentControlAnswer answer = null; @@ -332,6 +358,16 @@ public AgentAttache findAttache(final long hostId) { return attache; } + @Override + public List getLastAgents() { + return lastAgents; + } + + @Override + public void setLastAgents(List lastAgents) { + this.lastAgents = lastAgents; + } + @Override public Answer sendTo(final Long dcId, final HypervisorType type, final Command cmd) { final List clusters = _clusterDao.listByDcHyType(dcId, type.toString()); @@ -616,10 +652,10 @@ protected AgentAttache notifyMonitorsOfConnection(final AgentAttache attache, fi final long hostId = attache.getId(); final HostVO host = _hostDao.findById(hostId); for (final Pair monitor : _hostMonitors) { - logger.debug("Sending Connect to listener: {}", monitor.second().getClass().getSimpleName()); + logger.debug("Sending Connect to listener: {}, for rebalance: {}", monitor.second().getClass().getSimpleName(), forRebalance); for (int i = 0; i < cmd.length; i++) { try { - logger.debug("process connection to issue {} forRebalance == {}", ReflectionToStringBuilderUtils.reflectCollection(cmd[i]), forRebalance); + logger.debug("process connection to issue: {} for host: {}, forRebalance: {}, connection transferred: {}", ReflectionToStringBuilderUtils.reflectCollection(cmd[i]), hostId, forRebalance, cmd[i].isConnectionTransferred()); monitor.second().processConnect(host, cmd[i], forRebalance); } catch (final ConnectionException ce) { if (ce.isSetupError()) { @@ -675,6 +711,11 @@ protected AgentAttache notifyMonitorsOfConnection(final AgentAttache attache, fi @Override public boolean start() { + ManagementServerHostVO msHost = _mshostDao.findByMsid(_nodeId); + if (msHost != null && (ManagementServerHost.State.Maintenance.equals(msHost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(msHost.getState()))) { + return true; + } + startDirectlyConnectedHosts(); if (_connection != null) { @@ -1918,12 +1959,15 @@ public void processHostAdded(long hostId) { @Override public void processConnect(final Host host, final StartupCommand cmd, final boolean forRebalance) { - if (cmd instanceof StartupRoutingCommand) { - if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) { - Map params = new HashMap(); - params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString())); - params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString())); - params.put(NetworkOrchestrationService.TUNGSTEN_ENABLED.key(), String.valueOf(NetworkOrchestrationService.TUNGSTEN_ENABLED.valueIn(host.getDataCenterId()))); + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { + return; + } + + if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) { + Map params = new HashMap(); + params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString())); + params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString())); + params.put(NetworkOrchestrationService.TUNGSTEN_ENABLED.key(), String.valueOf(NetworkOrchestrationService.TUNGSTEN_ENABLED.valueIn(host.getDataCenterId()))); try { SetHostParamsCommand cmds = new SetHostParamsCommand(params); @@ -1935,8 +1979,6 @@ public void processConnect(final Host host, final StartupCommand cmd, final bool } } - } - @Override public boolean processDisconnect(final long agentId, final Status state) { return true; @@ -2004,6 +2046,11 @@ public void propagateChangeToAgents(Map params) { } } + @Override + public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs) { + return true; + } + private GlobalLock getHostJoinLock(Long hostId) { return GlobalLock.getInternLock(String.format("%s-%s", "Host-Join", hostId)); } diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java index be327418205b..bf99ecf9105e 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java @@ -47,14 +47,17 @@ import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.ha.dao.HAConfigDao; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceListener; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager; +import org.apache.cloudstack.maintenance.command.BaseShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.CancelMaintenanceManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.CancelShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.PrepareForMaintenanceManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.PrepareForShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.TriggerShutdownManagementServerHostCommand; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.managed.context.ManagedContextTimerTask; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; -import org.apache.cloudstack.shutdown.ShutdownManager; -import org.apache.cloudstack.shutdown.command.CancelShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.PrepareForShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.BaseShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.TriggerShutdownManagementServerHostCommand; import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.cloudstack.utils.security.SSLUtils; @@ -74,12 +77,17 @@ import com.cloud.cluster.ClusterServicePdu; import com.cloud.cluster.ClusteredAgentRebalanceService; import org.apache.cloudstack.management.ManagementServerHost; +import org.apache.commons.collections.CollectionUtils; + import com.cloud.cluster.ManagementServerHostVO; import com.cloud.cluster.agentlb.AgentLoadBalancerPlanner; import com.cloud.cluster.agentlb.HostTransferMapVO; import com.cloud.cluster.agentlb.HostTransferMapVO.HostTransferState; import com.cloud.cluster.agentlb.dao.HostTransferMapDao; import com.cloud.cluster.dao.ManagementServerHostDao; +import com.cloud.cluster.dao.ManagementServerHostPeerDao; +import com.cloud.dc.DataCenterVO; +import com.cloud.dc.dao.DataCenterDao; import com.cloud.exception.AgentUnavailableException; import com.cloud.exception.OperationTimedoutException; import com.cloud.exception.UnsupportedVersionException; @@ -100,7 +108,7 @@ import com.cloud.utils.nio.Task; import com.google.gson.Gson; -public class ClusteredAgentManagerImpl extends AgentManagerImpl implements ClusterManagerListener, ClusteredAgentRebalanceService { +public class ClusteredAgentManagerImpl extends AgentManagerImpl implements ClusterManagerListener, ManagementServerMaintenanceListener, ClusteredAgentRebalanceService { private static final ScheduledExecutorService s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor")); private final long rebalanceTimeOut = 300000; // 5 mins - after this time remove the agent from the transfer list @@ -113,12 +121,15 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust protected HashMap _sslEngines; private final Timer _timer = new Timer("ClusteredAgentManager Timer"); boolean _agentLbHappened = false; + private int _mshostCounter = 0; @Inject protected ClusterManager _clusterMgr = null; @Inject protected ManagementServerHostDao _mshostDao; @Inject + protected ManagementServerHostPeerDao _mshostPeerDao; + @Inject protected HostTransferMapDao _hostTransferDao; @Inject protected List _lbPlanners; @@ -133,7 +144,9 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust @Inject private CAManager caService; @Inject - private ShutdownManager shutdownManager; + private ManagementServerMaintenanceManager managementServerMaintenanceManager; + @Inject + private DataCenterDao dcDao; protected ClusteredAgentManagerImpl() { super(); @@ -159,6 +172,8 @@ public boolean configure(final String name, final Map xmlParams) _clusterMgr.registerListener(this); _clusterMgr.registerDispatcher(new ClusterDispatcher()); + managementServerMaintenanceManager.registerListener(this); + _gson = GsonHelper.getGson(); return super.configure(name, xmlParams); @@ -1320,10 +1335,28 @@ public String dispatch(final ClusterServicePdu pdu) { } private String handleShutdownManagementServerHostCommand(BaseShutdownManagementServerHostCommand cmd) { + if (cmd instanceof PrepareForMaintenanceManagementServerHostCommand) { + logger.debug("Received PrepareForMaintenanceManagementServerHostCommand - preparing for maintenance"); + try { + managementServerMaintenanceManager.prepareForMaintenance(((PrepareForMaintenanceManagementServerHostCommand) cmd).getLbAlgorithm()); + return "Successfully prepared for maintenance"; + } catch(CloudRuntimeException e) { + return e.getMessage(); + } + } + if (cmd instanceof CancelMaintenanceManagementServerHostCommand) { + logger.debug("Received CancelMaintenanceManagementServerHostCommand - cancelling maintenance"); + try { + managementServerMaintenanceManager.cancelMaintenance(); + return "Successfully cancelled maintenance"; + } catch(CloudRuntimeException e) { + return e.getMessage(); + } + } if (cmd instanceof PrepareForShutdownManagementServerHostCommand) { - logger.debug("Received BaseShutdownManagementServerHostCommand - preparing to shut down"); + logger.debug("Received PrepareForShutdownManagementServerHostCommand - preparing to shut down"); try { - shutdownManager.prepareForShutdown(); + managementServerMaintenanceManager.prepareForShutdown(); return "Successfully prepared for shutdown"; } catch(CloudRuntimeException e) { return e.getMessage(); @@ -1332,7 +1365,7 @@ private String handleShutdownManagementServerHostCommand(BaseShutdownManagementS if (cmd instanceof TriggerShutdownManagementServerHostCommand) { logger.debug("Received TriggerShutdownManagementServerHostCommand - triggering a shut down"); try { - shutdownManager.triggerShutdown(); + managementServerMaintenanceManager.triggerShutdown(); return "Successfully triggered shutdown"; } catch(CloudRuntimeException e) { return e.getMessage(); @@ -1341,8 +1374,8 @@ private String handleShutdownManagementServerHostCommand(BaseShutdownManagementS if (cmd instanceof CancelShutdownManagementServerHostCommand) { logger.debug("Received CancelShutdownManagementServerHostCommand - cancelling shut down"); try { - shutdownManager.cancelShutdown(); - return "Successfully prepared for shutdown"; + managementServerMaintenanceManager.cancelShutdown(); + return "Successfully cancelled shutdown"; } catch(CloudRuntimeException e) { return e.getMessage(); } @@ -1351,6 +1384,126 @@ private String handleShutdownManagementServerHostCommand(BaseShutdownManagementS } } + @Override + public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs) { + if (timeoutDurationInMs <= 0) { + logger.debug(String.format("Not transferring direct agents from management server node %d (id: %s) to other nodes, invalid timeout duration", fromMsId, fromMsUuid)); + return false; + } + + long transferStartTime = System.currentTimeMillis(); + if (CollectionUtils.isEmpty(getDirectAgentHosts(fromMsId))) { + logger.info(String.format("No direct agent hosts available on management server node %d (id: %s), to transfer", fromMsId, fromMsUuid)); + return true; + } + + List msHosts = getUpMsHostsExcludingMs(fromMsId); + if (msHosts.isEmpty()) { + logger.warn(String.format("No management server nodes available to transfer agents from management server node %d (id: %s)", fromMsId, fromMsUuid)); + return false; + } + + logger.debug(String.format("Transferring direct agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid)); + int agentTransferFailedCount = 0; + List dataCenterList = dcDao.listAll(); + for (DataCenterVO dc : dataCenterList) { + List directAgentHostsInDc = getDirectAgentHostsInDc(fromMsId, dc.getId()); + if (CollectionUtils.isEmpty(directAgentHostsInDc)) { + continue; + } + logger.debug(String.format("Transferring %d direct agents from management server node %d (id: %s) of zone %s", directAgentHostsInDc.size(), fromMsId, fromMsUuid, dc.toString())); + for (HostVO host : directAgentHostsInDc) { + long transferElapsedTimeInMs = System.currentTimeMillis() - transferStartTime; + if (transferElapsedTimeInMs >= timeoutDurationInMs) { + logger.debug(String.format("Stop transferring remaining direct agents from management server node %d (id: %s), timed out", fromMsId, fromMsUuid)); + return false; + } + + try { + if (_mshostCounter >= msHosts.size()) { + _mshostCounter = 0; + } + ManagementServerHostVO msHost = msHosts.get(_mshostCounter % msHosts.size()); + _mshostCounter++; + + if (rebalanceAgent(host.getId(), Event.StartAgentRebalance, fromMsId, msHost.getMsid())) { + agentTransferFailedCount++; + } else { + updateLastManagementServer(host.getId(), fromMsId); + } + } catch (Exception e) { + logger.warn(String.format("Failed to transfer direct agent of the host %s from management server node %d (id: %s), due to %s", host, fromMsId, fromMsUuid, e.getMessage())); + } + } + } + + return (agentTransferFailedCount == 0); + } + + private List getDirectAgentHosts(long msId) { + List directAgentHosts = new ArrayList<>(); + List hosts = _hostDao.listHostsByMs(msId); + for (HostVO host : hosts) { + AgentAttache agent = findAttache(host.getId()); + if (agent != null && agent instanceof DirectAgentAttache) { + directAgentHosts.add(host); + } + } + + return directAgentHosts; + } + + private List getDirectAgentHostsInDc(long msId, long dcId) { + List directAgentHosts = new ArrayList<>(); + List hosts = _hostDao.listHostsByMsAndDc(msId, dcId); + for (HostVO host : hosts) { + AgentAttache agent = findAttache(host.getId()); + if (agent != null && agent instanceof DirectAgentAttache) { + directAgentHosts.add(host); + } + } + + return directAgentHosts; + } + + private List getUpMsHostsExcludingMs(long avoidMsId) { + final List msHosts = _mshostDao.listBy(ManagementServerHost.State.Up); + Iterator iterator = msHosts.iterator(); + while (iterator.hasNext()) { + ManagementServerHostVO ms = iterator.next(); + if (ms.getMsid() == avoidMsId || _mshostPeerDao.findByPeerMsAndState(ms.getId(), ManagementServerHost.State.Up) == null) { + iterator.remove(); + } + } + + return msHosts; + } + + private void updateLastManagementServer(long hostId, long msId) { + HostVO hostVO = _hostDao.findById(hostId); + if (hostVO != null) { + hostVO.setLastManagementServerId(msId); + _hostDao.update(hostId, hostVO); + } + } + + @Override + public void onManagementServerMaintenance() { + s_transferExecutor.shutdownNow(); + cleanupTransferMap(_nodeId); + } + + @Override + public void onManagementServerCancelMaintenance() { + if (isAgentRebalanceEnabled()) { + cleanupTransferMap(_nodeId); + if (s_transferExecutor.isShutdown()) { + s_transferExecutor.scheduleAtFixedRate(getAgentRebalanceScanTask(), 60000, 60000, TimeUnit.MILLISECONDS); + s_transferExecutor.scheduleAtFixedRate(getTransferScanTask(), 60000, ClusteredAgentRebalanceService.DEFAULT_TRANSFER_CHECK_INTERVAL, TimeUnit.MILLISECONDS); + } + } + } + public boolean executeAgentUserRequest(final long agentId, final Event event) throws AgentUnavailableException { return executeUserRequest(agentId, event); } diff --git a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java index 053d9ac218ee..8ef2de3f74da 100644 --- a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java +++ b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java @@ -372,6 +372,9 @@ public void setHostTags(List hostTags) { @Column(name = "mgmt_server_id") private Long managementServerId; + @Column(name = "last_mgmt_server_id") + private Long lastManagementServerId; + @Column(name = "dom0_memory") private long dom0MinMemory; @@ -556,6 +559,10 @@ public void setManagementServerId(Long managementServerId) { this.managementServerId = managementServerId; } + public void setLastManagementServerId(Long lastManagementServerId) { + this.lastManagementServerId = lastManagementServerId; + } + @Override public long getLastPinged() { return lastPinged; @@ -625,6 +632,11 @@ public Long getManagementServerId() { return managementServerId; } + @Override + public Long getLastManagementServerId() { + return lastManagementServerId; + } + @Override public Date getDisconnectedOn() { return disconnectedOn; diff --git a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java index 7efc29b02a63..64eb2ac024b6 100644 --- a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java +++ b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java @@ -4263,7 +4263,7 @@ public void processHostAdded(long hostId) { @Override public void processConnect(final Host host, final StartupCommand cmd, final boolean forRebalance) throws ConnectionException { - if (!(cmd instanceof StartupRoutingCommand)) { + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { return; } final long hostId = host.getId(); diff --git a/engine/schema/src/main/java/com/cloud/host/HostVO.java b/engine/schema/src/main/java/com/cloud/host/HostVO.java index a449eb450cf9..bd6768fa0ddb 100644 --- a/engine/schema/src/main/java/com/cloud/host/HostVO.java +++ b/engine/schema/src/main/java/com/cloud/host/HostVO.java @@ -404,6 +404,9 @@ public void setGpuGroups(HashMap> groupDe @Column(name = "mgmt_server_id") private Long managementServerId; + @Column(name = "last_mgmt_server_id") + private Long lastManagementServerId; + @Column(name = "dom0_memory") private long dom0MinMemory; @@ -570,6 +573,10 @@ public void setManagementServerId(Long managementServerId) { this.managementServerId = managementServerId; } + public void setLastManagementServerId(Long lastManagementServerId) { + this.lastManagementServerId = lastManagementServerId; + } + @Override public long getLastPinged() { return lastPinged; @@ -639,6 +646,11 @@ public Long getManagementServerId() { return managementServerId; } + @Override + public Long getLastManagementServerId() { + return lastManagementServerId; + } + @Override public Date getDisconnectedOn() { return disconnectedOn; diff --git a/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java b/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java index a2df6db44e51..abdf50ab399a 100644 --- a/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java +++ b/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java @@ -151,12 +151,23 @@ public interface HostDao extends GenericDao, StateDao listHostsWithActiveVMs(long offeringId); + List listHostsByMsAndDc(long msId, long dcId); + + List listHostsByMs(long msId); + /** * Retrieves the number of hosts/agents this {@see ManagementServer} has responsibility over. - * @param msid the id of the {@see ManagementServer} + * @param msId the id of the {@see ManagementServer} * @return the number of hosts/agents this {@see ManagementServer} has responsibility over */ - int countByMs(long msid); + int countByMs(long msId); + + /** + * Retrieves the host ids/agents this {@see ManagementServer} has responsibility over. + * @param msId the id of the {@see ManagementServer} + * @return the host ids/agents this {@see ManagementServer} has responsibility over + */ + List listByMs(long msId); /** * Retrieves the hypervisor versions of the hosts in the datacenter which are in Up state in ascending order diff --git a/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java b/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java index 63950294654c..4e1be3ae0fb6 100644 --- a/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java @@ -124,7 +124,9 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao protected SearchBuilder UnmanagedApplianceSearch; protected SearchBuilder MaintenanceCountSearch; protected SearchBuilder HostTypeCountSearch; - protected SearchBuilder ResponsibleMsCountSearch; + protected SearchBuilder ResponsibleMsSearch; + protected SearchBuilder ResponsibleMsDcSearch; + protected GenericSearchBuilder ResponsibleMsIdSearch; protected SearchBuilder HostTypeZoneCountSearch; protected SearchBuilder ClusterStatusSearch; protected SearchBuilder TypeNameZoneSearch; @@ -189,9 +191,19 @@ public void init() { HostTypeCountSearch.and("type", HostTypeCountSearch.entity().getType(), SearchCriteria.Op.EQ); HostTypeCountSearch.done(); - ResponsibleMsCountSearch = createSearchBuilder(); - ResponsibleMsCountSearch.and("managementServerId", ResponsibleMsCountSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); - ResponsibleMsCountSearch.done(); + ResponsibleMsSearch = createSearchBuilder(); + ResponsibleMsSearch.and("managementServerId", ResponsibleMsSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); + ResponsibleMsSearch.done(); + + ResponsibleMsDcSearch = createSearchBuilder(); + ResponsibleMsDcSearch.and("managementServerId", ResponsibleMsDcSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); + ResponsibleMsDcSearch.and("dcId", ResponsibleMsDcSearch.entity().getDataCenterId(), SearchCriteria.Op.EQ); + ResponsibleMsDcSearch.done(); + + ResponsibleMsIdSearch = createSearchBuilder(String.class); + ResponsibleMsIdSearch.selectFields(ResponsibleMsIdSearch.entity().getUuid()); + ResponsibleMsIdSearch.and("managementServerId", ResponsibleMsIdSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); + ResponsibleMsIdSearch.done(); HostTypeZoneCountSearch = createSearchBuilder(); HostTypeZoneCountSearch.and("type", HostTypeZoneCountSearch.entity().getType(), SearchCriteria.Op.EQ); @@ -1424,12 +1436,34 @@ public List listHostsWithActiveVMs(long offeringId) { } @Override - public int countByMs(long msid) { - SearchCriteria sc = ResponsibleMsCountSearch.create(); - sc.setParameters("managementServerId", msid); + public List listHostsByMsAndDc(long msId, long dcId) { + SearchCriteria sc = ResponsibleMsDcSearch.create(); + sc.setParameters("managementServerId", msId); + sc.setParameters("dcId", dcId); + return listBy(sc); + } + + @Override + public List listHostsByMs(long msId) { + SearchCriteria sc = ResponsibleMsSearch.create(); + sc.setParameters("managementServerId", msId); + return listBy(sc); + } + + @Override + public int countByMs(long msId) { + SearchCriteria sc = ResponsibleMsSearch.create(); + sc.setParameters("managementServerId", msId); return getCount(sc); } + @Override + public List listByMs(long msId) { + SearchCriteria sc = ResponsibleMsIdSearch.create(); + sc.addAnd("managementServerId", SearchCriteria.Op.EQ, msId); + return customSearch(sc, null); + } + @Override public List listOrderedHostsHypervisorVersionsInDatacenter(long datacenterId, HypervisorType hypervisorType) { PreparedStatement pstmt = null; diff --git a/engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql b/engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql index c36b71c2f250..d56701c8aead 100644 --- a/engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql +++ b/engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql @@ -425,3 +425,12 @@ INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid, hypervisor_type, hypervi CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.vm_instance', 'delete_protection', 'boolean DEFAULT FALSE COMMENT "delete protection for vm" '); CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.volumes', 'delete_protection', 'boolean DEFAULT FALSE COMMENT "delete protection for volumes" '); + +-- Modify index for mshost_peer +DELETE FROM `cloud`.`mshost_peer`; +CALL `cloud`.`IDEMPOTENT_DROP_FOREIGN_KEY`('cloud.mshost_peer','fk_mshost_peer__owner_mshost'); +CALL `cloud`.`IDEMPOTENT_DROP_INDEX`('i_mshost_peer__owner_peer_runid','mshost_peer'); +CALL `cloud`.`IDEMPOTENT_ADD_UNIQUE_KEY`('cloud.mshost_peer', 'i_mshost_peer__owner_peer', '(owner_mshost, peer_mshost)'); +CALL `cloud`.`IDEMPOTENT_ADD_FOREIGN_KEY`('cloud.mshost_peer', 'fk_mshost_peer__owner_mshost', '(owner_mshost)', '`mshost`(`id`)'); + +CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.host', 'last_mgmt_server_id', 'bigint unsigned DEFAULT NULL COMMENT "last management server this host is connected to" AFTER `mgmt_server_id`'); diff --git a/framework/agent-lb/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLB.java b/framework/agent-lb/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLB.java index 9dfb9e1654e2..b136b8e842b8 100644 --- a/framework/agent-lb/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLB.java +++ b/framework/agent-lb/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLB.java @@ -20,6 +20,12 @@ public interface IndirectAgentLB { + /** + * Return list of management server addresses from host setting + * @return management servers string list + */ + List getManagementServerList(); + /** * Return list of management server addresses after applying configured lb algorithm * for a host in a zone. @@ -30,6 +36,17 @@ public interface IndirectAgentLB { */ List getManagementServerList(Long hostId, Long dcId, List orderedHostIdList); + /** + * Return list of management server addresses after applying the lb algorithm + * for a host in a zone. + * @param hostId host id (if present) + * @param dcId zone id + * @param orderedHostIdList (optional) list of ordered host id list + * @param lbAlgorithm lb algorithm + * @return management servers string list + */ + List getManagementServerList(Long hostId, Long dcId, List orderedHostIdList, String lbAlgorithm); + /** * Compares received management server list against expected list for a host in a zone. * @param hostId host id @@ -45,6 +62,8 @@ public interface IndirectAgentLB { */ String getLBAlgorithmName(); + void checkLBAlgorithmName(String lbAlgorithm); + /** * Returns the configured LB preferred host check interval (if applicable at cluster scope) * @return returns interval in seconds @@ -53,4 +72,7 @@ public interface IndirectAgentLB { void propagateMSListToAgents(); + boolean haveAgentBasedHosts(long msId); + + boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs); } diff --git a/framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java b/framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java index e26e32e7b2ef..1b45910b88a4 100644 --- a/framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java +++ b/framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java @@ -941,7 +941,7 @@ private void processInvalidatedNodes(List invalidatedNod try { JmxUtil.unregisterMBean("ClusterManager", "Node " + mshost.getId()); } catch (final Exception e) { - logger.warn("Unable to deregiester cluster node from JMX monitoring due to exception " + e.toString()); + logger.warn("Unable to deregister cluster node from JMX monitoring due to exception " + e.toString()); } } @@ -1063,8 +1063,12 @@ public ManagementServerHostVO doInTransaction(final TransactionStatus status) { logger.info("New instance of management server {}, runId {} is being started", mshost, _runId); } } else { + ManagementServerHost.State msHostState = ManagementServerHost.State.Up; + if (ManagementServerHost.State.Maintenance.equals(mshost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(mshost.getState())) { + msHostState = ManagementServerHost.State.Maintenance; + } _mshostDao.update(mshost.getId(), _runId, NetUtils.getCanonicalHostName(), version, _clusterNodeIP, _currentServiceAdapter.getServicePort(), - DateUtil.currentGMTTime()); + DateUtil.currentGMTTime(), msHostState); if (logger.isInfoEnabled()) { logger.info("Management server {}, runId {} is being started", mshost, _runId); } @@ -1102,11 +1106,17 @@ public boolean stop() { if (_mshostId != null) { final ManagementServerHostVO mshost = _mshostDao.findByMsid(_msId); - final ManagementServerStatusVO mshostStatus = mshostStatusDao.findByMsId(mshost.getUuid()); - mshost.setState(ManagementServerHost.State.Down); - mshostStatus.setLastJvmStop(new Date()); - _mshostDao.update(_mshostId, mshost); - mshostStatusDao.update(mshostStatus.getId(), mshostStatus); + if (mshost != null) { + final ManagementServerStatusVO mshostStatus = mshostStatusDao.findByMsId(mshost.getUuid()); + mshostStatus.setLastJvmStop(new Date()); + mshostStatusDao.update(mshostStatus.getId(), mshostStatus); + + ManagementServerHost.State msHostState = ManagementServerHost.State.Down; + if (ManagementServerHost.State.Maintenance.equals(mshost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(mshost.getState())) { + msHostState = ManagementServerHost.State.Maintenance; + } + _mshostDao.updateState(mshost.getId(), msHostState); + } } _heartbeatScheduler.shutdownNow(); diff --git a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDao.java b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDao.java index 96d57ee04258..6c8ffcac78b7 100644 --- a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDao.java +++ b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDao.java @@ -33,10 +33,12 @@ public interface ManagementServerHostDao extends GenericDao getActiveList(Date cutTime); List getInactiveList(Date cutTime); @@ -47,6 +49,8 @@ public interface ManagementServerHostDao extends GenericDao listBy(ManagementServerHost.State... states); + List listNonUpStateMsIPs(); + /** * Lists msids for which hosts are orphaned, i.e. msids that hosts refer as their owning ms whilst no mshost entry exists with those msids * diff --git a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDaoImpl.java b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDaoImpl.java index 27b6d52f61b0..ffee71daa0c4 100644 --- a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDaoImpl.java +++ b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDaoImpl.java @@ -35,6 +35,7 @@ import com.cloud.utils.db.DB; import com.cloud.utils.db.Filter; import com.cloud.utils.db.GenericDaoBase; +import com.cloud.utils.db.GenericSearchBuilder; import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; import com.cloud.utils.db.TransactionLegacy; @@ -46,6 +47,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase ActiveSearch; private final SearchBuilder InactiveSearch; private final SearchBuilder StateSearch; + protected GenericSearchBuilder NonUpStateMsSearch; @Override public void invalidateRunSession(long id, long runid) { @@ -77,7 +79,7 @@ public ManagementServerHostVO findByMsid(long msid) { @Override @DB - public void update(long id, long runid, String name, String version, String serviceIP, int servicePort, Date lastUpdate) { + public void update(long id, long runid, String name, String version, String serviceIP, int servicePort, Date lastUpdate, ManagementServerHost.State state) { TransactionLegacy txn = TransactionLegacy.currentTxn(); PreparedStatement pstmt = null; try { @@ -91,7 +93,7 @@ public void update(long id, long runid, String name, String version, String serv pstmt.setInt(4, servicePort); pstmt.setString(5, DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), lastUpdate)); pstmt.setLong(6, runid); - pstmt.setString(7, ManagementServerHost.State.Up.toString()); + pstmt.setString(7, state.toString()); pstmt.setLong(8, id); pstmt.executeUpdate(); @@ -148,6 +150,18 @@ public void update(long id, long runid, Date lastUpdate) { } } + @Override + public boolean updateState(long id, ManagementServerHost.State newState) { + ManagementServerHostVO msHost = findById(id); + if (msHost == null) { + return false; + } + + msHost.setState(newState); + msHost.setLastUpdateTime(DateUtil.currentGMTTime()); + return update(id, msHost); + } + @Override public List getActiveList(Date cutTime) { SearchCriteria sc = ActiveSearch.create(); @@ -205,6 +219,11 @@ protected ManagementServerHostDaoImpl() { StateSearch.and("state", StateSearch.entity().getState(), SearchCriteria.Op.IN); StateSearch.and("runid", StateSearch.entity().getRunid(), SearchCriteria.Op.GT); StateSearch.done(); + + NonUpStateMsSearch = createSearchBuilder(String.class); + NonUpStateMsSearch.selectFields(NonUpStateMsSearch.entity().getServiceIP()); + NonUpStateMsSearch.and("state", NonUpStateMsSearch.entity().getState(), SearchCriteria.Op.NLIKE); + NonUpStateMsSearch.done(); } @Override @@ -238,6 +257,13 @@ public List listBy(ManagementServerHost.State... states) return listBy(sc); } + @Override + public List listNonUpStateMsIPs() { + SearchCriteria sc = NonUpStateMsSearch.create(); + sc.addAnd("state", SearchCriteria.Op.NLIKE, State.Up); + return customSearch(sc, null); + } + @Override public List listOrphanMsids() { List orphanList = new ArrayList(); diff --git a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostPeerDao.java b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostPeerDao.java index 55559946cf04..37601e8ce783 100644 --- a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostPeerDao.java +++ b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostPeerDao.java @@ -33,4 +33,7 @@ public interface ManagementServerHostPeerDao extends GenericDao FindForUpdateSearch; private final SearchBuilder CountSearch; private final SearchBuilder ActiveSearch; + private final SearchBuilder FindByOwnerAndPeerMsSearch; + private final SearchBuilder FindByPeerMsAndStateSearch; + public ManagementServerHostPeerDaoImpl() { ClearPeerSearch = createSearchBuilder(); @@ -59,6 +62,17 @@ public ManagementServerHostPeerDaoImpl() { ActiveSearch.and("peerState", ActiveSearch.entity().getPeerState(), SearchCriteria.Op.EQ); ActiveSearch.and("lastUpdateTime", ActiveSearch.entity().getLastUpdateTime(), SearchCriteria.Op.GT); ActiveSearch.done(); + + FindByOwnerAndPeerMsSearch = createSearchBuilder(); + FindByOwnerAndPeerMsSearch.and("ownerMshost", FindByOwnerAndPeerMsSearch.entity().getOwnerMshost(), SearchCriteria.Op.EQ); + FindByOwnerAndPeerMsSearch.and("peerMshost", FindByOwnerAndPeerMsSearch.entity().getPeerMshost(), SearchCriteria.Op.EQ); + FindByOwnerAndPeerMsSearch.and("peerState", FindByOwnerAndPeerMsSearch.entity().getPeerState(), SearchCriteria.Op.EQ); + FindByOwnerAndPeerMsSearch.done(); + + FindByPeerMsAndStateSearch = createSearchBuilder(); + FindByPeerMsAndStateSearch.and("peerMshost", FindByPeerMsAndStateSearch.entity().getPeerMshost(), SearchCriteria.Op.EQ); + FindByPeerMsAndStateSearch.and("peerState", FindByPeerMsAndStateSearch.entity().getPeerState(), SearchCriteria.Op.EQ); + FindByPeerMsAndStateSearch.done(); } @Override @@ -133,4 +147,23 @@ public boolean isPeerUpState(long ownerMshost, long peerMshost, Date cutTime) { return listBy(sc).size() > 0; } + + @Override + public ManagementServerHostPeerVO findByOwnerAndPeerMsHost(long ownerMshost, long peerMshost, ManagementServerHost.State peerState) { + SearchCriteria sc = FindByOwnerAndPeerMsSearch.create(); + sc.setParameters("ownerMshost", ownerMshost); + sc.setParameters("peerMshost", peerMshost); + sc.setParameters("peerState", peerState); + + return findOneBy(sc); + } + + @Override + public ManagementServerHostPeerVO findByPeerMsAndState(long peerMshost, ManagementServerHost.State peerState) { + SearchCriteria sc = FindByPeerMsAndStateSearch.create(); + sc.setParameters("peerMshost", peerMshost); + sc.setParameters("peerState", peerState); + + return findOneBy(sc); + } } diff --git a/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java b/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java index 47bf27bd6c4b..19eeff37c1f6 100644 --- a/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java +++ b/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java @@ -218,16 +218,16 @@ public long submitAsyncJob(AsyncJob job) { return submitAsyncJob(job, false); } - private void checkShutdown() { + private void checkAsyncJobsAllowed() { if (!isAsyncJobsEnabled()) { - throw new CloudRuntimeException("A shutdown has been triggered. Can not accept new jobs"); + throw new CloudRuntimeException("Maintenance or Shutdown has been initiated on this management server. Can not accept new jobs"); } } @SuppressWarnings("unchecked") @DB public long submitAsyncJob(AsyncJob job, boolean scheduleJobExecutionInContext) { - checkShutdown(); + checkAsyncJobsAllowed(); @SuppressWarnings("rawtypes") GenericDao dao = GenericDaoBase.getDao(job.getClass()); @@ -248,7 +248,7 @@ public long submitAsyncJob(AsyncJob job, boolean scheduleJobExecutionInContext) @Override @DB public long submitAsyncJob(final AsyncJob job, final String syncObjType, final long syncObjId) { - checkShutdown(); + checkAsyncJobsAllowed(); try { @SuppressWarnings("rawtypes") @@ -860,7 +860,7 @@ protected void runInContext() { protected void reallyRun() { try { if (!isAsyncJobsEnabled()) { - logger.info("A shutdown has been triggered. Not executing any async job"); + logger.info("Maintenance or Shutdown has been initiated on this management server. Not executing any async jobs"); return; } diff --git a/plugins/shutdown/pom.xml b/plugins/maintenance/pom.xml similarity index 92% rename from plugins/shutdown/pom.xml rename to plugins/maintenance/pom.xml index 372095c55c83..fb8f64cd16c8 100644 --- a/plugins/shutdown/pom.xml +++ b/plugins/maintenance/pom.xml @@ -21,8 +21,8 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - cloud-plugin-shutdown - Apache CloudStack Plugin - Safe Shutdown + cloud-plugin-maintenance + Apache CloudStack Plugin - MS Maintenance and Safe Shutdown org.apache.cloudstack cloudstack-plugins diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/BaseShutdownActionCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/BaseMSMaintenanceActionCmd.java similarity index 85% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/BaseShutdownActionCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/BaseMSMaintenanceActionCmd.java index d7f4953291b6..da9263ed4fd3 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/BaseShutdownActionCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/BaseMSMaintenanceActionCmd.java @@ -25,18 +25,18 @@ import org.apache.cloudstack.api.response.ManagementServerResponse; -import org.apache.cloudstack.shutdown.ShutdownManager; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager; -public abstract class BaseShutdownActionCmd extends BaseCmd { +public abstract class BaseMSMaintenanceActionCmd extends BaseCmd { @Inject - protected ShutdownManager shutdownManager; + protected ManagementServerMaintenanceManager managementServerMaintenanceManager; ///////////////////////////////////////////////////// //////////////// API parameters ///////////////////// ///////////////////////////////////////////////////// - @Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the uuid of the management server", required = true) + @Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the id of the management server", required = true) private Long managementServerId; ///////////////////////////////////////////////////// diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelMaintenanceCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelMaintenanceCmd.java new file mode 100644 index 000000000000..a0f091ef1e4a --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelMaintenanceCmd.java @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command; + +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.BaseCmd; + +import com.cloud.user.Account; + +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; +import org.apache.cloudstack.acl.RoleType; + +@APICommand(name = CancelMaintenanceCmd.APINAME, + description = "Cancels maintenance of the management server", + since = "4.21.0", + responseObject = ManagementServerMaintenanceResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + authorized = {RoleType.Admin}) + +public class CancelMaintenanceCmd extends BaseMSMaintenanceActionCmd { + + public static final String APINAME = "cancelMaintenance"; + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() { + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.cancelMaintenance(this); + response.setResponseName(getCommandName()); + response.setObjectName("cancelmaintenance"); + setResponseObject(response); + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java similarity index 83% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java index aa90d7fcbdc9..35bfa4f42646 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java @@ -15,24 +15,24 @@ // specific language governing permissions and limitations // under the License. - package org.apache.cloudstack.api.command; +package org.apache.cloudstack.api.command; import org.apache.cloudstack.api.APICommand; import org.apache.cloudstack.api.BaseCmd; import com.cloud.user.Account; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import org.apache.cloudstack.acl.RoleType; @APICommand(name = CancelShutdownCmd.APINAME, description = "Cancels a triggered shutdown", since = "4.19.0", - responseObject = ReadyForShutdownResponse.class, + responseObject = ManagementServerMaintenanceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, authorized = {RoleType.Admin}) -public class CancelShutdownCmd extends BaseShutdownActionCmd { +public class CancelShutdownCmd extends BaseMSMaintenanceActionCmd { public static final String APINAME = "cancelShutdown"; @@ -52,7 +52,7 @@ public long getEntityOwnerId() { @Override public void execute() { - final ReadyForShutdownResponse response = shutdownManager.cancelShutdown(this); + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.cancelShutdown(this); response.setResponseName(getCommandName()); response.setObjectName("cancelshutdown"); setResponseObject(response); diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForMaintenanceCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForMaintenanceCmd.java new file mode 100644 index 000000000000..3c036c4c35f2 --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForMaintenanceCmd.java @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command; + +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; + +import com.cloud.user.Account; + +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; +import org.apache.cloudstack.acl.RoleType; + +@APICommand(name = PrepareForMaintenanceCmd.APINAME, + description = "Prepares management server for maintenance by preventing new jobs from being accepted after completion of active jobs and migrating the agents", + since = "4.21.0", + responseObject = ManagementServerMaintenanceResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + authorized = {RoleType.Admin}) +public class PrepareForMaintenanceCmd extends BaseMSMaintenanceActionCmd { + public static final String APINAME = "prepareForMaintenance"; + + @Parameter(name = ApiConstants.ALGORITHM, type = CommandType.STRING, description = "indirect agents load balancer algorithm (static, roundrobin, shuffle);" + + " when this is not set, already configured algorithm from setting 'indirect.agent.lb.algorithm' is considered") + private String algorithm; + + public String getAlgorithm() { + return algorithm; + } + + public void setAlgorithm(String algorithm) { + this.algorithm = algorithm; + } + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() { + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.prepareForMaintenance(this); + response.setResponseName(getCommandName()); + response.setObjectName("prepareformaintenance"); + setResponseObject(response); + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java similarity index 85% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java index c86d28560470..273cc2743ad3 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java @@ -17,22 +17,21 @@ package org.apache.cloudstack.api.command; - import org.apache.cloudstack.api.APICommand; import org.apache.cloudstack.api.BaseCmd; import com.cloud.user.Account; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import org.apache.cloudstack.acl.RoleType; @APICommand(name = PrepareForShutdownCmd.APINAME, description = "Prepares CloudStack for a safe manual shutdown by preventing new jobs from being accepted", since = "4.19.0", - responseObject = ReadyForShutdownResponse.class, + responseObject = ManagementServerMaintenanceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, authorized = {RoleType.Admin}) -public class PrepareForShutdownCmd extends BaseShutdownActionCmd { +public class PrepareForShutdownCmd extends BaseMSMaintenanceActionCmd { public static final String APINAME = "prepareForShutdown"; @Override @@ -51,7 +50,7 @@ public long getEntityOwnerId() { @Override public void execute() { - final ReadyForShutdownResponse response = shutdownManager.prepareForShutdown(this); + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.prepareForShutdown(this); response.setResponseName(getCommandName()); response.setObjectName("prepareforshutdown"); setResponseObject(response); diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java similarity index 66% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java index de4db9c04284..782b23a04222 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java @@ -17,41 +17,31 @@ package org.apache.cloudstack.api.command; -import javax.inject.Inject; - import org.apache.cloudstack.api.APICommand; -import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseCmd; -import org.apache.cloudstack.api.Parameter; -import org.apache.cloudstack.api.response.ManagementServerResponse; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; -import org.apache.cloudstack.shutdown.ShutdownManager; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import com.cloud.user.Account; @APICommand(name = ReadyForShutdownCmd.APINAME, description = "Returns the status of CloudStack, whether a shutdown has been triggered and if ready to shutdown", since = "4.19.0", - responseObject = ReadyForShutdownResponse.class, + responseObject = ManagementServerMaintenanceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false) -public class ReadyForShutdownCmd extends BaseCmd { +public class ReadyForShutdownCmd extends BaseMSMaintenanceActionCmd { public static final String APINAME = "readyForShutdown"; - @Inject - private ShutdownManager shutdownManager; - - ///////////////////////////////////////////////////// - //////////////// API parameters ///////////////////// - ///////////////////////////////////////////////////// - - @Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the uuid of the management server") - private Long managementServerId; - ///////////////////////////////////////////////////// /////////////////// Accessors /////////////////////// ///////////////////////////////////////////////////// - public Long getManagementServerId() { - return managementServerId; + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; } ///////////////////////////////////////////////////// @@ -60,19 +50,9 @@ public Long getManagementServerId() { @Override public void execute() { - final ReadyForShutdownResponse response = shutdownManager.readyForShutdown(this); + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.readyForShutdown(this); response.setResponseName(getCommandName()); response.setObjectName("readyforshutdown"); setResponseObject(response); } - - @Override - public String getCommandName() { - return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; - } - - @Override - public long getEntityOwnerId() { - return Account.ACCOUNT_ID_SYSTEM; - } } diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java similarity index 85% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java index b4ef7c1f67a6..dc729593b0db 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java @@ -22,16 +22,16 @@ import com.cloud.user.Account; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import org.apache.cloudstack.acl.RoleType; @APICommand(name = TriggerShutdownCmd.APINAME, - description = "Triggers an automatic safe shutdown of CloudStack by not accepting new jobs and shutting down when all pending jobbs have been completed. Triggers an immediate shutdown if forced", + description = "Triggers an automatic safe shutdown of CloudStack by not accepting new jobs and shutting down when all pending jobs have been completed.", since = "4.19.0", - responseObject = ReadyForShutdownResponse.class, + responseObject = ManagementServerMaintenanceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, authorized = {RoleType.Admin}) -public class TriggerShutdownCmd extends BaseShutdownActionCmd { +public class TriggerShutdownCmd extends BaseMSMaintenanceActionCmd { public static final String APINAME = "triggerShutdown"; ///////////////////////////////////////////////////// @@ -54,7 +54,7 @@ public long getEntityOwnerId() { @Override public void execute() { - final ReadyForShutdownResponse response = shutdownManager.triggerShutdown(this); + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.triggerShutdown(this); response.setResponseName(getCommandName()); response.setObjectName("triggershutdown"); setResponseObject(response); diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/response/ReadyForShutdownResponse.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/response/ManagementServerMaintenanceResponse.java similarity index 52% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/response/ReadyForShutdownResponse.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/response/ManagementServerMaintenanceResponse.java index d1b2353d2a33..52bd8ab3fb89 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/response/ReadyForShutdownResponse.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/response/ManagementServerMaintenanceResponse.java @@ -16,35 +16,81 @@ // under the License. package org.apache.cloudstack.api.response; +import java.util.List; import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseResponse; +import org.apache.cloudstack.management.ManagementServerHost.State; import com.cloud.serializer.Param; import com.google.gson.annotations.SerializedName; -public class ReadyForShutdownResponse extends BaseResponse { - @SerializedName(ApiConstants.READY_FOR_SHUTDOWN) - @Param(description = "Indicates whether CloudStack is ready to shutdown") - private Boolean readyForShutdown; +public class ManagementServerMaintenanceResponse extends BaseResponse { + @SerializedName(ApiConstants.MANAGEMENT_SERVER_ID) + @Param(description = "The id of the management server") + private String managementServerId; + + @SerializedName(ApiConstants.STATE) + @Param(description = "the state of the management server") + private State state; + + @SerializedName(ApiConstants.MAINTENANCE_INITIATED) + @Param(description = "Indicates whether maintenance has been initiated") + private Boolean maintenanceInitiated; @SerializedName(ApiConstants.SHUTDOWN_TRIGGERED) @Param(description = "Indicates whether a shutdown has been triggered") private Boolean shutdownTriggered; + @SerializedName(ApiConstants.READY_FOR_SHUTDOWN) + @Param(description = "Indicates whether CloudStack is ready to shutdown") + private Boolean readyForShutdown; + @SerializedName(ApiConstants.PENDING_JOBS_COUNT) @Param(description = "The number of jobs in progress") private Long pendingJobsCount; - @SerializedName(ApiConstants.MANAGEMENT_SERVER_ID) - @Param(description = "The id of the management server") - private Long msId; + @SerializedName(ApiConstants.AGENTS_COUNT) + @Param(description = "The number of host agents this management server is responsible for") + private Long agentsCount; + + @SerializedName(ApiConstants.AGENTS) + @Param(description = "The host agents this management server is responsible for") + private List agents; - public ReadyForShutdownResponse(Long msId, Boolean shutdownTriggered, Boolean readyForShutdown, long pendingJobsCount) { - this.msId = msId; + public ManagementServerMaintenanceResponse(String managementServerId, State state, Boolean maintenanceInitiated, Boolean shutdownTriggered, Boolean readyForShutdown, long pendingJobsCount, long agentsCount, List agents) { + this.managementServerId = managementServerId; + this.state = state; + this.maintenanceInitiated = maintenanceInitiated; this.shutdownTriggered = shutdownTriggered; this.readyForShutdown = readyForShutdown; this.pendingJobsCount = pendingJobsCount; + this.agentsCount = agentsCount; + this.agents = agents; + } + + public String getManagementServerId() { + return managementServerId; + } + + public void setManagementServerId(String managementServerId) { + this.managementServerId = managementServerId; + } + + public State getState() { + return state; + } + + public void setState(State state) { + this.state = state; + } + + public Boolean getMaintenanceInitiated() { + return this.maintenanceInitiated; + } + + public void setMaintenanceInitiated(Boolean maintenanceInitiated) { + this.maintenanceInitiated = maintenanceInitiated; } public Boolean getShutdownTriggered() { @@ -71,11 +117,19 @@ public void setPendingJobsCount(Long pendingJobsCount) { this.pendingJobsCount = pendingJobsCount; } - public Long getMsId() { - return msId; + public Long getAgentsCount() { + return this.agentsCount; + } + + public void setAgentsCount(Long agentsCount) { + this.agentsCount = agentsCount; + } + + public List getAgents() { + return agents; } - public void setMsId(Long msId) { - this.msId = msId; + public void setAgents(List agents) { + this.agents = agents; } } diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceListener.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceListener.java new file mode 100644 index 000000000000..bd82d1b257df --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceListener.java @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.maintenance; + +public interface ManagementServerMaintenanceListener { + void onManagementServerMaintenance(); + + void onManagementServerCancelMaintenance(); +} diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManager.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManager.java new file mode 100644 index 000000000000..4bb07be0de78 --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManager.java @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.maintenance; + +import org.apache.cloudstack.api.command.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.CancelShutdownCmd; +import org.apache.cloudstack.api.command.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.PrepareForShutdownCmd; +import org.apache.cloudstack.api.command.ReadyForShutdownCmd; +import org.apache.cloudstack.api.command.TriggerShutdownCmd; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; +import org.apache.cloudstack.framework.config.ConfigKey; + +import com.cloud.cluster.ManagementServerHostVO; + +public interface ManagementServerMaintenanceManager { + int DEFAULT_MS_MAINTENANCE_TIMEOUT_IN_MINS = 60; + + ConfigKey ManagementServerMaintenanceTimeoutInMins = new ConfigKey<>(Integer.class, + "management.server.maintenance.timeout", + "Advanced", + String.valueOf(DEFAULT_MS_MAINTENANCE_TIMEOUT_IN_MINS), + "Timeout (in mins) for the maintenance window for the management server, default: 60 mins.", + true, + ConfigKey.Scope.Global, + null); + + void registerListener(ManagementServerMaintenanceListener listener); + + void unregisterListener(ManagementServerMaintenanceListener listener); + + void onMaintenance(); + + void onCancelMaintenance(); + + // Returns the number of pending jobs for the given management server msids. + // NOTE: This is the msid and NOT the id + long countPendingJobs(Long... msIds); + + boolean isAsyncJobsEnabled(); + + // Indicates whether a shutdown has been triggered on the current management server + boolean isShutdownTriggered(); + + // Indicates whether the current management server is preparing to shutdown + boolean isPreparingForShutdown(); + + // Triggers a shutdown on the current management server by not accepting any more async jobs and shutting down when there are no pending jobs + void triggerShutdown(); + + // Prepares the current management server to shutdown by not accepting any more async jobs + void prepareForShutdown(); + + // Cancels the shutdown on the current management server + void cancelShutdown(); + + // Indicates whether the current management server is preparing to maintenance + boolean isPreparingForMaintenance(); + + void resetPreparingForMaintenance(); + + long getMaintenanceStartTime(); + + String getLbAlgorithm(); + + // Prepares the current management server for maintenance by migrating the agents and not accepting any more async jobs + void prepareForMaintenance(String lbAlorithm); + + // Cancels maintenance of the current management server + void cancelMaintenance(); + + void cancelPreparingForMaintenance(ManagementServerHostVO msHost); + + // Returns whether the any of the ms can be shut down and if a shutdown has been triggered on any running ms + ManagementServerMaintenanceResponse readyForShutdown(ReadyForShutdownCmd cmd); + + // Prepares the specified management server to shutdown by not accepting any more async jobs + ManagementServerMaintenanceResponse prepareForShutdown(PrepareForShutdownCmd cmd); + + // Cancels the shutdown on the specified management server + ManagementServerMaintenanceResponse cancelShutdown(CancelShutdownCmd cmd); + + // Triggers a shutdown on the specified management server by not accepting any more async jobs and shutting down when there are no pending jobs + ManagementServerMaintenanceResponse triggerShutdown(TriggerShutdownCmd cmd); + + // Prepares the specified management server to maintenance by migrating the agents and not accepting any more async jobs + ManagementServerMaintenanceResponse prepareForMaintenance(PrepareForMaintenanceCmd cmd); + + // Cancels maintenance of the specified management server + ManagementServerMaintenanceResponse cancelMaintenance(CancelMaintenanceCmd cmd); +} diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java new file mode 100644 index 000000000000..206173e6ecea --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java @@ -0,0 +1,584 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.maintenance; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Timer; +import java.util.TimerTask; + +import javax.inject.Inject; + +import org.apache.cloudstack.agent.lb.IndirectAgentLB; +import org.apache.cloudstack.api.command.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.CancelShutdownCmd; +import org.apache.cloudstack.api.command.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.PrepareForShutdownCmd; +import org.apache.cloudstack.api.command.ReadyForShutdownCmd; +import org.apache.cloudstack.api.command.TriggerShutdownCmd; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; +import org.apache.cloudstack.config.ApiServiceConfiguration; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.framework.jobs.AsyncJobManager; +import org.apache.cloudstack.management.ManagementServerHost.State; +import org.apache.cloudstack.maintenance.command.CancelMaintenanceManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.CancelShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.PrepareForMaintenanceManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.PrepareForShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.TriggerShutdownManagementServerHostCommand; +import org.apache.cloudstack.utils.identity.ManagementServerNode; +import org.apache.commons.collections.CollectionUtils; + +import com.cloud.agent.AgentManager; +import com.cloud.agent.api.Command; +import com.cloud.cluster.ClusterManager; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; +import com.cloud.host.dao.HostDao; +import com.cloud.serializer.GsonHelper; +import com.cloud.utils.StringUtils; +import com.cloud.utils.component.ManagerBase; +import com.cloud.utils.component.PluggableService; +import com.cloud.utils.exception.CloudRuntimeException; +import com.google.gson.Gson; + +public class ManagementServerMaintenanceManagerImpl extends ManagerBase implements ManagementServerMaintenanceManager, PluggableService, Configurable { + + Gson gson; + + @Inject + private AsyncJobManager jobManager; + @Inject + private ClusterManager clusterManager; + @Inject + private AgentManager agentMgr; + @Inject + private IndirectAgentLB indirectAgentLB; + @Inject + private ManagementServerHostDao msHostDao; + @Inject + private HostDao hostDao; + + private final List _listeners = new ArrayList<>(); + + private boolean shutdownTriggered = false; + private boolean preparingForShutdown = false; + private boolean preparingForMaintenance = false; + private long maintenanceStartTime = 0; + private String lbAlgorithm; + + private Timer timer = new Timer(); + private TimerTask pendingJobsTask; + + protected ManagementServerMaintenanceManagerImpl() { + super(); + gson = GsonHelper.getGson(); + } + + @Override + public boolean start() { + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + if (msHost != null) { + State[] maintenanceStates = {State.PreparingForMaintenance, State.Maintenance}; + if (Arrays.asList(maintenanceStates).contains(msHost.getState())) { + this.preparingForMaintenance = true; + jobManager.disableAsyncJobs(); + msHostDao.updateState(msHost.getId(), State.Maintenance); + } + } + return true; + } + + @Override + public void registerListener(ManagementServerMaintenanceListener listener) { + synchronized (_listeners) { + logger.info("Register management server maintenance listener " + listener.getClass()); + _listeners.add(listener); + } + } + + @Override + public void unregisterListener(ManagementServerMaintenanceListener listener) { + synchronized (_listeners) { + logger.info("Unregister management server maintenance listener " + listener.getClass()); + _listeners.remove(listener); + } + } + + @Override + public void onMaintenance() { + synchronized (_listeners) { + for (final ManagementServerMaintenanceListener listener : _listeners) { + logger.info("Invoke, on maintenance for listener " + listener.getClass()); + listener.onManagementServerMaintenance(); + } + } + } + + @Override + public void onCancelMaintenance() { + synchronized (_listeners) { + for (final ManagementServerMaintenanceListener listener : _listeners) { + logger.info("Invoke, on cancel maintenance for listener " + listener.getClass()); + listener.onManagementServerCancelMaintenance(); + } + } + } + + @Override + public boolean isShutdownTriggered() { + return shutdownTriggered; + } + + @Override + public boolean isPreparingForShutdown() { + return preparingForShutdown; + } + + @Override + public boolean isPreparingForMaintenance() { + return preparingForMaintenance; + } + + @Override + public void resetPreparingForMaintenance() { + preparingForMaintenance = false; + maintenanceStartTime = 0; + lbAlgorithm = null; + } + + @Override + public long getMaintenanceStartTime() { + return maintenanceStartTime; + } + + @Override + public String getLbAlgorithm() { + return lbAlgorithm; + } + + @Override + public long countPendingJobs(Long... msIds) { + return jobManager.countPendingNonPseudoJobs(msIds); + } + + @Override + public boolean isAsyncJobsEnabled() { + return jobManager.isAsyncJobsEnabled(); + } + + @Override + public void triggerShutdown() { + if (this.shutdownTriggered) { + throw new CloudRuntimeException("Shutdown has already been triggered"); + } + this.shutdownTriggered = true; + prepareForShutdown(true); + } + + private void prepareForShutdown(boolean postTrigger) { + if (this.preparingForMaintenance) { + throw new CloudRuntimeException("Maintenance has already been initiated, cancel maintenance and try again"); + } + + // Ensure we don't throw an error if triggering a shutdown after just preparing for it + if (!postTrigger && this.preparingForShutdown) { + throw new CloudRuntimeException("Shutdown has already been triggered"); + } + this.preparingForShutdown = true; + jobManager.disableAsyncJobs(); + waitForPendingJobs(); + } + + @Override + public void prepareForShutdown() { + prepareForShutdown(false); + } + + @Override + public void cancelShutdown() { + if (!this.preparingForShutdown) { + throw new CloudRuntimeException("Shutdown has not been triggered"); + } + + this.preparingForShutdown = false; + this.shutdownTriggered = false; + jobManager.enableAsyncJobs(); + cancelWaitForPendingJobs(); + } + + @Override + public void prepareForMaintenance(String lbAlorithm) { + if (this.preparingForShutdown) { + throw new CloudRuntimeException("Shutdown has already been triggered, cancel shutdown and try again"); + } + + if (this.preparingForMaintenance) { + throw new CloudRuntimeException("Maintenance has already been initiated"); + } + this.preparingForMaintenance = true; + this.maintenanceStartTime = System.currentTimeMillis(); + this.lbAlgorithm = lbAlorithm; + jobManager.disableAsyncJobs(); + waitForPendingJobs(); + } + + @Override + public void cancelMaintenance() { + if (!this.preparingForMaintenance) { + throw new CloudRuntimeException("Maintenance has not been initiated"); + } + resetPreparingForMaintenance(); + jobManager.enableAsyncJobs(); + cancelWaitForPendingJobs(); + } + + private void waitForPendingJobs() { + if (this.pendingJobsTask != null) { + this.pendingJobsTask.cancel(); + this.pendingJobsTask = null; + } + this.pendingJobsTask = new CheckPendingJobsTask(this); + long pendingJobsCheckDelayInMs = 1000L; // 1 sec + long pendingJobsCheckPeriodInMs = 15L * 1000; // every 15 secs + timer.scheduleAtFixedRate(pendingJobsTask, pendingJobsCheckDelayInMs, pendingJobsCheckPeriodInMs); + } + + private void cancelWaitForPendingJobs() { + if (this.pendingJobsTask != null) { + this.pendingJobsTask.cancel(); + this.pendingJobsTask = null; + } + } + + @Override + public ManagementServerMaintenanceResponse readyForShutdown(ReadyForShutdownCmd cmd) { + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse prepareForShutdown(PrepareForShutdownCmd cmd) { + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server, cannot prepare for shutdown"); + } + + if (!State.Up.equals(msHost.getState())) { + throw new CloudRuntimeException("Management server is not in the right state to prepare for shutdown"); + } + + final Command[] cmds = new Command[1]; + cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("PrepareForShutdownCmd result : " + result); + if (!result.startsWith("Success")) { + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.PreparingForShutDown); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse triggerShutdown(TriggerShutdownCmd cmd) { + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server, cannot trigger shutdown"); + } + + if (!(State.Up.equals(msHost.getState()) || State.Maintenance.equals(msHost.getState()) || State.PreparingForShutDown.equals(msHost.getState()) || + State.ReadyToShutDown.equals(msHost.getState()))) { + throw new CloudRuntimeException("Management server is not in the right state to trigger shutdown"); + } + + if (State.Up.equals(msHost.getState())) { + msHostDao.updateState(msHost.getId(), State.PreparingForShutDown); + } + + final Command[] cmds = new Command[1]; + cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("TriggerShutdownCmd result : " + result); + if (!result.startsWith("Success")) { + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.ShuttingDown); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse cancelShutdown(CancelShutdownCmd cmd) { + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server, cannot cancel shutdown"); + } + + if (!(State.PreparingForShutDown.equals(msHost.getState()) || State.ReadyToShutDown.equals(msHost.getState()))) { + throw new CloudRuntimeException("Management server is not in the right state to cancel shutdown"); + } + + final Command[] cmds = new Command[1]; + cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("CancelShutdownCmd result : " + result); + if (!result.startsWith("Success")) { + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.Up); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse prepareForMaintenance(PrepareForMaintenanceCmd cmd) { + if (StringUtils.isNotBlank(cmd.getAlgorithm())) { + indirectAgentLB.checkLBAlgorithmName(cmd.getAlgorithm()); + } + + final List activeMsList = msHostDao.listBy(State.Up); + if (CollectionUtils.isEmpty(activeMsList)) { + throw new CloudRuntimeException("Cannot prepare for maintenance, no active management servers found"); + } + + if (activeMsList.size() == 1) { + throw new CloudRuntimeException("Prepare for maintenance not supported, there is only one active management server"); + } + + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Cannot prepare for maintenance, unable to find the management server"); + } + + if (!State.Up.equals(msHost.getState())) { + throw new CloudRuntimeException("Management server is not in the right state to prepare for maintenance"); + } + + final List preparingForMaintenanceMsList = msHostDao.listBy(State.PreparingForMaintenance); + if (CollectionUtils.isNotEmpty(preparingForMaintenanceMsList)) { + throw new CloudRuntimeException("Cannot prepare for maintenance, there are other management servers preparing for maintenance"); + } + + if (indirectAgentLB.haveAgentBasedHosts(msHost.getMsid())) { + List indirectAgentMsList = indirectAgentLB.getManagementServerList(); + indirectAgentMsList.remove(msHost.getServiceIP()); + List nonUpMsList = msHostDao.listNonUpStateMsIPs(); + indirectAgentMsList.removeAll(nonUpMsList); + if (CollectionUtils.isEmpty(indirectAgentMsList)) { + throw new CloudRuntimeException(String.format("Cannot prepare for maintenance, no other active management servers found from '%s' setting", ApiServiceConfiguration.ManagementServerAddresses.key())); + } + } + + List lastAgents = hostDao.listByMs(cmd.getManagementServerId()); + agentMgr.setLastAgents(lastAgents); + + final Command[] cmds = new Command[1]; + cmds[0] = new PrepareForMaintenanceManagementServerHostCommand(msHost.getMsid(), cmd.getAlgorithm()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("PrepareForMaintenanceCmd result : " + result); + if (!result.startsWith("Success")) { + agentMgr.setLastAgents(null); + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.PreparingForMaintenance); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse cancelMaintenance(CancelMaintenanceCmd cmd) { + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server, cannot cancel maintenance"); + } + + if (!(State.Maintenance.equals(msHost.getState()) || State.PreparingForMaintenance.equals(msHost.getState()))) { + throw new CloudRuntimeException("Management server is not in the right state to cancel maintenance"); + } + + final Command[] cmds = new Command[1]; + cmds[0] = new CancelMaintenanceManagementServerHostCommand(msHost.getMsid()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("CancelMaintenanceCmd result : " + result); + if (!result.startsWith("Success")) { + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.Up); + agentMgr.setLastAgents(null); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public void cancelPreparingForMaintenance(ManagementServerHostVO msHost) { + resetPreparingForMaintenance(); + jobManager.enableAsyncJobs(); + if (msHost == null) { + msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + } + msHostDao.updateState(msHost.getId(), State.Up); + } + + private ManagementServerMaintenanceResponse prepareMaintenanceResponse(Long managementServerId) { + ManagementServerHostVO msHost; + Long[] msIds; + if (managementServerId == null) { + msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + } else { + msHost = msHostDao.findById(managementServerId); + } + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server"); + } + + State[] maintenanceStates = {State.PreparingForMaintenance, State.Maintenance}; + State[] shutdownStates = {State.ShuttingDown, State.PreparingForShutDown, State.ReadyToShutDown}; + boolean maintenanceInitiatedForMS = Arrays.asList(maintenanceStates).contains(msHost.getState()); + boolean shutdownTriggeredForMS = Arrays.asList(shutdownStates).contains(msHost.getState()); + msIds = new Long[]{msHost.getMsid()}; + List agents = hostDao.listByMs(managementServerId); + long agentsCount = hostDao.countByMs(managementServerId); + long pendingJobCount = countPendingJobs(msIds); + return new ManagementServerMaintenanceResponse(msHost.getUuid(), msHost.getState(), maintenanceInitiatedForMS, shutdownTriggeredForMS, pendingJobCount == 0, pendingJobCount, agentsCount, agents); + } + + @Override + public List> getCommands() { + final List> cmdList = new ArrayList<>(); + cmdList.add(PrepareForMaintenanceCmd.class); + cmdList.add(CancelMaintenanceCmd.class); + cmdList.add(PrepareForShutdownCmd.class); + cmdList.add(CancelShutdownCmd.class); + cmdList.add(ReadyForShutdownCmd.class); + cmdList.add(TriggerShutdownCmd.class); + return cmdList; + } + + @Override + public String getConfigComponentName() { + return ManagementServerMaintenanceManager.class.getSimpleName(); + } + + @Override + public ConfigKey[] getConfigKeys() { + return new ConfigKey[]{ + ManagementServerMaintenanceTimeoutInMins + }; + } + + private final class CheckPendingJobsTask extends TimerTask { + + private ManagementServerMaintenanceManager managementServerMaintenanceManager; + private boolean agentsTransferTriggered = false; + + public CheckPendingJobsTask(ManagementServerMaintenanceManager managementServerMaintenanceManager) { + this.managementServerMaintenanceManager = managementServerMaintenanceManager; + } + + @Override + public void run() { + try { + // If the maintenance or shutdown has been cancelled + if (!(managementServerMaintenanceManager.isPreparingForMaintenance() || managementServerMaintenanceManager.isPreparingForShutdown())) { + logger.info("Maintenance/Shutdown cancelled, terminating the pending jobs check timer task"); + this.cancel(); + return; + } + + if (managementServerMaintenanceManager.isPreparingForMaintenance() && isMaintenanceWindowExpired()) { + logger.debug("Maintenance window timeout, terminating the pending jobs check timer task"); + managementServerMaintenanceManager.cancelPreparingForMaintenance(null); + this.cancel(); + return; + } + + long totalPendingJobs = managementServerMaintenanceManager.countPendingJobs(ManagementServerNode.getManagementServerId()); + int totalAgents = hostDao.countByMs(ManagementServerNode.getManagementServerId()); + String msg = String.format("Checking for triggered maintenance or shutdown... shutdownTriggered [%b] AllowAsyncJobs [%b] PendingJobCount [%d] AgentsCount [%d]", + managementServerMaintenanceManager.isShutdownTriggered(), managementServerMaintenanceManager.isAsyncJobsEnabled(), totalPendingJobs, totalAgents); + logger.debug(msg); + + if (totalPendingJobs > 0) { + logger.info(String.format("There are %d pending jobs, trying again later", totalPendingJobs)); + return; + } + + // No more pending jobs. Good to terminate + if (managementServerMaintenanceManager.isShutdownTriggered()) { + logger.info("Shutting down now"); + // update state to down ? + System.exit(0); + } + if (managementServerMaintenanceManager.isPreparingForMaintenance()) { + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + if (totalAgents == 0) { + msHostDao.updateState(msHost.getId(), State.Maintenance); + this.cancel(); + return; + } + + if (agentsTransferTriggered) { + logger.info(String.format("There are %d agents, trying again later", totalAgents)); + return; + } + + agentsTransferTriggered = true; + logger.info(String.format("Preparing for maintenance - migrating agents from management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid())); + boolean agentsMigrated = indirectAgentLB.migrateAgents(msHost.getUuid(), ManagementServerNode.getManagementServerId(), managementServerMaintenanceManager.getLbAlgorithm(), remainingMaintenanceWindowInMs()); + if (!agentsMigrated) { + logger.warn(String.format("Unable to prepare for maintenance, cannot migrate indirect agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid())); + managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost); + this.cancel(); + return; + } + + if(!agentMgr.transferDirectAgentsFromMS(msHost.getUuid(), ManagementServerNode.getManagementServerId(), remainingMaintenanceWindowInMs())) { + logger.warn(String.format("Unable to prepare for maintenance, cannot transfer direct agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid())); + managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost); + this.cancel(); + return; + } + } else if (managementServerMaintenanceManager.isPreparingForShutdown()) { + logger.info("Ready to shutdown"); + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + msHostDao.updateState(msHost.getId(), State.ReadyToShutDown); + this.cancel(); + return; + } + } catch (final Exception e) { + logger.error("Error trying to check/run pending jobs task", e); + } + } + + private boolean isMaintenanceWindowExpired() { + long maintenanceElapsedTimeInMs = System.currentTimeMillis() - managementServerMaintenanceManager.getMaintenanceStartTime(); + if (maintenanceElapsedTimeInMs >= (ManagementServerMaintenanceTimeoutInMins.value().longValue() * 60 * 1000)) { + return true; + } + return false; + } + + private long remainingMaintenanceWindowInMs() { + long maintenanceElapsedTimeInMs = System.currentTimeMillis() - managementServerMaintenanceManager.getMaintenanceStartTime(); + long remainingMaintenanceWindowTimeInMs = (ManagementServerMaintenanceTimeoutInMins.value().longValue() * 60 * 1000) - maintenanceElapsedTimeInMs; + return (remainingMaintenanceWindowTimeInMs > 0) ? remainingMaintenanceWindowTimeInMs : 0; + } + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/BaseShutdownManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/BaseShutdownManagementServerHostCommand.java similarity index 95% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/BaseShutdownManagementServerHostCommand.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/BaseShutdownManagementServerHostCommand.java index 8fe33317bc0c..093a5d35eba3 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/BaseShutdownManagementServerHostCommand.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/BaseShutdownManagementServerHostCommand.java @@ -16,7 +16,7 @@ // under the License. -package org.apache.cloudstack.shutdown.command; +package org.apache.cloudstack.maintenance.command; import com.cloud.agent.api.Command; diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelMaintenanceManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelMaintenanceManagementServerHostCommand.java new file mode 100644 index 000000000000..50eb73b7bca2 --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelMaintenanceManagementServerHostCommand.java @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +package org.apache.cloudstack.maintenance.command; + +public class CancelMaintenanceManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { + + public CancelMaintenanceManagementServerHostCommand(long msId) { + super(msId); + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/CancelShutdownManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelShutdownManagementServerHostCommand.java similarity index 95% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/CancelShutdownManagementServerHostCommand.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelShutdownManagementServerHostCommand.java index eef44446aa14..2cbdbd2f07a5 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/CancelShutdownManagementServerHostCommand.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelShutdownManagementServerHostCommand.java @@ -16,7 +16,7 @@ // under the License. -package org.apache.cloudstack.shutdown.command; +package org.apache.cloudstack.maintenance.command; public class CancelShutdownManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForMaintenanceManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForMaintenanceManagementServerHostCommand.java new file mode 100644 index 000000000000..8f2a4e62b32d --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForMaintenanceManagementServerHostCommand.java @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +package org.apache.cloudstack.maintenance.command; + +public class PrepareForMaintenanceManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { + String lbAlgorithm; + + public PrepareForMaintenanceManagementServerHostCommand(long msId) { + super(msId); + } + + public PrepareForMaintenanceManagementServerHostCommand(long msId, String lbAlgorithm) { + super(msId); + this.lbAlgorithm = lbAlgorithm; + } + + public String getLbAlgorithm() { + return lbAlgorithm; + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/PrepareForShutdownManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForShutdownManagementServerHostCommand.java similarity index 95% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/PrepareForShutdownManagementServerHostCommand.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForShutdownManagementServerHostCommand.java index 32a9201d5516..15f04ae11e6f 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/PrepareForShutdownManagementServerHostCommand.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForShutdownManagementServerHostCommand.java @@ -16,7 +16,7 @@ // under the License. -package org.apache.cloudstack.shutdown.command; +package org.apache.cloudstack.maintenance.command; public class PrepareForShutdownManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/TriggerShutdownManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/TriggerShutdownManagementServerHostCommand.java similarity index 95% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/TriggerShutdownManagementServerHostCommand.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/TriggerShutdownManagementServerHostCommand.java index e0d1879fa358..41e2e7e86a02 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/TriggerShutdownManagementServerHostCommand.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/TriggerShutdownManagementServerHostCommand.java @@ -16,7 +16,7 @@ // under the License. -package org.apache.cloudstack.shutdown.command; +package org.apache.cloudstack.maintenance.command; public class TriggerShutdownManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { diff --git a/plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/module.properties b/plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/module.properties similarity index 97% rename from plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/module.properties rename to plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/module.properties index fd85c3085ca1..547afff867f7 100644 --- a/plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/module.properties +++ b/plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/module.properties @@ -14,5 +14,5 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -name=shutdown +name=maintenance parent=api diff --git a/plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/spring-shutdown-context.xml b/plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/spring-maintenance-context.xml similarity index 83% rename from plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/spring-shutdown-context.xml rename to plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/spring-maintenance-context.xml index 5318b3bf4462..bc5504634cee 100644 --- a/plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/spring-shutdown-context.xml +++ b/plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/spring-maintenance-context.xml @@ -22,8 +22,8 @@ http://www.springframework.org/schema/beans/spring-beans.xsd" > - - + + diff --git a/plugins/shutdown/src/test/java/org/apache/cloudstack/shutdown/ShutdownManagerImplTest.java b/plugins/maintenance/src/test/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImplTest.java similarity index 84% rename from plugins/shutdown/src/test/java/org/apache/cloudstack/shutdown/ShutdownManagerImplTest.java rename to plugins/maintenance/src/test/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImplTest.java index 9f75251c93f9..8e1c09bf9959 100644 --- a/plugins/shutdown/src/test/java/org/apache/cloudstack/shutdown/ShutdownManagerImplTest.java +++ b/plugins/maintenance/src/test/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImplTest.java @@ -15,18 +15,15 @@ // specific language governing permissions and limitations // under the License. -package org.apache.cloudstack.shutdown; +package org.apache.cloudstack.maintenance; import org.apache.cloudstack.framework.jobs.AsyncJobManager; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.Mockito; -import org.mockito.MockitoAnnotations; import org.mockito.Spy; import org.mockito.junit.MockitoJUnitRunner; @@ -34,20 +31,14 @@ @RunWith(MockitoJUnitRunner.class) -public class ShutdownManagerImplTest { +public class ManagementServerMaintenanceManagerImplTest { @Spy @InjectMocks - ShutdownManagerImpl spy; + ManagementServerMaintenanceManagerImpl spy; @Mock AsyncJobManager jobManagerMock; - private AutoCloseable closeable; - - @Before - public void setUp() throws Exception { - closeable = MockitoAnnotations.openMocks(this); - } private long prepareCountPendingJobs() { long expectedCount = 1L; @@ -79,14 +70,8 @@ public void prepareForShutdown() { spy.prepareForShutdown(); }); - Mockito.doNothing().when(jobManagerMock).enableAsyncJobs(); spy.cancelShutdown(); Mockito.verify(jobManagerMock).enableAsyncJobs(); } - - @After - public void tearDown() throws Exception { - closeable.close(); - } } diff --git a/plugins/metrics/src/main/java/org/apache/cloudstack/api/MetricConstants.java b/plugins/metrics/src/main/java/org/apache/cloudstack/api/MetricConstants.java index 8c93f2e1f446..ba4822fa8528 100644 --- a/plugins/metrics/src/main/java/org/apache/cloudstack/api/MetricConstants.java +++ b/plugins/metrics/src/main/java/org/apache/cloudstack/api/MetricConstants.java @@ -20,6 +20,8 @@ * metric local api constants */ public interface MetricConstants { + String LAST_AGENTS = "lastagents"; + String AGENTS = "agents"; String AGENT_COUNT = "agentcount"; String AVAILABLE_PROCESSORS = "availableprocessors"; String CONNECTIONS = "connections"; diff --git a/plugins/metrics/src/main/java/org/apache/cloudstack/metrics/MetricsServiceImpl.java b/plugins/metrics/src/main/java/org/apache/cloudstack/metrics/MetricsServiceImpl.java index 51524c129121..9fca23dcee3f 100644 --- a/plugins/metrics/src/main/java/org/apache/cloudstack/metrics/MetricsServiceImpl.java +++ b/plugins/metrics/src/main/java/org/apache/cloudstack/metrics/MetricsServiceImpl.java @@ -895,6 +895,8 @@ private void copyManagementServerStatusToResponse(ManagementServerMetricsRespons metricsResponse.setDbLocal(status.isDbLocal()); metricsResponse.setUsageLocal(status.isUsageLocal()); metricsResponse.setAvailableProcessors(status.getAvailableProcessors()); + metricsResponse.setLastAgents(status.getLastAgents()); + metricsResponse.setAgents(status.getAgents()); metricsResponse.setAgentCount(status.getAgentCount()); metricsResponse.setCollectionTime(status.getCollectionTime()); metricsResponse.setSessions(status.getSessions()); diff --git a/plugins/metrics/src/main/java/org/apache/cloudstack/response/ManagementServerMetricsResponse.java b/plugins/metrics/src/main/java/org/apache/cloudstack/response/ManagementServerMetricsResponse.java index 95c3fd09c072..d96f5b14f0da 100644 --- a/plugins/metrics/src/main/java/org/apache/cloudstack/response/ManagementServerMetricsResponse.java +++ b/plugins/metrics/src/main/java/org/apache/cloudstack/response/ManagementServerMetricsResponse.java @@ -22,6 +22,7 @@ import org.apache.cloudstack.api.response.ManagementServerResponse; import java.util.Date; +import java.util.List; public class ManagementServerMetricsResponse extends ManagementServerResponse { @@ -29,6 +30,14 @@ public class ManagementServerMetricsResponse extends ManagementServerResponse { @Param(description = "the number of processors available to the JVM") private Integer availableProcessors; + @SerializedName(MetricConstants.LAST_AGENTS) + @Param(description = "the last agents this Management Server is responsible for, before preparing for maintenance", since = "4.18.1") + private List lastAgents; + + @SerializedName(MetricConstants.AGENTS) + @Param(description = "the agents this Management Server is responsible for", since = "4.18.1") + private List agents; + @SerializedName(MetricConstants.AGENT_COUNT) @Param(description = "the number of agents this Management Server is responsible for") private Integer agentCount; @@ -121,6 +130,14 @@ public void setAvailableProcessors(int availableProcessors) { this.availableProcessors = availableProcessors; } + public void setLastAgents(List lastAgents) { + this.lastAgents = lastAgents; + } + + public void setAgents(List agents) { + this.agents = agents; + } + public void setAgentCount(int agentCount) { this.agentCount = agentCount; } diff --git a/plugins/pom.xml b/plugins/pom.xml index 3d5da1d59ac1..1667e151cfc5 100755 --- a/plugins/pom.xml +++ b/plugins/pom.xml @@ -118,7 +118,7 @@ outofbandmanagement-drivers/nested-cloudstack outofbandmanagement-drivers/redfish - shutdown + maintenance storage/sharedfs/storagevm storage/image/default diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManager.java b/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManager.java deleted file mode 100644 index 22f43cb4f626..000000000000 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManager.java +++ /dev/null @@ -1,60 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.cloudstack.shutdown; - -import org.apache.cloudstack.api.command.CancelShutdownCmd; -import org.apache.cloudstack.api.command.PrepareForShutdownCmd; -import org.apache.cloudstack.api.command.ReadyForShutdownCmd; -import org.apache.cloudstack.api.command.TriggerShutdownCmd; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; - -public interface ShutdownManager { - // Returns the number of pending jobs for the given Management server msids. - // NOTE: This is the msid and NOT the id - long countPendingJobs(Long... msIds); - - // Indicates whether a shutdown has been triggered on the current management server - boolean isShutdownTriggered(); - - // Indicates whether the current management server is preparing to shutdown - boolean isPreparingForShutdown(); - - // Triggers a shutdown on the current management server by not accepting any more async jobs and shutting down when there are no pending jobs - void triggerShutdown(); - - // Prepares the current management server to shutdown by not accepting any more async jobs - void prepareForShutdown(); - - // Cancels the shutdown on the current management server - void cancelShutdown(); - - // Returns whether the given ms can be shut down - ReadyForShutdownResponse readyForShutdown(Long managementserverid); - - // Returns whether the any of the ms can be shut down and if a shutdown has been triggered on any running ms - ReadyForShutdownResponse readyForShutdown(ReadyForShutdownCmd cmd); - - // Prepares the specified management server to shutdown by not accepting any more async jobs - ReadyForShutdownResponse prepareForShutdown(PrepareForShutdownCmd cmd); - - // Cancels the shutdown on the specified management server - ReadyForShutdownResponse cancelShutdown(CancelShutdownCmd cmd); - - // Triggers a shutdown on the specified management server by not accepting any more async jobs and shutting down when there are no pending jobs - ReadyForShutdownResponse triggerShutdown(TriggerShutdownCmd cmd); -} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManagerImpl.java b/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManagerImpl.java deleted file mode 100644 index c33243357fcd..000000000000 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManagerImpl.java +++ /dev/null @@ -1,265 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.cloudstack.shutdown; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Timer; -import java.util.TimerTask; - -import javax.inject.Inject; - -import org.apache.cloudstack.api.command.CancelShutdownCmd; -import org.apache.cloudstack.api.command.PrepareForShutdownCmd; -import org.apache.cloudstack.api.command.ReadyForShutdownCmd; -import org.apache.cloudstack.api.command.TriggerShutdownCmd; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; -import org.apache.cloudstack.framework.jobs.AsyncJobManager; -import org.apache.cloudstack.management.ManagementServerHost.State; -import org.apache.cloudstack.shutdown.command.CancelShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.PrepareForShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.TriggerShutdownManagementServerHostCommand; -import org.apache.cloudstack.utils.identity.ManagementServerNode; - -import com.cloud.agent.api.Command; -import com.cloud.cluster.ClusterManager; -import com.cloud.cluster.ManagementServerHostVO; -import com.cloud.cluster.dao.ManagementServerHostDao; -import com.cloud.serializer.GsonHelper; -import com.cloud.utils.component.ManagerBase; -import com.cloud.utils.component.PluggableService; -import com.cloud.utils.exception.CloudRuntimeException; -import com.google.gson.Gson; - -public class ShutdownManagerImpl extends ManagerBase implements ShutdownManager, PluggableService{ - Gson gson; - - @Inject - private AsyncJobManager jobManager; - @Inject - private ManagementServerHostDao msHostDao; - @Inject - private ClusterManager clusterManager; - - private boolean shutdownTriggered = false; - private boolean preparingForShutdown = false; - - private Timer timer = new Timer(); - private TimerTask shutdownTask; - - protected ShutdownManagerImpl() { - super(); - gson = GsonHelper.getGson(); - } - - @Override - public boolean isShutdownTriggered() { - return shutdownTriggered; - } - - @Override - public boolean isPreparingForShutdown() { - return preparingForShutdown; - } - - @Override - public long countPendingJobs(Long... msIds) { - return jobManager.countPendingNonPseudoJobs(msIds); - } - - @Override - public void triggerShutdown() { - if (this.shutdownTriggered) { - throw new CloudRuntimeException("A shutdown has already been triggered"); - } - this.shutdownTriggered = true; - prepareForShutdown(true); - } - - private void prepareForShutdown(boolean postTrigger) { - // Ensure we don't throw an error if triggering a shutdown after just preparing for it - if (!postTrigger && this.preparingForShutdown) { - throw new CloudRuntimeException("A shutdown has already been triggered"); - } - this.preparingForShutdown = true; - jobManager.disableAsyncJobs(); - if (this.shutdownTask != null) { - this.shutdownTask.cancel(); - this.shutdownTask = null; - } - this.shutdownTask = new ShutdownTask(this); - long period = 30L * 1000; - long delay = period / 2; - logger.debug(String.format("Scheduling shutdown task with delay: %d and period: %d", delay, period)); - timer.scheduleAtFixedRate(shutdownTask, delay, period); - } - - @Override - public void prepareForShutdown() { - prepareForShutdown(false); - } - - @Override - public void cancelShutdown() { - if (!this.preparingForShutdown) { - throw new CloudRuntimeException("A shutdown has not been triggered"); - } - - this.preparingForShutdown = false; - this.shutdownTriggered = false; - jobManager.enableAsyncJobs(); - if (shutdownTask != null) { - shutdownTask.cancel(); - } - shutdownTask = null; - } - - @Override - public ReadyForShutdownResponse readyForShutdown(Long managementserverid) { - Long[] msIds = null; - boolean shutdownTriggeredAnywhere = false; - State[] shutdownTriggeredStates = {State.ShuttingDown, State.PreparingToShutDown, State.ReadyToShutDown}; - if (managementserverid == null) { - List msHosts = msHostDao.listBy(shutdownTriggeredStates); - if (msHosts != null && !msHosts.isEmpty()) { - msIds = new Long[msHosts.size()]; - for (int i = 0; i < msHosts.size(); i++) { - msIds[i] = msHosts.get(i).getMsid(); - } - shutdownTriggeredAnywhere = !msHosts.isEmpty(); - } - } else { - ManagementServerHostVO msHost = msHostDao.findById(managementserverid); - msIds = new Long[]{msHost.getMsid()}; - shutdownTriggeredAnywhere = Arrays.asList(shutdownTriggeredStates).contains(msHost.getState()); - } - long pendingJobCount = countPendingJobs(msIds); - return new ReadyForShutdownResponse(managementserverid, shutdownTriggeredAnywhere, pendingJobCount == 0, pendingJobCount); - } - - @Override - public ReadyForShutdownResponse readyForShutdown(ReadyForShutdownCmd cmd) { - return readyForShutdown(cmd.getManagementServerId()); - } - - @Override - public ReadyForShutdownResponse prepareForShutdown(PrepareForShutdownCmd cmd) { - ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); - final Command[] cmds = new Command[1]; - cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid()); - String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); - logger.info("PrepareForShutdownCmd result : " + result); - if (!result.contains("Success")) { - throw new CloudRuntimeException(result); - } - - msHost.setState(State.PreparingToShutDown); - msHostDao.persist(msHost); - - return readyForShutdown(cmd.getManagementServerId()); - } - - @Override - public ReadyForShutdownResponse triggerShutdown(TriggerShutdownCmd cmd) { - ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); - final Command[] cmds = new Command[1]; - cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid()); - String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); - logger.info("TriggerShutdownCmd result : " + result); - if (!result.contains("Success")) { - throw new CloudRuntimeException(result); - } - - msHost.setState(State.ShuttingDown); - msHostDao.persist(msHost); - - return readyForShutdown(cmd.getManagementServerId()); - } - - @Override - public ReadyForShutdownResponse cancelShutdown(CancelShutdownCmd cmd) { - ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); - final Command[] cmds = new Command[1]; - cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid()); - String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); - logger.info("CancelShutdownCmd result : " + result); - if (!result.contains("Success")) { - throw new CloudRuntimeException(result); - } - - msHost.setState(State.Up); - msHostDao.persist(msHost); - - return readyForShutdown(cmd.getManagementServerId()); - } - - @Override - public List> getCommands() { - final List> cmdList = new ArrayList<>(); - cmdList.add(CancelShutdownCmd.class); - cmdList.add(PrepareForShutdownCmd.class); - cmdList.add(ReadyForShutdownCmd.class); - cmdList.add(TriggerShutdownCmd.class); - return cmdList; - } - - private final class ShutdownTask extends TimerTask { - - private ShutdownManager shutdownManager; - - public ShutdownTask(ShutdownManager shutdownManager) { - this.shutdownManager = shutdownManager; - } - - @Override - public void run() { - try { - Long totalPendingJobs = shutdownManager.countPendingJobs(ManagementServerNode.getManagementServerId()); - String msg = String.format("Checking for triggered shutdown... shutdownTriggered [%b] AllowAsyncJobs [%b] PendingJobCount [%d]", - shutdownManager.isShutdownTriggered(), shutdownManager.isPreparingForShutdown(), totalPendingJobs); - logger.info(msg); - - // If the shutdown has been cancelled - if (!shutdownManager.isPreparingForShutdown()) { - logger.info("Shutdown cancelled. Terminating the shutdown timer task"); - this.cancel(); - return; - } - - // No more pending jobs. Good to terminate - if (totalPendingJobs == 0) { - if (shutdownManager.isShutdownTriggered()) { - logger.info("Shutting down now"); - System.exit(0); - } - if (shutdownManager.isPreparingForShutdown()) { - logger.info("Ready to shutdown"); - ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); - msHost.setState(State.ReadyToShutDown); - msHostDao.persist(msHost); - } - } - - logger.info("Pending jobs. Trying again later"); - } catch (final Exception e) { - logger.error("Error trying to run shutdown task", e); - } - } - } -} diff --git a/server/src/main/java/com/cloud/api/ApiDispatcher.java b/server/src/main/java/com/cloud/api/ApiDispatcher.java index d8eb26ea0a71..6a43ff10f31a 100644 --- a/server/src/main/java/com/cloud/api/ApiDispatcher.java +++ b/server/src/main/java/com/cloud/api/ApiDispatcher.java @@ -94,7 +94,7 @@ public void dispatchCreateCmd(final BaseAsyncCreateCmd cmd, final Map, Integer> searchForServerIdsAndCount(ListHostsCmd cmd) { Long startIndex = cmd.getStartIndex(); Long pageSize = cmd.getPageSizeVal(); Hypervisor.HypervisorType hypervisorType = cmd.getHypervisor(); + Long msId = cmd.getManagementServerId(); Filter searchFilter = new Filter(HostVO.class, "id", Boolean.TRUE, startIndex, pageSize); @@ -2368,6 +2369,7 @@ public Pair, Integer> searchForServerIdsAndCount(ListHostsCmd cmd) { hostSearchBuilder.and("clusterId", hostSearchBuilder.entity().getClusterId(), SearchCriteria.Op.EQ); hostSearchBuilder.and("resourceState", hostSearchBuilder.entity().getResourceState(), SearchCriteria.Op.EQ); hostSearchBuilder.and("hypervisor_type", hostSearchBuilder.entity().getHypervisorType(), SearchCriteria.Op.EQ); + hostSearchBuilder.and("mgmt_server_id", hostSearchBuilder.entity().getManagementServerId(), SearchCriteria.Op.EQ); if (keyword != null) { hostSearchBuilder.and().op("keywordName", hostSearchBuilder.entity().getName(), SearchCriteria.Op.LIKE); @@ -2448,6 +2450,13 @@ public Pair, Integer> searchForServerIdsAndCount(ListHostsCmd cmd) { sc.setParameters("hypervisor_type", hypervisorType); } + if (msId != null) { + ManagementServerHostVO msHost = msHostDao.findById(msId); + if (msHost != null) { + sc.setParameters("mgmt_server_id", msHost.getMsid()); + } + } + Pair, Integer> uniqueHostPair = hostDao.searchAndCount(sc, searchFilter); Integer count = uniqueHostPair.second(); List hostIds = uniqueHostPair.first().stream().map(HostVO::getId).collect(Collectors.toList()); @@ -5426,6 +5435,7 @@ protected ManagementServerResponse createManagementServerResponse(ManagementServ mgmtResponse.addPeer(createPeerManagementServerNodeResponse(peer)); } } + mgmtResponse.setAgentsCount((long) hostDao.countByMs(mgmt.getMsid())); mgmtResponse.setObjectName("managementserver"); return mgmtResponse; } diff --git a/server/src/main/java/com/cloud/api/query/dao/AsyncJobJoinDaoImpl.java b/server/src/main/java/com/cloud/api/query/dao/AsyncJobJoinDaoImpl.java index 319e08deb39d..08b896edb17a 100644 --- a/server/src/main/java/com/cloud/api/query/dao/AsyncJobJoinDaoImpl.java +++ b/server/src/main/java/com/cloud/api/query/dao/AsyncJobJoinDaoImpl.java @@ -20,6 +20,8 @@ import java.util.List; +import javax.inject.Inject; + import org.springframework.stereotype.Component; import org.apache.cloudstack.api.ResponseObject; @@ -29,6 +31,8 @@ import com.cloud.api.ApiSerializerHelper; import com.cloud.api.SerializationContext; import com.cloud.api.query.vo.AsyncJobJoinVO; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; import com.cloud.utils.db.GenericDaoBase; import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; @@ -36,6 +40,9 @@ @Component public class AsyncJobJoinDaoImpl extends GenericDaoBase implements AsyncJobJoinDao { + @Inject + private ManagementServerHostDao managementServerHostDao; + private final SearchBuilder jobIdSearch; protected AsyncJobJoinDaoImpl() { @@ -63,7 +70,13 @@ public AsyncJobResponse newAsyncJobResponse(final AsyncJobJoinVO job) { jobResponse.setJobId(job.getUuid()); jobResponse.setJobStatus(job.getStatus()); jobResponse.setJobProcStatus(job.getProcessStatus()); - jobResponse.setMsid(job.getExecutingMsid()); + if (job.getExecutingMsid() != null) { + ManagementServerHostVO managementServer = managementServerHostDao.findByMsid(job.getExecutingMsid()); + if (managementServer != null) { + jobResponse.setManagementServerId(managementServer.getUuid()); + jobResponse.setManagementServerName(managementServer.getName()); + } + } if (job.getInstanceType() != null && job.getInstanceId() != null) { jobResponse.setJobInstanceType(job.getInstanceType().toString()); diff --git a/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java b/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java index 42966435d4a8..feee12dcb205 100644 --- a/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java +++ b/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java @@ -58,6 +58,8 @@ import com.cloud.utils.db.GenericDaoBase; import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.dao.VMInstanceDao; @Component public class HostJoinDaoImpl extends GenericDaoBase implements HostJoinDao { @@ -73,6 +75,8 @@ public class HostJoinDaoImpl extends GenericDaoBase implements @Inject private ManagementServerHostDao managementServerHostDao; @Inject + private VMInstanceDao virtualMachineDao; + @Inject private AnnotationDao annotationDao; @Inject private AccountManager accountManager; @@ -126,12 +130,19 @@ private void setNewHostResponseBase(HostJoinVO host, EnumSet detail hostResponse.setHypervisor(hypervisorType); } hostResponse.setHostType(host.getType()); + if (host.getType().equals(Host.Type.ConsoleProxy) || host.getType().equals(Host.Type.SecondaryStorageVM)) { + VMInstanceVO vm = virtualMachineDao.findVMByInstanceNameIncludingRemoved(host.getName()); + if (vm != null) { + hostResponse.setVirtualMachineId(vm.getUuid()); + } + } hostResponse.setLastPinged(new Date(host.getLastPinged())); Long mshostId = host.getManagementServerId(); if (mshostId != null) { ManagementServerHostVO managementServer = managementServerHostDao.findByMsid(host.getManagementServerId()); if (managementServer != null) { hostResponse.setManagementServerId(managementServer.getUuid()); + hostResponse.setManagementServerName(managementServer.getName()); } } hostResponse.setName(host.getName()); diff --git a/server/src/main/java/com/cloud/network/SshKeysDistriMonitor.java b/server/src/main/java/com/cloud/network/SshKeysDistriMonitor.java index 06ccc1a63f7a..f99063c7feca 100644 --- a/server/src/main/java/com/cloud/network/SshKeysDistriMonitor.java +++ b/server/src/main/java/com/cloud/network/SshKeysDistriMonitor.java @@ -85,13 +85,16 @@ public void processHostAdded(long hostId) { @Override public void processConnect(Host host, StartupCommand cmd, boolean forRebalance) throws ConnectionException { - if (cmd instanceof StartupRoutingCommand) { - if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.XenServer || + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { + return; + } + + if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.XenServer || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) { - /*TODO: Get the private/public keys here*/ + /*TODO: Get the private/public keys here*/ - String pubKey = _configDao.getValue("ssh.publickey"); - String prvKey = _configDao.getValue("ssh.privatekey"); + String pubKey = _configDao.getValue("ssh.publickey"); + String prvKey = _configDao.getValue("ssh.privatekey"); try { ModifySshKeysCommand cmds = new ModifySshKeysCommand(pubKey, prvKey); diff --git a/server/src/main/java/com/cloud/network/security/SecurityGroupListener.java b/server/src/main/java/com/cloud/network/security/SecurityGroupListener.java index 067f2fbdbb26..0c37336c09bc 100644 --- a/server/src/main/java/com/cloud/network/security/SecurityGroupListener.java +++ b/server/src/main/java/com/cloud/network/security/SecurityGroupListener.java @@ -164,22 +164,23 @@ public void processConnect(Host host, StartupCommand cmd, boolean forRebalance) if (logger.isInfoEnabled()) logger.info("Received a host startup notification"); - if (cmd instanceof StartupRoutingCommand) { - //if (Boolean.toString(true).equals(host.getDetail("can_bridge_firewall"))) { - try { - int interval = MIN_TIME_BETWEEN_CLEANUPS + _cleanupRandom.nextInt(MIN_TIME_BETWEEN_CLEANUPS / 2); - CleanupNetworkRulesCmd cleanupCmd = new CleanupNetworkRulesCmd(interval); - Commands c = new Commands(cleanupCmd); - _agentMgr.send(host.getId(), c, this); - if (logger.isInfoEnabled()) - logger.info("Scheduled network rules cleanup, interval=" + cleanupCmd.getInterval()); - } catch (AgentUnavailableException e) { - //usually hypervisors that do not understand sec group rules. - logger.debug("Unable to schedule network rules cleanup for host {}", host, e); - } - if (_workTracker != null) { - _workTracker.processConnect(host.getId()); - } + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { + return; + } + + try { + int interval = MIN_TIME_BETWEEN_CLEANUPS + _cleanupRandom.nextInt(MIN_TIME_BETWEEN_CLEANUPS / 2); + CleanupNetworkRulesCmd cleanupCmd = new CleanupNetworkRulesCmd(interval); + Commands c = new Commands(cleanupCmd); + _agentMgr.send(host.getId(), c, this); + if (logger.isInfoEnabled()) + logger.info("Scheduled network rules cleanup, interval=" + cleanupCmd.getInterval()); + } catch (AgentUnavailableException e) { + //usually hypervisors that do not understand sec group rules. + logger.debug("Unable to schedule network rules cleanup for host {}", host, e); + } + if (_workTracker != null) { + _workTracker.processConnect(host.getId()); } } diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 50116905bfe2..41e767a54a77 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -57,9 +57,9 @@ import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd; @@ -1284,7 +1284,7 @@ public Cluster updateCluster(UpdateClusterCmd cmd) { } @Override - public Host cancelMaintenance(final CancelMaintenanceCmd cmd) { + public Host cancelMaintenance(final CancelHostMaintenanceCmd cmd) { final Long hostId = cmd.getId(); // verify input parameters @@ -1501,7 +1501,7 @@ public boolean maintain(final long hostId) throws AgentUnavailableException { } @Override - public Host maintain(final PrepareForMaintenanceCmd cmd) { + public Host maintain(final PrepareForHostMaintenanceCmd cmd) { final Long hostId = cmd.getId(); final HostVO host = _hostDao.findById(hostId); diff --git a/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java b/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java index 72c289530215..b0f11e4fcbae 100644 --- a/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java @@ -32,7 +32,7 @@ import org.apache.cloudstack.affinity.AffinityGroupProcessor; import org.apache.cloudstack.api.ApiCommandResourceType; import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.resource.StartRollingMaintenanceCmd; import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.framework.config.ConfigKey; @@ -405,7 +405,7 @@ private Ternary performMaintenanceStageOnHost(Host hos */ private void putHostIntoMaintenance(Host host) throws InterruptedException, AgentUnavailableException { logger.debug(String.format("Trying to set %s into maintenance", host)); - PrepareForMaintenanceCmd cmd = new PrepareForMaintenanceCmd(); + PrepareForHostMaintenanceCmd cmd = new PrepareForHostMaintenanceCmd(); cmd.setId(host.getId()); resourceManager.maintain(cmd); waitForHostInMaintenance(host.getId()); diff --git a/server/src/main/java/com/cloud/server/ManagementServerHostStatsEntry.java b/server/src/main/java/com/cloud/server/ManagementServerHostStatsEntry.java index 172ab1e83eb5..c23e8ed2c9d1 100644 --- a/server/src/main/java/com/cloud/server/ManagementServerHostStatsEntry.java +++ b/server/src/main/java/com/cloud/server/ManagementServerHostStatsEntry.java @@ -19,6 +19,7 @@ package com.cloud.server; import java.util.Date; +import java.util.List; public class ManagementServerHostStatsEntry implements ManagementServerHostStats { @@ -45,6 +46,8 @@ public class ManagementServerHostStatsEntry implements ManagementServerHostStats private String jvmVendor; private String jvmVersion; private String osDistribution; + private List lastAgents; + private List agents; private int agentCount; private long heapMemoryUsed; @@ -199,6 +202,16 @@ public String getOsDistribution() { return osDistribution; } + @Override + public List getLastAgents() { + return lastAgents; + } + + @Override + public List getAgents() { + return agents; + } + @Override public int getAgentCount() { return agentCount; @@ -290,6 +303,14 @@ public void setOsDistribution(String osDistribution) { this.osDistribution = osDistribution; } + public void setLastAgents(List lastAgents) { + this.lastAgents = lastAgents; + } + + public void setAgents(List agents) { + this.agents = agents; + } + public void setAgentCount(int agentCount) { this.agentCount = agentCount; } diff --git a/server/src/main/java/com/cloud/server/ManagementServerImpl.java b/server/src/main/java/com/cloud/server/ManagementServerImpl.java index 76d2943e18c8..790e4bbbd381 100644 --- a/server/src/main/java/com/cloud/server/ManagementServerImpl.java +++ b/server/src/main/java/com/cloud/server/ManagementServerImpl.java @@ -101,13 +101,13 @@ import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; import org.apache.cloudstack.api.command.admin.host.DeleteHostCmd; import org.apache.cloudstack.api.command.admin.host.FindHostsForMigrationCmd; import org.apache.cloudstack.api.command.admin.host.ListHostTagsCmd; import org.apache.cloudstack.api.command.admin.host.ListHostsCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.ReleaseHostReservationCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; @@ -3508,14 +3508,14 @@ public List> getCommands() { cmdList.add(MoveDomainCmd.class); cmdList.add(AddHostCmd.class); cmdList.add(AddSecondaryStorageCmd.class); - cmdList.add(CancelMaintenanceCmd.class); + cmdList.add(CancelHostMaintenanceCmd.class); cmdList.add(CancelHostAsDegradedCmd.class); cmdList.add(DeclareHostAsDegradedCmd.class); cmdList.add(DeleteHostCmd.class); cmdList.add(ListHostsCmd.class); cmdList.add(ListHostTagsCmd.class); cmdList.add(FindHostsForMigrationCmd.class); - cmdList.add(PrepareForMaintenanceCmd.class); + cmdList.add(PrepareForHostMaintenanceCmd.class); cmdList.add(ReconnectHostCmd.class); cmdList.add(UpdateHostCmd.class); cmdList.add(UpdateHostPasswordCmd.class); diff --git a/server/src/main/java/com/cloud/server/StatsCollector.java b/server/src/main/java/com/cloud/server/StatsCollector.java index 2bdc008ca1a6..c70b36b4091e 100644 --- a/server/src/main/java/com/cloud/server/StatsCollector.java +++ b/server/src/main/java/com/cloud/server/StatsCollector.java @@ -829,6 +829,9 @@ private void retrieveSession(ManagementServerHostStatsEntry newEntry) { } private void getDataBaseStatistics(ManagementServerHostStatsEntry newEntry, long msid) { + newEntry.setLastAgents(_agentMgr.getLastAgents()); + List agents = _hostDao.listByMs(msid); + newEntry.setAgents(agents); int count = _hostDao.countByMs(msid); newEntry.setAgentCount(count); } diff --git a/server/src/main/java/com/cloud/storage/listener/StoragePoolMonitor.java b/server/src/main/java/com/cloud/storage/listener/StoragePoolMonitor.java index a0e10c646b58..9e83a58220d7 100644 --- a/server/src/main/java/com/cloud/storage/listener/StoragePoolMonitor.java +++ b/server/src/main/java/com/cloud/storage/listener/StoragePoolMonitor.java @@ -95,34 +95,37 @@ public void processHostAdded(long hostId) { @Override public void processConnect(Host host, StartupCommand cmd, boolean forRebalance) throws ConnectionException { - if (cmd instanceof StartupRoutingCommand) { - StartupRoutingCommand scCmd = (StartupRoutingCommand)cmd; - if (scCmd.getHypervisorType() == HypervisorType.XenServer || scCmd.getHypervisorType() == HypervisorType.KVM || + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { + return; + } + + StartupRoutingCommand scCmd = (StartupRoutingCommand)cmd; + if (scCmd.getHypervisorType() == HypervisorType.XenServer || scCmd.getHypervisorType() == HypervisorType.KVM || scCmd.getHypervisorType() == HypervisorType.VMware || scCmd.getHypervisorType() == HypervisorType.Simulator || scCmd.getHypervisorType() == HypervisorType.Ovm || scCmd.getHypervisorType() == HypervisorType.Hyperv || scCmd.getHypervisorType() == HypervisorType.LXC || scCmd.getHypervisorType() == HypervisorType.Ovm3) { - List pools = _poolDao.listBy(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER); - List zoneStoragePoolsByTags = _poolDao.findZoneWideStoragePoolsByTags(host.getDataCenterId(), null, false); - List zoneStoragePoolsByHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), scCmd.getHypervisorType()); - zoneStoragePoolsByTags.retainAll(zoneStoragePoolsByHypervisor); - pools.addAll(zoneStoragePoolsByTags); - List zoneStoragePoolsByAnyHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), HypervisorType.Any); - pools.addAll(zoneStoragePoolsByAnyHypervisor); - - // get the zone wide disabled pools list if global setting is true. - if (StorageManager.MountDisabledStoragePool.value()) { - pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), null, null, ScopeType.ZONE)); - } + List pools = _poolDao.listBy(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER); + List zoneStoragePoolsByTags = _poolDao.findZoneWideStoragePoolsByTags(host.getDataCenterId(), null, false); + List zoneStoragePoolsByHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), scCmd.getHypervisorType()); + zoneStoragePoolsByTags.retainAll(zoneStoragePoolsByHypervisor); + pools.addAll(zoneStoragePoolsByTags); + List zoneStoragePoolsByAnyHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), HypervisorType.Any); + pools.addAll(zoneStoragePoolsByAnyHypervisor); + + // get the zone wide disabled pools list if global setting is true. + if (StorageManager.MountDisabledStoragePool.value()) { + pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), null, null, ScopeType.ZONE)); + } - // get the cluster wide disabled pool list - if (StorageManager.MountDisabledStoragePool.valueIn(host.getClusterId())) { - pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER)); - } + // get the cluster wide disabled pool list + if (StorageManager.MountDisabledStoragePool.valueIn(host.getClusterId())) { + pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER)); + } - for (StoragePoolVO pool : pools) { - if (!pool.isShared()) { - continue; - } + for (StoragePoolVO pool : pools) { + if (!pool.isShared()) { + continue; + } if (pool.getPoolType() == StoragePoolType.OCFS2 && !_ocfs2Mgr.prepareNodes(pool.getClusterId())) { throw new ConnectionException(true, String.format("Unable to prepare OCFS2 nodes for pool %s", pool)); diff --git a/server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java b/server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java index 97e503974cf2..23dc5c6028e2 100644 --- a/server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java +++ b/server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java @@ -37,6 +37,11 @@ import com.cloud.agent.AgentManager; import com.cloud.agent.api.Answer; +import com.cloud.agent.api.MigrateAgentConnectionCommand; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; +import com.cloud.dc.DataCenterVO; +import com.cloud.dc.dao.DataCenterDao; import com.cloud.host.Host; import com.cloud.host.HostVO; import com.cloud.host.dao.HostDao; @@ -44,6 +49,8 @@ import com.cloud.resource.ResourceState; import com.cloud.utils.component.ComponentLifecycleBase; import com.cloud.utils.exception.CloudRuntimeException; + +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implements IndirectAgentLB, Configurable { @@ -63,14 +70,35 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement @Inject private HostDao hostDao; @Inject + private DataCenterDao dcDao; + @Inject + private ManagementServerHostDao mshostDao; + @Inject private AgentManager agentManager; ////////////////////////////////////////////////////// /////////////// Agent MSLB Methods /////////////////// ////////////////////////////////////////////////////// + @Override + public List getManagementServerList() { + final String msServerAddresses = ApiServiceConfiguration.ManagementServerAddresses.value(); + if (StringUtils.isEmpty(msServerAddresses)) { + throw new CloudRuntimeException(String.format("No management server addresses are defined in '%s' setting", + ApiServiceConfiguration.ManagementServerAddresses.key())); + } + + List msList = new ArrayList<>(Arrays.asList(msServerAddresses.replace(" ", "").split(","))); + return msList; + } + @Override public List getManagementServerList(final Long hostId, final Long dcId, final List orderedHostIdList) { + return getManagementServerList(hostId, dcId, orderedHostIdList, null); + } + + @Override + public List getManagementServerList(final Long hostId, final Long dcId, final List orderedHostIdList, String lbAlgorithm) { final String msServerAddresses = ApiServiceConfiguration.ManagementServerAddresses.value(); if (StringUtils.isEmpty(msServerAddresses)) { throw new CloudRuntimeException(String.format("No management server addresses are defined in '%s' setting", @@ -90,7 +118,7 @@ public List getManagementServerList(final Long hostId, final Long dcId, hostIdList.add(hostId); } - final org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm algorithm = getAgentMSLBAlgorithm(); + final org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm algorithm = getAgentMSLBAlgorithm(lbAlgorithm); final List msList = Arrays.asList(msServerAddresses.replace(" ", "").split(",")); return algorithm.sort(msList, hostIdList, hostId); } @@ -146,6 +174,30 @@ private List getAllAgentBasedHosts() { return agentBasedHosts; } + private List getAllAgentBasedHosts(long msId) { + final List allHosts = hostDao.listHostsByMs(msId); + if (allHosts == null) { + return new ArrayList<>(); + } + final List agentBasedHosts = new ArrayList<>(); + for (final Host host : allHosts) { + conditionallyAddHost(agentBasedHosts, host); + } + return agentBasedHosts; + } + + private List getAllAgentBasedHostsInDc(long msId, long dcId) { + final List allHosts = hostDao.listHostsByMsAndDc(msId, dcId); + if (allHosts == null) { + return new ArrayList<>(); + } + final List agentBasedHosts = new ArrayList<>(); + for (final Host host : allHosts) { + conditionallyAddHost(agentBasedHosts, host); + } + return agentBasedHosts; + } + private void conditionallyAddHost(List agentBasedHosts, Host host) { if (host == null) { if (logger.isTraceEnabled()) { @@ -191,13 +243,33 @@ private void conditionallyAddHost(List agentBasedHosts, Host host) { agentBasedHosts.add(host); } + @Override + public boolean haveAgentBasedHosts(long msId) { + return CollectionUtils.isNotEmpty(getAllAgentBasedHosts(msId)); + } + private org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm getAgentMSLBAlgorithm() { - final String algorithm = getLBAlgorithmName(); - if (algorithmMap.containsKey(algorithm)) { - return algorithmMap.get(algorithm); + return getAgentMSLBAlgorithm(null); + } + + private org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm getAgentMSLBAlgorithm(String lbAlgorithm) { + boolean algorithmNameFromConfig = false; + if (StringUtils.isEmpty(lbAlgorithm)) { + lbAlgorithm = getLBAlgorithmName(); + algorithmNameFromConfig = true; + } + if (algorithmMap.containsKey(lbAlgorithm)) { + return algorithmMap.get(lbAlgorithm); + } + throw new CloudRuntimeException(String.format("Algorithm %s%s not found, valid values are: %s", + lbAlgorithm, algorithmNameFromConfig? " configured for '" + IndirectAgentLBAlgorithm.key() + "'" : "", algorithmMap.keySet())); + } + + @Override + public void checkLBAlgorithmName(String lbAlgorithm) { + if (!algorithmMap.containsKey(lbAlgorithm)) { + throw new CloudRuntimeException(String.format("Invalid algorithm %s, valid values are: %s", lbAlgorithm, algorithmMap.keySet())); } - throw new CloudRuntimeException(String.format("Algorithm configured for '%s' not found, valid values are: %s", - IndirectAgentLBAlgorithm.key(), algorithmMap.keySet())); } //////////////////////////////////////////////////////////// @@ -224,6 +296,73 @@ public void propagateMSListToAgents() { } } + @Override + public boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs) { + if (timeoutDurationInMs <= 0) { + logger.debug(String.format("Not migrating indirect agents from management server node %d (id: %s) to other nodes, invalid timeout duration", fromMsId, fromMsUuid)); + return false; + } + + logger.debug(String.format("Migrating indirect agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid)); + long migrationStartTime = System.currentTimeMillis(); + if (!haveAgentBasedHosts(fromMsId)) { + logger.info(String.format("No indirect agents available on management server node %d (id: %s), to migrate", fromMsId, fromMsUuid)); + return true; + } + + boolean lbAlgorithmChanged = false; + if (StringUtils.isNotBlank(lbAlgorithm) && !lbAlgorithm.equalsIgnoreCase(getLBAlgorithmName())) { + logger.debug(String.format("Indirect agent lb algorithm changed to %s", lbAlgorithm)); + lbAlgorithmChanged = true; + } + + final List avoidMsList = mshostDao.listNonUpStateMsIPs(); + ManagementServerHostVO ms = mshostDao.findByMsid(fromMsId); + if (ms != null && !avoidMsList.contains(ms.getServiceIP())) { + avoidMsList.add(ms.getServiceIP()); + } + + List dataCenterList = dcDao.listAll(); + for (DataCenterVO dc : dataCenterList) { + Long dcId = dc.getId(); + List orderedHostIdList = getOrderedHostIdList(dcId); + List agentBasedHostsOfMsInDc = getAllAgentBasedHostsInDc(fromMsId, dcId); + if (CollectionUtils.isEmpty(agentBasedHostsOfMsInDc)) { + continue; + } + logger.debug(String.format("Migrating %d indirect agents from management server node %d (id: %s) of zone %s", agentBasedHostsOfMsInDc.size(), fromMsId, fromMsUuid, dc.toString())); + for (final Host host : agentBasedHostsOfMsInDc) { + long migrationElapsedTimeInMs = System.currentTimeMillis() - migrationStartTime; + if (migrationElapsedTimeInMs >= timeoutDurationInMs) { + logger.debug(String.format("Stop migrating remaining indirect agents from management server node %d (id: %s), timed out", fromMsId, fromMsUuid)); + return false; + } + + List msList = null; + Long lbCheckInterval = 0L; + if (lbAlgorithmChanged) { + // send new MS list when there is change in lb algorithm + msList = getManagementServerList(host.getId(), dcId, orderedHostIdList); + lbCheckInterval = getLBPreferredHostCheckInterval(host.getClusterId()); + } + + final MigrateAgentConnectionCommand cmd = new MigrateAgentConnectionCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval); + agentManager.easySend(host.getId(), cmd); //answer not received as the agent disconnects and reconnects to other ms + updateLastManagementServer(host.getId(), fromMsId); + } + } + + return true; + } + + private void updateLastManagementServer(long hostId, long msId) { + HostVO hostVO = hostDao.findById(hostId); + if (hostVO != null) { + hostVO.setLastManagementServerId(msId); + hostDao.update(hostId, hostVO); + } + } + private void configureAlgorithmMap() { final List algorithms = new ArrayList<>(); algorithms.add(new IndirectAgentLBStaticAlgorithm()); diff --git a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml index 68abe7a16f16..60c2095d5f41 100644 --- a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml +++ b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml @@ -268,8 +268,8 @@ - - + + diff --git a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java index e8b297ff188c..9954dc312f04 100755 --- a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java +++ b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java @@ -46,9 +46,9 @@ import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd; @@ -79,7 +79,7 @@ public Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resou * @see com.cloud.resource.ResourceService#cancelMaintenance(com.cloud.api.commands.CancelMaintenanceCmd) */ @Override - public Host cancelMaintenance(final CancelMaintenanceCmd cmd) { + public Host cancelMaintenance(final CancelHostMaintenanceCmd cmd) { // TODO Auto-generated method stub return null; } @@ -142,7 +142,7 @@ public List discoverHosts(final AddSecondaryStorageCmd cmd) thro * @see com.cloud.resource.ResourceService#maintain(com.cloud.api.commands.PrepareForMaintenanceCmd) */ @Override - public Host maintain(final PrepareForMaintenanceCmd cmd) { + public Host maintain(final PrepareForHostMaintenanceCmd cmd) { // TODO Auto-generated method stub return null; } diff --git a/server/src/test/resources/createNetworkOffering.xml b/server/src/test/resources/createNetworkOffering.xml index 99418467e0a1..a3f43407c61c 100644 --- a/server/src/test/resources/createNetworkOffering.xml +++ b/server/src/test/resources/createNetworkOffering.xml @@ -1,79 +1,80 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/integration/smoke/test_safe_shutdown.py b/test/integration/smoke/test_ms_maintenance_and_safe_shutdown.py similarity index 70% rename from test/integration/smoke/test_safe_shutdown.py rename to test/integration/smoke/test_ms_maintenance_and_safe_shutdown.py index d757bb6d0689..1198863ec593 100644 --- a/test/integration/smoke/test_safe_shutdown.py +++ b/test/integration/smoke/test_ms_maintenance_and_safe_shutdown.py @@ -22,9 +22,9 @@ from marvin.lib.base import * from marvin.lib.common import * -class TestSafeShutdown(cloudstackTestCase): +class TestMSMaintenanceAndSafeShutdown(cloudstackTestCase): """ - Tests safely shutting down the Management Server + Tests MS maintenance and safe shutting down the Management Server """ def setUp(self): @@ -33,8 +33,7 @@ def setUp(self): self.cleanup = [] def tearDown(self): - self.startServer() - super(TestSafeShutdown, self).tearDown() + super(TestMSMaintenanceAndSafeShutdown, self).tearDown() def isServerShutdown(self): sshClient = SshClient( @@ -88,11 +87,44 @@ def run_async_cmd(self) : ) @attr(tags=["advanced", "smoke"]) - def test_01_prepare_and_cancel_shutdown(self): + def test_01_prepare_and_cancel_maintenance(self): + try : + prepare_for_maintenance_cmd = prepareForMaintenance.prepareForMaintenanceCmd() + prepare_for_maintenance_cmd.managementserverid = 1 + response = self.apiclient.prepareForMaintenance(prepare_for_maintenance_cmd) + self.assertEqual( + response.maintenanceinitiated, + True, + "Failed to prepare for maintenance" + ) + try : + self.run_async_cmd() + except Exception as e: + self.debug("Prepare for maintenance check successful, API failure: %s" % e) + finally : + cancel_maintenance_cmd = cancelMaintenance.cancelMaintenanceCmd() + cancel_maintenance_cmd.managementserverid = 1 + self.apiclient.cancelMaintenance(cancel_maintenance_cmd) + # self.assertEqual( + # response.maintenanceinitiated, + # False, + # "Failed to cancel maintenance" + # ) + ## Just to be sure, run another async command + project = self.run_async_cmd() + self.cleanup.append(project) + + @attr(tags=["advanced", "smoke"]) + def test_02_prepare_and_cancel_shutdown(self): try : prepare_for_shutdown_cmd = prepareForShutdown.prepareForShutdownCmd() prepare_for_shutdown_cmd.managementserverid = 1 self.apiclient.prepareForShutdown(prepare_for_shutdown_cmd) + # self.assertEqual( + # response.maintenanceinitiated, + # True, + # "Failed to prepare for maintenance" + # ) try : self.run_async_cmd() except Exception as e: @@ -111,7 +143,7 @@ def test_01_prepare_and_cancel_shutdown(self): self.cleanup.append(project) @attr(tags=["advanced", "smoke"]) - def test_02_trigger_shutdown(self): + def test_03_trigger_shutdown(self): try : cmd = triggerShutdown.triggerShutdownCmd() cmd.managementserverid = 1 diff --git a/tools/apidoc/gen_toc.py b/tools/apidoc/gen_toc.py index 8d28749a637b..c05b8fe27987 100644 --- a/tools/apidoc/gen_toc.py +++ b/tools/apidoc/gen_toc.py @@ -233,7 +233,8 @@ 'listQuarantinedIp': 'IP Quarantine', 'updateQuarantinedIp': 'IP Quarantine', 'removeQuarantinedIp': 'IP Quarantine', - 'Shutdown': 'Management', + 'Shutdown': 'Maintenance', + 'Maintenance': 'Maintenance', 'addObjectStoragePool': 'Object Store', 'listObjectStoragePools': 'Object Store', 'deleteObjectStoragePool': 'Object Store', diff --git a/ui/public/locales/en.json b/ui/public/locales/en.json index 820f08b8cb4f..20602e56ae0c 100644 --- a/ui/public/locales/en.json +++ b/ui/public/locales/en.json @@ -349,6 +349,7 @@ "label.agent.username": "Agent username", "label.agentport": "Agent port", "label.agentstate": "Agent state", +"label.agentscount": "Number Of connected agents", "label.agree": "Agree", "label.alert": "Alert", "label.alert.details": "Alert details", @@ -470,7 +471,7 @@ "label.cachemode": "Write-cache type", "label.cancel": "Cancel", "label.cancel.shutdown": "Cancel Shutdown", -"label.cancelmaintenance": "Cancel maintenance", +"label.cancel.maintenance": "Cancel Maintenance", "label.cancel.host.as.degraded": "Cancel host as degraded", "label.capacity": "Capacity", "label.capacitybytes": "Capacity bytes", @@ -569,6 +570,7 @@ "label.confirmdeclineinvitation": "Are you sure you want to decline this project invitation?", "label.confirmpassword": "Confirm password", "label.confirmpassword.description": "Please type the same password again.", +"label.connected.agents": "Connected Agents", "label.connect": "Connect", "label.connectiontimeout": "Connection timeout", "label.conservemode": "Conserve mode", @@ -1385,6 +1387,7 @@ "label.management.server": "Management server", "label.management.servers": "Management servers", "label.management.server.peers": "Peers", +"label.managementservername": "Management Server", "label.managementservers": "Number of management servers", "label.matchall": "Match all", "label.max": "Max.", @@ -1723,7 +1726,7 @@ "label.prefix": "Prefix", "label.prefix.type": "Prefix type", "label.prepare.for.shutdown": "Prepare for Shutdown", -"label.prepareformaintenance": "Prepare for Maintenance", +"label.prepare.for.maintenance": "Prepare for Maintenance", "label.presetup": "PreSetup", "label.prev": "Prev", "label.previous": "Previous", @@ -2052,6 +2055,7 @@ "label.sequence": "Sequence", "label.server": "Server", "label.server.certificate": "Server certificate", +"label.serviceip": "Service IP", "label.service.connectivity.distributedroutercapabilitycheckbox": "Distributed router", "label.service.connectivity.regionlevelvpccapabilitycheckbox": "Region level VPC", "label.service.group": "Service group", @@ -2825,7 +2829,8 @@ "message.backup.create": "Are you sure you want create an Instance backup?", "message.backup.offering.remove": "Are you sure you want to remove Instance from backup offering and delete the backup chain?", "message.backup.restore": "Please confirm that you want to restore the Instance backup?", -"message.cancel.shutdown": "Please confirm that you would like to cancel the shutdown on this Management server. It will resume accepting any new Async Jobs.", +"message.cancel.shutdown": "Please confirm that you would like to cancel the shutdown on this Management Server. It will resume accepting any new Async Jobs.", +"message.cancel.maintenance": "Please confirm that you would like to cancel the maintenance on this Management Server. It will resume accepting any new Async Jobs.", "message.certificate.upload.processing": "Certificate upload in progress", "message.change.disk.offering.sharedfs.failed": "Failed to change disk offering for the Shared FileSystem.", "message.change.disk.offering.sharedfs.processing": "Changing disk offering for the Shared FileSystem.", @@ -3341,7 +3346,8 @@ "message.please.wait.while.zone.is.being.created": "Please wait while your zone is being created; this may take a while...", "message.pod.dedicated": "Pod dedicated.", "message.pod.dedication.released": "Pod dedication released.", -"message.prepare.for.shutdown": "Please confirm that you would like to prep this Management server for shutdown. It will not accept any new Async Jobs but will NOT terminate after there are no pending jobs.", +"message.prepare.for.shutdown": "Please confirm that you would like to prepare this Management Server for shutdown. It will not accept any new Async Jobs but will NOT terminate after there are no pending jobs.", +"message.prepare.for.maintenance": "Please confirm that you would like to prepare this Management Server for maintenance. It will not accept any new Async Jobs.", "message.primary.storage.invalid.state": "Primary storage is not in Up state", "message.processing.complete": "Processing complete!", "message.protocol.description": "For XenServer, choose NFS, iSCSI, or PreSetup. For KVM, choose NFS, SharedMountPoint, RDB, CLVM or Gluster. For vSphere, choose NFS, PreSetup (VMFS or iSCSI or FiberChannel or vSAN or vVols) or DatastoreCluster. For Hyper-V, choose SMB/CIFS. For LXC, choose NFS or SharedMountPoint. For OVM, choose NFS or OCFS2.", @@ -3432,6 +3438,7 @@ "message.shared.network.offering.warning": "Domain admins and regular Users can only create shared Networks from Network offering with the setting specifyvlan=false. Please contact an administrator to create a Network offering if this list is empty.", "message.shared.network.unsupported.for.nsx": "Shared networks aren't supported for NSX enabled zones", "message.shutdown.triggered": "A shutdown has been triggered. CloudStack will not accept new jobs", +"message.maintenance.initiated": "A maintenance has been initiated. Management Server will not accept new jobs", "message.snapshot.additional.zones": "Snapshots will always be created in its native zone - %x, here you can select additional zone(s) where it will be copied to at creation time", "message.sourcenatip.change.warning": "WARNING: Changing the sourcenat IP address of the network will cause connectivity downtime for the Instances with NICs in the Network.", "message.sourcenatip.change.inhibited": "Changing the sourcenat to this IP of the Network to this address is inhibited as firewall rules are defined for it. This can include port forwarding or load balancing rules.\n - If this is an Isolated Network, please use updateNetwork/click the edit button.\n - If this is a VPC, first clear all other rules for this address.", @@ -3595,7 +3602,7 @@ "message.tooltip.reserved.system.netmask": "The Network prefix that defines the pod subnet. Uses CIDR notation.", "message.traffic.type.deleted": "Successfully deleted traffic type", "message.traffic.type.to.basic.zone": "traffic type to basic zone", -"message.trigger.shutdown": "Please confirm that you would like to trigger a shutdown on this Management server. It will not accept any new Async Jobs and will terminate after there are no pending jobs.", +"message.trigger.shutdown": "Please confirm that you would like to trigger a shutdown on this Management Server. It will not accept any new Async Jobs and will terminate after there are no pending jobs.", "message.type.values.to.add": "Please add additional values by typing them in", "message.update.autoscale.policy.failed": "Failed to update autoscale policy", "message.update.autoscale.vmgroup.failed": "Failed to update autoscale group", diff --git a/ui/src/components/page/GlobalLayout.vue b/ui/src/components/page/GlobalLayout.vue index 6dd5c530fa5b..66568d2482f6 100644 --- a/ui/src/components/page/GlobalLayout.vue +++ b/ui/src/components/page/GlobalLayout.vue @@ -17,11 +17,14 @@