Skip to content

Commit 95f254a

Browse files
fixes & code improvements
1 parent 4f82dbb commit 95f254a

File tree

10 files changed

+76
-28
lines changed

10 files changed

+76
-28
lines changed

agent/src/main/java/com/cloud/agent/Agent.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -526,13 +526,14 @@ public Task create(final Task.Type type, final Link link, final byte[] data) {
526526
}
527527

528528
protected void reconnect(final Link link) {
529-
reconnect(link, null);
529+
reconnect(link, null, false);
530530
}
531531

532-
protected void reconnect(final Link link, String host) {
533-
if (!_reconnectAllowed) {
532+
protected void reconnect(final Link link, String host, boolean forTransfer) {
533+
if (!(forTransfer || _reconnectAllowed)) {
534534
return;
535535
}
536+
536537
synchronized (this) {
537538
if (_startup != null) {
538539
_startup.cancel();
@@ -885,7 +886,7 @@ private void migrateAgentConnection(List<String> avoidMsList) {
885886
_reconnectAllowed = true;
886887
_shell.resetHostCounter();
887888
_shell.setConnectionTransfer(true);
888-
reconnect(_link, preferredHost);
889+
reconnect(_link, preferredHost, true);
889890
}
890891

891892
public void processResponse(final Response response, final Link link) {

api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,11 @@ public class AsyncJobResponse extends BaseResponse {
8383
@Param(description = "the unique ID of the instance/entity object related to the job")
8484
private String jobInstanceId;
8585

86-
@SerializedName("managementserverid")
86+
@SerializedName(ApiConstants.MANAGEMENT_SERVER_ID)
8787
@Param(description = "the msid of the management server on which the job is running", since = "4.19")
8888
private String managementServerId;
8989

90-
@SerializedName("managementservername")
90+
@SerializedName(ApiConstants.MANAGEMENT_SERVER_NAME)
9191
@Param(description = "the management server name of the host", since = "4.21.0")
9292
private String managementServerName;
9393

api/src/main/java/org/apache/cloudstack/api/response/HostForMigrationResponse.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ public class HostForMigrationResponse extends BaseResponse {
172172
@Param(description = "the date and time the host was last pinged")
173173
private Date lastPinged;
174174

175-
@SerializedName("managementserverid")
175+
@SerializedName(ApiConstants.MANAGEMENT_SERVER_ID)
176176
@Param(description = "the management server ID of the host")
177177
private Long managementServerId;
178178

api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,11 @@ public class HostResponse extends BaseResponseWithAnnotations {
190190
@Param(description = "the virtual machine id for host type ConsoleProxy and SecondaryStorageVM", since = "4.21.0")
191191
private String virtualMachineId;
192192

193-
@SerializedName("managementserverid")
193+
@SerializedName(ApiConstants.MANAGEMENT_SERVER_ID)
194194
@Param(description = "the management server ID of the host")
195195
private String managementServerId;
196196

197-
@SerializedName("managementservername")
197+
@SerializedName(ApiConstants.MANAGEMENT_SERVER_NAME)
198198
@Param(description = "the management server name of the host", since = "4.21.0")
199199
private String managementServerName;
200200

engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// under the License.
1717
package com.cloud.agent.manager;
1818

19+
import java.io.IOException;
1920
import java.lang.reflect.Constructor;
2021
import java.lang.reflect.InvocationTargetException;
2122
import java.nio.channels.ClosedChannelException;
@@ -307,12 +308,39 @@ public void unregisterForHostEvents(final int id) {
307308

308309
@Override
309310
public void onManagementServerMaintenance() {
311+
logger.debug("Management server maintenance enabled");
310312
_monitorExecutor.shutdownNow();
313+
if (_connection != null) {
314+
_connection.stop();
315+
316+
try {
317+
_connection.cleanUp();
318+
} catch (final IOException e) {
319+
logger.warn("Fail to clean up old connection", e);
320+
}
321+
}
322+
_connectExecutor.shutdownNow();
311323
}
312324

313325
@Override
314326
public void onManagementServerCancelMaintenance() {
327+
logger.debug("Management server maintenance disabled");
328+
if (_connectExecutor.isShutdown()) {
329+
_connectExecutor = new ThreadPoolExecutor(100, 500, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("AgentConnectTaskPool"));
330+
_connectExecutor.allowCoreThreadTimeOut(true);
331+
}
332+
333+
startDirectlyConnectedHosts(true);
334+
if (_connection != null) {
335+
try {
336+
_connection.start();
337+
} catch (final NioConnectionException e) {
338+
logger.error("Error when connecting to the NioServer!", e);
339+
}
340+
}
341+
315342
if (_monitorExecutor.isShutdown()) {
343+
_monitorExecutor = new ScheduledThreadPoolExecutor(1, new NamedThreadFactory("AgentMonitor"));
316344
_monitorExecutor.scheduleWithFixedDelay(new MonitorTask(), mgmtServiceConf.getPingInterval(), mgmtServiceConf.getPingInterval(), TimeUnit.SECONDS);
317345
}
318346
}
@@ -650,7 +678,7 @@ public boolean start() {
650678
return true;
651679
}
652680

653-
startDirectlyConnectedHosts();
681+
startDirectlyConnectedHosts(false);
654682

655683
if (_connection != null) {
656684
try {
@@ -665,10 +693,10 @@ public boolean start() {
665693
return true;
666694
}
667695

668-
public void startDirectlyConnectedHosts() {
696+
public void startDirectlyConnectedHosts(final boolean forRebalance) {
669697
final List<HostVO> hosts = _resourceMgr.findDirectlyConnectedHosts();
670698
for (final HostVO host : hosts) {
671-
loadDirectlyConnectedHost(host, false);
699+
loadDirectlyConnectedHost(host, forRebalance);
672700
}
673701
}
674702

engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747
import org.apache.cloudstack.framework.config.ConfigKey;
4848
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
4949
import org.apache.cloudstack.ha.dao.HAConfigDao;
50-
import org.apache.cloudstack.maintenance.ManagementServerMaintenanceListener;
5150
import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager;
5251
import org.apache.cloudstack.maintenance.command.BaseShutdownManagementServerHostCommand;
5352
import org.apache.cloudstack.maintenance.command.CancelMaintenanceManagementServerHostCommand;
@@ -108,8 +107,8 @@
108107
import com.cloud.utils.nio.Task;
109108
import com.google.gson.Gson;
110109

111-
public class ClusteredAgentManagerImpl extends AgentManagerImpl implements ClusterManagerListener, ManagementServerMaintenanceListener, ClusteredAgentRebalanceService {
112-
private static final ScheduledExecutorService s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor"));
110+
public class ClusteredAgentManagerImpl extends AgentManagerImpl implements ClusterManagerListener, ClusteredAgentRebalanceService {
111+
private static ScheduledExecutorService s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor"));
113112
private final long rebalanceTimeOut = 300000; // 5 mins - after this time remove the agent from the transfer list
114113

115114
public final static long STARTUP_DELAY = 5000;
@@ -172,8 +171,6 @@ public boolean configure(final String name, final Map<String, Object> xmlParams)
172171
_clusterMgr.registerListener(this);
173172
_clusterMgr.registerDispatcher(new ClusterDispatcher());
174173

175-
managementServerMaintenanceManager.registerListener(this);
176-
177174
_gson = GsonHelper.getGson();
178175

179176
return super.configure(name, xmlParams);
@@ -595,7 +592,7 @@ public boolean stop() {
595592
}
596593

597594
@Override
598-
public void startDirectlyConnectedHosts() {
595+
public void startDirectlyConnectedHosts(final boolean forRebalance) {
599596
// override and let it be dummy for purpose, we will scan and load direct agents periodically.
600597
// We may also pickup agents that have been left over from other crashed management server
601598
}
@@ -1477,15 +1474,20 @@ private void updateLastManagementServer(long hostId, long msId) {
14771474

14781475
@Override
14791476
public void onManagementServerMaintenance() {
1477+
logger.debug("Management server maintenance enabled");
14801478
s_transferExecutor.shutdownNow();
14811479
cleanupTransferMap(_nodeId);
1480+
super.onManagementServerMaintenance();
14821481
}
14831482

14841483
@Override
14851484
public void onManagementServerCancelMaintenance() {
1485+
logger.debug("Management server maintenance disabled");
1486+
super.onManagementServerCancelMaintenance();
14861487
if (isAgentRebalanceEnabled()) {
14871488
cleanupTransferMap(_nodeId);
14881489
if (s_transferExecutor.isShutdown()) {
1490+
s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor"));
14891491
s_transferExecutor.scheduleAtFixedRate(getAgentRebalanceScanTask(), 60000, 60000, TimeUnit.MILLISECONDS);
14901492
s_transferExecutor.scheduleAtFixedRate(getTransferScanTask(), 60000, ClusteredAgentRebalanceService.DEFAULT_TRANSFER_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
14911493
}

plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -194,14 +194,17 @@ public void triggerShutdown() {
194194
}
195195

196196
private void prepareForShutdown(boolean postTrigger) {
197-
if (this.preparingForMaintenance) {
198-
throw new CloudRuntimeException("Maintenance has already been initiated, cancel maintenance and try again");
199-
}
197+
if (!postTrigger) {
198+
if (this.preparingForMaintenance) {
199+
throw new CloudRuntimeException("Maintenance has already been initiated, cancel maintenance and try again");
200+
}
200201

201-
// Ensure we don't throw an error if triggering a shutdown after just preparing for it
202-
if (!postTrigger && this.preparingForShutdown) {
203-
throw new CloudRuntimeException("Shutdown has already been triggered");
202+
// Ensure we don't throw an error if triggering a shutdown after just preparing for it
203+
if (this.preparingForShutdown) {
204+
throw new CloudRuntimeException("Shutdown has already been triggered");
205+
}
204206
}
207+
205208
this.preparingForShutdown = true;
206209
jobManager.disableAsyncJobs();
207210
waitForPendingJobs();
@@ -220,6 +223,7 @@ public void cancelShutdown() {
220223

221224
this.preparingForShutdown = false;
222225
this.shutdownTriggered = false;
226+
resetPreparingForMaintenance();
223227
jobManager.enableAsyncJobs();
224228
cancelWaitForPendingJobs();
225229
}
@@ -246,8 +250,14 @@ public void cancelMaintenance() {
246250
throw new CloudRuntimeException("Maintenance has not been initiated");
247251
}
248252
resetPreparingForMaintenance();
253+
this.preparingForShutdown = false;
254+
this.shutdownTriggered = false;
249255
jobManager.enableAsyncJobs();
250256
cancelWaitForPendingJobs();
257+
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
258+
if (msHost != null && State.Maintenance.equals(msHost.getState())) {
259+
onCancelMaintenance();
260+
}
251261
}
252262

253263
private void waitForPendingJobs() {
@@ -257,7 +267,7 @@ private void waitForPendingJobs() {
257267
}
258268
this.pendingJobsTask = new CheckPendingJobsTask(this);
259269
long pendingJobsCheckDelayInMs = 1000L; // 1 sec
260-
long pendingJobsCheckPeriodInMs = 15L * 1000; // every 15 secs
270+
long pendingJobsCheckPeriodInMs = 3L * 1000; // every 3 secs, check more frequently for pending jobs
261271
timer.scheduleAtFixedRate(pendingJobsTask, pendingJobsCheckDelayInMs, pendingJobsCheckPeriodInMs);
262272
}
263273

@@ -429,6 +439,8 @@ public ManagementServerMaintenanceResponse cancelMaintenance(CancelMaintenanceCm
429439
@Override
430440
public void cancelPreparingForMaintenance(ManagementServerHostVO msHost) {
431441
resetPreparingForMaintenance();
442+
this.preparingForShutdown = false;
443+
this.shutdownTriggered = false;
432444
jobManager.enableAsyncJobs();
433445
if (msHost == null) {
434446
msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
@@ -530,6 +542,7 @@ public void run() {
530542
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
531543
if (totalAgents == 0) {
532544
msHostDao.updateState(msHost.getId(), State.Maintenance);
545+
managementServerMaintenanceManager.onMaintenance();
533546
this.cancel();
534547
return;
535548
}

server/src/main/java/com/cloud/api/ApiServer.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,6 @@ public boolean start() {
475475
s_apiNameCmdClassMap.put(apiName, apiCmdList);
476476
}
477477
apiCmdList.add(cmdClass);
478-
479478
}
480479

481480
setEncodeApiResponse(EncodeApiResponse.value());

ui/public/locales/en.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3411,8 +3411,8 @@
34113411
"message.setup.physical.network.during.zone.creation.basic": "When adding a basic zone, you can set up one physical Network, which corresponds to a NIC on the hypervisor. The Network carries several types of traffic.<br/><br/>You may also <strong>add</strong> other traffic types onto the physical Network.",
34123412
"message.shared.network.offering.warning": "Domain admins and regular Users can only create shared Networks from Network offering with the setting specifyvlan=false. Please contact an administrator to create a Network offering if this list is empty.",
34133413
"message.shared.network.unsupported.for.nsx": "Shared networks aren't supported for NSX enabled zones",
3414-
"message.shutdown.triggered": "A shutdown has been triggered. CloudStack will not accept new jobs",
3415-
"message.maintenance.initiated": "A maintenance has been initiated. Management Server will not accept new jobs",
3414+
"message.shutdown.triggered": "Shutdown has been triggered. This Management Server will not accept new jobs",
3415+
"message.maintenance.initiated": "Maintenance has been initiated. This Management Server will not accept new jobs",
34163416
"message.snapshot.additional.zones": "Snapshots will always be created in its native zone - %x, here you can select additional zone(s) where it will be copied to at creation time",
34173417
"message.sourcenatip.change.warning": "WARNING: Changing the sourcenat IP address of the network will cause connectivity downtime for the Instances with NICs in the Network.",
34183418
"message.sourcenatip.change.inhibited": "Changing the sourcenat to this IP of the Network to this address is inhibited as firewall rules are defined for it. This can include port forwarding or load balancing rules.\n - If this is an Isolated Network, please use updateNetwork/click the edit button.\n - If this is a VPC, first clear all other rules for this address.",

utils/src/main/java/com/cloud/utils/nio/NioConnection.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ public abstract class NioConnection implements Callable<Boolean> {
6868
protected boolean _isRunning;
6969
protected boolean _isStartup;
7070
protected int _port;
71+
protected int _workers;
7172
protected List<ChangeRequest> _todos;
7273
protected HandlerFactory _factory;
7374
protected String _name;
@@ -80,6 +81,7 @@ public NioConnection(final String name, final int port, final int workers, final
8081
_isRunning = false;
8182
_selector = null;
8283
_port = port;
84+
_workers = workers;
8385
_factory = factory;
8486
_executor = new ThreadPoolExecutor(workers, 5 * workers, 1, TimeUnit.DAYS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory(name + "-Handler"));
8587
_sslHandshakeExecutor = Executors.newCachedThreadPool(new NamedThreadFactory(name + "-SSLHandshakeHandler"));
@@ -106,6 +108,9 @@ public void start() throws NioConnectionException {
106108
}
107109
_isStartup = true;
108110

111+
if (_executor.isShutdown()) {
112+
_executor = new ThreadPoolExecutor(_workers, 5 * _workers, 1, TimeUnit.DAYS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory(_name + "-Handler"));
113+
}
109114
_threadExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory(this._name + "-NioConnectionHandler"));
110115
_isRunning = true;
111116
_futureTask = _threadExecutor.submit(this);

0 commit comments

Comments
 (0)