Skip to content

Commit c1e473f

Browse files
committed
address comments
Signed-off-by: Abhishek Kumar <[email protected]>
1 parent a9c389a commit c1e473f

File tree

10 files changed

+114
-100
lines changed

10 files changed

+114
-100
lines changed

agent/src/main/java/com/cloud/agent/Agent.java

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,7 @@ protected String getAgentName() {
182182
}
183183

184184
protected void setupShutdownHookAndInitExecutors() {
185-
if (logger.isTraceEnabled()) {
186-
logger.trace("Adding shutdown hook");
187-
}
185+
logger.trace("Adding shutdown hook");
188186
Runtime.getRuntime().addShutdownHook(shutdownThread);
189187
selfTaskExecutor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("Agent-SelfTask"));
190188
outRequestHandler = new ThreadPoolExecutor(shell.getPingRetries(), 2 * shell.getPingRetries(), 10, TimeUnit.MINUTES,
@@ -193,12 +191,26 @@ protected void setupShutdownHookAndInitExecutors() {
193191
new LinkedBlockingQueue<>(), new NamedThreadFactory("AgentRequest-Handler"));
194192
}
195193

196-
// for simulator use only
194+
/**
195+
* Constructor for the {@code Agent} class, intended for simulator use only.
196+
*
197+
* <p>This constructor initializes the agent with a provided {@link IAgentShell}.
198+
* It sets up the necessary NIO client connection, establishes a shutdown hook,
199+
* and initializes the thread executors.
200+
*
201+
* @param shell the {@link IAgentShell} instance that provides agent configuration and runtime information.
202+
*/
197203
public Agent(final IAgentShell shell) {
198204
this.shell = shell;
199-
link = null;
200-
connection = new NioClient(getAgentName(), this.shell.getNextHost(), this.shell.getPort(),
201-
this.shell.getWorkers(), this.shell.getSslHandshakeTimeout(), this);
205+
this.link = null;
206+
this.connection = new NioClient(
207+
getAgentName(),
208+
this.shell.getNextHost(),
209+
this.shell.getPort(),
210+
this.shell.getWorkers(),
211+
this.shell.getSslHandshakeTimeout(),
212+
this
213+
);
202214
setupShutdownHookAndInitExecutors();
203215
}
204216

@@ -223,10 +235,19 @@ public Agent(final IAgentShell shell, final int localAgentId, final ServerResour
223235
connection = new NioClient(getAgentName(), host, this.shell.getPort(), this.shell.getWorkers(),
224236
this.shell.getSslHandshakeTimeout(), this);
225237
setupShutdownHookAndInitExecutors();
226-
logger.info("Agent [id = {}, type = {}, zone = {}, pod = {}, workers = {}, host = {}, " +
227-
"port = {}, local id = {}]",
228-
(id != null ? String.valueOf(id) : "new"), getResourceName(), this.shell.getZone(),
229-
this.shell.getPod(), this.shell.getWorkers(), host, this.shell.getPort(), localAgentId);
238+
logger.info("{} with host = {}, local id = {}", this, host, localAgentId);
239+
}
240+
241+
@Override
242+
public String toString() {
243+
return String.format("Agent [id = %s, type = %s, zone = %s, pod = %s, workers = %d, host = %s, port = %d]",
244+
(id != null ? String.valueOf(id) : "new"),
245+
getResourceName(),
246+
this.shell.getZone(),
247+
this.shell.getPod(),
248+
this.shell.getWorkers(),
249+
this.shell.getNextHost(),
250+
this.shell.getPort());
230251
}
231252

232253
public String getVersion() {
@@ -426,10 +447,7 @@ private void scheduleHostLBCheckerTask(final long checkInterval) {
426447
}
427448

428449
public void scheduleWatch(final Link link, final Request request, final long delay, final long period) {
429-
430-
if (logger.isDebugEnabled()) {
431-
logger.debug("Adding a watch list");
432-
}
450+
logger.debug("Adding a watch list");
433451
final WatchTask task = new WatchTask(link, request, this);
434452
final ScheduledFuture<?> future = selfTaskExecutor.scheduleAtFixedRate(task, delay, period, TimeUnit.MILLISECONDS);
435453
watchList.add(future);
@@ -452,9 +470,7 @@ protected void cancelTasks() {
452470
for (final ScheduledFuture<?> task : watchList) {
453471
task.cancel(true);
454472
}
455-
if (logger.isDebugEnabled()) {
456-
logger.debug("Clearing watch list: " + watchList.size());
457-
}
473+
logger.debug("Clearing watch list: {}", () -> watchList.size());
458474
watchList.clear();
459475
}
460476

@@ -472,7 +488,7 @@ protected void cleanupAgentZoneProperties() {
472488
}
473489

474490
public void lockStartupTask(final Link link) {
475-
logger.debug("Creating startup task for link: {}", getLinkLog(link));
491+
logger.debug("Creating startup task for link: {}", () -> getLinkLog(link));
476492
StartupTask currentTask = startupTask.get();
477493
if (currentTask != null) {
478494
logger.warn("A Startup task is already locked or in progress, cannot create for link {}",
@@ -525,9 +541,7 @@ public void sendStartup(final Link link) {
525541
}
526542

527543
protected String retrieveHostname() {
528-
if (logger.isTraceEnabled()) {
529-
logger.trace("Retrieving hostname with resource={}", serverResource.getClass().getSimpleName());
530-
}
544+
logger.trace("Retrieving hostname with resource={}", () -> serverResource.getClass().getSimpleName());
531545
final String result = Script.runSimpleBashScript(Script.getExecutableAbsolutePath("hostname"), 500);
532546
if (StringUtils.isNotBlank(result)) {
533547
return result;
@@ -596,7 +610,7 @@ protected void stopAndCleanupConnection(boolean waitForStop) {
596610

597611
protected void reconnect(final Link link) {
598612
if (!reconnectAllowed) {
599-
logger.debug("Reconnect requested but it is not allowed {}", getLinkLog(link));
613+
logger.debug("Reconnect requested but it is not allowed {}", () -> getLinkLog(link));
600614
return;
601615
}
602616
cancelStartupTask();
@@ -1128,9 +1142,7 @@ public void run() {
11281142
logger.info("The running startup command is now invalid. Attempting reconnect");
11291143
startupTask.set(null);
11301144
startupWait = DEFAULT_STARTUP_WAIT * 2;
1131-
if (logger.isDebugEnabled()) {
1132-
logger.debug("Executing reconnect from task - {}", getLinkLog(_link));
1133-
}
1145+
logger.debug("Executing reconnect from task - {}", () -> getLinkLog(_link));
11341146
reconnect(_link);
11351147
}
11361148
}
@@ -1183,7 +1195,7 @@ public void doTask(final Task task) throws TaskExecutionException {
11831195
logger.error("Error parsing task", e);
11841196
}
11851197
} else if (task.getType() == Task.Type.DISCONNECT) {
1186-
logger.debug("Executing disconnect task - {}", getLinkLog(task.getLink()));
1198+
logger.debug("Executing disconnect task - {}", () -> getLinkLog(task.getLink()));
11871199
reconnect(task.getLink());
11881200
} else if (task.getType() == Task.Type.OTHER) {
11891201
processOtherTask(task);
@@ -1223,6 +1235,7 @@ protected void runInContext() {
12231235
agent.stop(ShutdownCommand.Requested, "Restarting due to new X509 certificates");
12241236

12251237
// Nullify references for GC
1238+
agent.shell = null;
12261239
agent.watchList = null;
12271240
agent.shutdownThread = null;
12281241
agent.controlListeners = null;

engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4964,30 +4964,39 @@ private void handlePowerOffReportWithNoPendingJobsOnVM(final VMInstanceVO vm) {
49644964
}
49654965
}
49664966

4967+
/**
4968+
* Scans stalled VMs in transition states on an UP host and processes them accordingly.
4969+
*
4970+
* <p>This method is executed only when the {@code syncTransitioningVmPowerState} flag is enabled. It identifies
4971+
* VMs stuck in specific states (e.g., Starting, Stopping, Migrating) on a host that is UP, except for those
4972+
* in the Expunging state, which require special handling.</p>
4973+
*
4974+
* <p>The following conditions are checked during the scan:
4975+
* <ul>
4976+
* <li>No pending {@code VmWork} job exists for the VM.</li>
4977+
* <li>The VM is associated with the given {@code hostId}, and the host is UP.</li>
4978+
* </ul>
4979+
* </p>
4980+
*
4981+
* <p>When a host is UP, a state report for the VMs will typically be received. However, certain scenarios
4982+
* (e.g., out-of-band changes or behavior specific to hypervisors like XenServer or KVM) might result in
4983+
* missing reports, preventing the state-sync logic from running. To address this, the method scans VMs
4984+
* based on their last update timestamp. If a VM remains stalled without a status update while its host is UP,
4985+
* it is assumed to be powered off, which is generally a safe assumption.</p>
4986+
*
4987+
* @param hostId the ID of the host to scan for stalled VMs in transition states.
4988+
*/
49674989
private void scanStalledVMInTransitionStateOnUpHost(final long hostId) {
49684990
if (!syncTransitioningVmPowerState) {
49694991
return;
49704992
}
4971-
// Check VM that is stuck in Starting, Stopping, Migrating states, we won't check
4972-
// VMs in expunging state (this need to be handled specially)
4973-
//
4974-
// checking condition
4975-
// 1) no pending VmWork job
4976-
// 2) on hostId host and host is UP
4977-
//
4978-
// When host is UP, sooner or later we will get a report from the host about the VM,
4979-
// however, if VM is missing from the host report (it may happen in out of band changes
4980-
// or from behaviour of XS/KVM by design), the VM may not get a chance to run the state-sync logic
4981-
//
4982-
// Therefore, we will scan those VMs on UP host based on last update timestamp, if the host is UP
4983-
// and a VM stalls for status update, we will consider them to be powered off
4984-
// (which is relatively safe to do so)
4985-
final long stallThresholdInMs = VmJobStateReportInterval.value() * 2;
4986-
final long cutTime = new Date(DateUtil.currentGMTTime().getTime() - stallThresholdInMs).getTime();
49874993
if (!_hostDao.isHostUp(hostId)) {
49884994
return;
49894995
}
4996+
final long stallThresholdInMs = VmJobStateReportInterval.value() * 2;
4997+
final long cutTime = new Date(DateUtil.currentGMTTime().getTime() - stallThresholdInMs).getTime();
49904998
final List<VMInstanceVO> hostTransitionVms = _vmDao.listByHostAndState(hostId, State.Starting, State.Stopping, State.Migrating);
4999+
49915000
final List<VMInstanceVO> mostLikelyStoppedVMs = listStalledVMInTransitionStateOnUpHost(hostTransitionVms, cutTime);
49925001
for (final VMInstanceVO vm : mostLikelyStoppedVMs) {
49935002
handlePowerOffReportWithNoPendingJobsOnVM(vm);
@@ -5003,6 +5012,7 @@ private void scanStalledVMInTransitionStateOnUpHost(final long hostId) {
50035012
}
50045013
}
50055014

5015+
50065016
private void scanStalledVMInTransitionStateOnDisconnectedHosts() {
50075017
final Date cutTime = new Date(DateUtil.currentGMTTime().getTime() - VmOpWaitInterval.value() * 1000);
50085018
final List<Long> stuckAndUncontrollableVMs = listStalledVMInTransitionStateOnDisconnectedHosts(cutTime);

engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -76,17 +76,18 @@ private void updateAndPublishVmPowerStates(long hostId, Map<Long, VirtualMachine
7676
Set<Long> vmIds = instancePowerStates.keySet();
7777
Map<Long, VirtualMachine.PowerState> notUpdated = _instanceDao.updatePowerState(instancePowerStates, hostId,
7878
updateTime);
79-
if (notUpdated.size() <= vmIds.size()) {
80-
for (Long vmId : vmIds) {
81-
if (!notUpdated.isEmpty() && !notUpdated.containsKey(vmId)) {
82-
logger.debug("VM state report is updated. host: {}, vm id: {}}, power state: {}}",
83-
hostId, vmId, instancePowerStates.get(vmId));
84-
_messageBus.publish(null, VirtualMachineManager.Topics.VM_POWER_STATE,
85-
PublishScope.GLOBAL, vmId);
86-
continue;
87-
}
88-
logger.trace("VM power state does not change, skip DB writing. vm id: {}", vmId);
79+
if (notUpdated.size() > vmIds.size()) {
80+
return;
81+
}
82+
for (Long vmId : vmIds) {
83+
if (!notUpdated.isEmpty() && !notUpdated.containsKey(vmId)) {
84+
logger.debug("VM state report is updated. host: {}, vm id: {}}, power state: {}}",
85+
hostId, vmId, instancePowerStates.get(vmId));
86+
_messageBus.publish(null, VirtualMachineManager.Topics.VM_POWER_STATE,
87+
PublishScope.GLOBAL, vmId);
88+
continue;
8989
}
90+
logger.trace("VM power state does not change, skip DB writing. vm id: {}", vmId);
9091
}
9192
}
9293

@@ -121,22 +122,21 @@ private void processMissingVmReport(long hostId, Set<Long> vmIds, boolean force)
121122
for (VMInstanceVO instance : vmsThatAreMissingReport) {
122123
Date vmStateUpdateTime = instance.getPowerStateUpdateTime();
123124
if (vmStateUpdateTime == null) {
124-
logger.warn("VM power state update time is null, falling back to update time for vm id: " + instance.getId());
125+
logger.warn("VM power state update time is null, falling back to update time for vm id: {}",
126+
instance.getId());
125127
vmStateUpdateTime = instance.getUpdateTime();
126128
if (vmStateUpdateTime == null) {
127-
logger.warn("VM update time is null, falling back to creation time for vm id: " + instance.getId());
129+
logger.warn("VM update time is null, falling back to creation time for vm id: {}",
130+
instance.getId());
128131
vmStateUpdateTime = instance.getCreated();
129132
}
130133
}
131-
if (logger.isDebugEnabled()) {
132-
logger.debug(
133-
String.format("Detected missing VM. host: %d, vm id: %d(%s), power state: %s, last state update: %s"
134-
, hostId
135-
, instance.getId()
136-
, instance.getUuid()
137-
, VirtualMachine.PowerState.PowerReportMissing
138-
, DateUtil.getOutputString(vmStateUpdateTime)));
139-
}
134+
logger.debug("Detected missing VM. host: {}, vm id: {}({}), power state: {}, last state update: {}",
135+
hostId,
136+
instance.getId(),
137+
instance.getUuid(),
138+
VirtualMachine.PowerState.PowerReportMissing,
139+
DateUtil.getOutputString(vmStateUpdateTime));
140140
long milliSecondsSinceLastStateUpdate = currentTime.getTime() - vmStateUpdateTime.getTime();
141141
if (force || (milliSecondsSinceLastStateUpdate > milliSecondsGracefulPeriod)) {
142142
logger.debug("vm id: {} - time since last state update({} ms) has passed graceful period",
@@ -156,9 +156,9 @@ private void processMissingVmReport(long hostId, Set<Long> vmIds, boolean force)
156156
private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> translatedInfo, boolean force) {
157157
if (logger.isDebugEnabled()) {
158158
logger.debug("Process VM state report. Host: {}, number of records in report: {}. VMs: [{}]",
159-
hostId,
160-
translatedInfo.size(),
161-
translatedInfo.entrySet().stream().map(entry -> entry.getKey() + ":" + entry.getValue())
159+
() -> hostId,
160+
translatedInfo::size,
161+
() -> translatedInfo.entrySet().stream().map(entry -> entry.getKey() + ":" + entry.getValue())
162162
.collect(Collectors.joining(", ")) + "]");
163163
}
164164
updateAndPublishVmPowerStates(hostId, translatedInfo, DateUtil.currentGMTTime());

engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,8 +1042,8 @@ public Map<Long, VirtualMachine.PowerState> updatePowerState(
10421042
pstmt.setLong(1, powerHostId);
10431043
pstmt.executeUpdate();
10441044
} catch (SQLException e) {
1045-
logger.error(String.format("Unable to execute update power states SQL from VMs %s due to: %s",
1046-
idList, e.getMessage()), e);
1045+
logger.error("Unable to execute update power states SQL from VMs {} due to: {}",
1046+
idList, e.getMessage(), e);
10471047
return instancePowerStates;
10481048
}
10491049
return notUpdated;

framework/db/src/main/java/com/cloud/utils/db/GenericDaoBase.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1220,7 +1220,7 @@ private Object getIdObject() {
12201220
Method m = _entityBeanType.getMethod("getId");
12211221
return m.invoke(entity);
12221222
} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException ignored) {
1223-
logger.warn(String.format("Unable to get ID object for entity: %s", _entityBeanType.getSimpleName()));
1223+
logger.warn("Unable to get ID object for entity: {}", _entityBeanType.getSimpleName());
12241224
}
12251225
return null;
12261226
}

server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -419,14 +419,12 @@ public boolean checkIfHostHasCpuCapability(Host host, Integer cpuNum, Integer cp
419419
// Check host can support the Cpu Number and Speed.
420420
boolean isCpuNumGood = host.getCpus().intValue() >= cpuNum;
421421
boolean isCpuSpeedGood = host.getSpeed().intValue() >= cpuSpeed;
422-
if (isCpuNumGood && isCpuSpeedGood) {
423-
logger.debug("{} has cpu capability (cpu: {}, speed: {} ) to support requested CPU: {} and requested speed: {}",
424-
host, host.getCpus(), host.getSpeed(), cpuNum, cpuSpeed);
425-
return true;
426-
} else {logger.debug("{} doesn't have cpu capability (cpu: {}, speed: {} ) to support requested CPU: {} and requested speed: {}",
427-
host, host.getCpus(), host.getSpeed(), cpuNum, cpuSpeed);
428-
return false;
429-
}
422+
boolean hasCpuCapability = isCpuNumGood && isCpuSpeedGood;
423+
424+
logger.debug("{} {} cpu capability (cpu: {}, speed: {} ) to support requested CPU: {} and requested speed: {}",
425+
host, hasCpuCapability ? "has" : "doesn't have" ,host.getCpus(), host.getSpeed(), cpuNum, cpuSpeed);
426+
427+
return hasCpuCapability;
430428
}
431429

432430
@Override

server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ protected boolean isAdminVmDeployableInDisabledResources() {
743743
protected void avoidDisabledHosts(DataCenter dc, ExcludeList avoids) {
744744

745745
List<Long> disabledHostIds = _hostDao.listDisabledIdsByDataCenterId(dc.getId());
746-
logger.debug("Adding hosts %s of datacenter [%s] to the avoid set, because these hosts are in the Disabled state.",
746+
logger.debug("Adding hosts {} of datacenter [{}] to the avoid set, because these hosts are in the Disabled state.",
747747
StringUtils.join(disabledHostIds), dc.getUuid());
748748
disabledHostIds.forEach(avoids::addHost);
749749
}

server/src/main/java/com/cloud/hypervisor/kvm/discoverer/LibvirtServerDiscoverer.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import com.cloud.exception.DiscoveryException;
5656
import com.cloud.exception.OperationTimedoutException;
5757
import com.cloud.host.Host;
58+
import com.cloud.host.HostInfo;
5859
import com.cloud.host.HostVO;
5960
import com.cloud.host.Status;
6061
import com.cloud.host.dao.HostDao;
@@ -486,8 +487,8 @@ public HostVO createHostVOForConnectedAgent(HostVO host, StartupCommand[] cmd) {
486487

487488
HostVO existingHostInCluster = clusterExistingHostCache.get(clusterVO.getId());
488489
if (existingHostInCluster != null) {
489-
String hostOsInCluster = existingHostInCluster.getDetail("Host.OS");
490-
String hostOs = ssCmd.getHostDetails().get("Host.OS");
490+
String hostOsInCluster = existingHostInCluster.getDetail(HostInfo.HOST_OS);
491+
String hostOs = ssCmd.getHostDetails().get(HostInfo.HOST_OS);
491492
if (!isHostOsCompatibleWithOtherHost(hostOsInCluster, hostOs)) {
492493
String msg = String.format("host: %s with hostOS, \"%s\"into a cluster, in which there are \"%s\" hosts added", firstCmd.getPrivateIpAddress(), hostOs, hostOsInCluster);
493494
if (hostOs != null && hostOs.startsWith(hostOsInCluster)) {

utils/src/main/java/com/cloud/utils/nio/NioClient.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ protected void init() throws IOException {
8888
throw new IOException("Failed to initialise security", e);
8989
} catch (final IOException e) {
9090
closeChannel();
91-
logger.error(String.format("IOException while connecting to %s", hostLog), e);
91+
logger.error("IOException while connecting to {}", hostLog, e);
9292
throw e;
9393
}
9494
if (task != null) {

0 commit comments

Comments
 (0)