Skip to content

Commit 04875f1

Browse files
authored
Improve logs for VM migrations (#12332)
1 parent e25cf43 commit 04875f1

File tree

4 files changed

+35
-9
lines changed

4 files changed

+35
-9
lines changed

engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3053,7 +3053,7 @@ private void orchestrateMigrate(final String vmUuid, final long srcHostId, final
30533053
}
30543054

30553055
protected void migrate(final VMInstanceVO vm, final long srcHostId, final DeployDestination dest) throws ResourceUnavailableException, ConcurrentOperationException {
3056-
logger.info("Migrating {} to {}", vm, dest);
3056+
logger.info("Start preparing migration of the VM: {} to {}", vm, dest);
30573057
final long dstHostId = dest.getHost().getId();
30583058
final Host fromHost = _hostDao.findById(srcHostId);
30593059
if (fromHost == null) {
@@ -3118,9 +3118,11 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
31183118
if (pfma == null || !pfma.getResult()) {
31193119
final String details = pfma != null ? pfma.getDetails() : "null answer returned";
31203120
final String msg = "Unable to prepare for migration due to " + details;
3121+
logger.error("Failed to prepare destination host {} for migration of VM {} : {}", dstHostId, vm.getInstanceName(), details);
31213122
pfma = null;
31223123
throw new AgentUnavailableException(msg, dstHostId);
31233124
}
3125+
logger.debug("Successfully prepared destination host {} for migration of VM {} ", dstHostId, vm.getInstanceName());
31243126
} catch (final OperationTimedoutException e1) {
31253127
throw new AgentUnavailableException("Operation timed out", dstHostId);
31263128
} finally {
@@ -3141,18 +3143,23 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
31413143
volumeMgr.release(vm.getId(), dstHostId);
31423144
}
31433145

3144-
logger.info("Migration cancelled because state has changed: {}", vm);
3145-
throw new ConcurrentOperationException("Migration cancelled because state has changed: " + vm);
3146+
String msg = "Migration cancelled because state has changed: " + vm;
3147+
logger.warn(msg);
3148+
throw new ConcurrentOperationException(msg);
31463149
}
31473150
} catch (final NoTransitionException e1) {
31483151
_networkMgr.rollbackNicForMigration(vmSrc, profile);
31493152
volumeMgr.release(vm.getId(), dstHostId);
3150-
logger.info("Migration cancelled because {}", e1.getMessage());
3153+
String msg = String.format("Migration cancelled for VM %s due to state transition failure: %s",
3154+
vm.getInstanceName(), e1.getMessage());
3155+
logger.warn(msg, e1);
31513156
throw new ConcurrentOperationException("Migration cancelled because " + e1.getMessage());
31523157
} catch (final CloudRuntimeException e2) {
31533158
_networkMgr.rollbackNicForMigration(vmSrc, profile);
31543159
volumeMgr.release(vm.getId(), dstHostId);
3155-
logger.info("Migration cancelled because {}", e2.getMessage());
3160+
String msg = String.format("Migration cancelled for VM %s due to runtime exception: %s",
3161+
vm.getInstanceName(), e2.getMessage());
3162+
logger.error(msg, e2);
31563163
work.setStep(Step.Done);
31573164
_workDao.update(work.getId(), work);
31583165
try {
@@ -3172,8 +3179,12 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
31723179
final Answer ma = _agentMgr.send(vm.getLastHostId(), mc);
31733180
if (ma == null || !ma.getResult()) {
31743181
final String details = ma != null ? ma.getDetails() : "null answer returned";
3182+
String msg = String.format("Migration command failed for VM %s on source host id=%s to destination host %s: %s",
3183+
vm.getInstanceName(), vm.getLastHostId(), dstHostId, details);
3184+
logger.error(msg);
31753185
throw new CloudRuntimeException(details);
31763186
}
3187+
logger.info("Migration command successful for VM {}", vm.getInstanceName());
31773188
} catch (final OperationTimedoutException e) {
31783189
boolean success = false;
31793190
if (HypervisorType.KVM.equals(vm.getHypervisorType())) {
@@ -3210,7 +3221,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
32103221

32113222
try {
32123223
if (!checkVmOnHost(vm, dstHostId)) {
3213-
logger.error("Unable to complete migration for {}", vm);
3224+
logger.error("Migration verification failed for VM {} : VM not found on destination host {} ", vm.getInstanceName(), dstHostId);
32143225
try {
32153226
_agentMgr.send(srcHostId, new Commands(cleanup(vm, dpdkInterfaceMapping)), null);
32163227
} catch (final AgentUnavailableException e) {
@@ -3225,7 +3236,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
32253236
migrated = true;
32263237
} finally {
32273238
if (!migrated) {
3228-
logger.info("Migration was unsuccessful. Cleaning up: {}", vm);
3239+
logger.info("Migration was unsuccessful. Cleaning up: {}", vm);
32293240
_networkMgr.rollbackNicForMigration(vmSrc, profile);
32303241
volumeMgr.release(vm.getId(), dstHostId);
32313242
// deallocate GPU devices for the VM on the destination host
@@ -3237,7 +3248,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
32373248
try {
32383249
_agentMgr.send(dstHostId, new Commands(cleanup(vm, dpdkInterfaceMapping)), null);
32393250
} catch (final AgentUnavailableException ae) {
3240-
logger.warn("Looks like the destination Host is unavailable for cleanup", ae);
3251+
logger.warn("Destination host {} unavailable for cleanup after failed migration of VM {}", dstHostId, vm.getInstanceName(), ae);
32413252
}
32423253
_networkMgr.setHypervisorHostname(profile, dest, false);
32433254
try {
@@ -3246,6 +3257,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
32463257
logger.warn(e.getMessage());
32473258
}
32483259
} else {
3260+
logger.info("Migration completed successfully for VM %s" + vm);
32493261
_networkMgr.commitNicForMigration(vmSrc, profile);
32503262
volumeMgr.release(vm.getId(), srcHostId);
32513263
// deallocate GPU devices for the VM on the src host after migration is complete
@@ -3276,6 +3288,7 @@ protected MigrateCommand buildMigrateCommand(VMInstanceVO vmInstance, VirtualMac
32763288
migrateCommand.setVlanToPersistenceMap(vlanToPersistenceMap);
32773289
}
32783290

3291+
logger.debug("Setting auto convergence to: {}", StorageManager.KvmAutoConvergence.value());
32793292
migrateCommand.setAutoConvergence(StorageManager.KvmAutoConvergence.value());
32803293
migrateCommand.setHostGuid(destination.getHost().getGuid());
32813294

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,17 +278,20 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0.
278278

279279
// abort the vm migration if the job is executed more than vm.migrate.wait
280280
final int migrateWait = libvirtComputingResource.getMigrateWait();
281+
logger.info("vm.migrate.wait value set to: {}for VM: {}", migrateWait, vmName);
281282
if (migrateWait > 0 && sleeptime > migrateWait * 1000) {
282283
DomainState state = null;
283284
try {
284285
state = dm.getInfo().state;
286+
logger.info("VM domain state when trying to abort migration : {}", state);
285287
} catch (final LibvirtException e) {
286288
logger.info("Couldn't get VM domain state after " + sleeptime + "ms: " + e.getMessage());
287289
}
288290
if (state != null && state == DomainState.VIR_DOMAIN_RUNNING) {
289291
try {
290292
DomainJobInfo job = dm.getJobInfo();
291-
logger.info(String.format("Aborting migration of VM [%s] with domain job [%s] due to time out after %d seconds.", vmName, job, migrateWait));
293+
logger.warn("Aborting migration of VM {} with domain job [{}] due to timeout after {} seconds. " +
294+
"Job stats: data processed={} bytes, data remaining={} bytes", vmName, job, migrateWait, job.getDataProcessed(), job.getDataRemaining());
292295
dm.abortJob();
293296
result = String.format("Migration of VM [%s] was cancelled by CloudStack due to time out after %d seconds.", vmName, migrateWait);
294297
commandState = Command.State.FAILED;
@@ -303,10 +306,12 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0.
303306

304307
// pause vm if we meet the vm.migrate.pauseafter threshold and not already paused
305308
final int migratePauseAfter = libvirtComputingResource.getMigratePauseAfter();
309+
logger.info("vm.migrate.pauseafter value set to: {} for VM: {}", migratePauseAfter, vmName);
306310
if (migratePauseAfter > 0 && sleeptime > migratePauseAfter) {
307311
DomainState state = null;
308312
try {
309313
state = dm.getInfo().state;
314+
logger.info("VM domain state when trying to pause VM for migration: {}", state);
310315
} catch (final LibvirtException e) {
311316
logger.info("Couldn't get VM domain state after " + sleeptime + "ms: " + e.getMessage());
312317
}
@@ -381,6 +386,7 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0.
381386
}
382387

383388
if (result == null) {
389+
logger.info("Post-migration cleanup for VM {}: ", vmName);
384390
libvirtComputingResource.destroyNetworkRulesForVM(conn, vmName);
385391
for (final InterfaceDef iface : ifaces) {
386392
String vlanId = libvirtComputingResource.getVlanIdFromBridgeName(iface.getBrName());
@@ -394,6 +400,7 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0.
394400
commandState = Command.State.COMPLETED;
395401
libvirtComputingResource.createOrUpdateLogFileForCommand(command, commandState);
396402
} else if (commandState == null) {
403+
logger.error("Migration of VM {} failed with result: {}", vmName, result);
397404
commandState = Command.State.FAILED;
398405
libvirtComputingResource.createOrUpdateLogFileForCommand(command, commandState);
399406
}

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtPrepareForMigrationCommandWrapper.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ public Answer execute(final PrepareForMigrationCommand command, final LibvirtCom
5656
final VirtualMachineTO vm = command.getVirtualMachine();
5757

5858
if (command.isRollback()) {
59+
logger.info("Handling rollback for PrepareForMigration of VM {}", vm.getName());
5960
return handleRollback(command, libvirtComputingResource);
6061
}
6162

@@ -83,6 +84,7 @@ public Answer execute(final PrepareForMigrationCommand command, final LibvirtCom
8384
if (interfaceDef != null && interfaceDef.getNetType() == GuestNetType.VHOSTUSER) {
8485
DpdkTO to = new DpdkTO(interfaceDef.getDpdkOvsPath(), interfaceDef.getDpdkSourcePort(), interfaceDef.getInterfaceMode());
8586
dpdkInterfaceMapping.put(nic.getMac(), to);
87+
logger.debug("Configured DPDK interface for VM {}", vm.getName());
8688
}
8789
}
8890

@@ -122,6 +124,7 @@ public Answer execute(final PrepareForMigrationCommand command, final LibvirtCom
122124
return new PrepareForMigrationAnswer(command, "failed to connect physical disks to host");
123125
}
124126

127+
logger.info("Successfully prepared destination host for migration of VM {}", vm.getName());
125128
return createPrepareForMigrationAnswer(command, dpdkInterfaceMapping, libvirtComputingResource, vm);
126129
} catch (final LibvirtException | CloudRuntimeException | InternalErrorException | URISyntaxException e) {
127130
if (MapUtils.isNotEmpty(dpdkInterfaceMapping)) {
@@ -157,6 +160,7 @@ private Answer handleRollback(PrepareForMigrationCommand command, LibvirtComputi
157160
KVMStoragePoolManager storagePoolMgr = libvirtComputingResource.getStoragePoolMgr();
158161
VirtualMachineTO vmTO = command.getVirtualMachine();
159162

163+
logger.info("Rolling back PrepareForMigration for VM {}: disconnecting physical disks", vmTO.getName());
160164
if (!storagePoolMgr.disconnectPhysicalDisksViaVmSpec(vmTO)) {
161165
return new PrepareForMigrationAnswer(command, "failed to disconnect physical disks from host");
162166
}

server/src/main/java/com/cloud/vm/UserVmManagerImpl.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7189,6 +7189,7 @@ public VirtualMachine migrateVirtualMachine(Long vmId, Host destinationHost) thr
71897189
throw new CloudRuntimeException("Unable to find suitable destination to migrate VM " + vm.getInstanceName());
71907190
}
71917191

7192+
logger.info("Starting migration of VM {} from host {} to host {} ", vm.getInstanceName(), srcHostId, dest.getHost().getId());
71927193
collectVmDiskAndNetworkStatistics(vmId, State.Running);
71937194
_itMgr.migrate(vm.getUuid(), srcHostId, dest);
71947195
return findMigratedVm(vm.getId(), vm.getType());
@@ -7260,6 +7261,7 @@ protected void validateStrictHostTagCheck(VMInstanceVO vm, HostVO host) {
72607261

72617262
private DeployDestination checkVmMigrationDestination(VMInstanceVO vm, Host srcHost, Host destinationHost) throws VirtualMachineMigrationException {
72627263
if (destinationHost == null) {
7264+
logger.error("Destination host is null for migration of VM: {}", vm.getInstanceName());
72637265
return null;
72647266
}
72657267
if (destinationHost.getId() == srcHost.getId()) {

0 commit comments

Comments
 (0)