From 1a188995c015f6e76f50abadcf8553cbf672f258 Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Thu, 20 Mar 2025 10:06:11 +0100 Subject: [PATCH 1/3] HA: set correct hostId for HA work of vm migration and skip migration job if vm is running on a different host --- .../java/com/cloud/ha/HighAvailabilityManagerImpl.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java index 81ae44a97634..d05ef767fb58 100644 --- a/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java +++ b/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java @@ -342,7 +342,8 @@ protected void wakeupWorkers() { @Override public boolean scheduleMigration(final VMInstanceVO vm) { if (vm.getHostId() != null) { - final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.Migration, Step.Scheduled, vm.getHostId(), vm.getState(), 0, vm.getUpdated()); + Long hostId = VirtualMachine.State.Migrating.equals(vm.getState()) ? vm.getLastHostId() : vm.getHostId(); + final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.Migration, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated()); _haDao.persist(work); s_logger.info("Scheduled migration work of VM " + vm.getUuid() + " from host " + _hostDao.findById(vm.getHostId()) + " with HAWork " + work); wakeupWorkers(); @@ -716,6 +717,10 @@ public Long migrate(final HaWorkVO work) { s_logger.info("Unable to find vm: " + vmId + ", skipping migrate."); return null; } + if (VirtualMachine.State.Running.equals(vm.getState()) && srcHostId != vm.getHostId()) { + s_logger.info(String.format("VM %s is running on a different host %s, skipping migration", vm, vm.getHostId())); + return null; + } s_logger.info("Migration attempt: for VM " + vm.getUuid() + "from host id " + srcHostId + ". Starting attempt: " + (1 + work.getTimesTried()) + "/" + _maxRetries + " times."); try { From c6a682c492d3d1cbb78f426e322d6d9d208015cc Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Tue, 18 Mar 2025 09:11:25 +0100 Subject: [PATCH 2/3] HA: wait 10 seconds for all modules are loaded this fixes ``` 2025-03-04T07:58:13,306 WARN [c.c.h.HighAvailabilityManagerExtImpl] (HA-Worker-2:[ctx-3dc0c480, work-256]) (logid:431cd943) Encountered unhandled exception during HA process, reschedule work java.lang.NullPointerException: Cannot invoke "org.apache.cloudstack.framework.jobs.AsyncJob.getId()" because "job" is null ``` --- .../main/java/com/cloud/vm/VirtualMachineManagerImpl.java | 3 +-- engine/schema/src/main/java/com/cloud/vm/VMInstanceVO.java | 2 +- .../java/com/cloud/ha/HighAvailabilityManagerImpl.java | 7 +++++++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java index 055c1a2d444d..f2e90decdea8 100755 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java @@ -5248,10 +5248,9 @@ public Outcome migrateVmAwayThroughJobQueue(final String vmUuid, workJob = newVmWorkJobAndInfo.first(); VmWorkMigrateAway workInfo = new VmWorkMigrateAway(newVmWorkJobAndInfo.second(), srcHostId); - workJob.setCmdInfo(VmWorkSerializer.serialize(workInfo)); + setCmdInfoAndSubmitAsyncJob(workJob, workInfo, vmId); } - _jobMgr.submitAsyncJob(workJob, VmWorkConstants.VM_WORK_QUEUE, vmId); AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(workJob.getId()); diff --git a/engine/schema/src/main/java/com/cloud/vm/VMInstanceVO.java b/engine/schema/src/main/java/com/cloud/vm/VMInstanceVO.java index f3560d68f495..320c9a30fb45 100644 --- a/engine/schema/src/main/java/com/cloud/vm/VMInstanceVO.java +++ b/engine/schema/src/main/java/com/cloud/vm/VMInstanceVO.java @@ -502,7 +502,7 @@ public void setRemoved(Date removed) { @Override public String toString() { - return String.format("VM instance %s", ReflectionToStringBuilderUtils.reflectOnlySelectedFields(this, "id", "instanceName", "uuid", "type")); + return String.format("VM instance %s", ReflectionToStringBuilderUtils.reflectOnlySelectedFields(this, "id", "instanceName", "uuid", "type", "state")); } @Override diff --git a/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java index d05ef767fb58..4b1e4c977529 100644 --- a/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java +++ b/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java @@ -1027,6 +1027,13 @@ public WorkerThread(String name) { @Override public void run() { + try { + synchronized (this) { + wait(_timeToSleep); + } + } catch (final InterruptedException e) { + s_logger.info("Interrupted"); + } s_logger.info("Starting work"); while (!_stopped) { _managedContext.runWithContext(new Runnable() { From 9eb7f24f48aa3a10e936b19e87c9683c46bcaa81 Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Mon, 14 Apr 2025 17:57:05 +0200 Subject: [PATCH 3/3] server: skip migration if vm is Stopped --- .../main/java/com/cloud/ha/HighAvailabilityManagerImpl.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java index 4b1e4c977529..aa43e6b91610 100644 --- a/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java +++ b/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java @@ -717,6 +717,10 @@ public Long migrate(final HaWorkVO work) { s_logger.info("Unable to find vm: " + vmId + ", skipping migrate."); return null; } + if (VirtualMachine.State.Stopped.equals(vm.getState())) { + s_logger.info(String.format("vm %s is Stopped, skipping migrate.", vm)); + return null; + } if (VirtualMachine.State.Running.equals(vm.getState()) && srcHostId != vm.getHostId()) { s_logger.info(String.format("VM %s is running on a different host %s, skipping migration", vm, vm.getHostId())); return null;