4343import org .apache .cloudstack .managed .context .ManagedContext ;
4444import org .apache .cloudstack .managed .context .ManagedContextRunnable ;
4545import org .apache .cloudstack .management .ManagementServerHost ;
46+ import org .apache .logging .log4j .ThreadContext ;
4647
4748import com .cloud .agent .AgentManager ;
4849import com .cloud .alert .AlertManager ;
9091import com .cloud .vm .VirtualMachineManager ;
9192import com .cloud .vm .VirtualMachineProfile ;
9293import com .cloud .vm .dao .VMInstanceDao ;
93- import org .apache .logging .log4j .ThreadContext ;
9494
9595/**
9696 * HighAvailabilityManagerImpl coordinates the HA process. VMs are registered with the HA Manager for HA. The request is stored
@@ -133,6 +133,9 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur
133133 protected static ConfigKey <Boolean > VmHaAlertsEnabled = new ConfigKey <>("Advanced" , Boolean .class , "vm.ha.alerts.enabled" , "true" ,
134134 "Enable/Disable alerts for the VM HA operations, it is enabled by default." , true , Zone );
135135
136+ protected static final List <ReasonType > CancellableWorkReasonTypes =
137+ List .of (ReasonType .HostMaintenance , ReasonType .HostDown , ReasonType .HostDegraded );
138+
136139 WorkerThread [] _workers ;
137140 boolean _stopped ;
138141 long _timeToSleep ;
@@ -269,8 +272,7 @@ public Status investigate(final long hostId) {
269272 }
270273
271274 @ Override
272- public void scheduleRestartForVmsOnHost (final HostVO host , boolean investigate ) {
273-
275+ public void scheduleRestartForVmsOnHost (final HostVO host , boolean investigate , ReasonType reasonType ) {
274276 if (host .getType () != Host .Type .Routing ) {
275277 return ;
276278 }
@@ -337,12 +339,12 @@ public void scheduleRestartForVmsOnHost(final HostVO host, boolean investigate)
337339 logger .debug ("VM {} is not on down host {} it is on other host {} VM HA is done" , vm , host , hostId );
338340 continue ;
339341 }
340- scheduleRestart (vm , investigate );
342+ scheduleRestart (vm , investigate , reasonType );
341343 }
342344 }
343345
344346 @ Override
345- public boolean scheduleStop (VMInstanceVO vm , long hostId , WorkType type ) {
347+ public boolean scheduleStop (VMInstanceVO vm , long hostId , WorkType type , ReasonType reasonType ) {
346348 assert (type == WorkType .CheckStop || type == WorkType .ForceStop || type == WorkType .Stop );
347349
348350 if (_haDao .hasBeenScheduled (vm .getId (), type )) {
@@ -359,7 +361,7 @@ public boolean scheduleStop(VMInstanceVO vm, long hostId, WorkType type) {
359361 return false ;
360362 }
361363
362- HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), type , Step .Scheduled , hostId , vm .getState (), 0 , vm .getUpdated ());
364+ HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), type , Step .Scheduled , hostId , vm .getState (), 0 , vm .getUpdated (), reasonType );
363365 _haDao .persist (work );
364366 if (logger .isDebugEnabled ()) {
365367 logger .debug ("Scheduled " + work );
@@ -368,6 +370,11 @@ public boolean scheduleStop(VMInstanceVO vm, long hostId, WorkType type) {
368370 return true ;
369371 }
370372
373+ @ Override
374+ public boolean scheduleStop (VMInstanceVO vm , long hostId , WorkType type ) {
375+ return scheduleStop (vm , hostId , type , null );
376+ }
377+
371378 protected void wakeupWorkers () {
372379 logger .debug ("Wakeup workers HA" );
373380 for (WorkerThread worker : _workers ) {
@@ -376,7 +383,7 @@ protected void wakeupWorkers() {
376383 }
377384
378385 @ Override
379- public boolean scheduleMigration (final VMInstanceVO vm ) {
386+ public boolean scheduleMigration (final VMInstanceVO vm , ReasonType reasonType ) {
380387 if (vm .getHostId () == null ) {
381388 return false ;
382389 }
@@ -390,15 +397,20 @@ public boolean scheduleMigration(final VMInstanceVO vm) {
390397 return false ;
391398 }
392399
393- final HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), WorkType .Migration , Step .Scheduled , vm .getHostId (), vm .getState (), 0 , vm .getUpdated ());
400+ final HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), WorkType .Migration , Step .Scheduled , vm .getHostId (), vm .getState (), 0 , vm .getUpdated (), reasonType );
394401 _haDao .persist (work );
395402 logger .info ("Scheduled migration work of VM {} from host {} with HAWork {}" , vm , _hostDao .findById (vm .getHostId ()), work );
396403 wakeupWorkers ();
397404 return true ;
398405 }
399406
400407 @ Override
401- public void scheduleRestart (VMInstanceVO vm , boolean investigate ) {
408+ public boolean scheduleMigration (final VMInstanceVO vm ) {
409+ return scheduleMigration (vm , null );
410+ }
411+
412+ @ Override
413+ public void scheduleRestart (VMInstanceVO vm , boolean investigate , ReasonType reasonType ) {
402414 if (!VmHaEnabled .valueIn (vm .getDataCenterId ())) {
403415 String message = String .format ("Unable to schedule restart for the VM %s (%d), VM high availability manager is disabled." , vm .getName (), vm .getId ());
404416 if (logger .isDebugEnabled ()) {
@@ -490,7 +502,7 @@ public void scheduleRestart(VMInstanceVO vm, boolean investigate) {
490502 }
491503
492504 HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), WorkType .HA , investigate ? Step .Investigating : Step .Scheduled ,
493- hostId != null ? hostId : 0L , vm .getState (), timesTried , vm .getUpdated ());
505+ hostId != null ? hostId : 0L , vm .getState (), timesTried , vm .getUpdated (), reasonType );
494506 _haDao .persist (work );
495507
496508 if (logger .isInfoEnabled ()) {
@@ -500,6 +512,11 @@ public void scheduleRestart(VMInstanceVO vm, boolean investigate) {
500512 wakeupWorkers ();
501513 }
502514
515+ @ Override
516+ public void scheduleRestart (VMInstanceVO vm , boolean investigate ) {
517+ scheduleRestart (vm , investigate , null );
518+ }
519+
503520 private void startVm (VirtualMachine vm , Map <VirtualMachineProfile .Param , Object > params ,
504521 DeploymentPlanner planner ) throws InsufficientCapacityException , ResourceUnavailableException ,
505522 ConcurrentOperationException , OperationTimedoutException {
@@ -561,6 +578,9 @@ protected Long restart(final HaWorkVO work) {
561578 logger .info ("Unable to find vm: " + vmId );
562579 return null ;
563580 }
581+ if (checkAndCancelWorkIfNeeded (work , vm )) {
582+ return null ;
583+ }
564584
565585 logger .info ("HA on " + vm );
566586 if (vm .getState () != work .getPreviousState () || vm .getUpdated () != work .getUpdateTime ()) {
@@ -762,6 +782,23 @@ protected Long restart(final HaWorkVO work) {
762782 return (System .currentTimeMillis () >> 10 ) + _restartRetryInterval ;
763783 }
764784
785+ protected boolean checkAndCancelWorkIfNeeded (final HaWorkVO work , final VirtualMachine vm ) {
786+ if (!Step .Investigating .equals (work .getStep ())) {
787+ return false ;
788+ }
789+ if (!CancellableWorkReasonTypes .contains (work .getReasonType ())) {
790+ return false ;
791+ }
792+
793+ Status hostStatus = investigate (work .getHostId ());
794+ if (!Status .Up .equals (hostStatus )) {
795+ return false ;
796+ }
797+ logger .debug ("Cancelling {} as it is not needed anymore" , () -> work );
798+ work .setStep (Step .Cancelled );
799+ return true ;
800+ }
801+
765802 public Long migrate (final HaWorkVO work ) {
766803 long vmId = work .getInstanceId ();
767804 long srcHostId = work .getHostId ();
@@ -772,6 +809,9 @@ public Long migrate(final HaWorkVO work) {
772809 logger .info ("Unable to find vm: " + vmId + ", skipping migrate." );
773810 return null ;
774811 }
812+ if (checkAndCancelWorkIfNeeded (work , vm )) {
813+ return null ;
814+ }
775815 logger .info ("Migration attempt: for VM {}from host {}. Starting attempt: {}/{} times." , vm , srcHost , 1 + work .getTimesTried (), _maxRetries );
776816 try {
777817 work .setStep (Step .Migrating );
@@ -791,7 +831,7 @@ public Long migrate(final HaWorkVO work) {
791831 }
792832
793833 @ Override
794- public boolean scheduleDestroy (VMInstanceVO vm , long hostId ) {
834+ public boolean scheduleDestroy (VMInstanceVO vm , long hostId , ReasonType reasonType ) {
795835 if (!VmHaEnabled .valueIn (vm .getDataCenterId ())) {
796836 String message = String .format ("Unable to schedule destroy for the VM %s (%d) on host %d, VM high availability manager is disabled." , vm .getName (), vm .getId (), hostId );
797837 if (logger .isDebugEnabled ()) {
@@ -801,7 +841,7 @@ public boolean scheduleDestroy(VMInstanceVO vm, long hostId) {
801841 return false ;
802842 }
803843
804- final HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), WorkType .Destroy , Step .Scheduled , hostId , vm .getState (), 0 , vm .getUpdated ());
844+ final HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), WorkType .Destroy , Step .Scheduled , hostId , vm .getState (), 0 , vm .getUpdated (), reasonType );
805845 _haDao .persist (work );
806846 if (logger .isDebugEnabled ()) {
807847 logger .debug ("Scheduled " + work .toString ());
@@ -838,6 +878,9 @@ protected Long destroyVM(final HaWorkVO work) {
838878 logger .info ("No longer can find VM " + work .getInstanceId () + ". Throwing away " + work );
839879 return null ;
840880 }
881+ if (checkAndCancelWorkIfNeeded (work , vm )) {
882+ return null ;
883+ }
841884 boolean expunge = VirtualMachine .Type .SecondaryStorageVm .equals (vm .getType ())
842885 || VirtualMachine .Type .ConsoleProxy .equals (vm .getType ());
843886 if (!expunge && VirtualMachine .State .Destroyed .equals (work .getPreviousState ())) {
@@ -872,6 +915,9 @@ protected Long stopVM(final HaWorkVO work) throws ConcurrentOperationException {
872915 work .setStep (Step .Done );
873916 return null ;
874917 }
918+ if (checkAndCancelWorkIfNeeded (work , vm )) {
919+ return null ;
920+ }
875921 logger .info ("Stopping " + vm );
876922 try {
877923 if (work .getWorkType () == WorkType .Stop ) {
@@ -1057,6 +1103,8 @@ public boolean configure(final String name, final Map<String, Object> xmlParams)
10571103 public boolean start () {
10581104 _stopped = false ;
10591105
1106+ _haDao .markPendingWorksAsInvestigating ();
1107+
10601108 for (final WorkerThread thread : _workers ) {
10611109 thread .start ();
10621110 }
0 commit comments