1919import static org .apache .cloudstack .framework .config .ConfigKey .Scope .Zone ;
2020
2121import java .util .ArrayList ;
22+ import java .util .Arrays ;
2223import java .util .Date ;
2324import java .util .HashMap ;
2425import java .util .List ;
@@ -124,6 +125,9 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur
124125 protected static ConfigKey <Boolean > VmHaAlertsEnabled = new ConfigKey <>("Advanced" , Boolean .class , "vm.ha.alerts.enabled" , "true" ,
125126 "Enable/Disable alerts for the VM HA operations, it is enabled by default." , true , Zone );
126127
128+ protected static final List <ReasonType > CancellableWorkReasonTypes =
129+ Arrays .asList (ReasonType .HostMaintenance , ReasonType .HostDown , ReasonType .HostDegraded );
130+
127131 WorkerThread [] _workers ;
128132 boolean _stopped ;
129133 long _timeToSleep ;
@@ -252,7 +256,7 @@ public Status investigate(final long hostId) {
252256 }
253257
254258 @ Override
255- public void scheduleRestartForVmsOnHost (final HostVO host , boolean investigate ) {
259+ public void scheduleRestartForVmsOnHost (final HostVO host , boolean investigate , ReasonType reasonType ) {
256260 if (host .getType () != Host .Type .Routing ) {
257261 return ;
258262 }
@@ -319,12 +323,12 @@ public void scheduleRestartForVmsOnHost(final HostVO host, boolean investigate)
319323 s_logger .debug (String .format ("VM %s is not on down host %s it is on other host %d VM HA is done" , vm , host , hostId ));
320324 continue ;
321325 }
322- scheduleRestart (vm , investigate );
326+ scheduleRestart (vm , investigate , reasonType );
323327 }
324328 }
325329
326330 @ Override
327- public boolean scheduleStop (VMInstanceVO vm , long hostId , WorkType type ) {
331+ public boolean scheduleStop (VMInstanceVO vm , long hostId , WorkType type , ReasonType reasonType ) {
328332 assert (type == WorkType .CheckStop || type == WorkType .ForceStop || type == WorkType .Stop );
329333
330334 if (_haDao .hasBeenScheduled (vm .getId (), type )) {
@@ -341,7 +345,7 @@ public boolean scheduleStop(VMInstanceVO vm, long hostId, WorkType type) {
341345 return false ;
342346 }
343347
344- HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), type , Step .Scheduled , hostId , vm .getState (), 0 , vm .getUpdated ());
348+ HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), type , Step .Scheduled , hostId , vm .getState (), 0 , vm .getUpdated (), reasonType );
345349 _haDao .persist (work );
346350 if (s_logger .isDebugEnabled ()) {
347351 s_logger .debug ("Scheduled " + work );
@@ -350,14 +354,19 @@ public boolean scheduleStop(VMInstanceVO vm, long hostId, WorkType type) {
350354 return true ;
351355 }
352356
357+ @ Override
358+ public boolean scheduleStop (VMInstanceVO vm , long hostId , WorkType type ) {
359+ return scheduleStop (vm , hostId , type , null );
360+ }
361+
353362 protected void wakeupWorkers () {
354363 for (WorkerThread worker : _workers ) {
355364 worker .wakup ();
356365 }
357366 }
358367
359368 @ Override
360- public boolean scheduleMigration (final VMInstanceVO vm ) {
369+ public boolean scheduleMigration (final VMInstanceVO vm , ReasonType reasonType ) {
361370 if (vm .getHostId () == null ) {
362371 return false ;
363372 }
@@ -371,15 +380,20 @@ public boolean scheduleMigration(final VMInstanceVO vm) {
371380 return false ;
372381 }
373382
374- final HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), WorkType .Migration , Step .Scheduled , vm .getHostId (), vm .getState (), 0 , vm .getUpdated ());
383+ final HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), WorkType .Migration , Step .Scheduled , vm .getHostId (), vm .getState (), 0 , vm .getUpdated (), reasonType );
375384 _haDao .persist (work );
376385 s_logger .info (String .format ("Scheduled migration work of VM %s from host %s with HAWork %s" , vm , _hostDao .findById (vm .getHostId ()), work ));
377386 wakeupWorkers ();
378387 return true ;
379388 }
380389
381390 @ Override
382- public void scheduleRestart (VMInstanceVO vm , boolean investigate ) {
391+ public boolean scheduleMigration (final VMInstanceVO vm ) {
392+ return scheduleMigration (vm , null );
393+ }
394+
395+ @ Override
396+ public void scheduleRestart (VMInstanceVO vm , boolean investigate , ReasonType reasonType ) {
383397 if (!VmHaEnabled .valueIn (vm .getDataCenterId ())) {
384398 String message = String .format ("Unable to schedule restart for the VM %s (%d), VM high availability manager is disabled." , vm .getName (), vm .getId ());
385399 if (s_logger .isDebugEnabled ()) {
@@ -470,7 +484,7 @@ public void scheduleRestart(VMInstanceVO vm, boolean investigate) {
470484 }
471485
472486 HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), WorkType .HA , investigate ? Step .Investigating : Step .Scheduled ,
473- hostId != null ? hostId : 0L , vm .getState (), timesTried , vm .getUpdated ());
487+ hostId != null ? hostId : 0L , vm .getState (), timesTried , vm .getUpdated (), reasonType );
474488 _haDao .persist (work );
475489
476490 if (s_logger .isInfoEnabled ()) {
@@ -480,6 +494,11 @@ public void scheduleRestart(VMInstanceVO vm, boolean investigate) {
480494 wakeupWorkers ();
481495 }
482496
497+ @ Override
498+ public void scheduleRestart (VMInstanceVO vm , boolean investigate ) {
499+ scheduleRestart (vm , investigate , null );
500+ }
501+
483502 protected Long restart (final HaWorkVO work ) {
484503 List <HaWorkVO > items = _haDao .listFutureHaWorkForVm (work .getInstanceId (), work .getId ());
485504 if (items .size () > 0 ) {
@@ -510,6 +529,9 @@ protected Long restart(final HaWorkVO work) {
510529 s_logger .info ("Unable to find vm: " + vmId );
511530 return null ;
512531 }
532+ if (checkAndCancelWorkIfNeeded (work )) {
533+ return null ;
534+ }
513535
514536 s_logger .info ("HA on " + vm );
515537 if (vm .getState () != work .getPreviousState () || vm .getUpdated () != work .getUpdateTime ()) {
@@ -690,6 +712,24 @@ protected Long restart(final HaWorkVO work) {
690712 return (System .currentTimeMillis () >> 10 ) + _restartRetryInterval ;
691713 }
692714
715+ protected boolean checkAndCancelWorkIfNeeded (final HaWorkVO work ) {
716+ if (!Step .Investigating .equals (work .getStep ())) {
717+ return false ;
718+ }
719+ if (!CancellableWorkReasonTypes .contains (work .getReasonType ())) {
720+ return false ;
721+ }
722+ Status hostStatus = investigate (work .getHostId ());
723+ if (!Status .Up .equals (hostStatus )) {
724+ return false ;
725+ }
726+ if (s_logger .isDebugEnabled ()) {
727+ s_logger .debug (String .format ("Cancelling %s as it is not needed anymore" , work ));
728+ }
729+ work .setStep (Step .Cancelled );
730+ return true ;
731+ }
732+
693733 public Long migrate (final HaWorkVO work ) {
694734 long vmId = work .getInstanceId ();
695735 long srcHostId = work .getHostId ();
@@ -700,6 +740,9 @@ public Long migrate(final HaWorkVO work) {
700740 s_logger .info ("Unable to find vm: " + vmId + ", skipping migrate." );
701741 return null ;
702742 }
743+ if (checkAndCancelWorkIfNeeded (work )) {
744+ return null ;
745+ }
703746 s_logger .info (String .format ("Migration attempt: for VM %s from host %s. Starting attempt: %d/%d times." , vm , srcHost , 1 + work .getTimesTried (), _maxRetries ));
704747 try {
705748 work .setStep (Step .Migrating );
@@ -719,7 +762,7 @@ public Long migrate(final HaWorkVO work) {
719762 }
720763
721764 @ Override
722- public boolean scheduleDestroy (VMInstanceVO vm , long hostId ) {
765+ public boolean scheduleDestroy (VMInstanceVO vm , long hostId , ReasonType reasonType ) {
723766 if (!VmHaEnabled .valueIn (vm .getDataCenterId ())) {
724767 String message = String .format ("Unable to schedule destroy for the VM %s (%d) on host %d, VM high availability manager is disabled." , vm .getName (), vm .getId (), hostId );
725768 if (s_logger .isDebugEnabled ()) {
@@ -729,7 +772,7 @@ public boolean scheduleDestroy(VMInstanceVO vm, long hostId) {
729772 return false ;
730773 }
731774
732- final HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), WorkType .Destroy , Step .Scheduled , hostId , vm .getState (), 0 , vm .getUpdated ());
775+ final HaWorkVO work = new HaWorkVO (vm .getId (), vm .getType (), WorkType .Destroy , Step .Scheduled , hostId , vm .getState (), 0 , vm .getUpdated (), reasonType );
733776 _haDao .persist (work );
734777 if (s_logger .isDebugEnabled ()) {
735778 s_logger .debug ("Scheduled " + work .toString ());
@@ -766,6 +809,9 @@ protected Long destroyVM(final HaWorkVO work) {
766809 s_logger .info ("No longer can find VM " + work .getInstanceId () + ". Throwing away " + work );
767810 return null ;
768811 }
812+ if (checkAndCancelWorkIfNeeded (work )) {
813+ return null ;
814+ }
769815 boolean expunge = VirtualMachine .Type .SecondaryStorageVm .equals (vm .getType ())
770816 || VirtualMachine .Type .ConsoleProxy .equals (vm .getType ());
771817 if (!expunge && VirtualMachine .State .Destroyed .equals (work .getPreviousState ())) {
@@ -800,6 +846,9 @@ protected Long stopVM(final HaWorkVO work) throws ConcurrentOperationException {
800846 work .setStep (Step .Done );
801847 return null ;
802848 }
849+ if (checkAndCancelWorkIfNeeded (work )) {
850+ return null ;
851+ }
803852 s_logger .info ("Stopping " + vm );
804853 try {
805854 if (work .getWorkType () == WorkType .Stop ) {
@@ -987,6 +1036,8 @@ public boolean configure(final String name, final Map<String, Object> xmlParams)
9871036 public boolean start () {
9881037 _stopped = false ;
9891038
1039+ _haDao .markPendingWorksAsInvestigating ();
1040+
9901041 for (final WorkerThread thread : _workers ) {
9911042 thread .start ();
9921043 }
@@ -1004,6 +1055,8 @@ public boolean stop() {
10041055
10051056 _executor .shutdown ();
10061057
1058+ _haDao .markServerPendingWorksAsInvestigating (_msServer .getId ());
1059+
10071060 return true ;
10081061 }
10091062
0 commit comments