4040
4141import com .cloud .configuration .Config ;
4242import com .cloud .utils .NumbersUtil ;
43+ import com .cloud .utils .db .GlobalLock ;
4344import org .apache .cloudstack .agent .lb .IndirectAgentLB ;
4445import org .apache .cloudstack .ca .CAManager ;
4546import org .apache .cloudstack .engine .orchestration .service .NetworkOrchestrationService ;
@@ -799,49 +800,65 @@ public boolean stop() {
799800 return true ;
800801 }
801802
803+ protected Status getNextStatusOnDisconnection (Host host , final Status .Event event ) {
804+ final Status currentStatus = host .getStatus ();
805+ Status nextStatus ;
806+ if (currentStatus == Status .Down || currentStatus == Status .Alert || currentStatus == Status .Removed ) {
807+ if (s_logger .isDebugEnabled ()) {
808+ s_logger .debug (String .format ("Host %s is already %s" , host .getUuid (), currentStatus ));
809+ }
810+ nextStatus = currentStatus ;
811+ } else {
812+ try {
813+ nextStatus = currentStatus .getNextStatus (event );
814+ } catch (final NoTransitionException e ) {
815+ final String err = String .format ("Cannot find next status for %s as current status is %s for agent %s" , event , currentStatus , host .getUuid ());
816+ s_logger .debug (err );
817+ throw new CloudRuntimeException (err );
818+ }
819+
820+ if (s_logger .isDebugEnabled ()) {
821+ s_logger .debug (String .format ("The next status of agent %s is %s, current status is %s" , host .getUuid (), nextStatus , currentStatus ));
822+ }
823+ }
824+ return nextStatus ;
825+ }
826+
802827 protected boolean handleDisconnectWithoutInvestigation (final AgentAttache attache , final Status .Event event , final boolean transitState , final boolean removeAgent ) {
803828 final long hostId = attache .getId ();
804829
805- s_logger .info ("Host " + hostId + " is disconnecting with event " + event );
806- Status nextStatus = null ;
807- final HostVO host = _hostDao .findById (hostId );
808- if (host == null ) {
809- s_logger .warn ("Can't find host with " + hostId );
810- nextStatus = Status .Removed ;
811- } else {
812- final Status currentStatus = host .getStatus ();
813- if (currentStatus == Status .Down || currentStatus == Status .Alert || currentStatus == Status .Removed ) {
814- if (s_logger .isDebugEnabled ()) {
815- s_logger .debug ("Host " + hostId + " is already " + currentStatus );
816- }
817- nextStatus = currentStatus ;
818- } else {
819- try {
820- nextStatus = currentStatus .getNextStatus (event );
821- } catch (final NoTransitionException e ) {
822- final String err = "Cannot find next status for " + event + " as current status is " + currentStatus + " for agent " + hostId ;
823- s_logger .debug (err );
824- throw new CloudRuntimeException (err );
830+ boolean result = false ;
831+ GlobalLock joinLock = getHostJoinLock (hostId );
832+ if (joinLock .lock (60 )) {
833+ try {
834+ s_logger .info (String .format ("Host %d is disconnecting with event %s" , hostId , event ));
835+ Status nextStatus = null ;
836+ final HostVO host = _hostDao .findById (hostId );
837+ if (host == null ) {
838+ s_logger .warn (String .format ("Can't find host with %d" , hostId ));
839+ nextStatus = Status .Removed ;
840+ } else {
841+ nextStatus = getNextStatusOnDisconnection (host , event );
842+ caService .purgeHostCertificate (host );
825843 }
826844
827845 if (s_logger .isDebugEnabled ()) {
828- s_logger .debug ("The next status of agent " + hostId + "is " + nextStatus + ", current status is " + currentStatus );
846+ s_logger .debug (String . format ( "Deregistering link for %d with state %s" , hostId , nextStatus ) );
829847 }
830- }
831- caService .purgeHostCertificate (host );
832- }
833848
834- if (s_logger .isDebugEnabled ()) {
835- s_logger .debug ("Deregistering link for " + hostId + " with state " + nextStatus );
836- }
849+ removeAgent (attache , nextStatus );
837850
838- removeAgent (attache , nextStatus );
839- // update the DB
840- if (host != null && transitState ) {
841- disconnectAgent (host , event , _nodeId );
851+ if (host != null && transitState ) {
852+ // update the state for host in DB as per the event
853+ disconnectAgent (host , event , _nodeId );
854+ }
855+ } finally {
856+ joinLock .unlock ();
857+ }
858+ result = true ;
842859 }
843-
844- return true ;
860+ joinLock . releaseRef ();
861+ return result ;
845862 }
846863
847864 protected boolean handleDisconnectWithInvestigation (final AgentAttache attache , Status .Event event ) {
@@ -1102,26 +1119,23 @@ protected AgentAttache createAttacheForConnect(final HostVO host, final Link lin
11021119 return attache ;
11031120 }
11041121
1105- private AgentAttache handleConnectedAgent (final Link link , final StartupCommand [] startup , final Request request ) {
1106- AgentAttache attache = null ;
1107- ReadyCommand ready = null ;
1108- try {
1109- final List <String > agentMSHostList = new ArrayList <>();
1110- String lbAlgorithm = null ;
1111- if (startup != null && startup .length > 0 ) {
1112- final String agentMSHosts = startup [0 ].getMsHostList ();
1113- if (StringUtils .isNotEmpty (agentMSHosts )) {
1114- String [] msHosts = agentMSHosts .split ("@" );
1115- if (msHosts .length > 1 ) {
1116- lbAlgorithm = msHosts [1 ];
1117- }
1118- agentMSHostList .addAll (Arrays .asList (msHosts [0 ].split ("," )));
1122+ private AgentAttache sendReadyAndGetAttache (HostVO host , ReadyCommand ready , Link link , StartupCommand [] startup ) throws ConnectionException {
1123+ final List <String > agentMSHostList = new ArrayList <>();
1124+ String lbAlgorithm = null ;
1125+ if (startup != null && startup .length > 0 ) {
1126+ final String agentMSHosts = startup [0 ].getMsHostList ();
1127+ if (StringUtils .isNotEmpty (agentMSHosts )) {
1128+ String [] msHosts = agentMSHosts .split ("@" );
1129+ if (msHosts .length > 1 ) {
1130+ lbAlgorithm = msHosts [1 ];
11191131 }
1132+ agentMSHostList .addAll (Arrays .asList (msHosts [0 ].split ("," )));
11201133 }
1121-
1122- final HostVO host = _resourceMgr .createHostVOForConnectedAgent (startup );
1123- if (host != null ) {
1124- ready = new ReadyCommand (host .getDataCenterId (), host .getId (), NumbersUtil .enableHumanReadableSizes );
1134+ }
1135+ AgentAttache attache = null ;
1136+ GlobalLock joinLock = getHostJoinLock (host .getId ());
1137+ if (joinLock .lock (60 )) {
1138+ try {
11251139
11261140 if (!indirectAgentLB .compareManagementServerList (host .getId (), host .getDataCenterId (), agentMSHostList , lbAlgorithm )) {
11271141 final List <String > newMSList = indirectAgentLB .getManagementServerList (host .getId (), host .getDataCenterId (), null );
@@ -1133,6 +1147,24 @@ private AgentAttache handleConnectedAgent(final Link link, final StartupCommand[
11331147
11341148 attache = createAttacheForConnect (host , link );
11351149 attache = notifyMonitorsOfConnection (attache , startup , false );
1150+ } finally {
1151+ joinLock .unlock ();
1152+ }
1153+ } else {
1154+ throw new ConnectionException (true , "Unable to acquire lock on host " + host .getUuid ());
1155+ }
1156+ joinLock .releaseRef ();
1157+ return attache ;
1158+ }
1159+
1160+ private AgentAttache handleConnectedAgent (final Link link , final StartupCommand [] startup , final Request request ) {
1161+ AgentAttache attache = null ;
1162+ ReadyCommand ready = null ;
1163+ try {
1164+ final HostVO host = _resourceMgr .createHostVOForConnectedAgent (startup );
1165+ if (host != null ) {
1166+ ready = new ReadyCommand (host .getDataCenterId (), host .getId (), NumbersUtil .enableHumanReadableSizes );
1167+ attache = sendReadyAndGetAttache (host , ready , link , startup );
11361168 }
11371169 } catch (final Exception e ) {
11381170 s_logger .debug ("Failed to handle host connection: " , e );
@@ -1312,6 +1344,8 @@ protected void processRequest(final Link link, final Request request) {
13121344 connectAgent (link , cmds , request );
13131345 }
13141346 return ;
1347+ } else if (cmd instanceof StartupCommand ) {
1348+ connectAgent (link , cmds , request );
13151349 }
13161350
13171351 final long hostId = attache .getId ();
@@ -1366,7 +1400,10 @@ protected void processRequest(final Link link, final Request request) {
13661400 handleCommands (attache , request .getSequence (), new Command [] {cmd });
13671401 if (cmd instanceof PingCommand ) {
13681402 final long cmdHostId = ((PingCommand )cmd ).getHostId ();
1403+ boolean requestStartupCommand = false ;
13691404
1405+ final HostVO host = _hostDao .findById (Long .valueOf (cmdHostId ));
1406+ boolean gatewayAccessible = true ;
13701407 // if the router is sending a ping, verify the
13711408 // gateway was pingable
13721409 if (cmd instanceof PingRoutingCommand ) {
@@ -1391,7 +1428,10 @@ protected void processRequest(final Link link, final Request request) {
13911428 s_logger .debug ("Not processing " + PingRoutingCommand .class .getSimpleName () + " for agent id=" + cmdHostId + "; can't find the host in the DB" );
13921429 }
13931430 }
1394- answer = new PingAnswer ((PingCommand )cmd );
1431+ if (host != null && host .getStatus () != Status .Up && gatewayAccessible ) {
1432+ requestStartupCommand = true ;
1433+ }
1434+ answer = new PingAnswer ((PingCommand )cmd , requestStartupCommand );
13951435 } else if (cmd instanceof ReadyAnswer ) {
13961436 final HostVO host = _hostDao .findById (attache .getId ());
13971437 if (host == null ) {
@@ -1913,4 +1953,8 @@ public void propagateChangeToAgents(Map<String, String> params) {
19131953 sendCommandToAgents (hostsPerZone , params );
19141954 }
19151955 }
1956+
1957+ private GlobalLock getHostJoinLock (Long hostId ) {
1958+ return GlobalLock .getInternLock (String .format ("%s-%s" , "Host-Join" , hostId ));
1959+ }
19161960}
0 commit comments