4040
4141import com .cloud .configuration .Config ;
4242import com .cloud .utils .NumbersUtil ;
43+ import com .cloud .utils .db .GlobalLock ;
4344import org .apache .cloudstack .agent .lb .IndirectAgentLB ;
4445import org .apache .cloudstack .ca .CAManager ;
4546import org .apache .cloudstack .engine .orchestration .service .NetworkOrchestrationService ;
@@ -798,49 +799,65 @@ public boolean stop() {
798799 return true ;
799800 }
800801
802+ protected Status getNextStatusOnDisconnection (Host host , final Status .Event event ) {
803+ final Status currentStatus = host .getStatus ();
804+ Status nextStatus ;
805+ if (currentStatus == Status .Down || currentStatus == Status .Alert || currentStatus == Status .Removed ) {
806+ if (s_logger .isDebugEnabled ()) {
807+ s_logger .debug (String .format ("Host %s is already %s" , host .getUuid (), currentStatus ));
808+ }
809+ nextStatus = currentStatus ;
810+ } else {
811+ try {
812+ nextStatus = currentStatus .getNextStatus (event );
813+ } catch (final NoTransitionException e ) {
814+ final String err = String .format ("Cannot find next status for %s as current status is %s for agent %s" , event , currentStatus , host .getUuid ());
815+ s_logger .debug (err );
816+ throw new CloudRuntimeException (err );
817+ }
818+
819+ if (s_logger .isDebugEnabled ()) {
820+ s_logger .debug (String .format ("The next status of agent %s is %s, current status is %s" , host .getUuid (), nextStatus , currentStatus ));
821+ }
822+ }
823+ return nextStatus ;
824+ }
825+
801826 protected boolean handleDisconnectWithoutInvestigation (final AgentAttache attache , final Status .Event event , final boolean transitState , final boolean removeAgent ) {
802827 final long hostId = attache .getId ();
803828
804- s_logger .info ("Host " + hostId + " is disconnecting with event " + event );
805- Status nextStatus = null ;
806- final HostVO host = _hostDao .findById (hostId );
807- if (host == null ) {
808- s_logger .warn ("Can't find host with " + hostId );
809- nextStatus = Status .Removed ;
810- } else {
811- final Status currentStatus = host .getStatus ();
812- if (currentStatus == Status .Down || currentStatus == Status .Alert || currentStatus == Status .Removed ) {
813- if (s_logger .isDebugEnabled ()) {
814- s_logger .debug ("Host " + hostId + " is already " + currentStatus );
815- }
816- nextStatus = currentStatus ;
817- } else {
818- try {
819- nextStatus = currentStatus .getNextStatus (event );
820- } catch (final NoTransitionException e ) {
821- final String err = "Cannot find next status for " + event + " as current status is " + currentStatus + " for agent " + hostId ;
822- s_logger .debug (err );
823- throw new CloudRuntimeException (err );
829+ boolean result = false ;
830+ GlobalLock joinLock = getHostJoinLock (hostId );
831+ if (joinLock .lock (60 )) {
832+ try {
833+ s_logger .info (String .format ("Host %d is disconnecting with event %s" , hostId , event ));
834+ Status nextStatus = null ;
835+ final HostVO host = _hostDao .findById (hostId );
836+ if (host == null ) {
837+ s_logger .warn (String .format ("Can't find host with %d" , hostId ));
838+ nextStatus = Status .Removed ;
839+ } else {
840+ nextStatus = getNextStatusOnDisconnection (host , event );
841+ caService .purgeHostCertificate (host );
824842 }
825843
826844 if (s_logger .isDebugEnabled ()) {
827- s_logger .debug ("The next status of agent " + hostId + "is " + nextStatus + ", current status is " + currentStatus );
845+ s_logger .debug (String . format ( "Deregistering link for %d with state %s" , hostId , nextStatus ) );
828846 }
829- }
830- caService .purgeHostCertificate (host );
831- }
832847
833- if (s_logger .isDebugEnabled ()) {
834- s_logger .debug ("Deregistering link for " + hostId + " with state " + nextStatus );
835- }
848+ removeAgent (attache , nextStatus );
836849
837- removeAgent (attache , nextStatus );
838- // update the DB
839- if (host != null && transitState ) {
840- disconnectAgent (host , event , _nodeId );
850+ if (host != null && transitState ) {
851+ // update the state for host in DB as per the event
852+ disconnectAgent (host , event , _nodeId );
853+ }
854+ } finally {
855+ joinLock .unlock ();
856+ }
857+ result = true ;
841858 }
842-
843- return true ;
859+ joinLock . releaseRef ();
860+ return result ;
844861 }
845862
846863 protected boolean handleDisconnectWithInvestigation (final AgentAttache attache , Status .Event event ) {
@@ -1101,26 +1118,23 @@ protected AgentAttache createAttacheForConnect(final HostVO host, final Link lin
11011118 return attache ;
11021119 }
11031120
1104- private AgentAttache handleConnectedAgent (final Link link , final StartupCommand [] startup , final Request request ) {
1105- AgentAttache attache = null ;
1106- ReadyCommand ready = null ;
1107- try {
1108- final List <String > agentMSHostList = new ArrayList <>();
1109- String lbAlgorithm = null ;
1110- if (startup != null && startup .length > 0 ) {
1111- final String agentMSHosts = startup [0 ].getMsHostList ();
1112- if (StringUtils .isNotEmpty (agentMSHosts )) {
1113- String [] msHosts = agentMSHosts .split ("@" );
1114- if (msHosts .length > 1 ) {
1115- lbAlgorithm = msHosts [1 ];
1116- }
1117- agentMSHostList .addAll (Arrays .asList (msHosts [0 ].split ("," )));
1121+ private AgentAttache sendReadyAndGetAttache (HostVO host , ReadyCommand ready , Link link , StartupCommand [] startup ) throws ConnectionException {
1122+ final List <String > agentMSHostList = new ArrayList <>();
1123+ String lbAlgorithm = null ;
1124+ if (startup != null && startup .length > 0 ) {
1125+ final String agentMSHosts = startup [0 ].getMsHostList ();
1126+ if (StringUtils .isNotEmpty (agentMSHosts )) {
1127+ String [] msHosts = agentMSHosts .split ("@" );
1128+ if (msHosts .length > 1 ) {
1129+ lbAlgorithm = msHosts [1 ];
11181130 }
1131+ agentMSHostList .addAll (Arrays .asList (msHosts [0 ].split ("," )));
11191132 }
1120-
1121- final HostVO host = _resourceMgr .createHostVOForConnectedAgent (startup );
1122- if (host != null ) {
1123- ready = new ReadyCommand (host .getDataCenterId (), host .getId (), NumbersUtil .enableHumanReadableSizes );
1133+ }
1134+ AgentAttache attache = null ;
1135+ GlobalLock joinLock = getHostJoinLock (host .getId ());
1136+ if (joinLock .lock (60 )) {
1137+ try {
11241138
11251139 if (!indirectAgentLB .compareManagementServerList (host .getId (), host .getDataCenterId (), agentMSHostList , lbAlgorithm )) {
11261140 final List <String > newMSList = indirectAgentLB .getManagementServerList (host .getId (), host .getDataCenterId (), null );
@@ -1132,6 +1146,24 @@ private AgentAttache handleConnectedAgent(final Link link, final StartupCommand[
11321146
11331147 attache = createAttacheForConnect (host , link );
11341148 attache = notifyMonitorsOfConnection (attache , startup , false );
1149+ } finally {
1150+ joinLock .unlock ();
1151+ }
1152+ } else {
1153+ throw new ConnectionException (true , "Unable to acquire lock on host " + host .getUuid ());
1154+ }
1155+ joinLock .releaseRef ();
1156+ return attache ;
1157+ }
1158+
1159+ private AgentAttache handleConnectedAgent (final Link link , final StartupCommand [] startup , final Request request ) {
1160+ AgentAttache attache = null ;
1161+ ReadyCommand ready = null ;
1162+ try {
1163+ final HostVO host = _resourceMgr .createHostVOForConnectedAgent (startup );
1164+ if (host != null ) {
1165+ ready = new ReadyCommand (host .getDataCenterId (), host .getId (), NumbersUtil .enableHumanReadableSizes );
1166+ attache = sendReadyAndGetAttache (host , ready , link , startup );
11351167 }
11361168 } catch (final Exception e ) {
11371169 s_logger .debug ("Failed to handle host connection: " , e );
@@ -1265,6 +1297,8 @@ protected void processRequest(final Link link, final Request request) {
12651297 connectAgent (link , cmds , request );
12661298 }
12671299 return ;
1300+ } else if (cmd instanceof StartupCommand ) {
1301+ connectAgent (link , cmds , request );
12681302 }
12691303
12701304 final long hostId = attache .getId ();
@@ -1318,13 +1352,14 @@ protected void processRequest(final Link link, final Request request) {
13181352 handleCommands (attache , request .getSequence (), new Command [] {cmd });
13191353 if (cmd instanceof PingCommand ) {
13201354 final long cmdHostId = ((PingCommand )cmd ).getHostId ();
1355+ boolean requestStartupCommand = false ;
13211356
1357+ final HostVO host = _hostDao .findById (Long .valueOf (cmdHostId ));
1358+ boolean gatewayAccessible = true ;
13221359 // if the router is sending a ping, verify the
13231360 // gateway was pingable
13241361 if (cmd instanceof PingRoutingCommand ) {
1325- final boolean gatewayAccessible = ((PingRoutingCommand )cmd ).isGatewayAccessible ();
1326- final HostVO host = _hostDao .findById (Long .valueOf (cmdHostId ));
1327-
1362+ gatewayAccessible = ((PingRoutingCommand )cmd ).isGatewayAccessible ();
13281363 if (host != null ) {
13291364 if (!gatewayAccessible ) {
13301365 // alert that host lost connection to
@@ -1342,7 +1377,10 @@ protected void processRequest(final Link link, final Request request) {
13421377 s_logger .debug ("Not processing " + PingRoutingCommand .class .getSimpleName () + " for agent id=" + cmdHostId + "; can't find the host in the DB" );
13431378 }
13441379 }
1345- answer = new PingAnswer ((PingCommand )cmd );
1380+ if (host != null && host .getStatus () != Status .Up && gatewayAccessible ) {
1381+ requestStartupCommand = true ;
1382+ }
1383+ answer = new PingAnswer ((PingCommand )cmd , requestStartupCommand );
13461384 } else if (cmd instanceof ReadyAnswer ) {
13471385 final HostVO host = _hostDao .findById (attache .getId ());
13481386 if (host == null ) {
@@ -1864,4 +1902,8 @@ public void propagateChangeToAgents(Map<String, String> params) {
18641902 sendCommandToAgents (hostsPerZone , params );
18651903 }
18661904 }
1905+
1906+ private GlobalLock getHostJoinLock (Long hostId ) {
1907+ return GlobalLock .getInternLock (String .format ("%s-%s" , "Host-Join" , hostId ));
1908+ }
18671909}
0 commit comments