@@ -178,8 +178,10 @@ func (c *ApiConnectivityCheck) Start(ctx context.Context) error {
178178}
179179
180180// isConsideredHealthy keeps track of the number of errors reported, and when a certain amount of error occur within a certain
181- // time, ask peers if this node is healthy. Returns if the node is considered to be healthy or not.
181+ // time, ask peers if this node is healthy. Returns if the node is considered to be healthy or not. It is usable
182+ // whether this is a control plane node or a worker node
182183func (c * ApiConnectivityCheck ) isConsideredHealthy () bool {
184+
183185 isControlPlaneManagerNil := c .controlPlaneManager == nil
184186
185187 isWorkerNode := isControlPlaneManagerNil || ! c .controlPlaneManager .IsControlPlane ()
@@ -188,31 +190,52 @@ func (c *ApiConnectivityCheck) isConsideredHealthy() bool {
188190 "isControlPlaneManagerNil" , isControlPlaneManagerNil ,
189191 "isWorkerNode" , isWorkerNode )
190192
191- workerPeersResponse := c .getWorkerPeersResponse ( )
193+ workerPeersResponse := c .getPeersResponse ( peers . Worker )
192194
193195 if isWorkerNode {
194- c .config .Log .Info ("isConsideredHealthy: returning result from getWorkerPeersResponse" ,
195- "workerPeersResponse.IsHealthy" , workerPeersResponse .IsHealthy )
196- return workerPeersResponse .IsHealthy
197- } else {
198- canOtherControlPlanesBeReached := c .canOtherControlPlanesBeReached ()
199- isControlPlaneHealthy := c .controlPlaneManager .IsControlPlaneHealthy (workerPeersResponse , canOtherControlPlanesBeReached )
200- c .config .Log .Info ("isConsideredHealthy: returning result from IsControlPlaneHealthy" ,
201- "c.canOtherControlPlanesBeReached()" , canOtherControlPlanesBeReached ,
202- "c.controlPlaneManager.IsControlPlaneHealthy" , isControlPlaneHealthy )
203- return isControlPlaneHealthy
196+ if workerPeersResponse .IsHealthy {
197+ c .config .Log .Info ("isConsideredHealthy: I'm a worker node and my peers say I'm healthy" ,
198+ "workerPeersResponse.IsHealthy" , workerPeersResponse .IsHealthy )
199+ return true
200+ }
201+
202+ controlPlanePeersResponse := c .getPeersResponse (peers .ControlPlane )
203+
204+ c .config .Log .Info ("isConsideredHealthy: since peers think I'm unhealthy, double checking " +
205+ "by returning what the control plane nodes think of my state" ,
206+ "controlPlanePeersResponse.IsHealthy" , controlPlanePeersResponse .IsHealthy )
207+ return controlPlanePeersResponse .IsHealthy
208+
204209 }
205210
211+ controlPlanePeersResponse := c .getPeersResponse (peers .ControlPlane )
212+
213+ c .config .Log .Info ("isConsideredHealthy: control planes report my health status" ,
214+ "controlPlanePeersResponse.IsHealthy" , controlPlanePeersResponse .IsHealthy )
215+
216+ isControlPlaneHealthy := c .controlPlaneManager .IsControlPlaneHealthy (controlPlanePeersResponse ,
217+ c .canOtherControlPlanesBeReached ())
218+
219+ c .config .Log .Info ("isConsideredHealthy: we have checked the control plane peer responses and cross " +
220+ "checked it against the control plane diagnostics " ,
221+ "isControlPlaneHealthy" , controlPlanePeersResponse .IsHealthy )
222+
223+ return isControlPlaneHealthy
224+
206225}
207226
208- func (c * ApiConnectivityCheck ) getWorkerPeersResponse ( ) peers.Response {
227+ func (c * ApiConnectivityCheck ) getPeersResponse ( role peers. Role ) peers.Response {
209228 c .errorCount ++
210229 if c .errorCount < c .config .MaxErrorsThreshold {
211230 c .config .Log .Info ("Ignoring api-server error, error count below threshold" , "current count" , c .errorCount , "threshold" , c .config .MaxErrorsThreshold )
212231 return peers.Response {IsHealthy : true , Reason : peers .HealthyBecauseErrorsThresholdNotReached }
213232 }
233+ c .config .Log .Info ("Error count was above threshold, we will continue and attempt to get the addressess" +
234+ " for our peers, I consider myself a WORKER at the moment" )
214235
215- peersToAsk := c .config .Peers .GetPeersAddresses (peers .Worker )
236+ // MES: This gets called even if the current node is a control plane node. Hopefully
237+ // in an actual environment it is returning actual worker peers
238+ peersToAsk := c .config .Peers .GetPeersAddresses (role )
216239
217240 c .config .Log .Info ("Error count exceeds threshold, trying to ask other peer nodes if I'm healthy" ,
218241 "minPeersRequired" , c .config .MinPeersForRemediation , "actualNumPeersFound" , len (peersToAsk ))
0 commit comments