@@ -168,8 +168,10 @@ func (c *ApiConnectivityCheck) Start(ctx context.Context) error {
168168}
169169
170170// isConsideredHealthy keeps track of the number of errors reported, and when a certain amount of error occur within a certain
171- // time, ask peers if this node is healthy. Returns if the node is considered to be healthy or not.
171+ // time, ask peers if this node is healthy. Returns if the node is considered to be healthy or not. It is usable
172+ // whether this is a control plane node or a worker node
172173func (c * ApiConnectivityCheck ) isConsideredHealthy () bool {
174+
173175 isControlPlaneManagerNil := c .controlPlaneManager == nil
174176
175177 isWorkerNode := isControlPlaneManagerNil || ! c .controlPlaneManager .IsControlPlane ()
@@ -178,31 +180,52 @@ func (c *ApiConnectivityCheck) isConsideredHealthy() bool {
178180 "isControlPlaneManagerNil" , isControlPlaneManagerNil ,
179181 "isWorkerNode" , isWorkerNode )
180182
181- workerPeersResponse := c .getWorkerPeersResponse ( )
183+ workerPeersResponse := c .getPeersResponse ( peers . Worker )
182184
183185 if isWorkerNode {
184- c .config .Log .Info ("isConsideredHealthy: returning result from getWorkerPeersResponse" ,
185- "workerPeersResponse.IsHealthy" , workerPeersResponse .IsHealthy )
186- return workerPeersResponse .IsHealthy
187- } else {
188- canOtherControlPlanesBeReached := c .canOtherControlPlanesBeReached ()
189- isControlPlaneHealthy := c .controlPlaneManager .IsControlPlaneHealthy (workerPeersResponse , canOtherControlPlanesBeReached )
190- c .config .Log .Info ("isConsideredHealthy: returning result from IsControlPlaneHealthy" ,
191- "c.canOtherControlPlanesBeReached()" , canOtherControlPlanesBeReached ,
192- "c.controlPlaneManager.IsControlPlaneHealthy" , isControlPlaneHealthy )
193- return isControlPlaneHealthy
186+ if workerPeersResponse .IsHealthy {
187+ c .config .Log .Info ("isConsideredHealthy: I'm a worker node and my peers say I'm healthy" ,
188+ "workerPeersResponse.IsHealthy" , workerPeersResponse .IsHealthy )
189+ return true
190+ }
191+
192+ controlPlanePeersResponse := c .getPeersResponse (peers .ControlPlane )
193+
194+ c .config .Log .Info ("isConsideredHealthy: since peers think I'm unhealthy, double checking " +
195+ "by returning what the control plane nodes think of my state" ,
196+ "controlPlanePeersResponse.IsHealthy" , controlPlanePeersResponse .IsHealthy )
197+ return controlPlanePeersResponse .IsHealthy
198+
194199 }
195200
201+ controlPlanePeersResponse := c .getPeersResponse (peers .ControlPlane )
202+
203+ c .config .Log .Info ("isConsideredHealthy: control planes report my health status" ,
204+ "controlPlanePeersResponse.IsHealthy" , controlPlanePeersResponse .IsHealthy )
205+
206+ isControlPlaneHealthy := c .controlPlaneManager .IsControlPlaneHealthy (controlPlanePeersResponse ,
207+ c .canOtherControlPlanesBeReached ())
208+
209+ c .config .Log .Info ("isConsideredHealthy: we have checkd the control plane peer responses and cross " +
210+ "checked it against the control plane diagnostics " ,
211+ "isControlPlaneHealthy" , controlPlanePeersResponse .IsHealthy )
212+
213+ return isControlPlaneHealthy
214+
196215}
197216
198- func (c * ApiConnectivityCheck ) getWorkerPeersResponse ( ) peers.Response {
217+ func (c * ApiConnectivityCheck ) getPeersResponse ( role peers. Role ) peers.Response {
199218 c .errorCount ++
200219 if c .errorCount < c .config .MaxErrorsThreshold {
201220 c .config .Log .Info ("Ignoring api-server error, error count below threshold" , "current count" , c .errorCount , "threshold" , c .config .MaxErrorsThreshold )
202221 return peers.Response {IsHealthy : true , Reason : peers .HealthyBecauseErrorsThresholdNotReached }
203222 }
223+ c .config .Log .Info ("Error count was above threshold, we will continue and attempt to get the addressess" +
224+ " for our peers, I consider myself a WORKER at the moment" )
204225
205- peersToAsk := c .config .Peers .GetPeersAddresses (peers .Worker )
226+ // MES: This gets called even if the current node is a control plane node. Hopefully
227+ // in an actual environment it is returning actual worker peers
228+ peersToAsk := c .config .Peers .GetPeersAddresses (role )
206229
207230 c .config .Log .Info ("Error count exceeds threshold, trying to ask other peer nodes if I'm healthy" ,
208231 "minPeersRequired" , c .config .MinPeersForRemediation , "actualNumPeersFound" , len (peersToAsk ))
0 commit comments