Skip to content

Commit 7f657cc

Browse files
(chore): cluster shows not ready when node unschedulable
Signed-off-by: abhijith-darshan <abhijith.ravindra@sap.com> (chore): set ready false when all nodes are not ready Signed-off-by: abhijith-darshan <abhijith.ravindra@sap.com> (chore): e2e test node not ready Signed-off-by: abhijith-darshan <abhijith.ravindra@sap.com>
1 parent 511db1e commit 7f657cc

File tree

4 files changed

+98
-10
lines changed

4 files changed

+98
-10
lines changed

cmd/greenhouse/controllers.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ var knownControllers = map[string]func(controllerName string, mgr ctrl.Manager)
4242
"clusterPluginDefinition": (&plugindefinitioncontroller.ClusterPluginDefinitionReconciler{}).SetupWithManager,
4343

4444
// Cluster controllers
45-
"bootStrap": (&clustercontrollers.BootstrapReconciler{}).SetupWithManager,
46-
"clusterReconciler": startClusterReconciler,
47-
"kubeconfig": (&clustercontrollers.KubeconfigReconciler{}).SetupWithManager,
45+
"bootstrap": (&clustercontrollers.BootstrapReconciler{}).SetupWithManager,
46+
"cluster": startClusterReconciler,
47+
"kubeconfig": (&clustercontrollers.KubeconfigReconciler{}).SetupWithManager,
4848
}
4949

5050
// knownControllers lists the name of known controllers.

e2e/cluster/e2e_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,4 +251,36 @@ var _ = Describe("Cluster E2E", Ordered, func() {
251251
shared.ClusterIsReady(ctx, adminClient, remoteOIDCClusterCName, env.TestNamespace)
252252
})
253253
})
254+
255+
Context("Cluster Node Not Ready / Ready 😵 🤖", Ordered, func() {
256+
It("should onboard remote cluster", func() {
257+
By("onboarding remote cluster")
258+
shared.OnboardRemoteCluster(ctx, adminClient, env.RemoteKubeConfigBytes, remoteClusterFName, env.TestNamespace, team.Name)
259+
})
260+
It("should have a cluster resource created", func() {
261+
By("verifying if the cluster resource is created")
262+
Eventually(func(g Gomega) bool {
263+
err := adminClient.Get(ctx, client.ObjectKey{Name: remoteClusterFName, Namespace: env.TestNamespace}, &greenhousev1alpha1.Cluster{})
264+
g.Expect(err).ToNot(HaveOccurred())
265+
return true
266+
}).Should(BeTrue(), "cluster resource should be created")
267+
268+
By("verifying the cluster status is ready")
269+
shared.ClusterIsReady(ctx, adminClient, remoteClusterFName, env.TestNamespace)
270+
})
271+
It("should reach not ready state when all nodes are not ready", func() {
272+
By("cordoning all nodes in the remote cluster")
273+
expect.CordonRemoteNodes(ctx, remoteClient)
274+
275+
By("verifying the cluster status is not ready")
276+
expect.ReconcileReadyNotReady(ctx, adminClient, remoteClusterFName, env.TestNamespace, false)
277+
})
278+
It("should reach ready state when nodes are ready again", func() {
279+
By("un-cordoning all nodes in the remote cluster")
280+
expect.UnCordonRemoteNodes(ctx, remoteClient)
281+
282+
By("verifying the cluster status is ready again")
283+
expect.ReconcileReadyNotReady(ctx, adminClient, remoteClusterFName, env.TestNamespace, true)
284+
})
285+
})
254286
})

e2e/cluster/expect/expect.go

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ package expect
66
import (
77
"context"
88
"fmt"
9-
"strconv"
109
"time"
1110

1211
rbacv1 "k8s.io/api/rbac/v1"
@@ -123,12 +122,12 @@ func RevokingRemoteClusterAccess(ctx context.Context, adminClient, remoteClient
123122
func ReconcileReadyNotReady(ctx context.Context, adminClient client.Client, clusterName, namespace string, readyStatus bool) {
124123
err := shared.WaitUntilResourceReadyOrNotReady(ctx, adminClient, &greenhousev1alpha1.Cluster{}, clusterName, namespace, func(resource lifecycle.RuntimeObject) error {
125124
By("triggering a reconcile of the cluster resource")
126-
resourceLabels := resource.GetLabels()
125+
resourceLabels := resource.GetAnnotations()
127126
if resourceLabels == nil {
128127
resourceLabels = make(map[string]string)
129128
}
130-
resourceLabels["greenhouse.sap/last-apply"] = strconv.FormatInt(time.Now().Unix(), 10)
131-
resource.SetLabels(resourceLabels)
129+
resourceLabels[lifecycle.ReconcileAnnotation] = metav1.Now().Format(time.DateTime)
130+
resource.SetAnnotations(resourceLabels)
132131
return adminClient.Update(ctx, resource)
133132
}, readyStatus)
134133
Expect(err).NotTo(HaveOccurred(), "cluster should be in desired status")
@@ -139,3 +138,39 @@ func GetRestConfig(restClientGetter *clientutil.RestClientGetter) *rest.Config {
139138
Expect(err).NotTo(HaveOccurred(), "there should be no error creating the remote REST config")
140139
return restConfig
141140
}
141+
142+
func CordonRemoteNodes(ctx context.Context, remoteClient client.Client) {
143+
nodes := &corev1.NodeList{}
144+
err := remoteClient.List(ctx, nodes)
145+
Expect(err).NotTo(HaveOccurred(), "there should be no error while listing nodes")
146+
for _, node := range nodes.Items {
147+
labels := node.GetLabels()
148+
_, ok := labels["node-role.kubernetes.io/control-plane"]
149+
if ok {
150+
By("skipping cordoning control plane node: " + node.Name)
151+
continue
152+
}
153+
if node.Spec.Unschedulable {
154+
continue
155+
}
156+
base := node.DeepCopy()
157+
node.Spec.Unschedulable = true
158+
err = remoteClient.Patch(ctx, &node, client.MergeFrom(base))
159+
Expect(err).NotTo(HaveOccurred(), "there should be no error while patching node")
160+
}
161+
}
162+
163+
func UnCordonRemoteNodes(ctx context.Context, remoteClient client.Client) {
164+
nodes := &corev1.NodeList{}
165+
err := remoteClient.List(ctx, nodes)
166+
Expect(err).NotTo(HaveOccurred(), "there should be no error while listing nodes")
167+
for _, node := range nodes.Items {
168+
if !node.Spec.Unschedulable {
169+
continue
170+
}
171+
base := node.DeepCopy()
172+
node.Spec.Unschedulable = false
173+
err = remoteClient.Patch(ctx, &node, client.MergeFrom(base))
174+
Expect(err).NotTo(HaveOccurred(), "there should be no error while patching node")
175+
}
176+
}

internal/controller/cluster/cluster_status.go

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ func (r *RemoteClusterReconciler) setConditions() lifecycle.Conditioner {
5151
kubeConfigValidCondition, k8sVersion = r.reconcileKubeConfigValid(restClientGetter)
5252
}
5353

54-
var allNodesReadyCondition, clusterAccessibleCondition, resourcesDeployedCondition greenhousemetav1alpha1.Condition
54+
var allNodesReadyCondition, clusterAccessibleCondition, resourcesDeployedCondition, readyCondition greenhousemetav1alpha1.Condition
5555
var nodes *greenhousev1alpha1.Nodes
5656
// Can only reconcile detailed status if kubeconfig is valid.
5757
if kubeConfigValidCondition.IsFalse() {
@@ -64,7 +64,11 @@ func (r *RemoteClusterReconciler) setConditions() lifecycle.Conditioner {
6464
resourcesDeployedCondition = r.reconcileBootstrapResources(ctx, restClientGetter, clusterSecret)
6565
}
6666

67-
readyCondition := r.reconcileReadyStatus(kubeConfigValidCondition, resourcesDeployedCondition)
67+
if allNodesReadyCondition.IsFalse() {
68+
readyCondition = greenhousemetav1alpha1.FalseCondition(greenhousemetav1alpha1.ReadyCondition, "", allNodesReadyCondition.Message)
69+
} else {
70+
readyCondition = r.reconcileReadyStatus(kubeConfigValidCondition, resourcesDeployedCondition)
71+
}
6872

6973
ownerLabelCondition := util.ComputeOwnerLabelCondition(ctx, r.Client, cluster)
7074
util.UpdateOwnedByLabelMissingMetric(cluster, ownerLabelCondition.IsFalse())
@@ -223,11 +227,14 @@ func (r *RemoteClusterReconciler) reconcileNodeStatus(
223227
return greenhousemetav1alpha1.FalseCondition(greenhousev1alpha1.AllNodesReady, "", err.Error()), nil
224228
}
225229

226-
notReadyNodes := []greenhousev1alpha1.NodeStatus{}
230+
var notReadyNodes []greenhousev1alpha1.NodeStatus
227231
var totalNodes, readyNodes int32
228232
allNodesReadyCondition = greenhousemetav1alpha1.TrueCondition(greenhousev1alpha1.AllNodesReady, "", "")
229233

230234
for _, node := range nodeList.Items {
235+
if checkIfControlPlaneNode(node.Labels) {
236+
continue
237+
}
231238
totalNodes++
232239

233240
nodeStatus := getNodeStatusIfNodeNotReady(node)
@@ -251,6 +258,9 @@ func (r *RemoteClusterReconciler) reconcileNodeStatus(
251258

252259
// getNodeStatusIfNodeNotReady returns a NodeStatus when the NodeReady condition is explicitly False; otherwise nil.
253260
func getNodeStatusIfNodeNotReady(node corev1.Node) *greenhousev1alpha1.NodeStatus {
261+
if node.Spec.Unschedulable {
262+
return &greenhousev1alpha1.NodeStatus{Name: node.Name, Message: "Node is unschedulable", LastTransitionTime: metav1.Now()}
263+
}
254264
var readyCondition *corev1.NodeCondition
255265
for i := range node.Status.Conditions {
256266
if node.Status.Conditions[i].Type == corev1.NodeReady {
@@ -265,3 +275,14 @@ func getNodeStatusIfNodeNotReady(node corev1.Node) *greenhousev1alpha1.NodeStatu
265275

266276
return &greenhousev1alpha1.NodeStatus{Name: node.Name, Message: readyCondition.Message, LastTransitionTime: readyCondition.LastTransitionTime}
267277
}
278+
279+
func checkIfControlPlaneNode(labels map[string]string) (control bool) {
280+
control = false
281+
for key, _ := range labels {
282+
if key == "node-role.kubernetes.io/control-plane" {
283+
control = true
284+
break
285+
}
286+
}
287+
return
288+
}

0 commit comments

Comments
 (0)