Skip to content

Commit 3596563

Browse files
TomerNewmank8s-ci-robot
authored andcommitted
Removing labels when node not ready
Until now nodes kept their kmod labels when became not ready. Today we are deleting them when they are not ready due to potential race condition.
1 parent fc26e4a commit 3596563

File tree

5 files changed

+161
-15
lines changed

5 files changed

+161
-15
lines changed

internal/controllers/nmc_reconciler.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,12 @@ func (r *NMCReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
127127
return ctrl.Result{}, fmt.Errorf("could not get node %s: %v", nmcObj.Name, err)
128128
}
129129

130-
// skipping handling NMC spec, labelling, events until node becomes ready
130+
// skipping handling NMC spec, events until node becomes ready
131+
// removing label of loaded kmods
131132
if !r.nodeAPI.IsNodeSchedulable(&node) {
133+
if err := r.nodeAPI.RemoveNodeReadyLabels(ctx, &node); err != nil {
134+
return ctrl.Result{}, fmt.Errorf("could remove node %s labels: %v", node.Name, err)
135+
}
132136
return ctrl.Result{}, nil
133137
}
134138

internal/controllers/nmc_reconciler_test.go

Lines changed: 84 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,15 @@ import (
3737
)
3838

3939
const (
40-
nmcName = "nmc"
41-
nsFirst = "example-ns-1"
42-
nsSecond = "example-ns-2"
43-
nameFirst = "example-name-1"
44-
nameSecond = "example-name-2"
45-
imageFirst = "example-image-1"
46-
imageSecond = "example-image-2"
40+
nmcName = "nmc"
41+
nsFirst = "example-ns-1"
42+
nsSecond = "example-ns-2"
43+
nameFirst = "example-name-1"
44+
nameSecond = "example-name-2"
45+
imageFirst = "example-image-1"
46+
imageSecond = "example-image-2"
47+
kmodName = "kmm.node.kubernetes.io/test-ns.test-module.ready"
48+
labelNotToRemove = "example.node.kubernetes.io/label-not-to-be-removed"
4749
)
4850

4951
var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() {
@@ -126,11 +128,25 @@ var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() {
126128
Expect(err).To(HaveOccurred())
127129
})
128130

129-
It("should not continue if node is not schedulable", func() {
131+
It("should remove kmod labels and not continue if node is not schedulable", func() {
130132
nmc := &kmmv1beta1.NodeModulesConfig{
131133
ObjectMeta: metav1.ObjectMeta{Name: nmcName},
132134
}
133-
node := v1.Node{}
135+
node := v1.Node{
136+
ObjectMeta: metav1.ObjectMeta{
137+
Labels: map[string]string{
138+
kmodName: "",
139+
labelNotToRemove: "",
140+
},
141+
},
142+
}
143+
expectedNode := v1.Node{
144+
ObjectMeta: metav1.ObjectMeta{
145+
Labels: map[string]string{
146+
labelNotToRemove: "",
147+
},
148+
},
149+
}
134150
gomock.InOrder(
135151
kubeClient.
136152
EXPECT().
@@ -139,12 +155,70 @@ var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() {
139155
*kubeNmc.(*kmmv1beta1.NodeModulesConfig) = *nmc
140156
}),
141157
wh.EXPECT().SyncStatus(ctx, nmc),
142-
kubeClient.EXPECT().Get(ctx, types.NamespacedName{Name: nmc.Name}, &node).Return(nil),
158+
kubeClient.EXPECT().Get(ctx, types.NamespacedName{Name: nmc.Name}, &v1.Node{}).DoAndReturn(
159+
func(_ context.Context, _ types.NamespacedName, fetchedNode *v1.Node, _ ...ctrlclient.Options) error {
160+
*fetchedNode = node
161+
return nil
162+
},
163+
),
143164
nm.EXPECT().IsNodeSchedulable(&node).Return(false),
165+
nm.EXPECT().RemoveNodeReadyLabels(ctx, &node).DoAndReturn(
166+
func(_ context.Context, obj ctrlclient.Object) error {
167+
delete(node.ObjectMeta.Labels, kmodName)
168+
return nil
169+
},
170+
),
144171
)
145172

146173
_, err := r.Reconcile(ctx, req)
147174
Expect(err).To(BeNil())
175+
Expect(node).To(Equal(expectedNode))
176+
})
177+
178+
It("should fail to remove kmod labels and not continue if node is not schedulable", func() {
179+
nmc := &kmmv1beta1.NodeModulesConfig{
180+
ObjectMeta: metav1.ObjectMeta{Name: nmcName},
181+
}
182+
node := v1.Node{
183+
ObjectMeta: metav1.ObjectMeta{
184+
Labels: map[string]string{
185+
kmodName: "",
186+
labelNotToRemove: "",
187+
},
188+
},
189+
}
190+
expectedNode := v1.Node{
191+
ObjectMeta: metav1.ObjectMeta{
192+
Labels: map[string]string{
193+
labelNotToRemove: "",
194+
},
195+
},
196+
}
197+
gomock.InOrder(
198+
kubeClient.
199+
EXPECT().
200+
Get(ctx, nmcNsn, &kmmv1beta1.NodeModulesConfig{}).
201+
Do(func(_ context.Context, _ types.NamespacedName, kubeNmc ctrlclient.Object, _ ...ctrlclient.Options) {
202+
*kubeNmc.(*kmmv1beta1.NodeModulesConfig) = *nmc
203+
}),
204+
wh.EXPECT().SyncStatus(ctx, nmc),
205+
kubeClient.EXPECT().Get(ctx, types.NamespacedName{Name: nmc.Name}, &v1.Node{}).DoAndReturn(
206+
func(_ context.Context, _ types.NamespacedName, fetchedNode *v1.Node, _ ...ctrlclient.Options) error {
207+
*fetchedNode = node
208+
return nil
209+
},
210+
),
211+
nm.EXPECT().IsNodeSchedulable(&node).Return(false),
212+
nm.EXPECT().RemoveNodeReadyLabels(ctx, &node).DoAndReturn(
213+
func(_ context.Context, obj ctrlclient.Object) error {
214+
return fmt.Errorf("some error")
215+
},
216+
),
217+
)
218+
219+
_, err := r.Reconcile(ctx, req)
220+
Expect(err).To(HaveOccurred())
221+
Expect(node).ToNot(Equal(expectedNode))
148222
})
149223

150224
It("should process spec entries and orphan statuses", func() {

internal/node/mock_node.go

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/node/node.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"github.com/kubernetes-sigs/kernel-module-management/internal/meta"
7+
"github.com/kubernetes-sigs/kernel-module-management/internal/utils"
78
v1 "k8s.io/api/core/v1"
89
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
910
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -18,6 +19,7 @@ type Node interface {
1819
GetNumTargetedNodes(ctx context.Context, selector map[string]string) (int, error)
1920
UpdateLabels(ctx context.Context, node *v1.Node, toBeAdded, toBeRemoved []string) error
2021
NodeBecomeReadyAfter(node *v1.Node, checkTime metav1.Time) bool
22+
RemoveNodeReadyLabels(ctx context.Context, node *v1.Node) error
2123
}
2224

2325
type node struct {
@@ -78,6 +80,20 @@ func (n *node) UpdateLabels(ctx context.Context, node *v1.Node, toBeAdded, toBeR
7880
return nil
7981
}
8082

83+
func (n *node) RemoveNodeReadyLabels(ctx context.Context, node *v1.Node) error {
84+
var labelsToRemove []string
85+
for label := range node.GetLabels() {
86+
if ok, _, _ := utils.IsKernelModuleReadyNodeLabel(label); ok ||
87+
utils.IsDeprecatedKernelModuleReadyNodeLabel(label) {
88+
labelsToRemove = append(labelsToRemove, label)
89+
}
90+
}
91+
if err := n.UpdateLabels(ctx, node, []string{}, labelsToRemove); err != nil {
92+
return fmt.Errorf("could update node %s labels: %v", node.Name, err)
93+
}
94+
return nil
95+
}
96+
8197
func (n *node) NodeBecomeReadyAfter(node *v1.Node, timestamp metav1.Time) bool {
8298
conds := node.Status.Conditions
8399
for i := 0; i < len(conds); i++ {

internal/node/node_test.go

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"context"
55
"fmt"
66
"github.com/kubernetes-sigs/kernel-module-management/internal/client"
7-
"github.com/kubernetes-sigs/kernel-module-management/internal/utils"
87
. "github.com/onsi/ginkgo/v2"
98
. "github.com/onsi/gomega"
109
"go.uber.org/mock/gomock"
@@ -121,9 +120,10 @@ var _ = Describe("GetNodesListBySelector", func() {
121120
})
122121
})
123122

124-
var (
125-
loadedKernelModuleReadyNodeLabel = utils.GetKernelModuleReadyNodeLabel("loaded-ns", "loaded-n")
126-
unloadedKernelModuleReadyNodeLabel = utils.GetKernelModuleReadyNodeLabel("unloaded-ns", "unloaded-n")
123+
const (
124+
loadedKernelModuleReadyNodeLabel = "kmm.node.kubernetes.io/loaded-ns.loaded-n.ready"
125+
unloadedKernelModuleReadyNodeLabel = "kmm.node.kubernetes.io/unloaded-ns.unloaded-n.ready"
126+
notKernelModuleReadyNodeLabel = "example.node.kubernetes.io/label-not-to-be-removed"
127127
)
128128

129129
var _ = Describe("UpdateLabels", func() {
@@ -279,6 +279,44 @@ var _ = Describe("NodeBecomeReadyAfter", func() {
279279
})
280280
})
281281

282+
var _ = Describe("RemoveNodeReadyLabels", func() {
283+
var (
284+
ctrl *gomock.Controller
285+
n Node
286+
node *v1.Node
287+
ctx context.Context
288+
clnt *client.MockClient
289+
)
290+
291+
BeforeEach(func() {
292+
ctrl = gomock.NewController(GinkgoT())
293+
clnt = client.NewMockClient(ctrl)
294+
ctx = context.TODO()
295+
n = NewNode(clnt)
296+
node = &v1.Node{
297+
ObjectMeta: metav1.ObjectMeta{
298+
Labels: map[string]string{
299+
loadedKernelModuleReadyNodeLabel: "",
300+
notKernelModuleReadyNodeLabel: "",
301+
},
302+
},
303+
}
304+
})
305+
306+
It("Should remove all kmod labels", func() {
307+
clnt.EXPECT().Patch(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil)
308+
err := n.RemoveNodeReadyLabels(ctx, node)
309+
Expect(err).To(BeNil())
310+
Expect(node.Labels).ToNot(HaveKey(loadedKernelModuleReadyNodeLabel))
311+
Expect(node.Labels).To(HaveKey(notKernelModuleReadyNodeLabel))
312+
})
313+
It("Should fail", func() {
314+
clnt.EXPECT().Patch(gomock.Any(), gomock.Any(), gomock.Any()).Return(fmt.Errorf("some error"))
315+
err := n.RemoveNodeReadyLabels(ctx, node)
316+
Expect(err).ToNot(BeNil())
317+
})
318+
})
319+
282320
var _ = Describe("addLabels", func() {
283321
var node v1.Node
284322

0 commit comments

Comments
 (0)