Skip to content

Commit 3511d7c

Browse files
committed
feat(taint-remover): add node taint remover
we add the core logic of removing ProvisionerNotReadyNodeTaint from the node. The copy of node in runtime config might be out-dated or inconsistent. Therefore, we obtain the node directly from k8s api. We backoff if the taint removal fails.
1 parent f07cbfd commit 3511d7c

File tree

2 files changed

+265
-0
lines changed

2 files changed

+265
-0
lines changed

pkg/node-taint/node_taint.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package nodetaint
18+
19+
import (
20+
"context"
21+
"math/rand"
22+
"time"
23+
24+
corev1 "k8s.io/api/core/v1"
25+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
26+
"k8s.io/klog/v2"
27+
"sigs.k8s.io/sig-storage-local-static-provisioner/pkg/common"
28+
"sigs.k8s.io/sig-storage-local-static-provisioner/pkg/util"
29+
)
30+
31+
const (
32+
maxRemoveTaintRetries = 3
33+
removeTaintRetryPeriod = 5 * time.Second
34+
)
35+
36+
// Remover is responsible for removing the node taint that indidcates the provisioner is not ready yet.
37+
type Remover struct {
38+
RuntimeConfig *common.RuntimeConfig
39+
taintRemoved bool
40+
}
41+
42+
// NewRemover creates an instances of RemoveNodeNotReadyTaint.
43+
func NewRemover(runtimeConfig *common.RuntimeConfig) *Remover {
44+
return &Remover{
45+
RuntimeConfig: runtimeConfig,
46+
taintRemoved: false,
47+
}
48+
}
49+
50+
// RemoveNodeTaint searches for the provisionerNotReadyNodeTaintKey and removes it from the node.
51+
// it only removes the taint once.
52+
func (n *Remover) RemoveNodeTaint() error {
53+
userConfig := n.RuntimeConfig.UserConfig
54+
if !userConfig.RemoveNodeNotReadyTaint || n.taintRemoved {
55+
return nil
56+
}
57+
58+
client := n.RuntimeConfig.Client.CoreV1()
59+
node := util.GetNode(client, n.RuntimeConfig.Node.Name)
60+
61+
var taintExists bool
62+
currTaints := []corev1.Taint{}
63+
for _, taint := range node.Spec.Taints {
64+
if taint.Key == userConfig.ProvisionerNotReadyNodeTaintKey {
65+
taintExists = true
66+
} else {
67+
currTaints = append(currTaints, taint)
68+
}
69+
}
70+
71+
if !taintExists {
72+
klog.Infof("ProvisionerNotReadyNodeTaintKey %s was not found on node %s", userConfig.ProvisionerNotReadyNodeTaintKey, node.Name)
73+
return nil
74+
}
75+
76+
node.Spec.Taints = currTaints
77+
_, err := client.Nodes().Update(context.Background(), node, metav1.UpdateOptions{})
78+
if err != nil {
79+
klog.Errorf("failed to remove node taint %s from node %s: %v", userConfig.ProvisionerNotReadyNodeTaintKey, node.Name, err)
80+
return err
81+
}
82+
83+
n.taintRemoved = true
84+
klog.Infof("removed node taint %s from node %s", userConfig.ProvisionerNotReadyNodeTaintKey, node.Name)
85+
return nil
86+
}
87+
88+
// ShouldRemoveTaint returns true if the taint is not removed already and the user config is set to remove the taint.
89+
func (n *Remover) ShouldRemoveTaint() bool {
90+
return !n.taintRemoved && n.RuntimeConfig.UserConfig.RemoveNodeNotReadyTaint
91+
}
92+
93+
// RemoveTaintWithBackoff removes the taint if the taint is not removed already, it performs linear randomized backoff upon failure.
94+
func (n *Remover) RemoveTaintWithBackoff() {
95+
retries := 0
96+
retryPeriod := time.Duration(0 * time.Second)
97+
for n.ShouldRemoveTaint() && retries < maxRemoveTaintRetries {
98+
err := n.RemoveNodeTaint()
99+
if err == nil {
100+
return
101+
}
102+
retries++
103+
// randomized retry period
104+
retryPeriodInSeconds := int(removeTaintRetryPeriod / time.Second)
105+
randomSeconds := rand.Intn(retryPeriodInSeconds)
106+
retryPeriod += time.Duration(randomSeconds) * time.Second
107+
time.Sleep(retryPeriod)
108+
}
109+
if retries == maxRemoveTaintRetries {
110+
klog.Errorf("failed to remove node taint %s from node %s after %d retries", n.RuntimeConfig.UserConfig.ProvisionerNotReadyNodeTaintKey, n.RuntimeConfig.Node.Name, maxRemoveTaintRetries)
111+
}
112+
}

pkg/node-taint/node_taint_test.go

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package nodetaint
18+
19+
import (
20+
"context"
21+
"testing"
22+
23+
corev1 "k8s.io/api/core/v1"
24+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25+
"k8s.io/client-go/kubernetes/fake"
26+
"sigs.k8s.io/sig-storage-local-static-provisioner/pkg/common"
27+
)
28+
29+
type testCase struct {
30+
name string
31+
node *corev1.Node
32+
userConfig *common.UserConfig
33+
expectedTaints []corev1.Taint
34+
expectedTaintRemoved bool
35+
}
36+
37+
func TestRemoveNodeTaint(t *testing.T) {
38+
testCases := []testCase{
39+
{
40+
name: "should remove taint when it exists",
41+
node: &corev1.Node{
42+
ObjectMeta: metav1.ObjectMeta{Name: "test-node"},
43+
Spec: corev1.NodeSpec{
44+
Taints: []corev1.Taint{
45+
{Key: "test-taint-key", Value: "test-value", Effect: corev1.TaintEffectNoSchedule},
46+
{Key: "other-taint", Value: "other-value", Effect: corev1.TaintEffectNoSchedule},
47+
},
48+
},
49+
},
50+
userConfig: &common.UserConfig{
51+
RemoveNodeNotReadyTaint: true,
52+
ProvisionerNotReadyNodeTaintKey: "test-taint-key",
53+
},
54+
expectedTaints: []corev1.Taint{
55+
{Key: "other-taint", Value: "other-value", Effect: corev1.TaintEffectNoSchedule},
56+
},
57+
expectedTaintRemoved: true,
58+
},
59+
{
60+
name: "should not remove taint when RemoveNodeNotReadyTaint is false",
61+
node: &corev1.Node{
62+
ObjectMeta: metav1.ObjectMeta{Name: "test-node"},
63+
Spec: corev1.NodeSpec{
64+
Taints: []corev1.Taint{
65+
{Key: "test-taint-key", Value: "test-value", Effect: corev1.TaintEffectNoSchedule},
66+
},
67+
},
68+
},
69+
userConfig: &common.UserConfig{
70+
RemoveNodeNotReadyTaint: false,
71+
ProvisionerNotReadyNodeTaintKey: "test-taint-key",
72+
},
73+
expectedTaints: []corev1.Taint{
74+
{Key: "test-taint-key", Value: "test-value", Effect: corev1.TaintEffectNoSchedule},
75+
},
76+
expectedTaintRemoved: false,
77+
},
78+
{
79+
name: "should not remove taint when it doesn't exist",
80+
node: &corev1.Node{
81+
ObjectMeta: metav1.ObjectMeta{Name: "test-node"},
82+
Spec: corev1.NodeSpec{
83+
Taints: []corev1.Taint{
84+
{Key: "other-taint", Value: "other-value", Effect: corev1.TaintEffectNoSchedule},
85+
},
86+
},
87+
},
88+
userConfig: &common.UserConfig{
89+
RemoveNodeNotReadyTaint: true,
90+
ProvisionerNotReadyNodeTaintKey: "test-taint-key",
91+
},
92+
expectedTaints: []corev1.Taint{
93+
{Key: "other-taint", Value: "other-value", Effect: corev1.TaintEffectNoSchedule},
94+
},
95+
expectedTaintRemoved: false,
96+
},
97+
{
98+
name: "should not remove taint when already removed",
99+
node: &corev1.Node{
100+
ObjectMeta: metav1.ObjectMeta{Name: "test-node"},
101+
Spec: corev1.NodeSpec{
102+
Taints: []corev1.Taint{
103+
{Key: "test-taint-key", Value: "test-value", Effect: corev1.TaintEffectNoSchedule},
104+
},
105+
},
106+
},
107+
userConfig: &common.UserConfig{
108+
RemoveNodeNotReadyTaint: true,
109+
ProvisionerNotReadyNodeTaintKey: "test-taint-key",
110+
},
111+
expectedTaints: []corev1.Taint{
112+
{Key: "test-taint-key", Value: "test-value", Effect: corev1.TaintEffectNoSchedule},
113+
},
114+
expectedTaintRemoved: true,
115+
},
116+
}
117+
118+
for _, tc := range testCases {
119+
t.Run(tc.name, func(t *testing.T) {
120+
fakeClient := fake.NewSimpleClientset(tc.node)
121+
122+
runtimeConfig := &common.RuntimeConfig{
123+
UserConfig: tc.userConfig,
124+
Client: fakeClient,
125+
}
126+
runtimeConfig.UserConfig.Node = tc.node
127+
128+
// For the "already removed" test case, mark taint as already removed
129+
remover := NewRemover(runtimeConfig)
130+
if tc.name == "should not remove taint when already removed" {
131+
remover.taintRemoved = true
132+
}
133+
134+
err := remover.RemoveNodeTaint()
135+
if err != nil {
136+
t.Errorf("failed to remove node taint: %v", err)
137+
}
138+
139+
updatedNode, err := fakeClient.CoreV1().Nodes().Get(context.Background(), tc.node.Name, metav1.GetOptions{})
140+
if err != nil {
141+
t.Errorf("failed to get updated node: %v", err)
142+
}
143+
if len(tc.expectedTaints) != len(updatedNode.Spec.Taints) {
144+
t.Errorf("expected %d taints, got %d", len(tc.expectedTaints), len(updatedNode.Spec.Taints))
145+
}
146+
147+
// Verify taintRemoved flag
148+
if remover.taintRemoved != tc.expectedTaintRemoved {
149+
t.Errorf("expected taintRemoved to be %v, got %v", tc.expectedTaintRemoved, remover.taintRemoved)
150+
}
151+
})
152+
}
153+
}

0 commit comments

Comments
 (0)