Skip to content

Commit 6ef167b

Browse files
committed
impl compensation mechanism for driver.removeNotReadyTaint()
1 parent bed1afa commit 6ef167b

File tree

2 files changed

+17
-5
lines changed

2 files changed

+17
-5
lines changed

pkg/driver/driver.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"net"
2222
"strings"
23+
"time"
2324

2425
"github.com/container-storage-interface/spec/lib/go/csi"
2526
"google.golang.org/grpc"
@@ -129,10 +130,7 @@ func (d *Driver) Run() error {
129130

130131
// Remove taint from node to indicate driver startup success
131132
// This is done at the last possible moment to prevent race conditions or false positive removals
132-
err = removeNotReadyTaint(cloud.DefaultKubernetesAPIClient)
133-
if err != nil {
134-
klog.ErrorS(err, "Unexpected failure when attempting to remove node taint(s)")
135-
}
133+
go tryRemoveNotReadyTaintUntilSucceed(cloud.DefaultKubernetesAPIClient, time.Second)
136134

137135
klog.Infof("Listening for connections on address: %#v", listener.Addr())
138136
return d.srv.Serve(listener)

pkg/driver/node.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"path/filepath"
2626
"strconv"
2727
"strings"
28+
"time"
2829

2930
"github.com/container-storage-interface/spec/lib/go/csi"
3031
"github.com/kubernetes-sigs/aws-efs-csi-driver/pkg/cloud"
@@ -452,7 +453,7 @@ type JSONPatch struct {
452453
Value interface{} `json:"value"`
453454
}
454455

455-
// removeNotReadyTaint removes the taint ebs.csi.aws.com/agent-not-ready from the local node
456+
// removeNotReadyTaint removes the taint efs.csi.aws.com/agent-not-ready from the local node
456457
// This taint can be optionally applied by users to prevent startup race conditions such as
457458
// https://github.com/kubernetes/kubernetes/issues/95911
458459
func removeNotReadyTaint(k8sClient cloud.KubernetesAPIClient) error {
@@ -512,3 +513,16 @@ func removeNotReadyTaint(k8sClient cloud.KubernetesAPIClient) error {
512513
klog.InfoS("Removed taint(s) from local node", "node", nodeName)
513514
return nil
514515
}
516+
517+
// remove taint may failed, this keep retring until succeed, make sure the taint will eventually being removed
518+
func tryRemoveNotReadyTaintUntilSucceed(k8sClient cloud.KubernetesAPIClient, interval time.Duration) {
519+
for {
520+
err := removeNotReadyTaint(k8sClient)
521+
if err == nil {
522+
return
523+
}
524+
525+
klog.ErrorS(err, "Unexpected failure when attempting to remove node taint(s)")
526+
time.Sleep(interval)
527+
}
528+
}

0 commit comments

Comments
 (0)