Skip to content

Commit 6d8c526

Browse files
authored
Merge pull request #1287 from abbshr/startup-taint-remove-compensation
make sure the startup taint will eventually being removed after efs driver ready
2 parents 3618623 + 4e270c4 commit 6d8c526

File tree

3 files changed

+49
-5
lines changed

3 files changed

+49
-5
lines changed

pkg/driver/driver.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"net"
2222
"strings"
23+
"time"
2324

2425
"github.com/container-storage-interface/spec/lib/go/csi"
2526
"google.golang.org/grpc"
@@ -129,10 +130,9 @@ func (d *Driver) Run() error {
129130

130131
// Remove taint from node to indicate driver startup success
131132
// This is done at the last possible moment to prevent race conditions or false positive removals
132-
err = removeNotReadyTaint(cloud.DefaultKubernetesAPIClient)
133-
if err != nil {
134-
klog.ErrorS(err, "Unexpected failure when attempting to remove node taint(s)")
135-
}
133+
go tryRemoveNotReadyTaintUntilSucceed(time.Second, func() error {
134+
return removeNotReadyTaint(cloud.DefaultKubernetesAPIClient)
135+
})
136136

137137
klog.Infof("Listening for connections on address: %#v", listener.Addr())
138138
return d.srv.Serve(listener)

pkg/driver/node.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"path/filepath"
2626
"strconv"
2727
"strings"
28+
"time"
2829

2930
"github.com/container-storage-interface/spec/lib/go/csi"
3031
"github.com/kubernetes-sigs/aws-efs-csi-driver/pkg/cloud"
@@ -464,7 +465,7 @@ type JSONPatch struct {
464465
Value interface{} `json:"value"`
465466
}
466467

467-
// removeNotReadyTaint removes the taint ebs.csi.aws.com/agent-not-ready from the local node
468+
// removeNotReadyTaint removes the taint efs.csi.aws.com/agent-not-ready from the local node
468469
// This taint can be optionally applied by users to prevent startup race conditions such as
469470
// https://github.com/kubernetes/kubernetes/issues/95911
470471
func removeNotReadyTaint(k8sClient cloud.KubernetesAPIClient) error {
@@ -524,3 +525,16 @@ func removeNotReadyTaint(k8sClient cloud.KubernetesAPIClient) error {
524525
klog.InfoS("Removed taint(s) from local node", "node", nodeName)
525526
return nil
526527
}
528+
529+
// remove taint may failed, this keep retring until succeed, make sure the taint will eventually being removed
530+
func tryRemoveNotReadyTaintUntilSucceed(interval time.Duration, removeFn func() error) {
531+
for {
532+
err := removeFn()
533+
if err == nil {
534+
return
535+
}
536+
537+
klog.ErrorS(err, "Unexpected failure when attempting to remove node taint(s)")
538+
time.Sleep(interval)
539+
}
540+
}

pkg/driver/node_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package driver
1818

1919
import (
2020
"context"
21+
"errors"
2122
"fmt"
2223
"os"
2324
"reflect"
@@ -1012,3 +1013,32 @@ func getNodeMock(mockCtl *gomock.Controller, nodeName string, returnNode *corev1
10121013

10131014
return mockClient, mockNode
10141015
}
1016+
1017+
func TestTryRemoveNotReadyTaintUntilSucceed(t *testing.T) {
1018+
{
1019+
i := 0
1020+
tryRemoveNotReadyTaintUntilSucceed(time.Second, func() error {
1021+
i++
1022+
if i < 3 {
1023+
return errors.New("test")
1024+
}
1025+
1026+
return nil
1027+
})
1028+
1029+
if i != 3 {
1030+
t.Fatalf("unexpected result")
1031+
}
1032+
}
1033+
{
1034+
i := 0
1035+
tryRemoveNotReadyTaintUntilSucceed(time.Second, func() error {
1036+
i++
1037+
return nil
1038+
})
1039+
1040+
if i != 1 {
1041+
t.Fatalf("unexpected result")
1042+
}
1043+
}
1044+
}

0 commit comments

Comments
 (0)