Skip to content

Commit 8bdbd4d

Browse files
committed
Fix CSINodeInfo startup
To speed up unit tests and add more observability when things go wrong.
1 parent 8205f81 commit 8bdbd4d

File tree

3 files changed

+23
-18
lines changed

3 files changed

+23
-18
lines changed

pkg/volume/csi/csi_plugin.go

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,14 @@ limitations under the License.
1717
package csi
1818

1919
import (
20+
"context"
2021
"errors"
2122
"fmt"
2223
"os"
2324
"path/filepath"
2425
"strings"
2526
"time"
2627

27-
"context"
28-
2928
"k8s.io/klog"
3029

3130
api "k8s.io/api/core/v1"
@@ -256,7 +255,8 @@ func initializeCSINode(host volume.VolumeHost) error {
256255
defer utilruntime.HandleCrash()
257256

258257
// First wait indefinitely to talk to Kube APIServer
259-
err := waitForAPIServerForever(kubeClient)
258+
nodeName := host.GetNodeName()
259+
err := waitForAPIServerForever(kubeClient, nodeName)
260260
if err != nil {
261261
klog.Fatalf("Failed to initialize CSINode while waiting for API server to report ok: %v", err)
262262
}
@@ -921,20 +921,25 @@ func highestSupportedVersion(versions []string) (*utilversion.Version, error) {
921921
return highestSupportedVersion, nil
922922
}
923923

924-
// waitForAPIServerForever waits forever to get the APIServer Version as a proxy
925-
// for a healthy APIServer.
926-
func waitForAPIServerForever(client clientset.Interface) error {
924+
// waitForAPIServerForever waits forever to get a CSINode instance as a proxy
925+
// for a healthy APIServer
926+
func waitForAPIServerForever(client clientset.Interface, nodeName types.NodeName) error {
927927
var lastErr error
928-
err := wait.PollInfinite(time.Second, func() (bool, error) {
929-
_, lastErr = client.Discovery().ServerVersion()
930-
if lastErr != nil {
931-
lastErr = fmt.Errorf("failed to get apiserver version: %v", lastErr)
932-
return false, nil
928+
err := wait.PollImmediateInfinite(time.Second, func() (bool, error) {
929+
// Get a CSINode from API server to make sure 1) kubelet can reach API server
930+
// and 2) it has enough permissions. Kubelet may have restricted permissions
931+
// when it's bootstrapping TLS.
932+
// https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet-tls-bootstrapping/
933+
_, lastErr = client.StorageV1().CSINodes().Get(context.TODO(), string(nodeName), meta.GetOptions{})
934+
if lastErr == nil || apierrors.IsNotFound(lastErr) {
935+
// API server contacted
936+
return true, nil
933937
}
934-
935-
return true, nil
938+
klog.V(2).Infof("Failed to contact API server when waiting for CSINode publishing: %s", lastErr)
939+
return false, nil
936940
})
937941
if err != nil {
942+
// In theory this is unreachable, but just in case:
938943
return fmt.Errorf("%v: %v", err, lastErr)
939944
}
940945

pkg/volume/csi/nodeinfomanager/nodeinfomanager.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -397,16 +397,16 @@ func (nim *nodeInfoManager) InitializeCSINodeWithAnnotation() error {
397397
return goerrors.New("error getting CSI client")
398398
}
399399

400-
var updateErrs []error
400+
var lastErr error
401401
err := wait.ExponentialBackoff(updateBackoff, func() (bool, error) {
402-
if err := nim.tryInitializeCSINodeWithAnnotation(csiKubeClient); err != nil {
403-
updateErrs = append(updateErrs, err)
402+
if lastErr = nim.tryInitializeCSINodeWithAnnotation(csiKubeClient); lastErr != nil {
403+
klog.V(2).Infof("Failed to publish CSINode: %v", lastErr)
404404
return false, nil
405405
}
406406
return true, nil
407407
})
408408
if err != nil {
409-
return fmt.Errorf("error updating CSINode annotation: %v; caused by: %v", err, utilerrors.NewAggregate(updateErrs))
409+
return fmt.Errorf("error updating CSINode annotation: %v; caused by: %v", err, lastErr)
410410
}
411411

412412
return nil

pkg/volume/testing/testing.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1870,7 +1870,7 @@ func (f *fakeVolumeHost) WaitForCacheSync() error {
18701870
}
18711871

18721872
func (f *fakeVolumeHost) WaitForKubeletErrNil() error {
1873-
return wait.PollImmediate(100*time.Millisecond, 10*time.Second, func() (bool, error) {
1873+
return wait.PollImmediate(10*time.Millisecond, 10*time.Second, func() (bool, error) {
18741874
f.mux.Lock()
18751875
defer f.mux.Unlock()
18761876
return f.kubeletErr == nil, nil

0 commit comments

Comments
 (0)