@@ -23,10 +23,8 @@ import (
2323 "net"
2424 "net/http"
2525 "os"
26- "sync"
2726 "time"
2827
29- "google.golang.org/grpc"
3028 "k8s.io/klog/v2"
3129
3230 "k8s.io/component-base/featuregate"
@@ -62,7 +60,7 @@ func (h *healthProbe) checkProbe(w http.ResponseWriter, req *http.Request) {
6260 ctx , cancel := context .WithTimeout (req .Context (), * probeTimeout )
6361 defer cancel ()
6462
65- conn , err := acquireConnection ( ctx , h .metricsManager )
63+ conn , err := connlib . Connect ( * csiAddress , h .metricsManager , connlib . WithTimeout ( * probeTimeout ) )
6664 if err != nil {
6765 w .WriteHeader (http .StatusInternalServerError )
6866 w .Write ([]byte (err .Error ()))
@@ -92,37 +90,6 @@ func (h *healthProbe) checkProbe(w http.ResponseWriter, req *http.Request) {
9290 klog .V (5 ).InfoS ("Health check succeeded" )
9391}
9492
95- // acquireConnection wraps the connlib.Connect but adding support to context
96- // cancelation.
97- func acquireConnection (ctx context.Context , metricsManager metrics.CSIMetricsManager ) (conn * grpc.ClientConn , err error ) {
98-
99- var m sync.Mutex
100- var canceled bool
101- ready := make (chan bool )
102- go func () {
103- conn , err = connlib .Connect (* csiAddress , metricsManager )
104-
105- m .Lock ()
106- defer m .Unlock ()
107- if err != nil && canceled && conn != nil {
108- conn .Close ()
109- }
110-
111- close (ready )
112- }()
113-
114- select {
115- case <- ctx .Done ():
116- m .Lock ()
117- defer m .Unlock ()
118- canceled = true
119- return nil , ctx .Err ()
120-
121- case <- ready :
122- return conn , err
123- }
124- }
125-
12693func main () {
12794 fg := featuregate .NewFeatureGate ()
12895 logsapi .AddFeatureGates (fg )
@@ -151,10 +118,14 @@ func main() {
151118 }
152119
153120 metricsManager := metrics .NewCSIMetricsManager ("" /* driverName */ )
154- csiConn , err := acquireConnection (context .Background (), metricsManager )
121+ // Connect to the CSI driver without any timeout to avoid crashing the probe when the driver is not ready yet.
122+ // Goal: liveness probe never crashes, it only fails the probe when the driver is not available (yet).
123+ // Since a http server for the probe is not running at this point, Kubernetes liveness probe will fail immediately
124+ // with "connection refused", which is good enough to fail the probe.
125+ csiConn , err := connlib .Connect (* csiAddress , metricsManager , connlib .WithTimeout (0 ))
155126 if err != nil {
156127 // connlib should retry forever so a returned error should mean
157- // the grpc client is misconfigured rather than an error on the network
128+ // the grpc client is misconfigured rather than an error on the network or CSI driver.
158129 klog .ErrorS (err , "Failed to establish connection to CSI driver" )
159130 klog .FlushAndExit (klog .ExitFlushTimeout , 1 )
160131 }
@@ -163,6 +134,7 @@ func main() {
163134 csiDriverName , err := rpc .GetDriverName (context .Background (), csiConn )
164135 csiConn .Close ()
165136 if err != nil {
137+ // The CSI driver does not support GetDriverName, which is serious enough to crash the probe.
166138 klog .ErrorS (err , "Failed to get CSI driver name" )
167139 klog .FlushAndExit (klog .ExitFlushTimeout , 1 )
168140 }
0 commit comments