Skip to content

Commit 4ed8c89

Browse files
authored
Merge pull request #240 from jsafrane/dont-crash
Don't exit the probe on connection issues
2 parents 9cad16c + 4335b7e commit 4ed8c89

File tree

2 files changed

+9
-37
lines changed

2 files changed

+9
-37
lines changed

cmd/livenessprobe/main.go

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,8 @@ import (
2323
"net"
2424
"net/http"
2525
"os"
26-
"sync"
2726
"time"
2827

29-
"google.golang.org/grpc"
3028
"k8s.io/klog/v2"
3129

3230
"k8s.io/component-base/featuregate"
@@ -62,7 +60,7 @@ func (h *healthProbe) checkProbe(w http.ResponseWriter, req *http.Request) {
6260
ctx, cancel := context.WithTimeout(req.Context(), *probeTimeout)
6361
defer cancel()
6462

65-
conn, err := acquireConnection(ctx, h.metricsManager)
63+
conn, err := connlib.Connect(*csiAddress, h.metricsManager, connlib.WithTimeout(*probeTimeout))
6664
if err != nil {
6765
w.WriteHeader(http.StatusInternalServerError)
6866
w.Write([]byte(err.Error()))
@@ -92,37 +90,6 @@ func (h *healthProbe) checkProbe(w http.ResponseWriter, req *http.Request) {
9290
klog.V(5).InfoS("Health check succeeded")
9391
}
9492

95-
// acquireConnection wraps the connlib.Connect but adding support to context
96-
// cancelation.
97-
func acquireConnection(ctx context.Context, metricsManager metrics.CSIMetricsManager) (conn *grpc.ClientConn, err error) {
98-
99-
var m sync.Mutex
100-
var canceled bool
101-
ready := make(chan bool)
102-
go func() {
103-
conn, err = connlib.Connect(*csiAddress, metricsManager)
104-
105-
m.Lock()
106-
defer m.Unlock()
107-
if err != nil && canceled && conn != nil {
108-
conn.Close()
109-
}
110-
111-
close(ready)
112-
}()
113-
114-
select {
115-
case <-ctx.Done():
116-
m.Lock()
117-
defer m.Unlock()
118-
canceled = true
119-
return nil, ctx.Err()
120-
121-
case <-ready:
122-
return conn, err
123-
}
124-
}
125-
12693
func main() {
12794
fg := featuregate.NewFeatureGate()
12895
logsapi.AddFeatureGates(fg)
@@ -151,10 +118,14 @@ func main() {
151118
}
152119

153120
metricsManager := metrics.NewCSIMetricsManager("" /* driverName */)
154-
csiConn, err := acquireConnection(context.Background(), metricsManager)
121+
// Connect to the CSI driver without any timeout to avoid crashing the probe when the driver is not ready yet.
122+
// Goal: liveness probe never crashes, it only fails the probe when the driver is not available (yet).
123+
// Since a http server for the probe is not running at this point, Kubernetes liveness probe will fail immediately
124+
// with "connection refused", which is good enough to fail the probe.
125+
csiConn, err := connlib.Connect(*csiAddress, metricsManager, connlib.WithTimeout(0))
155126
if err != nil {
156127
// connlib should retry forever so a returned error should mean
157-
// the grpc client is misconfigured rather than an error on the network
128+
// the grpc client is misconfigured rather than an error on the network or CSI driver.
158129
klog.ErrorS(err, "Failed to establish connection to CSI driver")
159130
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
160131
}
@@ -163,6 +134,7 @@ func main() {
163134
csiDriverName, err := rpc.GetDriverName(context.Background(), csiConn)
164135
csiConn.Close()
165136
if err != nil {
137+
// The CSI driver does not support GetDriverName, which is serious enough to crash the probe.
166138
klog.ErrorS(err, "Failed to get CSI driver name")
167139
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
168140
}

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ require (
77
github.com/golang/mock v1.6.0
88
github.com/kubernetes-csi/csi-lib-utils v0.17.0
99
github.com/kubernetes-csi/csi-test/v5 v5.2.0
10-
google.golang.org/grpc v1.60.1
1110
k8s.io/component-base v0.29.0
1211
k8s.io/klog/v2 v2.110.1
1312
)
@@ -46,6 +45,7 @@ require (
4645
golang.org/x/sys v0.14.0 // indirect
4746
golang.org/x/text v0.14.0 // indirect
4847
google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 // indirect
48+
google.golang.org/grpc v1.60.1 // indirect
4949
google.golang.org/protobuf v1.31.0 // indirect
5050
gopkg.in/inf.v0 v0.9.1 // indirect
5151
gopkg.in/yaml.v2 v2.4.0 // indirect

0 commit comments

Comments
 (0)