Skip to content

Commit 7d62a09

Browse files
authored
feat: support smooth upgrade with checking csi sock (#10)
When the model CSI driver is deployed as a DaemonSet pod, newly created pod may delete the sock file that the old pod was listening on. If the new pod hasn't started properly yet, this causes a period of service unavailability. This change adds a check to verify if the sock file exists and is listening, if it is, the new pod will exit with an error. Signed-off-by: imeoer <[email protected]>
1 parent f862f46 commit 7d62a09

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

pkg/server/server.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,20 @@ func ensureSockNotExists(ctx context.Context, sockPath string) error {
5656
return nil
5757
}
5858

59+
func isSockListening(sockPath string) bool {
60+
if _, err := os.Stat(sockPath); err != nil {
61+
return false
62+
}
63+
64+
conn, err := net.DialTimeout("unix", sockPath, time.Second*5)
65+
if err != nil {
66+
return false
67+
}
68+
defer func() { _ = conn.Close() }()
69+
70+
return true
71+
}
72+
5973
type Server struct {
6074
cfg *config.Config
6175
svc *service.Service
@@ -110,6 +124,15 @@ func (server *Server) Run(ctx context.Context) error {
110124
}
111125
}
112126

127+
if server.cfg.Get().IsNodeMode() {
128+
if endpoint, err := url.Parse(server.cfg.Get().CSIEndpoint); err == nil {
129+
if endpoint.Path != "" && isSockListening(endpoint.Path) {
130+
err := errors.Errorf("CSI socket %s is already listening", endpoint.Path)
131+
return err
132+
}
133+
}
134+
}
135+
113136
if server.cfg.Get().PprofAddr != "" {
114137
eg.Go(withFatalError(func() error {
115138
endpoint, err := url.Parse(server.cfg.Get().PprofAddr)

0 commit comments

Comments
 (0)