@@ -18,6 +18,7 @@ package proxy
18
18
19
19
import (
20
20
"context"
21
+ "time"
21
22
22
23
api "github.com/containerd/containerd/api/services/sandbox/v1"
23
24
"github.com/containerd/containerd/api/types"
@@ -119,9 +120,31 @@ func (s *remoteSandboxController) Shutdown(ctx context.Context, sandboxID string
119
120
}
120
121
121
122
func (s * remoteSandboxController ) Wait (ctx context.Context , sandboxID string ) (sandbox.ExitStatus , error ) {
122
- resp , err := s .client .Wait (ctx , & api.ControllerWaitRequest {SandboxID : sandboxID })
123
- if err != nil {
124
- return sandbox.ExitStatus {}, errdefs .FromGRPC (err )
123
+ // For remote sandbox controllers, the controller process may restart,
124
+ // we have to retry if the error indicates that it is the grpc disconnection.
125
+ var (
126
+ resp * api.ControllerWaitResponse
127
+ err error
128
+ retryInterval time.Duration = 128
129
+ )
130
+ for {
131
+ resp , err = s .client .Wait (ctx , & api.ControllerWaitRequest {SandboxID : sandboxID })
132
+ if err != nil {
133
+ grpcErr := errdefs .FromGRPC (err )
134
+ if ! errdefs .IsUnavailable (grpcErr ) {
135
+ return sandbox.ExitStatus {}, grpcErr
136
+ }
137
+ select {
138
+ case <- time .After (retryInterval * time .Millisecond ):
139
+ if retryInterval < 4096 {
140
+ retryInterval = retryInterval << 1
141
+ }
142
+ continue
143
+ case <- ctx .Done ():
144
+ return sandbox.ExitStatus {}, grpcErr
145
+ }
146
+ }
147
+ break
125
148
}
126
149
127
150
return sandbox.ExitStatus {
0 commit comments