Skip to content

Commit df86bdd

Browse files
dcantahdims
authored andcommitted
CRI Sbserver: Make PodSandboxStatus friendlier to shim crashes
Currently if you're using the shim-mode sandbox server support, if your shim that's hosting the Sandbox API dies for any reason that wasn't intentional (segfault, oom etc.) PodSandboxStatus is kind of wedged. We can use the fact that if we didn't go through the usual k8s flow of Stop->Remove and we still have an entry in our sandbox store, us not having a shim mapping anymore means this was likely unintentional. Signed-off-by: Danny Canter <[email protected]>
1 parent 923bb1f commit df86bdd

File tree

1 file changed

+24
-3
lines changed

1 file changed

+24
-3
lines changed

pkg/cri/sbserver/sandbox_status.go

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"time"
2323

2424
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
25+
"github.com/containerd/errdefs"
2526
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
2627
)
2728

@@ -42,12 +43,32 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox
4243
return nil, fmt.Errorf("failed to get sandbox controller: %w", err)
4344
}
4445

46+
var (
47+
createdAt time.Time
48+
state string
49+
info map[string]string
50+
)
4551
cstatus, err := controller.Status(ctx, sandbox.ID, r.GetVerbose())
4652
if err != nil {
47-
return nil, fmt.Errorf("failed to query controller status: %w", err)
53+
// If the shim died unexpectedly (segfault etc.) let's set the state as
54+
// NOTREADY and not just error out to make k8s and clients like crictl
55+
// happy. If we get back ErrNotFound from controller.Status above while
56+
// we're using the shim-mode controller, this is a decent indicator it
57+
// exited unexpectedly. We can use the fact that we successfully retrieved
58+
// the sandbox object from the store above to tell that this is true, otherwise
59+
// if we followed the normal k8s convention of StopPodSandbox -> RemovePodSandbox,
60+
// we wouldn't have that object in the store anymore.
61+
if !errdefs.IsNotFound(err) {
62+
return nil, fmt.Errorf("failed to query controller status: %w", err)
63+
}
64+
state = runtime.PodSandboxState_SANDBOX_NOTREADY.String()
65+
} else {
66+
state = cstatus.State
67+
createdAt = cstatus.CreatedAt
68+
info = cstatus.Info
4869
}
4970

50-
status := toCRISandboxStatus(sandbox.Metadata, cstatus.State, cstatus.CreatedAt, ip, additionalIPs)
71+
status := toCRISandboxStatus(sandbox.Metadata, state, createdAt, ip, additionalIPs)
5172
if status.GetCreatedAt() == 0 {
5273
// CRI doesn't allow CreatedAt == 0.
5374
sandboxInfo, err := c.client.SandboxStore().Get(ctx, sandbox.ID)
@@ -59,7 +80,7 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox
5980

6081
return &runtime.PodSandboxStatusResponse{
6182
Status: status,
62-
Info: cstatus.Info,
83+
Info: info,
6384
}, nil
6485
}
6586

0 commit comments

Comments
 (0)