Skip to content

Commit 88512be

Browse files
authored
Merge pull request kubernetes#92817 from kmala/kubelet
Check for sandboxes before deleting the pod from apiserver
2 parents c3b888f + acac15c commit 88512be

15 files changed

+417
-28
lines changed

pkg/kubelet/BUILD

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ go_library(
2525
"kubelet_resources.go",
2626
"kubelet_volumes.go",
2727
"pod_container_deletor.go",
28+
"pod_sandbox_deleter.go",
2829
"pod_workers.go",
2930
"reason_cache.go",
3031
"runonce.go",
@@ -177,6 +178,7 @@ go_test(
177178
"kubelet_volumes_linux_test.go",
178179
"kubelet_volumes_test.go",
179180
"pod_container_deletor_test.go",
181+
"pod_sandbox_deleter_test.go",
180182
"pod_workers_test.go",
181183
"reason_cache_test.go",
182184
"runonce_test.go",
@@ -253,21 +255,14 @@ go_test(
253255
"//staging/src/k8s.io/client-go/util/testing:go_default_library",
254256
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
255257
"//staging/src/k8s.io/component-base/version:go_default_library",
258+
"//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library",
256259
"//vendor/github.com/golang/groupcache/lru:go_default_library",
257260
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
258261
"//vendor/github.com/google/cadvisor/info/v2:go_default_library",
259262
"//vendor/github.com/stretchr/testify/assert:go_default_library",
260263
"//vendor/github.com/stretchr/testify/require:go_default_library",
261264
"//vendor/k8s.io/utils/mount:go_default_library",
262-
] + select({
263-
"@io_bazel_rules_go//go/platform:android": [
264-
"//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library",
265-
],
266-
"@io_bazel_rules_go//go/platform:linux": [
267-
"//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library",
268-
],
269-
"//conditions:default": [],
270-
}),
265+
],
271266
)
272267

273268
filegroup(

pkg/kubelet/container/runtime.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ type Runtime interface {
114114
GetContainerLogs(ctx context.Context, pod *v1.Pod, containerID ContainerID, logOptions *v1.PodLogOptions, stdout, stderr io.Writer) (err error)
115115
// Delete a container. If the container is still running, an error is returned.
116116
DeleteContainer(containerID ContainerID) error
117+
// DeleteSandbox deletes a sandbox.
118+
DeleteSandbox(sandboxID string) error
117119
// ImageService provides methods to image-related methods.
118120
ImageService
119121
// UpdatePodCIDR sends a new podCIDR to the runtime.
@@ -299,7 +301,6 @@ type PodStatus struct {
299301
// Status of containers in the pod.
300302
ContainerStatuses []*Status
301303
// Status of the pod sandbox.
302-
// Only for kuberuntime now, other runtime may keep it nil.
303304
SandboxStatuses []*runtimeapi.PodSandboxStatus
304305
}
305306

@@ -309,6 +310,8 @@ type Status struct {
309310
ID ContainerID
310311
// Name of the container.
311312
Name string
313+
// ID of the sandbox to which this container belongs.
314+
PodSandboxID string
312315
// Status of the container.
313316
State State
314317
// Creation time of the container.

pkg/kubelet/container/testing/fake_runtime.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,14 @@ func (f *FakeRuntime) DeleteContainer(containerID kubecontainer.ContainerID) err
364364
return f.Err
365365
}
366366

367+
func (f *FakeRuntime) DeleteSandbox(sandboxID string) error {
368+
f.Lock()
369+
defer f.Unlock()
370+
371+
f.CalledFunctions = append(f.CalledFunctions, "DeleteSandbox")
372+
return f.Err
373+
}
374+
367375
func (f *FakeRuntime) ImageStats() (*kubecontainer.ImageStats, error) {
368376
f.Lock()
369377
defer f.Unlock()

pkg/kubelet/container/testing/runtime_mock.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,11 @@ func (r *Mock) DeleteContainer(containerID kubecontainer.ContainerID) error {
147147
return args.Error(0)
148148
}
149149

150+
func (r *Mock) DeleteSandbox(sandboxID string) error {
151+
args := r.Called(sandboxID)
152+
return args.Error(0)
153+
}
154+
150155
func (r *Mock) ImageStats() (*kubecontainer.ImageStats, error) {
151156
args := r.Called()
152157
return args.Get(0).(*kubecontainer.ImageStats), args.Error(1)

pkg/kubelet/cri/remote/fake/fake_runtime.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ func (f *RemoteRuntime) StopPodSandbox(ctx context.Context, req *kubeapi.StopPod
112112
// This call is idempotent, and must not return an error if the sandbox has
113113
// already been removed.
114114
func (f *RemoteRuntime) RemovePodSandbox(ctx context.Context, req *kubeapi.RemovePodSandboxRequest) (*kubeapi.RemovePodSandboxResponse, error) {
115-
err := f.RuntimeService.StopPodSandbox(req.PodSandboxId)
115+
err := f.RuntimeService.RemovePodSandbox(req.PodSandboxId)
116116
if err != nil {
117117
return nil, err
118118
}

pkg/kubelet/kubelet.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
671671
}
672672
klet.containerGC = containerGC
673673
klet.containerDeletor = newPodContainerDeletor(klet.containerRuntime, integer.IntMax(containerGCPolicy.MaxPerPodContainer, minDeadContainerInPod))
674+
klet.sandboxDeleter = newPodSandboxDeleter(klet.containerRuntime)
674675

675676
// setup imageManager
676677
imageManager, err := images.NewImageGCManager(klet.containerRuntime, klet.StatsProvider, kubeDeps.Recorder, nodeRef, imageGCPolicy, crOptions.PodSandboxImage)
@@ -1098,6 +1099,9 @@ type Kubelet struct {
10981099
// trigger deleting containers in a pod
10991100
containerDeletor *podContainerDeletor
11001101

1102+
// trigger deleting sandboxes in a pod
1103+
sandboxDeleter *podSandboxDeleter
1104+
11011105
// config iptables util rules
11021106
makeIPTablesUtilChains bool
11031107

@@ -1870,6 +1874,9 @@ func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handle
18701874
klog.V(4).Infof("SyncLoop (PLEG): ignore irrelevant event: %#v", e)
18711875
}
18721876
}
1877+
if e.Type == pleg.ContainerRemoved {
1878+
kl.deletePodSandbox(e.ID)
1879+
}
18731880

18741881
if e.Type == pleg.ContainerDied {
18751882
if containerID, ok := e.Data.(string); ok {
@@ -2197,6 +2204,16 @@ func (kl *Kubelet) fastStatusUpdateOnce() {
21972204
}
21982205
}
21992206

2207+
func (kl *Kubelet) deletePodSandbox(podID types.UID) {
2208+
if podStatus, err := kl.podCache.Get(podID); err == nil {
2209+
toKeep := 1
2210+
if kl.IsPodDeleted(podID) {
2211+
toKeep = 0
2212+
}
2213+
kl.sandboxDeleter.deleteSandboxesInPod(podStatus, toKeep)
2214+
}
2215+
}
2216+
22002217
// isSyncPodWorthy filters out events that are not worthy of pod syncing
22012218
func isSyncPodWorthy(event *pleg.PodLifecycleEvent) bool {
22022219
// ContainerRemoved doesn't affect pod state

pkg/kubelet/kubelet_pods.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -967,6 +967,16 @@ func (kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bo
967967
klog.V(3).Infof("Pod %q is terminated, but some containers have not been cleaned up: %s", format.Pod(pod), statusStr)
968968
return false
969969
}
970+
// pod's sandboxes should be deleted
971+
if len(runtimeStatus.SandboxStatuses) > 0 {
972+
var sandboxStr string
973+
for _, sandbox := range runtimeStatus.SandboxStatuses {
974+
sandboxStr += fmt.Sprintf("%+v ", *sandbox)
975+
}
976+
klog.V(3).Infof("Pod %q is terminated, but some pod sandboxes have not been cleaned up: %s", format.Pod(pod), sandboxStr)
977+
return false
978+
}
979+
970980
if kl.podVolumesExist(pod.UID) && !kl.keepTerminatedPodVolumes {
971981
// We shouldn't delete pods whose volumes have not been cleaned up if we are not keeping terminated pod volumes
972982
klog.V(3).Infof("Pod %q is terminated, but some volumes have not been cleaned up", format.Pod(pod))

pkg/kubelet/kubelet_pods_test.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import (
4242
// api.Registry.GroupOrDie(v1.GroupName).GroupVersions[0].String() is changed
4343
// to "v1"?
4444

45+
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
4546
_ "k8s.io/kubernetes/pkg/apis/core/install"
4647
"k8s.io/kubernetes/pkg/features"
4748
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
@@ -2421,3 +2422,70 @@ func TestTruncatePodHostname(t *testing.T) {
24212422
assert.Equal(t, test.output, output)
24222423
}
24232424
}
2425+
2426+
func TestPodResourcesAreReclaimed(t *testing.T) {
2427+
2428+
type args struct {
2429+
pod *v1.Pod
2430+
status v1.PodStatus
2431+
runtimeStatus kubecontainer.PodStatus
2432+
}
2433+
tests := []struct {
2434+
name string
2435+
args args
2436+
want bool
2437+
}{
2438+
{
2439+
"pod with running containers",
2440+
args{
2441+
pod: &v1.Pod{},
2442+
status: v1.PodStatus{
2443+
ContainerStatuses: []v1.ContainerStatus{
2444+
runningState("containerA"),
2445+
runningState("containerB"),
2446+
},
2447+
},
2448+
},
2449+
false,
2450+
},
2451+
{
2452+
"pod with containers in runtime cache",
2453+
args{
2454+
pod: &v1.Pod{},
2455+
status: v1.PodStatus{},
2456+
runtimeStatus: kubecontainer.PodStatus{
2457+
ContainerStatuses: []*kubecontainer.Status{
2458+
{},
2459+
},
2460+
},
2461+
},
2462+
false,
2463+
},
2464+
{
2465+
"pod with sandbox present",
2466+
args{
2467+
pod: &v1.Pod{},
2468+
status: v1.PodStatus{},
2469+
runtimeStatus: kubecontainer.PodStatus{
2470+
SandboxStatuses: []*runtimeapi.PodSandboxStatus{
2471+
{},
2472+
},
2473+
},
2474+
},
2475+
false,
2476+
},
2477+
}
2478+
2479+
testKubelet := newTestKubelet(t, false)
2480+
defer testKubelet.Cleanup()
2481+
kl := testKubelet.kubelet
2482+
2483+
for _, tt := range tests {
2484+
t.Run(tt.name, func(t *testing.T) {
2485+
testKubelet.fakeRuntime.PodStatus = tt.args.runtimeStatus
2486+
if got := kl.PodResourcesAreReclaimed(tt.args.pod, tt.args.status); got != tt.want {
2487+
t.Errorf("PodResourcesAreReclaimed() = %v, want %v", got, tt.want)
2488+
}
2489+
})
2490+
}
2491+
}

pkg/kubelet/kuberuntime/kuberuntime_container.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ func (m *kubeGenericRuntimeManager) getPodContainerStatuses(uid kubetypes.UID, n
474474
cStatus.Message += tMessage
475475
}
476476
}
477+
cStatus.PodSandboxID = c.PodSandboxId
477478
statuses[i] = cStatus
478479
}
479480

pkg/kubelet/kuberuntime/kuberuntime_gc.go

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -161,26 +161,13 @@ func (cgc *containerGC) removeOldestNSandboxes(sandboxes []sandboxGCInfo, toRemo
161161
// Remove from oldest to newest (last to first).
162162
for i := len(sandboxes) - 1; i >= numToKeep; i-- {
163163
if !sandboxes[i].active {
164-
cgc.removeSandbox(sandboxes[i].id)
164+
if err := cgc.manager.DeleteSandbox(sandboxes[i].id); err != nil {
165+
klog.Errorf("Failed to remove sandbox %q: %v", sandboxes[i].id, err)
166+
}
165167
}
166168
}
167169
}
168170

169-
// removeSandbox removes the sandbox by sandboxID.
170-
func (cgc *containerGC) removeSandbox(sandboxID string) {
171-
klog.V(4).Infof("Removing sandbox %q", sandboxID)
172-
// In normal cases, kubelet should've already called StopPodSandbox before
173-
// GC kicks in. To guard against the rare cases where this is not true, try
174-
// stopping the sandbox before removing it.
175-
if err := cgc.client.StopPodSandbox(sandboxID); err != nil {
176-
klog.Errorf("Failed to stop sandbox %q before removing: %v", sandboxID, err)
177-
return
178-
}
179-
if err := cgc.client.RemovePodSandbox(sandboxID); err != nil {
180-
klog.Errorf("Failed to remove sandbox %q: %v", sandboxID, err)
181-
}
182-
}
183-
184171
// evictableContainers gets all containers that are evictable. Evictable containers are: not running
185172
// and created more than MinAge ago.
186173
func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByEvictUnit, error) {

0 commit comments

Comments
 (0)