Skip to content

Commit 8722c83

Browse files
kubelet: Never restart containers in deleting pods
When constructing the API status of a pod, if the pod is marked for deletion no containers should be started. Previously, if a container inside of a terminating pod failed to start due to a container runtime error (that populates reasonCache) the reasonCache would remain populated (it is only updated by syncPod for non-terminating pods) and the delete action on the pod would be delayed until the reasonCache entry expired due to other pods. This dramatically reduces the amount of time the Kubelet waits to delete pods that are terminating and encountered a container runtime error.
1 parent 2364c10 commit 8722c83

File tree

2 files changed

+28
-0
lines changed

2 files changed

+28
-0
lines changed

pkg/kubelet/container/helpers.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ type RuntimeHelper interface {
6161
// ShouldContainerBeRestarted checks whether a container needs to be restarted.
6262
// TODO(yifan): Think about how to refactor this.
6363
func ShouldContainerBeRestarted(container *v1.Container, pod *v1.Pod, podStatus *PodStatus) bool {
64+
// Once a pod has been marked deleted, it should not be restarted
65+
if pod.DeletionTimestamp != nil {
66+
return false
67+
}
6468
// Get latest container status.
6569
status := podStatus.FindContainerStatusByName(container.Name)
6670
// If the container was never started before, we should start it.

pkg/kubelet/container/helpers_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package container
1919
import (
2020
"reflect"
2121
"testing"
22+
"time"
2223

2324
"github.com/google/go-cmp/cmp"
2425
"github.com/stretchr/testify/assert"
@@ -449,6 +450,8 @@ func TestShouldContainerBeRestarted(t *testing.T) {
449450
v1.RestartPolicyOnFailure,
450451
v1.RestartPolicyAlways,
451452
}
453+
454+
// test policies
452455
expected := map[string][]bool{
453456
"no-history": {true, true, true},
454457
"alive": {false, false, false},
@@ -467,6 +470,27 @@ func TestShouldContainerBeRestarted(t *testing.T) {
467470
}
468471
}
469472
}
473+
474+
// test deleted pod
475+
pod.DeletionTimestamp = &metav1.Time{Time: time.Now()}
476+
expected = map[string][]bool{
477+
"no-history": {false, false, false},
478+
"alive": {false, false, false},
479+
"succeed": {false, false, false},
480+
"failed": {false, false, false},
481+
"unknown": {false, false, false},
482+
}
483+
for _, c := range pod.Spec.Containers {
484+
for i, policy := range policies {
485+
pod.Spec.RestartPolicy = policy
486+
e := expected[c.Name][i]
487+
r := ShouldContainerBeRestarted(&c, pod, podStatus)
488+
if r != e {
489+
t.Errorf("Restart for container %q with restart policy %q expected %t, got %t",
490+
c.Name, policy, e, r)
491+
}
492+
}
493+
}
470494
}
471495

472496
func TestHasPrivilegedContainer(t *testing.T) {

0 commit comments

Comments
 (0)