Skip to content

Commit ae52684

Browse files
committed
kuberuntime_manager: fix container success check.
When evaluating whether a container ran to completion, we only check whether the CRI container status `ExitCode` is 0. But, the ExitCode is only meaningful if the container has actually run and exited. There are other states, eg: `Created` where the container runtime never set an ExitCode - we shouldn't read it in that case.
1 parent a2106b5 commit ae52684

File tree

2 files changed

+19
-2
lines changed

2 files changed

+19
-2
lines changed

pkg/kubelet/kuberuntime/kuberuntime_manager.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -534,10 +534,11 @@ func shouldRestartOnFailure(pod *v1.Pod) bool {
534534

535535
func containerSucceeded(c *v1.Container, podStatus *kubecontainer.PodStatus) bool {
536536
cStatus := podStatus.FindContainerStatusByName(c.Name)
537-
if cStatus == nil || cStatus.State == kubecontainer.ContainerStateRunning {
537+
if cStatus == nil {
538538
return false
539539
}
540-
return cStatus.ExitCode == 0
540+
// Container has exited, with an exit code of 0.
541+
return cStatus.State == kubecontainer.ContainerStateExited && cStatus.ExitCode == 0
541542
}
542543

543544
func isInPlacePodVerticalScalingAllowed(pod *v1.Pod) bool {

pkg/kubelet/kuberuntime/kuberuntime_manager_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,22 @@ func TestComputePodActions(t *testing.T) {
994994
ContainersToKill: getKillMap(basePod, baseStatus, []int{}),
995995
},
996996
},
997+
"restart created but not started containers if RestartPolicy == OnFailure": {
998+
mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure },
999+
mutateStatusFn: func(status *kubecontainer.PodStatus) {
1000+
// The first container completed, don't restart it.
1001+
status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited
1002+
status.ContainerStatuses[0].ExitCode = 0
1003+
1004+
// The second container was created, but never started.
1005+
status.ContainerStatuses[1].State = kubecontainer.ContainerStateCreated
1006+
},
1007+
actions: podActions{
1008+
SandboxID: baseStatus.SandboxStatuses[0].Id,
1009+
ContainersToStart: []int{1},
1010+
ContainersToKill: getKillMap(basePod, baseStatus, []int{}),
1011+
},
1012+
},
9971013
"don't restart containers if RestartPolicy == Never": {
9981014
mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever },
9991015
mutateStatusFn: func(status *kubecontainer.PodStatus) {

0 commit comments

Comments
 (0)