Skip to content

Commit 2ad48d3

Browse files
authored
Merge pull request kubernetes#95364 from deads2k/pending-pods
set lastterminationstate for container status even when CRI fails to return termination (or any) data
2 parents e99df0e + e0516a3 commit 2ad48d3

File tree

1 file changed

+70
-0
lines changed

1 file changed

+70
-0
lines changed

pkg/kubelet/kubelet_pods.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1642,6 +1642,12 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
16421642
ContainerID: cid,
16431643
}
16441644
default:
1645+
// this collapses any unknown state to container waiting. If any container is waiting, then the pod status moves to pending even if it is running.
1646+
// if I'm reading this correctly, then any failure to read status on any container results in the entire pod going pending even if the containers
1647+
// are actually running.
1648+
// see https://github.com/kubernetes/kubernetes/blob/5d1b3e26af73dde33ecb6a3e69fb5876ceab192f/pkg/kubelet/kuberuntime/kuberuntime_container.go#L497 to
1649+
// https://github.com/kubernetes/kubernetes/blob/8976e3620f8963e72084971d9d4decbd026bf49f/pkg/kubelet/kuberuntime/helpers.go#L58-L71
1650+
// and interpreted here https://github.com/kubernetes/kubernetes/blob/b27e78f590a0d43e4a23ca3b2bf1739ca4c6e109/pkg/kubelet/kubelet_pods.go#L1434-L1439
16451651
status.State.Waiting = &v1.ContainerStateWaiting{}
16461652
}
16471653
return status
@@ -1681,6 +1687,70 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
16811687
statuses[container.Name] = status
16821688
}
16831689

1690+
for _, container := range containers {
1691+
found := false
1692+
for _, cStatus := range podStatus.ContainerStatuses {
1693+
if container.Name == cStatus.Name {
1694+
found = true
1695+
break
1696+
}
1697+
}
1698+
if found {
1699+
continue
1700+
}
1701+
// if no container is found, then assuming it should be waiting seems plausible, but the status code requires
1702+
// that a previous termination be present. If we're offline long enough (or something removed the container?), then
1703+
// the previous termination may not be present. This next code block ensures that if the container was previously running
1704+
// then when that container status disappears, we can infer that it terminated even if we don't know the status code.
1705+
// By setting the lasttermination state we are able to leave the container status waiting and present more accurate
1706+
// data via the API.
1707+
1708+
oldStatus, ok := oldStatuses[container.Name]
1709+
if !ok {
1710+
continue
1711+
}
1712+
if oldStatus.State.Terminated != nil {
1713+
// if the old container status was terminated, the lasttermination status is correct
1714+
continue
1715+
}
1716+
if oldStatus.State.Running == nil {
1717+
// if the old container status isn't running, then waiting is an appropriate status and we have nothing to do
1718+
continue
1719+
}
1720+
1721+
if pod.DeletionTimestamp == nil {
1722+
continue
1723+
}
1724+
1725+
// and if the pod itself is being deleted, then the CRI may have removed the container already and for whatever reason the kubelet missed the exit code
1726+
// (this seems not awesome). We know at this point that we will not be restarting the container.
1727+
status := statuses[container.Name]
1728+
// if the status we're about to write indicates the default, the Waiting status will force this pod back into Pending.
1729+
// That isn't true, we know the pod is going away.
1730+
isDefaultWaitingStatus := status.State.Waiting != nil && status.State.Waiting.Reason == "ContainerCreating"
1731+
if hasInitContainers {
1732+
isDefaultWaitingStatus = status.State.Waiting != nil && status.State.Waiting.Reason == "PodInitializing"
1733+
}
1734+
if !isDefaultWaitingStatus {
1735+
// we the status was written, don't override
1736+
continue
1737+
}
1738+
if status.LastTerminationState.Terminated != nil {
1739+
// if we already have a termination state, nothing to do
1740+
continue
1741+
}
1742+
1743+
// setting this value ensures that we show as stopped here, not as waiting:
1744+
// https://github.com/kubernetes/kubernetes/blob/90c9f7b3e198e82a756a68ffeac978a00d606e55/pkg/kubelet/kubelet_pods.go#L1440-L1445
1745+
// This prevents the pod from becoming pending
1746+
status.LastTerminationState.Terminated = &v1.ContainerStateTerminated{
1747+
Reason: "ContainerStatusUnknown",
1748+
Message: "The container could not be located when the pod was deleted. The container used to be Running",
1749+
ExitCode: 137,
1750+
}
1751+
statuses[container.Name] = status
1752+
}
1753+
16841754
// Make the latest container status comes first.
16851755
sort.Sort(sort.Reverse(kubecontainer.SortContainerStatusesByCreationTime(podStatus.ContainerStatuses)))
16861756
// Set container statuses according to the statuses seen in pod status

0 commit comments

Comments
 (0)