@@ -71,6 +71,11 @@ import (
71
71
const (
72
72
managedHostsHeader = "# Kubernetes-managed hosts file.\n "
73
73
managedHostsHeaderWithHostNetwork = "# Kubernetes-managed hosts file (host network).\n "
74
+
75
+ // Capacity of the channel for storing pods to kill. A small number should
76
+ // suffice because a goroutine is dedicated to check the channel and does
77
+ // not block on anything else.
78
+ podKillingChannelCapacity = 50
74
79
)
75
80
76
81
// Get a list of pods that have data directories.
@@ -1020,6 +1025,23 @@ func (kl *Kubelet) removeOrphanedPodStatuses(pods []*v1.Pod, mirrorPods []*v1.Po
1020
1025
kl .statusManager .RemoveOrphanedStatuses (podUIDs )
1021
1026
}
1022
1027
1028
+ // deleteOrphanedMirrorPods checks whether pod killer has done with orphaned mirror pod.
1029
+ // If pod killing is done, podManager.DeleteMirrorPod() is called to delete mirror pod
1030
+ // from the API server
1031
+ func (kl * Kubelet ) deleteOrphanedMirrorPods () {
1032
+ podFullNames := kl .podManager .GetOrphanedMirrorPodNames ()
1033
+ for _ , podFullname := range podFullNames {
1034
+ if ! kl .podKiller .IsMirrorPodPendingTerminationByPodName (podFullname ) {
1035
+ _ , err := kl .podManager .DeleteMirrorPod (podFullname , nil )
1036
+ if err != nil {
1037
+ klog .Errorf ("encountered error when deleting mirror pod %q : %v" , podFullname , err )
1038
+ } else {
1039
+ klog .V (3 ).Infof ("deleted pod %q" , podFullname )
1040
+ }
1041
+ }
1042
+ }
1043
+ }
1044
+
1023
1045
// HandlePodCleanups performs a series of cleanup work, including terminating
1024
1046
// pod workers, killing unwanted pods, and removing orphaned volumes/pod
1025
1047
// directories.
@@ -1071,7 +1093,7 @@ func (kl *Kubelet) HandlePodCleanups() error {
1071
1093
}
1072
1094
for _ , pod := range runningPods {
1073
1095
if _ , found := desiredPods [pod .ID ]; ! found {
1074
- kl .podKillingCh <- & kubecontainer.PodPair {APIPod : nil , RunningPod : pod }
1096
+ kl .podKiller . KillPod ( & kubecontainer.PodPair {APIPod : nil , RunningPod : pod })
1075
1097
}
1076
1098
}
1077
1099
@@ -1099,24 +1121,112 @@ func (kl *Kubelet) HandlePodCleanups() error {
1099
1121
}
1100
1122
1101
1123
// Remove any orphaned mirror pods.
1102
- kl .podManager . DeleteOrphanedMirrorPods ()
1124
+ kl .deleteOrphanedMirrorPods ()
1103
1125
1104
1126
// Remove any cgroups in the hierarchy for pods that are no longer running.
1105
1127
if kl .cgroupsPerQOS {
1106
- kl .cleanupOrphanedPodCgroups (cgroupPods , activePods )
1128
+ pcm := kl .containerManager .NewPodContainerManager ()
1129
+ kl .cleanupOrphanedPodCgroups (pcm , cgroupPods , activePods )
1107
1130
}
1108
1131
1109
1132
kl .backOff .GC ()
1110
1133
return nil
1111
1134
}
1112
1135
1113
- // podKiller launches a goroutine to kill a pod received from the channel if
1136
+ // PodKiller handles requests for killing pods
1137
+ type PodKiller interface {
1138
+ // KillPod receives pod speficier representing the pod to kill
1139
+ KillPod (pair * kubecontainer.PodPair )
1140
+ // PerformPodKillingWork performs the actual pod killing work via calling CRI
1141
+ // It returns after its Close() func is called and all outstanding pod killing requests are served
1142
+ PerformPodKillingWork ()
1143
+ // After Close() is called, this pod killer wouldn't accept any more pod killing requests
1144
+ Close ()
1145
+ // IsMirrorPodPendingTerminationByPodName checks whether the mirror pod for the given full pod name is pending termination
1146
+ IsMirrorPodPendingTerminationByPodName (podFullname string ) bool
1147
+ // IsMirrorPodPendingTerminationByUID checks whether the mirror pod for the given uid is pending termination
1148
+ IsMirrorPodPendingTerminationByUID (uid types.UID ) bool
1149
+ // MarkMirrorPodPendingTermination marks the mirror pod entering grace period of termination
1150
+ MarkMirrorPodPendingTermination (pod * v1.Pod )
1151
+ }
1152
+
1153
+ // podKillerWithChannel is an implementation of PodKiller which receives pod killing requests via channel
1154
+ type podKillerWithChannel struct {
1155
+ // Channel for getting pods to kill.
1156
+ podKillingCh chan * kubecontainer.PodPair
1157
+ // lock for synchronization between HandlePodCleanups and pod killer
1158
+ podKillingLock * sync.Mutex
1159
+ // mirrorPodTerminationMap keeps track of the progress of mirror pod termination
1160
+ // The key is the UID of the pod and the value is the full name of the pod
1161
+ mirrorPodTerminationMap map [string ]string
1162
+ // killPod is the func which invokes runtime to kill the pod
1163
+ killPod func (pod * v1.Pod , runningPod * kubecontainer.Pod , status * kubecontainer.PodStatus , gracePeriodOverride * int64 ) error
1164
+ }
1165
+
1166
+ // NewPodKiller returns a functional PodKiller
1167
+ func NewPodKiller (kl * Kubelet ) PodKiller {
1168
+ podKiller := & podKillerWithChannel {
1169
+ podKillingCh : make (chan * kubecontainer.PodPair , podKillingChannelCapacity ),
1170
+ podKillingLock : & sync.Mutex {},
1171
+ mirrorPodTerminationMap : make (map [string ]string ),
1172
+ killPod : kl .killPod ,
1173
+ }
1174
+ return podKiller
1175
+ }
1176
+
1177
+ // IsMirrorPodPendingTerminationByUID checks whether the pod for the given uid is pending termination
1178
+ func (pk * podKillerWithChannel ) IsMirrorPodPendingTerminationByUID (uid types.UID ) bool {
1179
+ pk .podKillingLock .Lock ()
1180
+ defer pk .podKillingLock .Unlock ()
1181
+ _ , ok := pk .mirrorPodTerminationMap [string (uid )]
1182
+ return ok
1183
+ }
1184
+
1185
+ // IsMirrorPodPendingTerminationByPodName checks whether the given pod is in grace period of termination
1186
+ func (pk * podKillerWithChannel ) IsMirrorPodPendingTerminationByPodName (podFullname string ) bool {
1187
+ pk .podKillingLock .Lock ()
1188
+ defer pk .podKillingLock .Unlock ()
1189
+ for _ , name := range pk .mirrorPodTerminationMap {
1190
+ if name == podFullname {
1191
+ return true
1192
+ }
1193
+ }
1194
+ return false
1195
+ }
1196
+
1197
+ func (pk * podKillerWithChannel ) markMirrorPodTerminated (uid string ) {
1198
+ pk .podKillingLock .Lock ()
1199
+ klog .V (4 ).Infof ("marking pod termination %q" , uid )
1200
+ delete (pk .mirrorPodTerminationMap , uid )
1201
+ pk .podKillingLock .Unlock ()
1202
+ }
1203
+
1204
+ // MarkMirrorPodPendingTermination marks the pod entering grace period of termination
1205
+ func (pk * podKillerWithChannel ) MarkMirrorPodPendingTermination (pod * v1.Pod ) {
1206
+ fullname := kubecontainer .GetPodFullName (pod )
1207
+ klog .V (3 ).Infof ("marking pod pending termination %q" , string (pod .UID ))
1208
+ pk .podKillingLock .Lock ()
1209
+ pk .mirrorPodTerminationMap [string (pod .UID )] = fullname
1210
+ pk .podKillingLock .Unlock ()
1211
+ }
1212
+
1213
+ // Close closes the channel through which requests are delivered
1214
+ func (pk * podKillerWithChannel ) Close () {
1215
+ close (pk .podKillingCh )
1216
+ }
1217
+
1218
+ // KillPod sends pod killing request to the killer
1219
+ func (pk * podKillerWithChannel ) KillPod (pair * kubecontainer.PodPair ) {
1220
+ pk .podKillingCh <- pair
1221
+ }
1222
+
1223
+ // PerformPodKillingWork launches a goroutine to kill a pod received from the channel if
1114
1224
// another goroutine isn't already in action.
1115
- func (kl * Kubelet ) podKiller () {
1225
+ func (pk * podKillerWithChannel ) PerformPodKillingWork () {
1116
1226
killing := sets .NewString ()
1117
1227
// guard for the killing set
1118
1228
lock := sync.Mutex {}
1119
- for podPair := range kl .podKillingCh {
1229
+ for podPair := range pk .podKillingCh {
1120
1230
runningPod := podPair .RunningPod
1121
1231
apiPod := podPair .APIPod
1122
1232
@@ -1130,13 +1240,14 @@ func (kl *Kubelet) podKiller() {
1130
1240
if ! exists {
1131
1241
go func (apiPod * v1.Pod , runningPod * kubecontainer.Pod ) {
1132
1242
klog .V (2 ).Infof ("Killing unwanted pod %q" , runningPod .Name )
1133
- err := kl .killPod (apiPod , runningPod , nil , nil )
1243
+ err := pk .killPod (apiPod , runningPod , nil , nil )
1134
1244
if err != nil {
1135
1245
klog .Errorf ("Failed killing the pod %q: %v" , runningPod .Name , err )
1136
1246
}
1137
1247
lock .Lock ()
1138
1248
killing .Delete (string (runningPod .ID ))
1139
1249
lock .Unlock ()
1250
+ pk .markMirrorPodTerminated (string (runningPod .ID ))
1140
1251
}(apiPod , runningPod )
1141
1252
}
1142
1253
}
@@ -1721,13 +1832,12 @@ func (kl *Kubelet) GetPortForward(podName, podNamespace string, podUID types.UID
1721
1832
1722
1833
// cleanupOrphanedPodCgroups removes cgroups that should no longer exist.
1723
1834
// it reconciles the cached state of cgroupPods with the specified list of runningPods
1724
- func (kl * Kubelet ) cleanupOrphanedPodCgroups (cgroupPods map [types.UID ]cm.CgroupName , activePods []* v1.Pod ) {
1835
+ func (kl * Kubelet ) cleanupOrphanedPodCgroups (pcm cm. PodContainerManager , cgroupPods map [types.UID ]cm.CgroupName , activePods []* v1.Pod ) {
1725
1836
// Add all running pods to the set that we want to preserve
1726
1837
podSet := sets .NewString ()
1727
1838
for _ , pod := range activePods {
1728
1839
podSet .Insert (string (pod .UID ))
1729
1840
}
1730
- pcm := kl .containerManager .NewPodContainerManager ()
1731
1841
1732
1842
// Iterate over all the found pods to verify if they should be running
1733
1843
for uid , val := range cgroupPods {
@@ -1736,6 +1846,11 @@ func (kl *Kubelet) cleanupOrphanedPodCgroups(cgroupPods map[types.UID]cm.CgroupN
1736
1846
continue
1737
1847
}
1738
1848
1849
+ // if the pod is within termination grace period, we shouldn't cleanup the underlying cgroup
1850
+ if kl .podKiller .IsMirrorPodPendingTerminationByUID (uid ) {
1851
+ klog .V (3 ).Infof ("pod %q is pending termination" , uid )
1852
+ continue
1853
+ }
1739
1854
// If volumes have not been unmounted/detached, do not delete the cgroup
1740
1855
// so any memory backed volumes don't have their charges propagated to the
1741
1856
// parent croup. If the volumes still exist, reduce the cpu shares for any
0 commit comments