Skip to content

Commit 2ade53e

Browse files
authored
Merge pull request kubernetes#124947 from toVersus/fix/eviction-message
[Sidecar Containers] Consider init containers in eviction message
2 parents f0036aa + 1d0777a commit 2ade53e

File tree

2 files changed

+285
-4
lines changed

2 files changed

+285
-4
lines changed

pkg/kubelet/eviction/helpers.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,14 +1234,22 @@ func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats stats
12341234
if quantity != nil && available != nil {
12351235
message += fmt.Sprintf(thresholdMetMessageFmt, quantity, available)
12361236
}
1237-
containers := []string{}
1237+
exceededContainers := []string{}
12381238
containerUsage := []string{}
12391239
podStats, ok := stats(pod)
12401240
if !ok {
12411241
return
12421242
}
1243+
// Since the resources field cannot be specified for ephemeral containers,
1244+
// they will always be blamed for resource overuse when an eviction occurs.
1245+
// That’s why only regular, init and restartable init containers are considered
1246+
// for the eviction message.
1247+
containers := pod.Spec.Containers
1248+
if len(pod.Spec.InitContainers) != 0 {
1249+
containers = append(containers, pod.Spec.InitContainers...)
1250+
}
12431251
for _, containerStats := range podStats.Containers {
1244-
for _, container := range pod.Spec.Containers {
1252+
for _, container := range containers {
12451253
if container.Name == containerStats.Name {
12461254
requests := container.Resources.Requests[resourceToReclaim]
12471255
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) &&
@@ -1263,13 +1271,16 @@ func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats stats
12631271
}
12641272
if usage != nil && usage.Cmp(requests) > 0 {
12651273
message += fmt.Sprintf(containerMessageFmt, container.Name, usage.String(), requests.String(), resourceToReclaim)
1266-
containers = append(containers, container.Name)
1274+
exceededContainers = append(exceededContainers, container.Name)
12671275
containerUsage = append(containerUsage, usage.String())
12681276
}
1277+
// Found the container to compare resource usage with,
1278+
// so it's safe to break out of the containers loop here.
1279+
break
12691280
}
12701281
}
12711282
}
1272-
annotations[OffendingContainersKey] = strings.Join(containers, ",")
1283+
annotations[OffendingContainersKey] = strings.Join(exceededContainers, ",")
12731284
annotations[OffendingContainersUsageKey] = strings.Join(containerUsage, ",")
12741285
annotations[StarvedResourceKey] = string(resourceToReclaim)
12751286
return

pkg/kubelet/eviction/helpers_test.go

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3299,6 +3299,13 @@ func newContainer(name string, requests v1.ResourceList, limits v1.ResourceList)
32993299
}
33003300
}
33013301

3302+
func newRestartableInitContainer(name string, requests v1.ResourceList, limits v1.ResourceList) v1.Container {
3303+
restartAlways := v1.ContainerRestartPolicyAlways
3304+
container := newContainer(name, requests, limits)
3305+
container.RestartPolicy = &restartAlways
3306+
return container
3307+
}
3308+
33023309
func newVolume(name string, volumeSource v1.VolumeSource) v1.Volume {
33033310
return v1.Volume{
33043311
Name: name,
@@ -3321,6 +3328,12 @@ func newPod(name string, priority int32, containers []v1.Container, volumes []v1
33213328
}
33223329
}
33233330

3331+
func newPodWithInitContainers(name string, priority int32, initContainers []v1.Container, containers []v1.Container, volumes []v1.Volume) *v1.Pod {
3332+
pod := newPod(name, priority, containers, volumes)
3333+
pod.Spec.InitContainers = initContainers
3334+
return pod
3335+
}
3336+
33243337
// nodeConditionList is a simple alias to support equality checking independent of order
33253338
type nodeConditionList []v1.NodeConditionType
33263339

@@ -3448,3 +3461,260 @@ func TestStatsNotFoundForPod(t *testing.T) {
34483461
})
34493462
}
34503463
}
3464+
3465+
func TestEvictionMessage(t *testing.T) {
3466+
type containerMemoryStat struct {
3467+
name string
3468+
usage string
3469+
}
3470+
type memoryExceededContainer struct {
3471+
name string
3472+
request string
3473+
usage string
3474+
}
3475+
memoryExceededEvictionMessage := func(containers []memoryExceededContainer) string {
3476+
resourceToReclaim := v1.ResourceMemory
3477+
msg := fmt.Sprintf(nodeLowMessageFmt, resourceToReclaim)
3478+
for _, container := range containers {
3479+
msg += fmt.Sprintf(containerMessageFmt, container.name, container.usage, container.request, resourceToReclaim)
3480+
}
3481+
return msg
3482+
}
3483+
3484+
const (
3485+
init1 = "init1"
3486+
init2 = "init2"
3487+
init3 = "init3"
3488+
restartableInit1 = "restartable-init1"
3489+
restartableInit2 = "restartable-init2"
3490+
restartableInit3 = "restartable-init3"
3491+
regular1 = "regular1"
3492+
regular2 = "regular2"
3493+
regular3 = "regular3"
3494+
ephemeral1 = "ephemeral1"
3495+
)
3496+
3497+
testcase := []struct {
3498+
name string
3499+
initContainers []v1.Container
3500+
containers []v1.Container
3501+
ephemeralContainers []v1.EphemeralContainer
3502+
containerMemoryStats []containerMemoryStat
3503+
expectedContainerMessage string
3504+
expectedAnnotations map[string]string
3505+
}{
3506+
{
3507+
name: "No container exceeds memory usage",
3508+
initContainers: []v1.Container{
3509+
newContainer(init1, newResourceList("", "100Mi", ""), newResourceList("", "", "")),
3510+
newRestartableInitContainer(restartableInit1, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3511+
},
3512+
containers: []v1.Container{
3513+
newContainer(regular1, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3514+
},
3515+
// Terminated init containers aren't included in the containerMemoryStats
3516+
containerMemoryStats: []containerMemoryStat{
3517+
{name: restartableInit1, usage: "150Mi"},
3518+
{name: regular1, usage: "100Mi"},
3519+
},
3520+
expectedContainerMessage: memoryExceededEvictionMessage(nil),
3521+
},
3522+
{
3523+
name: "Init container exceeds memory usage",
3524+
initContainers: []v1.Container{
3525+
newContainer(init1, newResourceList("", "100Mi", ""), newResourceList("", "", "")),
3526+
newRestartableInitContainer(restartableInit1, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3527+
newContainer(init2, newResourceList("", "150Mi", ""), newResourceList("", "", "")),
3528+
},
3529+
containers: []v1.Container{
3530+
newContainer(regular1, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3531+
},
3532+
// An eviction occurred while the init container was running
3533+
containerMemoryStats: []containerMemoryStat{
3534+
{name: init1, usage: "150Mi"},
3535+
},
3536+
expectedContainerMessage: memoryExceededEvictionMessage([]memoryExceededContainer{
3537+
{name: init1, request: "100Mi", usage: "150Mi"},
3538+
}),
3539+
expectedAnnotations: map[string]string{
3540+
OffendingContainersKey: init1,
3541+
OffendingContainersUsageKey: "150Mi",
3542+
},
3543+
},
3544+
{
3545+
name: "Restartable init container exceeds memory usage",
3546+
initContainers: []v1.Container{
3547+
newContainer(init1, newResourceList("", "100Mi", ""), newResourceList("", "", "")),
3548+
newRestartableInitContainer(restartableInit1, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3549+
newRestartableInitContainer(restartableInit2, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3550+
},
3551+
containers: []v1.Container{
3552+
newContainer(regular1, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3553+
},
3554+
// Terminated init containers aren't included in the containerMemoryStats
3555+
containerMemoryStats: []containerMemoryStat{
3556+
{name: restartableInit1, usage: "250Mi"},
3557+
{name: restartableInit2, usage: "150Mi"},
3558+
{name: regular1, usage: "200Mi"},
3559+
},
3560+
expectedContainerMessage: memoryExceededEvictionMessage([]memoryExceededContainer{
3561+
{name: restartableInit1, request: "200Mi", usage: "250Mi"},
3562+
}),
3563+
expectedAnnotations: map[string]string{
3564+
OffendingContainersKey: restartableInit1,
3565+
OffendingContainersUsageKey: "250Mi",
3566+
},
3567+
},
3568+
{
3569+
name: "Regular container exceeds memory usage",
3570+
initContainers: []v1.Container{
3571+
newContainer(init1, newResourceList("", "100Mi", ""), newResourceList("", "", "")),
3572+
newRestartableInitContainer(restartableInit1, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3573+
},
3574+
containers: []v1.Container{
3575+
newContainer(regular1, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3576+
newContainer(regular2, newResourceList("", "300Mi", ""), newResourceList("", "", "")),
3577+
},
3578+
// Terminated init containers aren't included in the containerMemoryStats
3579+
containerMemoryStats: []containerMemoryStat{
3580+
{name: restartableInit1, usage: "200Mi"},
3581+
{name: regular1, usage: "250Mi"},
3582+
{name: regular2, usage: "250Mi"},
3583+
},
3584+
expectedContainerMessage: memoryExceededEvictionMessage([]memoryExceededContainer{
3585+
{name: regular1, request: "200Mi", usage: "250Mi"},
3586+
}),
3587+
expectedAnnotations: map[string]string{
3588+
OffendingContainersKey: regular1,
3589+
OffendingContainersUsageKey: "250Mi",
3590+
},
3591+
},
3592+
{
3593+
name: "Ephemeral container exceeds memory usage",
3594+
initContainers: []v1.Container{
3595+
newContainer(init1, newResourceList("", "100Mi", ""), newResourceList("", "", "")),
3596+
newRestartableInitContainer(restartableInit1, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3597+
},
3598+
containers: []v1.Container{
3599+
newContainer(regular1, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3600+
},
3601+
ephemeralContainers: []v1.EphemeralContainer{
3602+
{TargetContainerName: regular1, EphemeralContainerCommon: v1.EphemeralContainerCommon{Name: ephemeral1}},
3603+
},
3604+
// Terminated init containers aren't included in the containerMemoryStats
3605+
containerMemoryStats: []containerMemoryStat{
3606+
{name: restartableInit1, usage: "200Mi"},
3607+
{name: regular1, usage: "150Mi"},
3608+
{name: ephemeral1, usage: "250Mi"},
3609+
},
3610+
expectedContainerMessage: memoryExceededEvictionMessage(nil),
3611+
},
3612+
{
3613+
name: "Both regular and restartable init containers exceed memory usage due to missing memory requests",
3614+
initContainers: []v1.Container{
3615+
newContainer(init1, newResourceList("", "100Mi", ""), newResourceList("", "", "")),
3616+
newRestartableInitContainer(restartableInit1, newResourceList("", "", ""), newResourceList("", "", "")),
3617+
newRestartableInitContainer(restartableInit2, newResourceList("", "300Mi", ""), newResourceList("", "", "")),
3618+
},
3619+
containers: []v1.Container{
3620+
newContainer("regular1", newResourceList("", "", ""), newResourceList("", "", "")),
3621+
},
3622+
// Terminated init containers aren't included in the containerMemoryStats
3623+
containerMemoryStats: []containerMemoryStat{
3624+
{name: restartableInit1, usage: "250Mi"},
3625+
{name: restartableInit2, usage: "250Mi"},
3626+
{name: regular1, usage: "200Mi"},
3627+
},
3628+
expectedContainerMessage: memoryExceededEvictionMessage([]memoryExceededContainer{
3629+
{name: restartableInit1, request: "0", usage: "250Mi"},
3630+
{name: regular1, request: "0", usage: "200Mi"},
3631+
}),
3632+
expectedAnnotations: map[string]string{
3633+
OffendingContainersKey: strings.Join([]string{restartableInit1, regular1}, ","),
3634+
OffendingContainersUsageKey: "250Mi,200Mi",
3635+
},
3636+
},
3637+
{
3638+
name: "Multiple regular and restartable init containers exceed memory usage",
3639+
initContainers: []v1.Container{
3640+
newContainer(init1, newResourceList("", "100Mi", ""), newResourceList("", "", "")),
3641+
newContainer(init2, newResourceList("", "100Mi", ""), newResourceList("", "", "")),
3642+
newContainer(init3, newResourceList("", "100Mi", ""), newResourceList("", "", "")),
3643+
newRestartableInitContainer(restartableInit1, newResourceList("", "100Mi", ""), newResourceList("", "", "")),
3644+
newRestartableInitContainer(restartableInit2, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3645+
newRestartableInitContainer(restartableInit3, newResourceList("", "300Mi", ""), newResourceList("", "", "")),
3646+
},
3647+
containers: []v1.Container{
3648+
newContainer(regular1, newResourceList("", "300Mi", ""), newResourceList("", "", "")),
3649+
newContainer(regular2, newResourceList("", "200Mi", ""), newResourceList("", "", "")),
3650+
newContainer(regular3, newResourceList("", "100Mi", ""), newResourceList("", "", "")),
3651+
},
3652+
// Terminated init containers aren't included in the containerMemoryStats
3653+
containerMemoryStats: []containerMemoryStat{
3654+
{name: restartableInit1, usage: "150Mi"},
3655+
{name: restartableInit2, usage: "250Mi"},
3656+
{name: restartableInit3, usage: "250Mi"},
3657+
{name: regular1, usage: "50Mi"},
3658+
{name: regular2, usage: "250Mi"},
3659+
{name: regular3, usage: "400Mi"},
3660+
},
3661+
expectedContainerMessage: memoryExceededEvictionMessage([]memoryExceededContainer{
3662+
{name: restartableInit1, request: "100Mi", usage: "150Mi"},
3663+
{name: restartableInit2, request: "200Mi", usage: "250Mi"},
3664+
{name: regular2, request: "200Mi", usage: "250Mi"},
3665+
{name: regular3, request: "100Mi", usage: "400Mi"},
3666+
}),
3667+
expectedAnnotations: map[string]string{
3668+
OffendingContainersKey: strings.Join([]string{restartableInit1, restartableInit2, regular2, regular3}, ","),
3669+
OffendingContainersUsageKey: "150Mi,250Mi,250Mi,400Mi",
3670+
},
3671+
},
3672+
}
3673+
3674+
threshold := []evictionapi.Threshold{}
3675+
observations := signalObservations{}
3676+
for _, tc := range testcase {
3677+
pod := newPodWithInitContainers("pod", 1, tc.initContainers, tc.containers, nil)
3678+
if len(tc.ephemeralContainers) > 0 {
3679+
pod.Spec.EphemeralContainers = tc.ephemeralContainers
3680+
}
3681+
// Create PodMemoryStats with dummy container memory
3682+
// and override container stats with the provided values.
3683+
dummyContainerMemory := resource.MustParse("1Mi")
3684+
podStats := newPodMemoryStats(pod, dummyContainerMemory)
3685+
podStats.Containers = make([]statsapi.ContainerStats, len(tc.containerMemoryStats))
3686+
for _, stat := range tc.containerMemoryStats {
3687+
memoryStat := resource.MustParse(stat.usage)
3688+
memoryBytes := uint64(memoryStat.Value())
3689+
podStats.Containers = append(podStats.Containers, statsapi.ContainerStats{
3690+
Name: stat.name,
3691+
Memory: &statsapi.MemoryStats{
3692+
WorkingSetBytes: &memoryBytes,
3693+
},
3694+
})
3695+
}
3696+
stats := map[*v1.Pod]statsapi.PodStats{
3697+
pod: podStats,
3698+
}
3699+
statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) {
3700+
result, found := stats[pod]
3701+
return result, found
3702+
}
3703+
3704+
t.Run(tc.name, func(t *testing.T) {
3705+
msg, annotations := evictionMessage(v1.ResourceMemory, pod, statsFn, threshold, observations)
3706+
if msg != tc.expectedContainerMessage {
3707+
t.Errorf("Unexpected memory exceeded eviction message found, got: %s, want : %s", msg, tc.expectedContainerMessage)
3708+
}
3709+
for _, key := range []string{OffendingContainersKey, OffendingContainersUsageKey} {
3710+
val, ok := annotations[key]
3711+
if !ok {
3712+
t.Errorf("Expected annotation %s not found", key)
3713+
}
3714+
if val != tc.expectedAnnotations[key] {
3715+
t.Errorf("Unexpected annotation value for %s key found, got: %s, want: %s", key, val, tc.expectedAnnotations[key])
3716+
}
3717+
}
3718+
})
3719+
}
3720+
}

0 commit comments

Comments
 (0)