Skip to content

Commit 9b67ab3

Browse files
committed
fix: use container status resources over desired spec resources for cpu/memory resource metrics during in-place pod vertical scaling
1 parent 45be912 commit 9b67ab3

File tree

3 files changed

+121
-4
lines changed

3 files changed

+121
-4
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
77

88
## Unreleased
99

10+
### bugfix
11+
- Use actual applied container resources (`Pod.Status.ContainerStatuses[i].Resources`) over desired spec resources (`Pod.Spec.Containers[i].Resources`) where present to correctly report `cpuRequestedCores`, `memoryRequestedBytes`, and related metrics during in-place pod vertical scaling (K8s 1.33+ beta, 1.35+ GA) @kondracek-nr
12+
1013
### enhancement
1114
- Support OpenShift 4.20 @jamescripter [#1401](https://github.com/newrelic/nri-kubernetes/pull/1401)
1215

src/kubelet/metric/pods.go

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,24 @@ func (podsFetcher *PodsFetcher) fetchContainersData(pod *v1.Pod) map[string]defi
190190
statuses := make(map[string]definition.RawMetrics)
191191
fillContainerStatuses(pod, statuses)
192192

193+
// Build lookup map for actual applied resources per container name.
194+
// As of K8s 1.33 (beta, on by default) / 1.35 (GA), in-place pod vertical scaling means
195+
// Pod.Spec.Containers[i].Resources is the desired state, not the actual state.
196+
// Pod.Status.ContainerStatuses[i].Resources holds the actual applied resources.
197+
containerStatusByName := make(map[string]*v1.ContainerStatus)
198+
for i := range pod.Status.ContainerStatuses {
199+
cs := &pod.Status.ContainerStatuses[i]
200+
containerStatusByName[cs.Name] = cs
201+
}
202+
for idx, initContainer := range pod.Spec.InitContainers {
203+
if initContainer.RestartPolicy != nil && *initContainer.RestartPolicy == v1.ContainerRestartPolicyAlways {
204+
if idx < len(pod.Status.InitContainerStatuses) {
205+
cs := &pod.Status.InitContainerStatuses[idx]
206+
containerStatusByName[cs.Name] = cs
207+
}
208+
}
209+
}
210+
193211
metrics := make(map[string]definition.RawMetrics)
194212
containers := pod.Spec.Containers
195213

@@ -214,19 +232,25 @@ func (podsFetcher *PodsFetcher) fetchContainersData(pod *v1.Pod) map[string]defi
214232
metrics[id]["nodeIP"] = v
215233
}
216234

217-
if v, ok := c.Resources.Requests[v1.ResourceCPU]; ok {
235+
// Prefer status resources (actual applied) over spec resources (desired state).
236+
resources := c.Resources
237+
if cs, ok := containerStatusByName[c.Name]; ok && cs.Resources != nil {
238+
resources = *cs.Resources
239+
}
240+
241+
if v, ok := resources.Requests[v1.ResourceCPU]; ok {
218242
metrics[id]["cpuRequestedCores"] = v.MilliValue()
219243
}
220244

221-
if v, ok := c.Resources.Limits[v1.ResourceCPU]; ok {
245+
if v, ok := resources.Limits[v1.ResourceCPU]; ok {
222246
metrics[id]["cpuLimitCores"] = v.MilliValue()
223247
}
224248

225-
if v, ok := c.Resources.Requests[v1.ResourceMemory]; ok {
249+
if v, ok := resources.Requests[v1.ResourceMemory]; ok {
226250
metrics[id]["memoryRequestedBytes"] = v.Value()
227251
}
228252

229-
if v, ok := c.Resources.Limits[v1.ResourceMemory]; ok {
253+
if v, ok := resources.Limits[v1.ResourceMemory]; ok {
230254
metrics[id]["memoryLimitBytes"] = v.Value()
231255
}
232256

src/kubelet/metric/pods_test.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/google/go-cmp/cmp"
1515
"github.com/stretchr/testify/assert"
1616
corev1 "k8s.io/api/core/v1"
17+
"k8s.io/apimachinery/pkg/api/resource"
1718
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1819

1920
"github.com/newrelic/nri-kubernetes/v3/internal/config"
@@ -463,3 +464,92 @@ func TestFetchPodData_WithPriorityClassNameOnly(t *testing.T) {
463464
assert.False(t, hasPriority, "priority should not be present when nil")
464465
assert.Equal(t, "system-cluster-critical", result["priorityClassName"])
465466
}
467+
468+
// TestFetchContainersData_InPlaceVerticalScaling verifies that when
469+
// ContainerStatus.Resources is populated (K8s 1.33+ in-place pod vertical scaling),
470+
// the actual applied resources are used instead of Spec resources (desired state).
471+
func TestFetchContainersData_InPlaceVerticalScaling(t *testing.T) {
472+
t.Parallel()
473+
474+
specCPU := resource.MustParse("100m")
475+
specMemory := resource.MustParse("128Mi")
476+
statusCPU := resource.MustParse("200m") // resized up
477+
statusMemory := resource.MustParse("256Mi")
478+
479+
pod := &corev1.Pod{
480+
ObjectMeta: metav1.ObjectMeta{
481+
Name: "test-pod",
482+
Namespace: "default",
483+
},
484+
Spec: corev1.PodSpec{
485+
NodeName: "test-node",
486+
Containers: []corev1.Container{
487+
{
488+
Name: "app",
489+
Image: "app:latest",
490+
Resources: corev1.ResourceRequirements{
491+
Requests: corev1.ResourceList{
492+
corev1.ResourceCPU: specCPU,
493+
corev1.ResourceMemory: specMemory,
494+
},
495+
Limits: corev1.ResourceList{
496+
corev1.ResourceCPU: specCPU,
497+
corev1.ResourceMemory: specMemory,
498+
},
499+
},
500+
},
501+
{
502+
Name: "sidecar",
503+
Image: "sidecar:latest",
504+
Resources: corev1.ResourceRequirements{
505+
Requests: corev1.ResourceList{
506+
corev1.ResourceCPU: specCPU,
507+
corev1.ResourceMemory: specMemory,
508+
},
509+
},
510+
},
511+
},
512+
},
513+
Status: corev1.PodStatus{
514+
HostIP: "192.168.0.1",
515+
ContainerStatuses: []corev1.ContainerStatus{
516+
{
517+
Name: "app",
518+
// Resources populated: actual applied state after resize
519+
Resources: &corev1.ResourceRequirements{
520+
Requests: corev1.ResourceList{
521+
corev1.ResourceCPU: statusCPU,
522+
corev1.ResourceMemory: statusMemory,
523+
},
524+
Limits: corev1.ResourceList{
525+
corev1.ResourceCPU: statusCPU,
526+
corev1.ResourceMemory: statusMemory,
527+
},
528+
},
529+
},
530+
{
531+
Name: "sidecar",
532+
Resources: nil, // no status resources; fall back to spec
533+
},
534+
},
535+
},
536+
}
537+
538+
podFetcher := &PodsFetcher{}
539+
result := podFetcher.fetchContainersData(pod)
540+
541+
appID := "default_test-pod_app"
542+
sidecarID := "default_test-pod_sidecar"
543+
544+
// "app" container: status resources should take precedence
545+
assert.Equal(t, statusCPU.MilliValue(), result[appID]["cpuRequestedCores"], "app: should use status CPU request")
546+
assert.Equal(t, statusCPU.MilliValue(), result[appID]["cpuLimitCores"], "app: should use status CPU limit")
547+
assert.Equal(t, statusMemory.Value(), result[appID]["memoryRequestedBytes"], "app: should use status memory request")
548+
assert.Equal(t, statusMemory.Value(), result[appID]["memoryLimitBytes"], "app: should use status memory limit")
549+
550+
// "sidecar" container: status resources nil, should fall back to spec
551+
assert.Equal(t, specCPU.MilliValue(), result[sidecarID]["cpuRequestedCores"], "sidecar: should fall back to spec CPU request")
552+
assert.Equal(t, specMemory.Value(), result[sidecarID]["memoryRequestedBytes"], "sidecar: should fall back to spec memory request")
553+
_, hasLimit := result[sidecarID]["cpuLimitCores"]
554+
assert.False(t, hasLimit, "sidecar: no CPU limit in spec, should not be set")
555+
}

0 commit comments

Comments
 (0)