Skip to content

Commit 77749c2

Browse files
authored
Merge pull request kubernetes#127193 from DP19/ignore-unready-pods-hpa-containermetrics
allow ContainerResource calculations to continue with missing metrics like Resource calculations
2 parents e831239 + f97abdb commit 77749c2

File tree

4 files changed

+41
-22
lines changed

4 files changed

+41
-22
lines changed

pkg/controller/podautoscaler/metrics/client.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,15 @@ func (c *resourceMetricsClient) GetResourceMetric(ctx context.Context, resource
7575
}
7676
var res PodMetricsInfo
7777
if container != "" {
78-
res, err = getContainerMetrics(metrics.Items, resource, container)
79-
if err != nil {
80-
return nil, time.Time{}, fmt.Errorf("failed to get container metrics: %v", err)
81-
}
78+
res = getContainerMetrics(ctx, metrics.Items, resource, container)
8279
} else {
8380
res = getPodMetrics(ctx, metrics.Items, resource)
8481
}
8582
timestamp := metrics.Items[0].Timestamp.Time
8683
return res, timestamp, nil
8784
}
8885

89-
func getContainerMetrics(rawMetrics []metricsapi.PodMetrics, resource v1.ResourceName, container string) (PodMetricsInfo, error) {
86+
func getContainerMetrics(ctx context.Context, rawMetrics []metricsapi.PodMetrics, resource v1.ResourceName, container string) PodMetricsInfo {
9087
res := make(PodMetricsInfo, len(rawMetrics))
9188
for _, m := range rawMetrics {
9289
containerFound := false
@@ -104,10 +101,10 @@ func getContainerMetrics(rawMetrics []metricsapi.PodMetrics, resource v1.Resourc
104101
}
105102
}
106103
if !containerFound {
107-
return nil, fmt.Errorf("container %s not present in metrics for pod %s/%s", container, m.Namespace, m.Name)
104+
klog.FromContext(ctx).V(2).Info("Missing container metric", "container", container, "pod", klog.KRef(m.Namespace, m.Name))
108105
}
109106
}
110-
return res, nil
107+
return res
111108
}
112109

113110
func getPodMetrics(ctx context.Context, rawMetrics []metricsapi.PodMetrics, resource v1.ResourceName) PodMetricsInfo {

pkg/controller/podautoscaler/metrics/client_test.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,22 @@ func TestRESTClientContainerCPUEmptyMetricsForOnePod(t *testing.T) {
429429
container: "test-1",
430430
targetTimestamp: targetTimestamp,
431431
window: window,
432-
desiredError: fmt.Errorf("failed to get container metrics"),
432+
reportedPodMetrics: []map[string]int64{{"test-1": 100}, {"test-1": 300, "test-2": 400}, {}},
433+
}
434+
tc.runTest(t)
435+
}
436+
437+
func TestRESTClientContainerCPUEmptyMetricsForOnePodReturnsOnlyFoundContainer(t *testing.T) {
438+
targetTimestamp := 1
439+
window := 30 * time.Second
440+
tc := restClientTestCase{
441+
resourceName: v1.ResourceCPU,
442+
desiredMetricValues: PodMetricsInfo{
443+
"test-pod-1": {Value: 400, Timestamp: offsetTimestampBy(targetTimestamp), Window: window},
444+
},
445+
container: "test-2",
446+
targetTimestamp: targetTimestamp,
447+
window: window,
433448
reportedPodMetrics: []map[string]int64{{"test-1": 100}, {"test-1": 300, "test-2": 400}, {}},
434449
}
435450
tc.runTest(t)

pkg/controller/podautoscaler/metrics/interfaces.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ type MetricsClient interface {
4141
// GetResourceMetric gets the given resource metric (and an associated oldest timestamp)
4242
// for the specified named container in all pods matching the specified selector in the given namespace and when
4343
// the container is an empty string it returns the sum of all the container metrics.
44+
// Missing metrics will not error and callers should rely on Pod status to filter metrics info returned from this interface.
4445
GetResourceMetric(ctx context.Context, resource v1.ResourceName, namespace string, selector labels.Selector, container string) (PodMetricsInfo, time.Time, error)
4546

4647
// GetRawMetric gets the given metric (and an associated oldest timestamp)

pkg/controller/podautoscaler/replica_calculator_test.go

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -446,20 +446,6 @@ func TestReplicaCalcDisjointResourcesMetrics(t *testing.T) {
446446
tc.runTest(t)
447447
}
448448

449-
func TestReplicaCalcMissingContainerMetricError(t *testing.T) {
450-
tc := replicaCalcTestCase{
451-
currentReplicas: 1,
452-
expectedError: fmt.Errorf("container container2 not present in metrics for pod test-namespace/test-pod-0"),
453-
resource: &resourceInfo{
454-
name: v1.ResourceCPU,
455-
requests: []resource.Quantity{resource.MustParse("1.0")},
456-
levels: [][]int64{{0}},
457-
},
458-
container: "container2",
459-
}
460-
tc.runTest(t)
461-
}
462-
463449
func TestReplicaCalcScaleUp(t *testing.T) {
464450
tc := replicaCalcTestCase{
465451
currentReplicas: 3,
@@ -606,6 +592,26 @@ func TestReplicaCalcScaleUpIgnoresFailedPods(t *testing.T) {
606592
tc.runTest(t)
607593
}
608594

595+
func TestReplicaCalcMissingContainerMetricScaleUpIgnoresFailedPods(t *testing.T) {
596+
tc := replicaCalcTestCase{
597+
currentReplicas: 2,
598+
expectedReplicas: 4,
599+
podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
600+
podPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed},
601+
resource: &resourceInfo{
602+
name: v1.ResourceCPU,
603+
requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
604+
levels: [][]int64{{1000, 500}, {9000, 700}, {1000}, {9000}},
605+
606+
targetUtilization: 30,
607+
expectedUtilization: 60,
608+
expectedValue: 600,
609+
},
610+
container: "container2",
611+
}
612+
tc.runTest(t)
613+
}
614+
609615
func TestReplicaCalcScaleUpContainerIgnoresFailedPods(t *testing.T) {
610616
tc := replicaCalcTestCase{
611617
currentReplicas: 2,

0 commit comments

Comments
 (0)