Skip to content

Commit 1ccaaa7

Browse files
committed
Ignore deleted pods.
When a pod is deleted, it is given a deletion timestamp. However the pod might still run for some time during graceful shutdown. During this time it might still produce CPU utilization metrics and be in a Running phase. Currently the HPA replica calculator attempts to ignore deleted pods by skipping over them. However by not adding them to the ignoredPods set, their metrics are not removed from the average utilization calculation. This allows pods in the process of shutting down to drag down the recommmended number of replicas by producing near 0% utilization metrics. In fact the ignoredPods set is misnomer. Those pods are not fully ignored. When the replica calculator recommends to scale up, 0% utilization metrics are filled in for those pods to limit the scale up. This prevents overscaling when pods take some time to startup. In fact, there should be 4 sets considered (readyPods, unreadyPods, missingPods, ignoredPods) not just 3. This change renames ignoredPods as unreadyPods and leaves the scaleup limiting semantics. Another set (actually) ignoredPods is added to which delete pods are added instead of being skipped during grouping. Both ignoredPods and unreadyPods have their metrics removed from consideration. But only unreadyPods have 0% utilization metrics filled in upon scaleup.
1 parent 6d01c5a commit 1ccaaa7

File tree

2 files changed

+190
-122
lines changed

2 files changed

+190
-122
lines changed

pkg/controller/podautoscaler/replica_calculator.go

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,9 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
7676
return 0, 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count")
7777
}
7878

79-
readyPodCount, ignoredPods, missingPods := groupPods(podList, metrics, resource, c.cpuInitializationPeriod, c.delayOfInitialReadinessStatus)
79+
readyPodCount, unreadyPods, missingPods, ignoredPods := groupPods(podList, metrics, resource, c.cpuInitializationPeriod, c.delayOfInitialReadinessStatus)
8080
removeMetricsForPods(metrics, ignoredPods)
81+
removeMetricsForPods(metrics, unreadyPods)
8182
requests, err := calculatePodRequests(podList, resource)
8283
if err != nil {
8384
return 0, 0, 0, time.Time{}, err
@@ -92,7 +93,7 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
9293
return 0, 0, 0, time.Time{}, err
9394
}
9495

95-
rebalanceIgnored := len(ignoredPods) > 0 && usageRatio > 1.0
96+
rebalanceIgnored := len(unreadyPods) > 0 && usageRatio > 1.0
9697
if !rebalanceIgnored && len(missingPods) == 0 {
9798
if math.Abs(1.0-usageRatio) <= c.tolerance {
9899
// return the current replicas if the change would be too small
@@ -119,7 +120,7 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
119120

120121
if rebalanceIgnored {
121122
// on a scale-up, treat unready pods as using 0% of the resource request
122-
for podName := range ignoredPods {
123+
for podName := range unreadyPods {
123124
metrics[podName] = metricsclient.PodMetric{Value: 0}
124125
}
125126
}
@@ -184,16 +185,17 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
184185
return 0, 0, fmt.Errorf("no pods returned by selector while calculating replica count")
185186
}
186187

187-
readyPodCount, ignoredPods, missingPods := groupPods(podList, metrics, resource, c.cpuInitializationPeriod, c.delayOfInitialReadinessStatus)
188+
readyPodCount, unreadyPods, missingPods, ignoredPods := groupPods(podList, metrics, resource, c.cpuInitializationPeriod, c.delayOfInitialReadinessStatus)
188189
removeMetricsForPods(metrics, ignoredPods)
190+
removeMetricsForPods(metrics, unreadyPods)
189191

190192
if len(metrics) == 0 {
191193
return 0, 0, fmt.Errorf("did not receive metrics for any ready pods")
192194
}
193195

194196
usageRatio, utilization := metricsclient.GetMetricUtilizationRatio(metrics, targetUtilization)
195197

196-
rebalanceIgnored := len(ignoredPods) > 0 && usageRatio > 1.0
198+
rebalanceIgnored := len(unreadyPods) > 0 && usageRatio > 1.0
197199

198200
if !rebalanceIgnored && len(missingPods) == 0 {
199201
if math.Abs(1.0-usageRatio) <= c.tolerance {
@@ -221,7 +223,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
221223

222224
if rebalanceIgnored {
223225
// on a scale-up, treat unready pods as using 0% of the resource request
224-
for podName := range ignoredPods {
226+
for podName := range unreadyPods {
225227
metrics[podName] = metricsclient.PodMetric{Value: 0}
226228
}
227229
}
@@ -366,16 +368,18 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(statusReplicas int32
366368
return replicaCount, utilization, timestamp, nil
367369
}
368370

369-
func groupPods(pods []*v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) (readyPodCount int, ignoredPods sets.String, missingPods sets.String) {
371+
func groupPods(pods []*v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) (readyPodCount int, unreadyPods, missingPods, ignoredPods sets.String) {
370372
missingPods = sets.NewString()
373+
unreadyPods = sets.NewString()
371374
ignoredPods = sets.NewString()
372375
for _, pod := range pods {
373376
if pod.DeletionTimestamp != nil || pod.Status.Phase == v1.PodFailed {
377+
ignoredPods.Insert(pod.Name)
374378
continue
375379
}
376380
// Pending pods are ignored.
377381
if pod.Status.Phase == v1.PodPending {
378-
ignoredPods.Insert(pod.Name)
382+
unreadyPods.Insert(pod.Name)
379383
continue
380384
}
381385
// Pods missing metrics.
@@ -386,22 +390,22 @@ func groupPods(pods []*v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1
386390
}
387391
// Unready pods are ignored.
388392
if resource == v1.ResourceCPU {
389-
var ignorePod bool
393+
var unready bool
390394
_, condition := podutil.GetPodCondition(&pod.Status, v1.PodReady)
391395
if condition == nil || pod.Status.StartTime == nil {
392-
ignorePod = true
396+
unready = true
393397
} else {
394398
// Pod still within possible initialisation period.
395399
if pod.Status.StartTime.Add(cpuInitializationPeriod).After(time.Now()) {
396400
// Ignore sample if pod is unready or one window of metric wasn't collected since last state transition.
397-
ignorePod = condition.Status == v1.ConditionFalse || metric.Timestamp.Before(condition.LastTransitionTime.Time.Add(metric.Window))
401+
unready = condition.Status == v1.ConditionFalse || metric.Timestamp.Before(condition.LastTransitionTime.Time.Add(metric.Window))
398402
} else {
399403
// Ignore metric if pod is unready and it has never been ready.
400-
ignorePod = condition.Status == v1.ConditionFalse && pod.Status.StartTime.Add(delayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time)
404+
unready = condition.Status == v1.ConditionFalse && pod.Status.StartTime.Add(delayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time)
401405
}
402406
}
403-
if ignorePod {
404-
ignoredPods.Insert(pod.Name)
407+
if unready {
408+
unreadyPods.Insert(pod.Name)
405409
continue
406410
}
407411
}

0 commit comments

Comments
 (0)