Skip to content

Commit ed49186

Browse files
committed
Add a metric for observing recommendation stability & changes.
1 parent a00bf59 commit ed49186

File tree

5 files changed

+66
-16
lines changed

5 files changed

+66
-16
lines changed

vertical-pod-autoscaler/pkg/recommender/model/cluster.go

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@ type ClusterState struct {
4646
// time we've noticed the recommendation missing or last time we logged
4747
// a warning about it.
4848
EmptyVPAs map[VpaID]time.Time
49-
// VpaPodCount contains number of live Pods matching a given VPA object.
50-
VpaPodCount map[VpaID]int
5149
// Observed VPAs. Used to check if there are updates needed.
5250
ObservedVpas []*vpa_types.VerticalPodAutoscaler
5351

@@ -99,7 +97,6 @@ func NewClusterState() *ClusterState {
9997
Pods: make(map[PodID]*PodState),
10098
Vpas: make(map[VpaID]*Vpa),
10199
EmptyVPAs: make(map[VpaID]time.Time),
102-
VpaPodCount: make(map[VpaID]int),
103100
aggregateStateMap: make(aggregateContainerStatesMap),
104101
labelSetMap: make(labelSetMap),
105102
}
@@ -147,7 +144,7 @@ func (cluster *ClusterState) AddOrUpdatePod(podID PodID, newLabels labels.Set, p
147144
func (cluster *ClusterState) addPodToItsVpa(pod *PodState) {
148145
for _, vpa := range cluster.Vpas {
149146
if vpa_utils.PodLabelsMatchVPA(pod.ID.Namespace, cluster.labelSetMap[pod.labelSetKey], vpa.ID.Namespace, vpa.PodSelector) {
150-
cluster.VpaPodCount[vpa.ID]++
147+
vpa.PodCount++
151148
}
152149
}
153150
}
@@ -156,7 +153,7 @@ func (cluster *ClusterState) addPodToItsVpa(pod *PodState) {
156153
func (cluster *ClusterState) removePodFromItsVpa(pod *PodState) {
157154
for _, vpa := range cluster.Vpas {
158155
if vpa_utils.PodLabelsMatchVPA(pod.ID.Namespace, cluster.labelSetMap[pod.labelSetKey], vpa.ID.Namespace, vpa.PodSelector) {
159-
cluster.VpaPodCount[vpa.ID]--
156+
vpa.PodCount--
160157
}
161158
}
162159
}
@@ -268,7 +265,7 @@ func (cluster *ClusterState) AddOrUpdateVpa(apiObject *vpa_types.VerticalPodAuto
268265
for aggregationKey, aggregation := range cluster.aggregateStateMap {
269266
vpa.UseAggregationIfMatching(aggregationKey, aggregation)
270267
}
271-
cluster.VpaPodCount[vpaID] = len(cluster.GetMatchingPods(vpa))
268+
vpa.PodCount = len(cluster.GetMatchingPods(vpa))
272269
}
273270
vpa.TargetRef = apiObject.Spec.TargetRef
274271
vpa.Annotations = annotationsMap
@@ -290,7 +287,6 @@ func (cluster *ClusterState) DeleteVpa(vpaID VpaID) error {
290287
}
291288
delete(cluster.Vpas, vpaID)
292289
delete(cluster.EmptyVPAs, vpaID)
293-
delete(cluster.VpaPodCount, vpaID)
294290
return nil
295291
}
296292

vertical-pod-autoscaler/pkg/recommender/model/cluster_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,7 @@ func TestVPAWithMatchingPods(t *testing.T) {
768768
containerID := ContainerID{testPodID, "foo"}
769769
assert.NoError(t, cluster.AddOrUpdateContainer(containerID, testRequest))
770770
}
771-
assert.Equal(t, tc.expectedMatch, cluster.VpaPodCount[vpa.ID])
771+
assert.Equal(t, tc.expectedMatch, cluster.Vpas[vpa.ID].PodCount)
772772
})
773773
}
774774
// Run with adding Pods first
@@ -781,7 +781,7 @@ func TestVPAWithMatchingPods(t *testing.T) {
781781
assert.NoError(t, cluster.AddOrUpdateContainer(containerID, testRequest))
782782
}
783783
vpa := addVpa(cluster, testVpaID, testAnnotations, tc.vpaSelector)
784-
assert.Equal(t, tc.expectedMatch, cluster.VpaPodCount[vpa.ID])
784+
assert.Equal(t, tc.expectedMatch, cluster.Vpas[vpa.ID].PodCount)
785785
})
786786
}
787787
}

vertical-pod-autoscaler/pkg/recommender/model/vpa.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2626
"k8s.io/apimachinery/pkg/labels"
2727
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
28+
metrics_quality "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/quality"
2829
vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa"
2930
)
3031

@@ -107,6 +108,8 @@ type Vpa struct {
107108
IsV1Beta1API bool
108109
// TargetRef points to the controller managing the set of pods.
109110
TargetRef *autoscaling.CrossVersionObjectReference
111+
// PodCount contains number of live Pods matching a given VPA object.
112+
PodCount int
110113
}
111114

112115
// NewVpa returns a new Vpa with a given ID and pod selector. Doesn't set the
@@ -121,6 +124,7 @@ func NewVpa(id VpaID, selector labels.Selector, created time.Time) *Vpa {
121124
Annotations: make(vpaAnnotationsMap),
122125
Conditions: make(vpaConditionsMap),
123126
IsV1Beta1API: false,
127+
PodCount: 0,
124128
}
125129
return vpa
126130
}
@@ -143,14 +147,15 @@ func (vpa *Vpa) UseAggregationIfMatching(aggregationKey AggregateStateKey, aggre
143147
// UpdateRecommendation updates the recommended resources in the VPA and its
144148
// aggregations with the given recommendation.
145149
func (vpa *Vpa) UpdateRecommendation(recommendation *vpa_types.RecommendedPodResources) {
146-
vpa.Recommendation = recommendation
147150
for _, containerRecommendation := range recommendation.ContainerRecommendations {
148151
for container, state := range vpa.aggregateContainerStates {
149152
if container.ContainerName() == containerRecommendation.ContainerName {
153+
metrics_quality.ObserveRecommendationChange(state.LastRecommendation, containerRecommendation.UncappedTarget, vpa.UpdateMode, vpa.PodCount)
150154
state.LastRecommendation = containerRecommendation.UncappedTarget
151155
}
152156
}
153157
}
158+
vpa.Recommendation = recommendation
154159
}
155160

156161
// UsesAggregation returns true iff an aggregation with the given key contributes to the VPA.

vertical-pod-autoscaler/pkg/recommender/routines/recommender.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,18 +103,17 @@ func (r *recommender) UpdateVPAs() {
103103
if vpa.HasRecommendation() && !had {
104104
metrics_recommender.ObserveRecommendationLatency(vpa.Created)
105105
}
106-
hasMatchingPods := r.clusterState.VpaPodCount[vpa.ID] > 0
106+
hasMatchingPods := vpa.PodCount > 0
107107
vpa.UpdateConditions(hasMatchingPods)
108108
if err := r.clusterState.RecordRecommendation(vpa, time.Now()); err != nil {
109109
klog.Warningf("%v", err)
110110
klog.V(4).Infof("VPA dump")
111111
klog.V(4).Infof("%+v", vpa)
112112
klog.V(4).Infof("HasMatchingPods: %v", hasMatchingPods)
113-
podCount := r.clusterState.VpaPodCount[vpa.ID]
114-
klog.V(4).Infof("VpaPodCount: %v", podCount)
113+
klog.V(4).Infof("PodCount: %v", vpa.PodCount)
115114
pods := r.clusterState.GetMatchingPods(vpa)
116115
klog.V(4).Infof("MatchingPods: %+v", pods)
117-
if len(pods) != podCount {
116+
if len(pods) != vpa.PodCount {
118117
klog.Errorf("ClusterState pod count and matching pods disagree for vpa %v/%v", vpa.ID.Namespace, vpa.ID.VpaName)
119118
}
120119
}

vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"strconv"
2323

2424
corev1 "k8s.io/api/core/v1"
25+
"k8s.io/apimachinery/pkg/api/resource"
2526
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
2627
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics"
2728
"k8s.io/klog"
@@ -38,6 +39,9 @@ var (
3839
cpuBuckets = prometheus.ExponentialBuckets(0.01, 2., 17)
3940
// Buckets between 1MB and 65.5 GB
4041
memoryBuckets = prometheus.ExponentialBuckets(1e6, 2., 17)
42+
// Buckets for relative comparisons, from -100% to x100
43+
relativeBuckets = []float64{-1., -.75, -.5, -.25, -.1, -.05, -0.025, -.01, -.005, -0.0025, -.001,
44+
0., .001, .0025, .005, .01, .025, .05, .1, .25, .5, .75, 1., 2.5, 5., 10., 25., 50., 100.}
4145
)
4246

4347
var (
@@ -46,8 +50,7 @@ var (
4650
Namespace: metricsNamespace,
4751
Name: "usage_recommendation_relative_diffs",
4852
Help: "Diffs between recommendation and usage, normalized by recommendation value",
49-
Buckets: []float64{-1., -.75, -.5, -.25, -.1, -.05, -0.025, -.01, -.005, -0.0025, -.001, 0.,
50-
.001, .0025, .005, .01, .025, .05, .1, .25, .5, .75, 1., 2.5, 5., 10., 25., 50., 100.},
53+
Buckets: relativeBuckets,
5154
}, []string{"update_mode", "resource", "is_oom"},
5255
)
5356
usageMissingRecommendationCounter = prometheus.NewCounterVec(
@@ -105,6 +108,14 @@ var (
105108
Buckets: memoryBuckets,
106109
}, []string{"update_mode", "is_oom"},
107110
)
111+
relativeRecommendationChange = prometheus.NewHistogramVec(
112+
prometheus.HistogramOpts{
113+
Namespace: metricsNamespace,
114+
Name: "relative_recommendation_changes",
115+
Help: "Changes between consecutive recommendation values, normalized by old value",
116+
Buckets: relativeBuckets,
117+
}, []string{"update_mode", "resource", "vpa_size_log2"},
118+
)
108119
)
109120

110121
// Register initializes all VPA quality metrics
@@ -117,6 +128,7 @@ func Register() {
117128
prometheus.MustRegister(memoryRecommendationLowerOrEqualUsageDiff)
118129
prometheus.MustRegister(cpuRecommendations)
119130
prometheus.MustRegister(memoryRecommendations)
131+
prometheus.MustRegister(relativeRecommendationChange)
120132
}
121133

122134
// observeUsageRecommendationRelativeDiff records relative diff between usage and
@@ -184,6 +196,44 @@ func ObserveQualityMetricsRecommendationMissing(usage float64, isOOM bool, resou
184196
observeUsageRecommendationDiff(usage, 0, true, isOOM, resource, updateMode)
185197
}
186198

199+
// ObserveRecommendationChange records relative_recommendation_changes metric.
200+
func ObserveRecommendationChange(previous, current corev1.ResourceList, updateMode *vpa_types.UpdateMode, vpaSize int) {
201+
// This will happen if there is no previous recommendation, we don't want to emit anything then.
202+
if previous == nil {
203+
return
204+
}
205+
// This is not really expected thus a warning.
206+
if current == nil {
207+
klog.Warningf("Cannot compare with current recommendation being nil. VPA mode: %v, size: %v", updateMode, vpaSize)
208+
return
209+
}
210+
211+
for resource, amount := range current {
212+
newValue := quantityAsFloat(resource, amount)
213+
oldValue := quantityAsFloat(resource, previous[resource])
214+
215+
if oldValue > 0.0 {
216+
diff := newValue/oldValue - 1.0 // -1.0 to report decreases as negative values and keep 0.0 neutral
217+
relativeRecommendationChange.WithLabelValues(updateModeToString(updateMode), string(resource), string(vpaSize)).Observe(diff)
218+
} else {
219+
klog.Warningf("Cannot compare as old recommendation for %v is 0. VPA mode: %v, size: %v", resource, updateMode, vpaSize)
220+
}
221+
}
222+
}
223+
224+
// quantityAsFloat converts resource.Quantity to a float64 value, in some scale (constant per resource but unspecified)
225+
func quantityAsFloat(resource corev1.ResourceName, quantity resource.Quantity) float64 {
226+
switch resource {
227+
case corev1.ResourceCPU:
228+
return float64(quantity.MilliValue())
229+
case corev1.ResourceMemory:
230+
return float64(quantity.Value())
231+
default:
232+
klog.Warningf("Unknown resource: %v", resource)
233+
return 0.0
234+
}
235+
}
236+
187237
func updateModeToString(updateMode *vpa_types.UpdateMode) string {
188238
if updateMode == nil {
189239
return ""

0 commit comments

Comments
 (0)