Skip to content

Commit 64d7c6f

Browse files
committed
Add bunch of metrics to monitor VPA Updater impact in a cluster.
1 parent 7b7d704 commit 64d7c6f

File tree

4 files changed

+140
-10
lines changed

4 files changed

+140
-10
lines changed

vertical-pod-autoscaler/pkg/updater/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,3 @@ before pod with 20% memory increase and no change in cpu).
2929

3030
# Missing parts
3131
* Recommendation API for fetching data from Vertical Pod Autoscaler Recommender.
32-
* Monitoring.

vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
apiv1 "k8s.io/api/core/v1"
2525
policyv1 "k8s.io/api/policy/v1beta1"
2626
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27-
metrics_updater "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/updater"
2827
appsinformer "k8s.io/client-go/informers/apps/v1"
2928
coreinformer "k8s.io/client-go/informers/core/v1"
3029
kube_client "k8s.io/client-go/kubernetes"
@@ -141,7 +140,6 @@ func (e *podsEvictionRestrictionImpl) Evict(podToEvict *apiv1.Pod, eventRecorder
141140
}
142141
eventRecorder.Event(podToEvict, apiv1.EventTypeNormal, "EvictedByVPA",
143142
"Pod was evicted by VPA Updater to apply resource recommendation.")
144-
metrics_updater.AddEvictedPod()
145143

146144
if podToEvict.Status.Phase != apiv1.PodPending {
147145
singleGroupStats, present := e.creatorToSingleGroupStatsMap[cr]

vertical-pod-autoscaler/pkg/updater/logic/updater.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,11 +177,29 @@ func (u *updater) RunOnce(ctx context.Context) {
177177
}
178178
timer.ObserveStep("AdmissionInit")
179179

180+
// wrappers for metrics which are computed every loop run
181+
controlledPodsCounter := metrics_updater.NewControlledPodsCounter()
182+
evictablePodsCounter := metrics_updater.NewEvictablePodsCounter()
183+
vpasWithEvictablePodsCounter := metrics_updater.NewVpasWithEvictablePodsCounter()
184+
vpasWithEvictedPodsCounter := metrics_updater.NewVpasWithEvictedPodsCounter()
185+
186+
// using defer to protect against 'return' after evictionRateLimiter.Wait
187+
defer controlledPodsCounter.Observe()
188+
defer evictablePodsCounter.Observe()
189+
defer vpasWithEvictablePodsCounter.Observe()
190+
defer vpasWithEvictedPodsCounter.Observe()
191+
180192
for vpa, livePods := range controlledPods {
193+
vpaSize := len(livePods)
194+
controlledPodsCounter.Add(vpaSize, vpaSize)
181195
evictionLimiter := u.evictionFactory.NewPodsEvictionRestriction(livePods)
182196
podsForUpdate := u.getPodsUpdateOrder(filterNonEvictablePods(livePods, evictionLimiter), vpa)
197+
evictablePodsCounter.Add(vpaSize, len(podsForUpdate))
183198

199+
withEvictable := false
200+
withEvicted := false
184201
for _, pod := range podsForUpdate {
202+
withEvictable = true
185203
if !evictionLimiter.CanEvict(pod) {
186204
continue
187205
}
@@ -194,8 +212,18 @@ func (u *updater) RunOnce(ctx context.Context) {
194212
evictErr := evictionLimiter.Evict(pod, u.eventRecorder)
195213
if evictErr != nil {
196214
klog.Warningf("evicting pod %v failed: %v", pod.Name, evictErr)
215+
} else {
216+
withEvicted = true
217+
metrics_updater.AddEvictedPod(vpaSize)
197218
}
198219
}
220+
221+
if withEvictable {
222+
vpasWithEvictablePodsCounter.Add(vpaSize, 1)
223+
}
224+
if withEvicted {
225+
vpasWithEvictedPodsCounter.Add(vpaSize, 1)
226+
}
199227
}
200228
timer.ObserveStep("EvictPods")
201229
}

vertical-pod-autoscaler/pkg/utils/metrics/updater/updater.go

Lines changed: 112 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,65 @@ limitations under the License.
1818
package updater
1919

2020
import (
21+
"strconv"
22+
2123
"github.com/prometheus/client_golang/prometheus"
2224
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics"
2325
)
2426

2527
const (
2628
metricsNamespace = metrics.TopMetricsNamespace + "updater"
29+
30+
maxVpaSizeLog = 20
31+
// maxVpaSize = 2 ^ maxVpaSizeLog
32+
maxVpaSize = 1024 * 1024
2733
)
2834

35+
// SizeBasedGauge is a wrapper for incrementally recording values indexed by log2(VPA size)
36+
type SizeBasedGauge struct {
37+
values map[int]int
38+
gauge *prometheus.GaugeVec
39+
}
40+
2941
var (
30-
evictedCount = prometheus.NewCounter(
42+
controlledCount = prometheus.NewGaugeVec(
43+
prometheus.GaugeOpts{
44+
Namespace: metricsNamespace,
45+
Name: "controlled_pods_total",
46+
Help: "Number of Pods controlled by VPA updater.",
47+
}, []string{"vpa_size_log2"},
48+
)
49+
50+
evictableCount = prometheus.NewGaugeVec(
51+
prometheus.GaugeOpts{
52+
Namespace: metricsNamespace,
53+
Name: "evictable_pods_total",
54+
Help: "Number of Pods matching evicition criteria.",
55+
}, []string{"vpa_size_log2"},
56+
)
57+
58+
evictedCount = prometheus.NewCounterVec(
3159
prometheus.CounterOpts{
3260
Namespace: metricsNamespace,
3361
Name: "evicted_pods_total",
3462
Help: "Number of Pods evicted by Updater to apply a new recommendation.",
35-
},
63+
}, []string{"vpa_size_log2"},
64+
)
65+
66+
vpasWithEvictablePodsCount = prometheus.NewGaugeVec(
67+
prometheus.GaugeOpts{
68+
Namespace: metricsNamespace,
69+
Name: "vpas_with_evictable_pods_total",
70+
Help: "Number of VPA objects with at least one Pod matching evicition criteria.",
71+
}, []string{"vpa_size_log2"},
72+
)
73+
74+
vpasWithEvictedPodsCount = prometheus.NewGaugeVec(
75+
prometheus.GaugeOpts{
76+
Namespace: metricsNamespace,
77+
Name: "vpas_with_evicted_pods_total",
78+
Help: "Number of VPA objects with at least one evicted Pod.",
79+
}, []string{"vpa_size_log2"},
3680
)
3781

3882
functionLatency = metrics.CreateExecutionTimeMetric(metricsNamespace,
@@ -41,16 +85,77 @@ var (
4185

4286
// Register initializes all metrics for VPA Updater
4387
func Register() {
44-
prometheus.MustRegister(evictedCount)
45-
prometheus.MustRegister(functionLatency)
88+
prometheus.MustRegister(controlledCount, evictableCount, evictedCount, vpasWithEvictablePodsCount, vpasWithEvictedPodsCount, functionLatency)
4689
}
4790

4891
// NewExecutionTimer provides a timer for Updater's RunOnce execution
4992
func NewExecutionTimer() *metrics.ExecutionTimer {
5093
return metrics.NewExecutionTimer(functionLatency)
5194
}
5295

53-
// AddEvictedPod increases the counter of pods evicted by VPA
54-
func AddEvictedPod() {
55-
evictedCount.Add(1)
96+
// newSizeBasedGauge provides a wrapper for counting items in a loop
97+
func newSizeBasedGauge(gauge *prometheus.GaugeVec) *SizeBasedGauge {
98+
obj := SizeBasedGauge{
99+
values: make(map[int]int),
100+
gauge: gauge,
101+
}
102+
103+
// initialize with empty data so we can clean stale gauge values in Observe
104+
for i := 0; i <= maxVpaSizeLog; i++ {
105+
obj.values[i] = 0
106+
}
107+
108+
return &obj
109+
}
110+
111+
// NewControlledPodsCounter returns a wrapper for counting Pods controlled by Updater
112+
func NewControlledPodsCounter() *SizeBasedGauge {
113+
return newSizeBasedGauge(controlledCount)
114+
}
115+
116+
// NewEvictablePodsCounter returns a wrapper for counting Pods which are matching eviction criteria
117+
func NewEvictablePodsCounter() *SizeBasedGauge {
118+
return newSizeBasedGauge(evictableCount)
119+
}
120+
121+
// NewVpasWithEvictablePodsCounter returns a wrapper for counting VPA objects with Pods matching eviction criteria
122+
func NewVpasWithEvictablePodsCounter() *SizeBasedGauge {
123+
return newSizeBasedGauge(vpasWithEvictablePodsCount)
124+
}
125+
126+
// NewVpasWithEvictedPodsCounter returns a wrapper for counting VPA objects with evicted Pods
127+
func NewVpasWithEvictedPodsCounter() *SizeBasedGauge {
128+
return newSizeBasedGauge(vpasWithEvictedPodsCount)
129+
}
130+
131+
func getVpaSizeLog2(vpaSize int) int {
132+
if vpaSize >= maxVpaSize {
133+
return maxVpaSizeLog
134+
}
135+
136+
log2 := 0
137+
for vpaSize > 1 {
138+
log2++
139+
vpaSize >>= 1
140+
}
141+
return log2
142+
}
143+
144+
// AddEvictedPod increases the counter of pods evicted by Updater, by given VPA size
145+
func AddEvictedPod(vpaSize int) {
146+
log2 := getVpaSizeLog2(vpaSize)
147+
evictedCount.WithLabelValues(strconv.Itoa(log2)).Inc()
148+
}
149+
150+
// Add increases the counter for the given VPA size
151+
func (g *SizeBasedGauge) Add(vpaSize int, value int) {
152+
log2 := getVpaSizeLog2(vpaSize)
153+
g.values[log2] += value
154+
}
155+
156+
// Observe stores the recorded values into metrics object associated with the wrapper
157+
func (g *SizeBasedGauge) Observe() {
158+
for log2, value := range g.values {
159+
g.gauge.WithLabelValues(strconv.Itoa(log2)).Set(float64(value))
160+
}
56161
}

0 commit comments

Comments
 (0)