Skip to content

Commit 24ca89e

Browse files
committed
Add metrics for controller
1 parent 9a097d8 commit 24ca89e

File tree

2 files changed

+27
-0
lines changed

2 files changed

+27
-0
lines changed

components/node-labeler/cmd/metrics.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,21 @@ var (
2626
Help: "time it took for a pods to reach the running phase and the ready label was applied to the node",
2727
Buckets: []float64{5, 10, 15, 20, 25, 30, 45, 60, 75},
2828
}, []string{"component"})
29+
30+
// Track reconciliation durations for the NodeScaledownAnnotationController
31+
NodeScaledownAnnotationReconcileDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
32+
Namespace: metricsNamespace,
33+
Subsystem: metricsWorkspaceSubsystem,
34+
Name: "node_scaledown_annotation_reconcile_duration_seconds",
35+
Help: "Duration of NodeScaledownAnnotationController reconciliations",
36+
Buckets: []float64{0.1, 0.5, 1, 2.5, 5, 10, 30},
37+
}, []string{"operation"})
38+
39+
// Track queue size for the NodeScaledownAnnotationController
40+
NodeScaledownAnnotationReconciliationQueueSize = prometheus.NewGauge(prometheus.GaugeOpts{
41+
Namespace: metricsNamespace,
42+
Subsystem: metricsWorkspaceSubsystem,
43+
Name: "node_scaledown_annotation_reconciliation_queue_size",
44+
Help: "Current size of the NodeScaledownAnnotationController reconciliation queue",
45+
})
2946
)

components/node-labeler/cmd/run.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616

1717
"github.com/bombsimon/logrusr/v2"
1818
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
19+
"github.com/prometheus/client_golang/prometheus"
1920
"github.com/spf13/cobra"
2021
corev1 "k8s.io/api/core/v1"
2122
"k8s.io/apimachinery/pkg/api/errors"
@@ -143,6 +144,8 @@ var runCmd = &cobra.Command{
143144

144145
metrics.Registry.MustRegister(NodeLabelerCounterVec)
145146
metrics.Registry.MustRegister(NodeLabelerTimeHistVec)
147+
metrics.Registry.MustRegister(NodeScaledownAnnotationReconcileDuration)
148+
metrics.Registry.MustRegister(NodeScaledownAnnotationReconciliationQueueSize)
146149

147150
err = mgr.AddHealthzCheck("healthz", healthz.Ping)
148151
if err != nil {
@@ -356,6 +359,7 @@ func (c *NodeScaledownAnnotationController) workspaceFilter() predicate.Predicat
356359
default:
357360
log.WithField("node", ws.Status.Runtime.NodeName).Warn("reconciliation queue full")
358361
}
362+
NodeScaledownAnnotationReconciliationQueueSize.Set(float64(len(c.nodesToReconcile)))
359363
return true
360364
}
361365
return false
@@ -402,6 +406,9 @@ func (wc *NodeScaledownAnnotationController) Stop() {
402406

403407
// reconcileAllNodes lists all nodes and reconciles each one
404408
func (wc *NodeScaledownAnnotationController) reconcileAllNodes(ctx context.Context) (ctrl.Result, error) {
409+
timer := prometheus.NewTimer(NodeScaledownAnnotationReconcileDuration.WithLabelValues("all_nodes"))
410+
defer timer.ObserveDuration()
411+
405412
var nodes corev1.NodeList
406413
if err := wc.List(ctx, &nodes); err != nil {
407414
log.WithError(err).Error("failed to list nodes")
@@ -420,6 +427,9 @@ func (wc *NodeScaledownAnnotationController) reconcileAllNodes(ctx context.Conte
420427

421428
// reconcileNode counts the workspaces running on a node and updates the autoscaler annotation accordingly
422429
func (c *NodeScaledownAnnotationController) reconcileNode(ctx context.Context, nodeName string) error {
430+
timer := prometheus.NewTimer(NodeScaledownAnnotationReconcileDuration.WithLabelValues("node"))
431+
defer timer.ObserveDuration()
432+
423433
var workspaceList workspacev1.WorkspaceList
424434
if err := c.List(ctx, &workspaceList, client.MatchingFields{
425435
"status.runtime.nodeName": nodeName,

0 commit comments

Comments
 (0)