Skip to content

Commit 4be18f6

Browse files
Expose controller-runtime metrics externally
1 parent 71f7db5 commit 4be18f6

File tree

4 files changed

+41
-40
lines changed

4 files changed

+41
-40
lines changed

go.mod

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,15 @@ require (
3232
sigs.k8s.io/yaml v1.4.0
3333
)
3434

35+
require github.com/awslabs/operatorpkg v0.0.0-20241108234832-dacc8b988e70
36+
3537
require (
3638
cel.dev/expr v0.19.1 // indirect
3739
github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
3840
github.com/beorn7/perks v1.0.1 // indirect
3941
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
4042
github.com/cespare/xxhash/v2 v2.3.0 // indirect
41-
github.com/davecgh/go-spew v1.1.1 // indirect
43+
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
4244
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
4345
github.com/felixge/httpsnoop v1.0.4 // indirect
4446
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
@@ -64,6 +66,7 @@ require (
6466
github.com/pkg/errors v0.9.1 // indirect
6567
github.com/prometheus/common v0.62.0 // indirect
6668
github.com/prometheus/procfs v0.15.1 // indirect
69+
github.com/samber/lo v1.47.0 // indirect
6770
github.com/spf13/cobra v1.8.1 // indirect
6871
github.com/spf13/pflag v1.0.5 // indirect
6972
github.com/stoewer/go-strcase v1.3.0 // indirect

go.sum

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ cel.dev/expr v0.19.1 h1:NciYrtDRIR0lNCnH1LFJegdjspNx9fI59O7TWcua/W4=
22
cel.dev/expr v0.19.1/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw=
33
github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
44
github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
5+
github.com/awslabs/operatorpkg v0.0.0-20241108234832-dacc8b988e70 h1:VeYzU8sJEzx/raHd1+c74PgPrI+uxsHlW55CinIgln4=
6+
github.com/awslabs/operatorpkg v0.0.0-20241108234832-dacc8b988e70/go.mod h1:nq1PLBLCojzjfqSK8SG3ymxqwW6e/cHLJvddKOSFkfw=
57
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
68
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
79
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
@@ -13,12 +15,13 @@ github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL
1315
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
1416
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
1517
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
16-
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
1718
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
19+
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
20+
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
1821
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
1922
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
20-
github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k=
21-
github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ=
23+
github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U=
24+
github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
2225
github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
2326
github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM=
2427
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
@@ -113,6 +116,8 @@ github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoG
113116
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
114117
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
115118
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
119+
github.com/samber/lo v1.47.0 h1:z7RynLwP5nbyRscyvcD043DWYoOcYRv3mV8lBeqOCLc=
120+
github.com/samber/lo v1.47.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU=
116121
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
117122
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
118123
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=

pkg/internal/controller/controller.go

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ import (
3030
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
3131
"k8s.io/apimachinery/pkg/util/uuid"
3232
"k8s.io/client-go/util/workqueue"
33+
ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
3334

3435
"sigs.k8s.io/controller-runtime/pkg/controller/priorityqueue"
35-
ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/internal/controller/metrics"
3636
logf "sigs.k8s.io/controller-runtime/pkg/log"
3737
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3838
"sigs.k8s.io/controller-runtime/pkg/source"
@@ -101,7 +101,7 @@ type Controller[request comparable] struct {
101101
func (c *Controller[request]) Reconcile(ctx context.Context, req request) (_ reconcile.Result, err error) {
102102
defer func() {
103103
if r := recover(); r != nil {
104-
ctrlmetrics.ReconcilePanics.WithLabelValues(c.Name).Inc()
104+
ctrlmetrics.ReconcilePanics.Inc(map[string]string{"controller": c.Name})
105105

106106
if c.RecoverPanic == nil || *c.RecoverPanic {
107107
for _, fn := range utilruntime.PanicHandlers {
@@ -294,8 +294,8 @@ func (c *Controller[request]) processNextWorkItem(ctx context.Context) bool {
294294
// period.
295295
defer c.Queue.Done(obj)
296296

297-
ctrlmetrics.ActiveWorkers.WithLabelValues(c.Name).Add(1)
298-
defer ctrlmetrics.ActiveWorkers.WithLabelValues(c.Name).Add(-1)
297+
ctrlmetrics.ActiveWorkers.Set(1, map[string]string{"controller": c.Name})
298+
defer ctrlmetrics.ActiveWorkers.Set(0, map[string]string{"controller": c.Name})
299299

300300
c.reconcileHandler(ctx, obj, priority)
301301
return true
@@ -309,15 +309,15 @@ const (
309309
)
310310

311311
func (c *Controller[request]) initMetrics() {
312-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelError).Add(0)
313-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelRequeueAfter).Add(0)
314-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelRequeue).Add(0)
315-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelSuccess).Add(0)
316-
ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Add(0)
317-
ctrlmetrics.TerminalReconcileErrors.WithLabelValues(c.Name).Add(0)
318-
ctrlmetrics.ReconcilePanics.WithLabelValues(c.Name).Add(0)
319-
ctrlmetrics.WorkerCount.WithLabelValues(c.Name).Set(float64(c.MaxConcurrentReconciles))
320-
ctrlmetrics.ActiveWorkers.WithLabelValues(c.Name).Set(0)
312+
ctrlmetrics.ReconcileTotal.Add(0, map[string]string{"controller": c.Name, "result": labelError})
313+
ctrlmetrics.ReconcileTotal.Add(0, map[string]string{"controller": c.Name, "result": labelRequeueAfter})
314+
ctrlmetrics.ReconcileTotal.Add(0, map[string]string{"controller": c.Name, "result": labelRequeue})
315+
ctrlmetrics.ReconcileTotal.Add(0, map[string]string{"controller": c.Name, "result": labelSuccess})
316+
ctrlmetrics.ReconcileErrors.Add(0, map[string]string{"controller": c.Name})
317+
ctrlmetrics.TerminalReconcileErrors.Add(0, map[string]string{"controller": c.Name})
318+
ctrlmetrics.ReconcilePanics.Add(0, map[string]string{"controller": c.Name})
319+
ctrlmetrics.WorkerCount.Set(float64(c.MaxConcurrentReconciles), map[string]string{"controller": c.Name})
320+
ctrlmetrics.ActiveWorkers.Set(0, map[string]string{"controller": c.Name})
321321
}
322322

323323
func (c *Controller[request]) reconcileHandler(ctx context.Context, req request, priority int) {
@@ -341,12 +341,12 @@ func (c *Controller[request]) reconcileHandler(ctx context.Context, req request,
341341
switch {
342342
case err != nil:
343343
if errors.Is(err, reconcile.TerminalError(nil)) {
344-
ctrlmetrics.TerminalReconcileErrors.WithLabelValues(c.Name).Inc()
344+
ctrlmetrics.TerminalReconcileErrors.Inc(map[string]string{"controller": c.Name})
345345
} else {
346346
c.Queue.AddWithOpts(priorityqueue.AddOpts{RateLimited: true, Priority: priority}, req)
347347
}
348-
ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Inc()
349-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelError).Inc()
348+
ctrlmetrics.ReconcileErrors.Inc(map[string]string{"controller": c.Name})
349+
ctrlmetrics.ReconcileTotal.Inc(map[string]string{"controller": c.Name, "result": labelError})
350350
if !result.IsZero() {
351351
log.Info("Warning: Reconciler returned both a non-zero result and a non-nil error. The result will always be ignored if the error is non-nil and the non-nil error causes requeuing with exponential backoff. For more details, see: https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/reconcile#Reconciler")
352352
}
@@ -359,17 +359,17 @@ func (c *Controller[request]) reconcileHandler(ctx context.Context, req request,
359359
// to result.RequestAfter
360360
c.Queue.Forget(req)
361361
c.Queue.AddWithOpts(priorityqueue.AddOpts{After: result.RequeueAfter, Priority: priority}, req)
362-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelRequeueAfter).Inc()
362+
ctrlmetrics.ReconcileTotal.Inc(map[string]string{"controller": c.Name, "result": labelRequeueAfter})
363363
case result.Requeue: //nolint: staticcheck // We have to handle it until it is removed
364364
log.V(5).Info("Reconcile done, requeueing")
365365
c.Queue.AddWithOpts(priorityqueue.AddOpts{RateLimited: true, Priority: priority}, req)
366-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelRequeue).Inc()
366+
ctrlmetrics.ReconcileTotal.Inc(map[string]string{"controller": c.Name, "result": labelRequeue})
367367
default:
368368
log.V(5).Info("Reconcile successful")
369369
// Finally, if no error occurs we Forget this item so it does not
370370
// get queued again until another change happens.
371371
c.Queue.Forget(req)
372-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelSuccess).Inc()
372+
ctrlmetrics.ReconcileTotal.Inc(map[string]string{"controller": c.Name, "result": labelSuccess})
373373
}
374374
}
375375

@@ -380,7 +380,7 @@ func (c *Controller[request]) GetLogger() logr.Logger {
380380

381381
// updateMetrics updates prometheus metrics within the controller.
382382
func (c *Controller[request]) updateMetrics(reconcileTime time.Duration) {
383-
ctrlmetrics.ReconcileTime.WithLabelValues(c.Name).Observe(reconcileTime.Seconds())
383+
ctrlmetrics.ReconcileTime.Observe(reconcileTime.Seconds(), map[string]string{"controller": c.Name})
384384
}
385385

386386
// ReconcileIDFromContext gets the reconcileID from the current context.

pkg/internal/controller/metrics/metrics.go renamed to pkg/metrics/metrics.go

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,45 +19,45 @@ package metrics
1919
import (
2020
"time"
2121

22+
opmetrics "github.com/awslabs/operatorpkg/metrics"
2223
"github.com/prometheus/client_golang/prometheus"
2324
"github.com/prometheus/client_golang/prometheus/collectors"
24-
"sigs.k8s.io/controller-runtime/pkg/metrics"
2525
)
2626

2727
var (
2828
// ReconcileTotal is a prometheus counter metrics which holds the total
2929
// number of reconciliations per controller. It has two labels. controller label refers
3030
// to the controller name and result label refers to the reconcile result i.e
3131
// success, error, requeue, requeue_after.
32-
ReconcileTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
32+
ReconcileTotal = opmetrics.NewPrometheusCounter(Registry, prometheus.CounterOpts{
3333
Name: "controller_runtime_reconcile_total",
3434
Help: "Total number of reconciliations per controller",
3535
}, []string{"controller", "result"})
3636

3737
// ReconcileErrors is a prometheus counter metrics which holds the total
3838
// number of errors from the Reconciler.
39-
ReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
39+
ReconcileErrors = opmetrics.NewPrometheusCounter(Registry, prometheus.CounterOpts{
4040
Name: "controller_runtime_reconcile_errors_total",
4141
Help: "Total number of reconciliation errors per controller",
4242
}, []string{"controller"})
4343

4444
// TerminalReconcileErrors is a prometheus counter metrics which holds the total
4545
// number of terminal errors from the Reconciler.
46-
TerminalReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
46+
TerminalReconcileErrors = opmetrics.NewPrometheusCounter(Registry, prometheus.CounterOpts{
4747
Name: "controller_runtime_terminal_reconcile_errors_total",
4848
Help: "Total number of terminal reconciliation errors per controller",
4949
}, []string{"controller"})
5050

5151
// ReconcilePanics is a prometheus counter metrics which holds the total
5252
// number of panics from the Reconciler.
53-
ReconcilePanics = prometheus.NewCounterVec(prometheus.CounterOpts{
53+
ReconcilePanics = opmetrics.NewPrometheusCounter(Registry, prometheus.CounterOpts{
5454
Name: "controller_runtime_reconcile_panics_total",
5555
Help: "Total number of reconciliation panics per controller",
5656
}, []string{"controller"})
5757

5858
// ReconcileTime is a prometheus metric which keeps track of the duration
5959
// of reconciliations.
60-
ReconcileTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
60+
ReconcileTime = opmetrics.NewPrometheusHistogram(Registry, prometheus.HistogramOpts{
6161
Name: "controller_runtime_reconcile_time_seconds",
6262
Help: "Length of time per reconciliation per controller",
6363
Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
@@ -69,28 +69,21 @@ var (
6969

7070
// WorkerCount is a prometheus metric which holds the number of
7171
// concurrent reconciles per controller.
72-
WorkerCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{
72+
WorkerCount = opmetrics.NewPrometheusGauge(Registry, prometheus.GaugeOpts{
7373
Name: "controller_runtime_max_concurrent_reconciles",
7474
Help: "Maximum number of concurrent reconciles per controller",
7575
}, []string{"controller"})
7676

7777
// ActiveWorkers is a prometheus metric which holds the number
7878
// of active workers per controller.
79-
ActiveWorkers = prometheus.NewGaugeVec(prometheus.GaugeOpts{
79+
ActiveWorkers = opmetrics.NewPrometheusGauge(Registry, prometheus.GaugeOpts{
8080
Name: "controller_runtime_active_workers",
8181
Help: "Number of currently used workers per controller",
8282
}, []string{"controller"})
8383
)
8484

8585
func init() {
86-
metrics.Registry.MustRegister(
87-
ReconcileTotal,
88-
ReconcileErrors,
89-
TerminalReconcileErrors,
90-
ReconcilePanics,
91-
ReconcileTime,
92-
WorkerCount,
93-
ActiveWorkers,
86+
Registry.MustRegister(
9487
// expose process metrics like CPU, Memory, file descriptor usage etc.
9588
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
9689
// expose all Go runtime metrics like GC stats, memory stats etc.

0 commit comments

Comments
 (0)