@@ -5,31 +5,49 @@ import (
55 "sigs.k8s.io/controller-runtime/pkg/metrics"
66)
77
8- // managerStartFailures is a monotic counter which tracks the number of times the controller-runtime
9- // manager failed to start. To drive alerting based on this metric, it is recommended to use the rate
10- // of increase over a period of time. A positive rate of change indicates that the CNS is actively
11- // failing and retrying.
12- var managerStartFailures = prometheus .NewCounter (
13- prometheus.CounterOpts {
14- Name : "cns_ctrlmanager_start_failures_total" ,
15- Help : "Number of times the controller-runtime manager failed to start." ,
16- },
17- )
18-
19- // nncReconcilerStartFailures is a monotic counter which tracks the number of times the NNC reconciler
20- // has failed to start within the timeout period. To drive alerting based on this metric, it is
21- // recommended to use the rate of increase over a period of time. A positive rate of change indicates
22- // that the CNS is actively failing and retrying.
23- var nncReconcilerStartFailures = prometheus .NewCounter (
24- prometheus.CounterOpts {
25- Name : "cns_nnc_reconciler_start_failures_total" ,
26- Help : "Number of times the NNC reconciler has failed to start within the timeout period." ,
27- },
8+ var (
9+ // managerStartFailures is a monotic counter which tracks the number of times the controller-runtime
10+ // manager failed to start. To drive alerting based on this metric, it is recommended to use the rate
11+ // of increase over a period of time. A positive rate of change indicates that the CNS is actively
12+ // failing and retrying.
13+ managerStartFailures = prometheus .NewCounter (
14+ prometheus.CounterOpts {
15+ Name : "cns_ctrlmanager_start_failures_total" ,
16+ Help : "Number of times the controller-runtime manager failed to start." ,
17+ },
18+ )
19+ // nncReconcilerStartFailures is a monotic counter which tracks the number of times the NNC reconciler
20+ // has failed to start within the timeout period. To drive alerting based on this metric, it is
21+ // recommended to use the rate of increase over a period of time. A positive rate of change indicates
22+ // that the CNS is actively failing and retrying.
23+ nncReconcilerStartFailures = prometheus .NewCounter (
24+ prometheus.CounterOpts {
25+ Name : "cns_nnc_reconciler_start_failures_total" ,
26+ Help : "Number of times the NNC reconciler has failed to start within the timeout period." ,
27+ },
28+ )
29+ // nncInitFailure is a monotic counter which tracks the number of times the initial NNC reconcile
30+ // has failed.
31+ nncInitFailure = prometheus .NewCounter (
32+ prometheus.CounterOpts {
33+ Name : "cns_nnc_init_failures_total" ,
34+ Help : "Number of times the initial NNC reconcile has failed." ,
35+ },
36+ )
37+ // hasNNCInitialized is a gauge which tracks whether the initial NNC reconcile has completed.
38+ hasNNCInitialized = prometheus .NewGauge (
39+ prometheus.GaugeOpts {
40+ Name : "cns_nnc_initialized" ,
41+ Help : "Whether the initial NNC reconcile has completed." ,
42+ },
43+ )
2844)
2945
3046func init () {
3147 metrics .Registry .MustRegister (
3248 managerStartFailures ,
3349 nncReconcilerStartFailures ,
50+ nncInitFailure ,
51+ hasNNCInitialized ,
3452 )
3553}
0 commit comments