Skip to content

Commit 5884308

Browse files
committed
feat(metrics) Limit Cardinality of CSV metrics
This commit introduces a change that limits the number of metrics that an OLM cluster reports at any given time for a CSV. The first metric introduced is called csv_up, which tracks CSVs that have reached the succeeded phase. The following information is provided about the CSV via labels: namespace, name, version. The value of this metric will always be 0 or 1. The second metric introduced is called csv_abnormal, which is reported whenever the CSV is updated and has not reached the succeeded phase. The following information is provided about the CSV via labels: namespace, name, version, phase, reason. Whenever a CSV is updated, the existing timeseries is deleted and replaced by an updated version.
1 parent 3255734 commit 5884308

File tree

3 files changed

+45
-14
lines changed

3 files changed

+45
-14
lines changed

pkg/controller/operators/olm/operator.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -930,8 +930,6 @@ func (a *Operator) syncClusterServiceVersion(obj interface{}) (syncError error)
930930
})
931931
logger.Debug("syncing CSV")
932932

933-
metrics.EmitCSVMetric(clusterServiceVersion)
934-
935933
if a.csvNotification != nil {
936934
a.csvNotification.OnAddOrUpdate(clusterServiceVersion)
937935
}
@@ -964,6 +962,8 @@ func (a *Operator) syncClusterServiceVersion(obj interface{}) (syncError error)
964962
} else {
965963
syncError = fmt.Errorf("error transitioning ClusterServiceVersion: %s and error updating CSV status: %s", syncError, updateErr)
966964
}
965+
} else {
966+
metrics.EmitCSVMetric(clusterServiceVersion, outCSV)
967967
}
968968
}
969969

pkg/metrics/metrics.go

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,17 @@ import (
55
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
66
"k8s.io/apimachinery/pkg/labels"
77

8+
olmv1alpha1 "github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1"
89
"github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/clientset/versioned"
910
v1alpha1 "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/listers/operators/v1alpha1"
10-
olmv1alpha1 "github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1"
11-
1211
)
1312

1413
const (
1514
NAME_LABEL = "name"
1615
INSTALLED_LABEL = "installed"
16+
NAMESPACE_LABEL = "namespace"
1717
VERSION_LABEL = "version"
18-
PHASE_LABEL = "phase"
18+
PHASE_LABEL = "phase"
1919
REASON_LABEL = "reason"
2020
)
2121

@@ -151,18 +151,27 @@ var (
151151
[]string{NAME_LABEL, INSTALLED_LABEL},
152152
)
153153

154-
csvSyncCounter = prometheus.NewCounterVec(
155-
prometheus.CounterOpts{
156-
Name: "csv_sync_total",
157-
Help: "Monotonic count of CSV syncs",
154+
csvSucceeded = prometheus.NewGaugeVec(
155+
prometheus.GaugeOpts{
156+
Name: "csv_succeeded",
157+
Help: "Successful CSV install",
158+
},
159+
[]string{NAMESPACE_LABEL, NAME_LABEL, VERSION_LABEL},
160+
)
161+
162+
csvAbnormal = prometheus.NewGaugeVec(
163+
prometheus.GaugeOpts{
164+
Name: "csv_abnormal",
165+
Help: "CSV is not installed",
158166
},
159-
[]string{NAME_LABEL, VERSION_LABEL, PHASE_LABEL, REASON_LABEL},
167+
[]string{NAMESPACE_LABEL, NAME_LABEL, VERSION_LABEL, PHASE_LABEL, REASON_LABEL},
160168
)
161169
)
162170

163171
func RegisterOLM() {
164172
prometheus.MustRegister(csvCount)
165-
prometheus.MustRegister(csvSyncCounter)
173+
prometheus.MustRegister(csvSucceeded)
174+
prometheus.MustRegister(csvAbnormal)
166175
prometheus.MustRegister(CSVUpgradeCount)
167176
}
168177

@@ -177,6 +186,26 @@ func CounterForSubscription(name, installedCSV string) prometheus.Counter {
177186
return SubscriptionSyncCount.WithLabelValues(name, installedCSV)
178187
}
179188

180-
func EmitCSVMetric(csv *olmv1alpha1.ClusterServiceVersion){
181-
csvSyncCounter.WithLabelValues(csv.Name, csv.Spec.Version.String(), string(csv.Status.Phase), string(csv.Status.Reason)).Inc()
189+
func EmitCSVMetric(oldCSV *olmv1alpha1.ClusterServiceVersion, newCSV *olmv1alpha1.ClusterServiceVersion) {
190+
if oldCSV == nil || newCSV == nil {
191+
return
192+
}
193+
194+
// Don't update the metric for copies
195+
if newCSV.Status.Reason == olmv1alpha1.CSVReasonCopied {
196+
return
197+
}
198+
199+
// Delete the old CSV metrics
200+
csvAbnormal.DeleteLabelValues(oldCSV.Namespace, oldCSV.Name, oldCSV.Spec.Version.String(), string(oldCSV.Status.Phase), string(oldCSV.Status.Reason))
201+
202+
// Get the phase of the new CSV
203+
newCSVPhase := string(newCSV.Status.Phase)
204+
csvSucceededGauge := csvSucceeded.WithLabelValues(newCSV.Namespace, newCSV.Name, newCSV.Spec.Version.String())
205+
if newCSVPhase == string(olmv1alpha1.CSVPhaseSucceeded) {
206+
csvSucceededGauge.Set(1)
207+
} else {
208+
csvSucceededGauge.Set(0)
209+
csvAbnormal.WithLabelValues(newCSV.Namespace, newCSV.Name, newCSV.Spec.Version.String(), string(newCSV.Status.Phase), string(newCSV.Status.Reason)).Set(1)
210+
}
182211
}

test/e2e/metrics_e2e_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,13 @@ func TestMetricsEndpoint(t *testing.T) {
4949
}
5050

5151
// Verify metrics have been emitted for packageserver csv
52-
require.Contains(t, rawOutput, "csv_sync_total")
52+
require.Contains(t, rawOutput, "csv_abnormal")
5353
require.Contains(t, rawOutput, "name=\""+failingCSV.Name+"\"")
5454
require.Contains(t, rawOutput, "phase=\"Failed\"")
5555
require.Contains(t, rawOutput, "reason=\"UnsupportedOperatorGroup\"")
5656
require.Contains(t, rawOutput, "version=\"0.0.0\"")
57+
58+
require.Contains(t, rawOutput, "csv_succeeded")
5759
log.Info(rawOutput)
5860
}
5961

0 commit comments

Comments
 (0)