diff --git a/README.md b/README.md index 4f35903..1c777f1 100644 --- a/README.md +++ b/README.md @@ -206,15 +206,12 @@ comes instead of `meta.SetStatusCondition`. To delete the metrics for a given custom resource, simply call `RemoveConditionsFor` and pass the object. ```go -const ( - kind = "MyCr" -) - // SetStatusCondition utility function which replaces and wraps meta.SetStatusCondition calls func (r *MyReconciler) SetStatusCondition(cr *v1.MyCR, condition metav1.Condition) bool { - changed = meta.SetStatusCondition(&cr.Status.Conditions, condition) + changed := meta.SetStatusCondition(&cr.Status.Conditions, condition) + kind := cr.GetObjectKind().GroupVersionKind().Kind if changed { - r.RecordConditionFor(kind, cr, condition.Type, string(condition.Status), condition.Reason) + r.Recorder.RecordConditionFor(kind, cr, condition.Type, string(condition.Status), condition.Reason) } return changed } @@ -228,6 +225,7 @@ func (r *MyReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Re // Remove the metrics when the CR is deleted if cr.DeletionTimeStamp != nil { + kind := cr.GetObjectKind().GroupVersionKind().Kind r.Recorder.RemoveConditionsFor(kind, cr) } diff --git a/pkg/gauge_vec_set/gauge_vec_set.go b/pkg/gauge_vec_set/gauge_vec_set.go index 59577b8..63d2fee 100644 --- a/pkg/gauge_vec_set/gauge_vec_set.go +++ b/pkg/gauge_vec_set/gauge_vec_set.go @@ -2,6 +2,7 @@ package gauge_vec_set import ( "fmt" + "regexp" "strings" "sync" @@ -53,6 +54,26 @@ type GaugeVecSet struct { mu sync.RWMutex } +// validateLowercaseUnderscore panics if the input contains any character +// that is not a lowercase letter or underscore. +func validateLowercaseUnderscore(input string) { + invalidCharPattern := regexp.MustCompile(`[^a-z_]`) + if invalidCharPattern.MatchString(input) { + panic( + fmt.Errorf( + "NewGaugeVecSet: %q contains characters other than lowercase letters and underscores", input, + ), + ) + } + if strings.HasSuffix(input, "_") { + panic( + fmt.Errorf( + "NewGaugeVecSet: %q must not end with an underscore", input, + ), + ) + } +} + // NewGaugeVecSet constructs a GaugeVecSet. // // Parameters: @@ -74,6 +95,10 @@ func NewGaugeVecSet( groupLabels []string, extraLabels ...string, ) *GaugeVecSet { + validateLowercaseUnderscore(namespace) + validateLowercaseUnderscore(subsystem) + validateLowercaseUnderscore(name) + if len(indexLabels) == 0 { panic("NewMultiIndexGaugeCollector: at least one index label is required") } diff --git a/pkg/gauge_vec_set/gauge_vec_set_test.go b/pkg/gauge_vec_set/gauge_vec_set_test.go index 97376af..b23c221 100644 --- a/pkg/gauge_vec_set/gauge_vec_set_test.go +++ b/pkg/gauge_vec_set/gauge_vec_set_test.go @@ -350,6 +350,60 @@ func Test_DynamicGaugeCollector_ArityValidationPanics(t *testing.T) { }) } +func Test_DynamicGaugeCollector_MetricNamePanics(t *testing.T) { + cases := []struct { + name string + namespace string + subsystem string + metricName string + }{ + { + name: "invalid namespace contains special characters", + namespace: "n-s", + subsystem: "subsystem", + metricName: "name", + }, + { + name: "invalid subsystem contains special characters", + namespace: "namespace", + subsystem: "sub-system", + metricName: "name", + }, + { + name: "invalid metric contains special characters", + namespace: "namespace", + subsystem: "subsystem", + metricName: "na-me", + }, + { + name: "invalid namespace ends with underscore", + namespace: "namespace_", + subsystem: "subsystem", + metricName: "name", + }, + { + name: "invalid subsystem ends with underscore", + namespace: "namespace", + subsystem: "subsystem_", + metricName: "name", + }, + { + name: "invalid metric ends with underscore", + namespace: "namespace", + subsystem: "subsystem", + metricName: "name_", + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + assert.Panics(t, func() { + NewGaugeVecSet(c.namespace, c.subsystem, c.metricName, "", []string{"index"}, []string{"group"}) + }) + }) + } +} + func Test_DynamicGaugeCollector_LabelsWithHashCharacters(t *testing.T) { reg := prometheus.NewRegistry() diff --git a/pkg/operator_condition_metrics/operator_condition_metrics.go b/pkg/operator_condition_metrics/operator_condition_metrics.go index ca53a53..d28a5a5 100644 --- a/pkg/operator_condition_metrics/operator_condition_metrics.go +++ b/pkg/operator_condition_metrics/operator_condition_metrics.go @@ -14,7 +14,7 @@ and marking exactly one as active (1) while the others are inactive (0). Example kube_pod_status_phase{namespace="default", pod="nginx", phase="Pending"} 0 kube_pod_status_phase{namespace="default", pod="nginx", phase="Failed"} 0 -We adopt the same pattern for controller Conditions but we export one time series per (status, reason) variant +We adopt the same pattern for controller Conditions, but we export one time series per (status, reason) variant and enforce **exclusivity per condition**. For any given (controller, kind, name, namespace, condition) exactly one (status, reason) series is present at a time. @@ -24,13 +24,13 @@ Metric _controller_condition Labels (order matches registration) - - controller: controller name (e.g., "my-operator") - - kind: resource kind (e.g., "MyCRD") - - name: resource name - - namespace: resource namespace ("" for cluster-scoped) - - condition: condition type (e.g., "Ready", "Reconciled") - - status: "True" | "False" | "Unknown" - - reason: short machine-typed reason (often "" when status="True") + - controller: controller name (e.g., "my-operator") + - resource_kind: resource kind (e.g., "MyCRD") + - resource_name: resource name + - resource_namespace: resource namespace ("" for cluster-scoped) + - condition: condition type (e.g., "Ready", "Reconciled") + - status: "True" | "False" | "Unknown" + - reason: short machine-typed reason (often "" when status="True") Value - Always 1 for the single active (status, reason) series in the group. @@ -41,9 +41,9 @@ Examples: my_controller_condition{ controller="my-operator", - kind="MyCRD", - name="my-cr-1", - namespace="prod", + resource_kind="MyCRD", + resource_name="my-cr-1", + resource_namespace="prod", condition="Ready", status="True", reason="" @@ -73,11 +73,11 @@ Examples: Cleanup When the resource is deleted/pruned, all series for its index key - (controller, kind, name, namespace) are removed via DeleteByIndex(). + (controller, kind, resource_name, resource_namespace) are removed via DeleteByIndex(). Implementation Backed by a GaugeVecSet with: - indexLabels = [controller, kind, name, namespace] + indexLabels = [controller, resource_kind, resource_name, resource_namespace] groupLabels = [condition] extraLabels = [status, reason] Exclusivity is enforced with SetGroup(), which deletes sibling series. @@ -90,7 +90,7 @@ const ( ) var ( - indexLabels = []string{"controller", "kind", "name", "namespace"} + indexLabels = []string{"controller", "resource_kind", "resource_name", "resource_namespace"} groupLabels = []string{"condition"} extraLabels = []string{"status", "reason"} ) diff --git a/pkg/operator_condition_metrics/operator_condition_metrics_test.go b/pkg/operator_condition_metrics/operator_condition_metrics_test.go index 23cf2c5..c68d192 100644 --- a/pkg/operator_condition_metrics/operator_condition_metrics_test.go +++ b/pkg/operator_condition_metrics/operator_condition_metrics_test.go @@ -47,8 +47,8 @@ func TestConditionMetricRecorder_Record_Transition_And_SecondCondition(t *testin want := ` # HELP test_record_transition_and_second_condition_controller_condition Condition status for a custom resource; one active (status,reason) time series per (controller,kind,name,namespace,condition). # TYPE test_record_transition_and_second_condition_controller_condition gauge -test_record_transition_and_second_condition_controller_condition{condition="Ready",controller="my-controller",kind="MyCRD",name="cr-1",namespace="prod",reason="Failed",status="False"} 1 -test_record_transition_and_second_condition_controller_condition{condition="Synchronized",controller="my-controller",kind="MyCRD",name="cr-1",namespace="prod",reason="",status="True"} 1 +test_record_transition_and_second_condition_controller_condition{condition="Ready",controller="my-controller",reason="Failed",resource_kind="MyCRD",resource_name="cr-1",resource_namespace="prod",status="False"} 1 +test_record_transition_and_second_condition_controller_condition{condition="Synchronized",controller="my-controller",reason="",resource_kind="MyCRD",resource_name="cr-1",resource_namespace="prod",status="True",} 1 ` require.NoError(t, testutil.GatherAndCompare( @@ -114,7 +114,7 @@ func TestConditionMetricRecorder_SetsKindLabelFromObject(t *testing.T) { want := ` # HELP test_sets_kind_label_from_object_controller_condition Condition status for a custom resource; one active (status,reason) time series per (controller,kind,name,namespace,condition). # TYPE test_sets_kind_label_from_object_controller_condition gauge -test_sets_kind_label_from_object_controller_condition{condition="Ready",controller="my-controller",kind="FancyKind",name="obj-1",namespace="ns-1",reason="",status="True"} 1 +test_sets_kind_label_from_object_controller_condition{condition="Ready",controller="my-controller",reason="",resource_kind="FancyKind",resource_name="obj-1",resource_namespace="ns-1",status="True"} 1 ` require.NoError(t, testutil.GatherAndCompare(