Skip to content

Commit 678c67f

Browse files
authored
use last transition time as metric value (#13)
1 parent d943f69 commit 678c67f

File tree

4 files changed

+49
-26
lines changed

4 files changed

+49
-26
lines changed

README.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ kube_pod_status_phase{namespace="default", pod="nginx", phase="Failed"} 0
134134

135135
We adopt the same pattern for controller Conditions, but we export only one time series per (status, reason) variant,
136136
meaning we delete all other variants in the group when we set the metric, ensuring the cardinality stays under control.
137+
Additionally, rather than return 1/0 indicating the activeness of the metric, we set the last transition time of the
138+
condition as the value (unix timestamp).
137139

138140
Example metric:
139141

@@ -146,12 +148,13 @@ operator_controller_condition{
146148
condition="Ready",
147149
status="False",
148150
reason="FailedToProvision"
149-
} 1
151+
} 17591743210
150152
```
151153

152154
- **Index**: controller, resource_kind, resource_name, resource_namespace
153155
- **Group**: condition
154156
- **Extra**: status, reason
157+
- **Metric Value**: Unix timestamp of last transition of given condition
155158

156159
### Initialization
157160

@@ -223,10 +226,12 @@ const (
223226
)
224227

225228
// SetStatusCondition utility function which replaces and wraps meta.SetStatusCondition calls
226-
func (r *MyReconciler) SetStatusCondition(cr *v1.MyCR, condition metav1.Condition) bool {
227-
changed := meta.SetStatusCondition(&cr.Status.Conditions, condition)
229+
func (r *MyReconciler) SetStatusCondition(cr *v1.MyCR, cond metav1.Condition) bool {
230+
changed := meta.SetStatusCondition(&cr.Status.Conditions, cond)
228231
if changed {
229-
r.Recorder.RecordConditionFor(kind, cr, condition.Type, string(condition.Status), condition.Reason)
232+
r.Recorder.RecordConditionFor(
233+
kind, cr, cond.Type, string(cond.Status), cond.Reason, cond.LastTransitionTime,
234+
)
230235
}
231236
return changed
232237
}

pkg/operator_condition_metrics/operator_condition_metrics.go

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package operator_condition_metrics
22

33
import (
4+
"time"
5+
46
metrics "github.com/sourcehawk/go-prometheus-gaugevecset/pkg/gauge_vec_set"
57
)
68

@@ -15,7 +17,8 @@ and marking exactly one as active (1) while the others are inactive (0). Example
1517
kube_pod_status_phase{namespace="default", pod="nginx", phase="Failed"} 0
1618
1719
We adopt the same pattern for controller Conditions, but we export one time series per (status, reason) variant
18-
and enforce **exclusivity per condition**.
20+
and enforce **exclusivity per condition**. The value of the metric we set is also the last transition time of the
21+
condition.
1922
2023
For any given (controller, kind, name, namespace, condition) exactly one (status, reason) series is present at a time.
2124
All other variants are **deleted**. This keeps cardinality under control.
@@ -33,7 +36,7 @@ Labels (order matches registration)
3336
- reason: short machine-typed reason (often "" when status="True")
3437
3538
Value
36-
- Always 1 for the single active (status, reason) series in the group.
39+
- The timestamp of last transition time for the condition
3740
3841
Examples:
3942
@@ -47,7 +50,7 @@ Examples:
4750
condition="Ready",
4851
status="True",
4952
reason=""
50-
} 1
53+
} 1759174202
5154
5255
(Other status/reason variants for this condition are removed.)
5356
@@ -60,7 +63,7 @@ Examples:
6063
condition="Ready",
6164
status="False",
6265
reason="Failed"
63-
} 1
66+
} 1759174205
6467
6568
3. Another condition can be active simultaneously (different group):
6669
@@ -69,7 +72,7 @@ Examples:
6972
condition="Synchronized",
7073
status="True",
7174
reason=""
72-
} 1
75+
} 17591743210
7376
7477
Cleanup
7578
When the resource is deleted/pruned, all series for its index key
@@ -157,7 +160,9 @@ type ConditionMetricRecorder struct {
157160
// RecordConditionFor sets a condition metric for a given controller and object.
158161
//
159162
// It enforces exclusivity within the same (controller, name, namespace, condition) group,
160-
// ensuring that only the latest status (True/False/Unknown) is present for a given condition type.
163+
// ensuring that only the latest (status, phase) is present for a given condition type.
164+
//
165+
// If the lastTransitionTime is zero, the value of the metric is set to the unix timestamp for time.Now().UTC()
161166
//
162167
// The following label values are set:
163168
//
@@ -171,15 +176,20 @@ type ConditionMetricRecorder struct {
171176
//
172177
// Example:
173178
//
174-
// r.RecordConditionFor(kind, obj, "Ready", "True", "AppReady")
179+
// r.RecordConditionFor(kind, obj, "Ready", "True", "AppReady", lastTransitionTime)
175180
func (r *ConditionMetricRecorder) RecordConditionFor(
176-
kind string, object ObjectLike, conditionType, conditionStatus, conditionReason string,
181+
kind string, object ObjectLike,
182+
conditionType, conditionStatus, conditionReason string, lastTransitionTime time.Time,
177183
) {
178184
indexValues := []string{r.Controller, kind, object.GetName(), object.GetNamespace()}
179185
groupValues := []string{conditionType}
180186
extraValues := []string{conditionStatus, conditionReason}
181187

182-
r.OperatorConditionsGauge.SetGroup(1, indexValues, groupValues, extraValues...)
188+
if lastTransitionTime.IsZero() {
189+
lastTransitionTime = time.Now().UTC()
190+
}
191+
192+
r.OperatorConditionsGauge.SetGroup(float64(lastTransitionTime.Unix()), indexValues, groupValues, extraValues...)
183193
}
184194

185195
// RemoveConditionsFor deletes all condition metrics for a given resource.

pkg/operator_condition_metrics/operator_condition_metrics_benchmark_test.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bytes"
55
"fmt"
66
"testing"
7+
"time"
78

89
"github.com/prometheus/client_golang/prometheus"
910
"github.com/prometheus/common/expfmt"
@@ -65,6 +66,7 @@ func createBenchmarkScenario(tb testing.TB, registry *prometheus.Registry) *Cond
6566
}
6667

6768
obj := &FakeObject{}
69+
transitionTime := time.Now().UTC()
6870

6971
condition := &FakeCondition{
7072
Status: "True", // doesn't matter, cardinality decided by Reason
@@ -82,7 +84,7 @@ func createBenchmarkScenario(tb testing.TB, registry *prometheus.Registry) *Cond
8284

8385
for v := 0; v < variantsPerCondition; v++ {
8486
condition.Reason = generatedName("variant", v)
85-
rec.RecordConditionFor(kind, obj, condition.Type, condition.Reason, condition.Reason)
87+
rec.RecordConditionFor(kind, obj, condition.Type, condition.Reason, condition.Reason, transitionTime)
8688
}
8789
}
8890
}
@@ -106,6 +108,7 @@ func Benchmark_ConditionMetricsRecorder_TimePerCall(b *testing.B) {
106108
Name: "Resource0",
107109
Namespace: "namespace0",
108110
}
111+
transitionTime := time.Now().UTC()
109112

110113
// Two variants in the same (controller,kind,name,namespace,condition) group.
111114
condTrue := &FakeCondition{
@@ -126,9 +129,9 @@ func Benchmark_ConditionMetricsRecorder_TimePerCall(b *testing.B) {
126129
for i := 0; i < b.N; i++ {
127130
// Flip between two variants
128131
if (i & 1) == 0 {
129-
rec.RecordConditionFor(kind, obj, condTrue.Type, condTrue.Status, condTrue.Reason)
132+
rec.RecordConditionFor(kind, obj, condTrue.Type, condTrue.Status, condTrue.Reason, transitionTime)
130133
} else {
131-
rec.RecordConditionFor(kind, obj, condFalse.Type, condFalse.Status, condFalse.Reason)
134+
rec.RecordConditionFor(kind, obj, condFalse.Type, condFalse.Status, condFalse.Reason, transitionTime)
132135
}
133136
}
134137
})
@@ -140,7 +143,7 @@ func Benchmark_ConditionMetricsRecorder_TimePerCall(b *testing.B) {
140143
for i := 0; i < b.N; i++ {
141144
// Ensure there is something to remove, but do not count the set time.
142145
b.StopTimer()
143-
rec.RecordConditionFor(kind, obj, condTrue.Type, condTrue.Status, condTrue.Reason)
146+
rec.RecordConditionFor(kind, obj, condTrue.Type, condTrue.Status, condTrue.Reason, transitionTime)
144147
b.StartTimer()
145148

146149
rec.RemoveConditionsFor(kind, obj)

pkg/operator_condition_metrics/operator_condition_metrics_test.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package operator_condition_metrics
33
import (
44
"strings"
55
"testing"
6+
"time"
67

78
"github.com/prometheus/client_golang/prometheus"
89
"github.com/prometheus/client_golang/prometheus/testutil"
@@ -30,23 +31,25 @@ func TestConditionMetricRecorder_Record_Transition_And_SecondCondition(t *testin
3031
kind := "MyCRD"
3132
name := "cr-1"
3233
ns := "prod"
34+
transitionTime := time.Date(2025, time.January, 1, 0, 0, 0, 0, time.UTC)
35+
3336
obj := makeObj(name, ns)
3437

3538
// Record Ready=True
36-
rec.RecordConditionFor(kind, obj, "Ready", "True", "")
39+
rec.RecordConditionFor(kind, obj, "Ready", "True", "", transitionTime)
3740

3841
// Flip Ready -> False with reason
39-
rec.RecordConditionFor(kind, obj, "Ready", "False", "Failed")
42+
rec.RecordConditionFor(kind, obj, "Ready", "False", "Failed", transitionTime)
4043

4144
// Another condition Synchronized=True (independent group)
42-
rec.RecordConditionFor(kind, obj, "Synchronized", "True", "")
45+
rec.RecordConditionFor(kind, obj, "Synchronized", "True", "", transitionTime)
4346

4447
// Expect: Ready False(reason)=1, Synchronized True=1
4548
want := `
4649
# HELP test_record_transition_and_second_condition_controller_condition Condition status for a custom resource; one active (status,reason) time series per (controller,kind,name,namespace,condition).
4750
# TYPE test_record_transition_and_second_condition_controller_condition gauge
48-
test_record_transition_and_second_condition_controller_condition{condition="Ready",controller="my-controller",reason="Failed",resource_kind="MyCRD",resource_name="cr-1",resource_namespace="prod",status="False"} 1
49-
test_record_transition_and_second_condition_controller_condition{condition="Synchronized",controller="my-controller",reason="",resource_kind="MyCRD",resource_name="cr-1",resource_namespace="prod",status="True",} 1
51+
test_record_transition_and_second_condition_controller_condition{condition="Ready",controller="my-controller",reason="Failed",resource_kind="MyCRD",resource_name="cr-1",resource_namespace="prod",status="False"} 1735689600
52+
test_record_transition_and_second_condition_controller_condition{condition="Synchronized",controller="my-controller",reason="",resource_kind="MyCRD",resource_name="cr-1",resource_namespace="prod",status="True",} 1735689600
5053
`
5154
require.NoError(t,
5255
testutil.GatherAndCompare(
@@ -72,10 +75,11 @@ func TestConditionMetricRecorder_RemoveConditionsFor(t *testing.T) {
7275
kind := "MyCRD"
7376
name := "cr-2"
7477
ns := "staging"
78+
transitionTime := time.Date(2025, time.January, 1, 0, 0, 0, 0, time.UTC)
7579
obj := makeObj(name, ns)
7680

77-
rec.RecordConditionFor(kind, obj, "Ready", "True", "")
78-
rec.RecordConditionFor(kind, obj, "Synchronized", "False", "SyncPending")
81+
rec.RecordConditionFor(kind, obj, "Ready", "True", "", transitionTime)
82+
rec.RecordConditionFor(kind, obj, "Synchronized", "False", "SyncPending", transitionTime)
7983

8084
// Remove all condition series for this object
8185
removed := rec.RemoveConditionsFor(kind, obj)
@@ -103,16 +107,17 @@ func TestConditionMetricRecorder_SetsKindLabelFromObject(t *testing.T) {
103107
kind := "FancyKind"
104108
name := "obj-1"
105109
ns := "ns-1"
110+
transitionTime := time.Date(2025, time.January, 1, 0, 0, 0, 0, time.UTC)
106111
obj := makeObj(name, ns)
107112

108113
// Record a condition
109-
rec.RecordConditionFor(kind, obj, "Ready", "True", "")
114+
rec.RecordConditionFor(kind, obj, "Ready", "True", "", transitionTime)
110115

111116
// Expect the 'kind' label to reflect the object's Kind
112117
want := `
113118
# HELP test_sets_kind_label_from_object_controller_condition Condition status for a custom resource; one active (status,reason) time series per (controller,kind,name,namespace,condition).
114119
# TYPE test_sets_kind_label_from_object_controller_condition gauge
115-
test_sets_kind_label_from_object_controller_condition{condition="Ready",controller="my-controller",reason="",resource_kind="FancyKind",resource_name="obj-1",resource_namespace="ns-1",status="True"} 1
120+
test_sets_kind_label_from_object_controller_condition{condition="Ready",controller="my-controller",reason="",resource_kind="FancyKind",resource_name="obj-1",resource_namespace="ns-1",status="True"} 1735689600
116121
`
117122
require.NoError(t,
118123
testutil.GatherAndCompare(

0 commit comments

Comments
 (0)