Skip to content

Commit ced36b0

Browse files
committed
add kube_janitor_resource_deleted_total
Signed-off-by: Markus Blaschke <[email protected]>
1 parent 6d8d6cf commit ced36b0

File tree

3 files changed

+47
-18
lines changed

3 files changed

+47
-18
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,6 @@ Supported relative timestamps ([`time.Duration`](https://pkg.go.dev/time) and [`
6767

6868
| Metric | Description |
6969
|-------------------------------------------------------|-----------------------------------------------------------------------------------------------------|
70+
| `kube_janitor_resource_deleted_total` | Total number of deleted resources (by namespace, gvk, rule) |
7071
| `kube_janitor_resource_ttl_expiry_timestamp_seconds` | Expiry date (unix timestamp) for every resource which was detected matching the TTL expiry |
7172
| `kube_janitor_resource_rule_expiry_timestamp_seconds` | Expiry date (unix timestamp) for every resource which was detected matching the static expiry rules |

kube_janitor/metrics.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,41 @@ import (
66

77
type (
88
JanitorMetrics struct {
9-
ttl *prometheus.GaugeVec
10-
rule *prometheus.GaugeVec
9+
deleted *prometheus.CounterVec
10+
ttl *prometheus.GaugeVec
11+
rule *prometheus.GaugeVec
1112
}
1213
)
1314

1415
// setupMetrics setups all Prometheus metrics with name, help and corresponding labels
1516
func (j *Janitor) setupMetrics() {
16-
commonLabels := []string{
17+
ttlLabels := []string{
1718
"rule",
18-
"version",
19-
"kind",
19+
"groupVersionKind",
2020
"namespace",
2121
"name",
2222
"ttl",
2323
}
2424

25+
j.prometheus.deleted = prometheus.NewCounterVec(
26+
prometheus.CounterOpts{
27+
Name: "kube_janitor_resource_deleted_total",
28+
Help: "Total count of deleted Kubernetes resources",
29+
},
30+
[]string{
31+
"rule",
32+
"groupVersionKind",
33+
"namespace",
34+
},
35+
)
36+
prometheus.MustRegister(j.prometheus.deleted)
37+
2538
j.prometheus.ttl = prometheus.NewGaugeVec(
2639
prometheus.GaugeOpts{
2740
Name: "kube_janitor_resource_ttl_expiry_timestamp_seconds",
2841
Help: "Expiry unix timestamp for Kubernetes resources by ttl",
2942
},
30-
commonLabels,
43+
ttlLabels,
3144
)
3245
prometheus.MustRegister(j.prometheus.ttl)
3346

@@ -36,7 +49,7 @@ func (j *Janitor) setupMetrics() {
3649
Name: "kube_janitor_resource_rule_expiry_timestamp_seconds",
3750
Help: "Expiry unix timestamp for Kubernetes resources by rule",
3851
},
39-
commonLabels,
52+
ttlLabels,
4053
)
4154
prometheus.MustRegister(j.prometheus.rule)
4255
}

kube_janitor/task.common.go

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,6 @@ func (j *Janitor) runRule(ctx context.Context, logger *slogger.Logger, rule *Con
7878
rule.Id,
7979
ttl,
8080
metricList,
81-
prometheus.Labels{
82-
"rule": rule.String(),
83-
},
8481
)
8582
})
8683
if err != nil {
@@ -95,13 +92,15 @@ func (j *Janitor) runRule(ctx context.Context, logger *slogger.Logger, rule *Con
9592
}
9693

9794
// checkResourceTtlAndTriggerDeleteIfExpired checks the resource against the defined TTL and deletes if the resource is expired
98-
func (j *Janitor) checkResourceTtlAndTriggerDeleteIfExpired(ctx context.Context, logger *slogger.Logger, resourceConfig *ConfigResource, resource unstructured.Unstructured, ruleId string, ttlValue string, metricResourceTtl *prometheusCommon.MetricList, labels prometheus.Labels) error {
95+
func (j *Janitor) checkResourceTtlAndTriggerDeleteIfExpired(ctx context.Context, logger *slogger.Logger, resourceConfig *ConfigResource, resource unstructured.Unstructured, ruleId string, ttlValue string, metricResourceTtl *prometheusCommon.MetricList) error {
9996
resourceLogger := logger.WithGroup("resource").With(
10097
slog.String("namespace", resource.GetNamespace()),
10198
slog.String("name", resource.GetName()),
10299
slog.String("ttl", ttlValue),
103100
)
104101

102+
groupVersionKind := resource.GroupVersionKind()
103+
105104
// no ttl, no processing
106105
// better safe than sorry
107106
if ttlValue == "" {
@@ -143,13 +142,6 @@ func (j *Janitor) checkResourceTtlAndTriggerDeleteIfExpired(ctx context.Context,
143142
return nil
144143
}
145144

146-
labels["version"] = resource.GetAPIVersion()
147-
labels["kind"] = resource.GetKind()
148-
labels["namespace"] = resource.GetNamespace()
149-
labels["name"] = resource.GetName()
150-
labels["ttl"] = ttlValue
151-
metricResourceTtl.AddTime(labels, *parsedDate)
152-
153145
resourceLogger.Debug("found resource with valid TTL", slog.Time("expiry", *parsedDate))
154146

155147
if expired {
@@ -162,6 +154,15 @@ func (j *Janitor) checkResourceTtlAndTriggerDeleteIfExpired(ctx context.Context,
162154
return err
163155
}
164156

157+
// increase deleted counter
158+
j.prometheus.deleted.With(
159+
prometheus.Labels{
160+
"rule": ruleId,
161+
"groupVersionKind": fmt.Sprintf("%s/%s/%s", groupVersionKind.Group, groupVersionKind.Version, groupVersionKind.Kind),
162+
"namespace": resource.GetNamespace(),
163+
},
164+
).Inc()
165+
165166
reason := "TimeToLiveExpired"
166167
message := fmt.Sprintf(`TTL of "%v" is expired and resource is being deleted (%s)`, ttlValue, ruleId)
167168

@@ -170,6 +171,20 @@ func (j *Janitor) checkResourceTtlAndTriggerDeleteIfExpired(ctx context.Context,
170171
resourceLogger.Error("unable to create Kubernetes Event", slog.Any("error", err))
171172
}
172173
}
174+
} else {
175+
// resource not yet expired, but add expiry as metric
176+
177+
metricResourceTtl.AddTime(
178+
prometheus.Labels{
179+
"rule": ruleId,
180+
"groupVersionKind": fmt.Sprintf("%s/%s/%s", groupVersionKind.Group, groupVersionKind.Version, groupVersionKind.Kind),
181+
"namespace": resource.GetNamespace(),
182+
"name": resource.GetName(),
183+
"ttl": ttlValue,
184+
},
185+
*parsedDate,
186+
)
187+
173188
}
174189

175190
return nil

0 commit comments

Comments
 (0)