Skip to content

Commit 181bffc

Browse files
authored
Merge pull request #6932 from jabellard/metric-labels
Standardize Karmada metrics label for member clusters
2 parents c670824 + c340e8d commit 181bffc

File tree

4 files changed

+216
-73
lines changed

4 files changed

+216
-73
lines changed

pkg/metrics/cluster.go

Lines changed: 58 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -41,68 +41,76 @@ const (
4141
evictionKindTotalMetricsName = "eviction_kind_total"
4242
evictionProcessingLatencyMetricsName = "eviction_processing_latency_seconds"
4343
evictionProcessingTotalMetricsName = "eviction_processing_total"
44+
45+
// Canonical label for Karmada member clusters.
46+
memberClusterLabel = "member_cluster"
47+
48+
// DEPRECATED: cluster_name (target removal: 1.18)
49+
// Rationale: avoid collision with Prometheus external_labels like cluster and standardize on the metric label name used to denote a Karmada member cluster across all metrics.
50+
// Migration: use member_cluster instead across all queries and dashboards.
51+
clusterNameLabel = "cluster_name"
4452
)
4553

4654
var (
4755
// clusterReadyGauge reports if the cluster is ready.
4856
clusterReadyGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
4957
Name: clusterReadyMetricsName,
50-
Help: "State of the cluster(1 if ready, 0 otherwise).",
51-
}, []string{"cluster_name"})
58+
Help: "State of the cluster (1 if ready, 0 otherwise). [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]",
59+
}, []string{memberClusterLabel, clusterNameLabel})
5260

5361
// clusterTotalNodeNumberGauge reports the number of nodes in the given cluster.
5462
clusterTotalNodeNumberGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
5563
Name: clusterTotalNodeNumberMetricsName,
56-
Help: "Number of nodes in the cluster.",
57-
}, []string{"cluster_name"})
64+
Help: "Number of nodes in the cluster. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]",
65+
}, []string{memberClusterLabel, clusterNameLabel})
5866

5967
// clusterReadyNodeNumberGauge reports the number of ready nodes in the given cluster.
6068
clusterReadyNodeNumberGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
6169
Name: clusterReadyNodeNumberMetricsName,
62-
Help: "Number of ready nodes in the cluster.",
63-
}, []string{"cluster_name"})
70+
Help: "Number of ready nodes in the cluster. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]",
71+
}, []string{memberClusterLabel, clusterNameLabel})
6472

6573
// clusterMemoryAllocatableGauge reports the allocatable memory in the given cluster.
6674
clusterMemoryAllocatableGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
6775
Name: clusterMemoryAllocatableMetricsName,
68-
Help: "Allocatable cluster memory resource in bytes.",
69-
}, []string{"cluster_name"})
76+
Help: "Allocatable cluster memory resource in bytes. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]",
77+
}, []string{memberClusterLabel, clusterNameLabel})
7078

7179
// clusterCPUAllocatableGauge reports the allocatable CPU in the given cluster.
7280
clusterCPUAllocatableGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
7381
Name: clusterCPUAllocatableMetricsName,
74-
Help: "Number of allocatable CPU in the cluster.",
75-
}, []string{"cluster_name"})
82+
Help: "Number of allocatable CPU in the cluster. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]",
83+
}, []string{memberClusterLabel, clusterNameLabel})
7684

7785
// clusterPodAllocatableGauge reports the allocatable Pod number in the given cluster.
7886
clusterPodAllocatableGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
7987
Name: clusterPodAllocatableMetricsName,
80-
Help: "Number of allocatable pods in the cluster.",
81-
}, []string{"cluster_name"})
88+
Help: "Number of allocatable pods in the cluster. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]",
89+
}, []string{memberClusterLabel, clusterNameLabel})
8290

8391
// clusterMemoryAllocatedGauge reports the allocated memory in the given cluster.
8492
clusterMemoryAllocatedGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
8593
Name: clusterMemoryAllocatedMetricsName,
86-
Help: "Allocated cluster memory resource in bytes.",
87-
}, []string{"cluster_name"})
94+
Help: "Allocated cluster memory resource in bytes. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]",
95+
}, []string{memberClusterLabel, clusterNameLabel})
8896

8997
// clusterCPUAllocatedGauge reports the allocated CPU in the given cluster.
9098
clusterCPUAllocatedGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
9199
Name: clusterCPUAllocatedMetricsName,
92-
Help: "Number of allocated CPU in the cluster.",
93-
}, []string{"cluster_name"})
100+
Help: "Number of allocated CPU in the cluster. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]",
101+
}, []string{memberClusterLabel, clusterNameLabel})
94102

95103
// clusterPodAllocatedGauge reports the allocated Pod number in the given cluster.
96104
clusterPodAllocatedGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
97105
Name: clusterPodAllocatedMetricsName,
98-
Help: "Number of allocated pods in the cluster.",
99-
}, []string{"cluster_name"})
106+
Help: "Number of allocated pods in the cluster. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]",
107+
}, []string{memberClusterLabel, clusterNameLabel})
100108

101109
// clusterSyncStatusDuration reports the duration of the given cluster syncing status.
102110
clusterSyncStatusDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
103111
Name: clusterSyncStatusDurationMetricsName,
104-
Help: "Duration in seconds for syncing the status of the cluster once.",
105-
}, []string{"cluster_name"})
112+
Help: "Duration in seconds for syncing the status of the cluster once. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]",
113+
}, []string{memberClusterLabel, clusterNameLabel})
106114

107115
evictionQueueMetrics = prometheus.NewGaugeVec(prometheus.GaugeOpts{
108116
Name: evictionQueueDepthMetricsName,
@@ -111,8 +119,8 @@ var (
111119

112120
evictionKindTotalMetrics = prometheus.NewGaugeVec(prometheus.GaugeOpts{
113121
Name: evictionKindTotalMetricsName,
114-
Help: "Number of resources in the eviction queue by resource kind",
115-
}, []string{"member_cluster", "resource_kind"})
122+
Help: "Number of resources in the eviction queue by resource kind [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]",
123+
}, []string{memberClusterLabel, clusterNameLabel, "resource_kind"})
116124

117125
evictionProcessingLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
118126
Name: evictionProcessingLatencyMetricsName,
@@ -128,50 +136,55 @@ var (
128136

129137
// RecordClusterStatus records the status of the given cluster.
130138
func RecordClusterStatus(cluster *v1alpha1.Cluster) {
131-
clusterReadyGauge.WithLabelValues(cluster.Name).Set(func() float64 {
139+
labels := []string{cluster.Name, cluster.Name} // member_cluster, cluster_name
140+
141+
clusterReadyGauge.WithLabelValues(labels...).Set(func() float64 {
132142
if util.IsClusterReady(&cluster.Status) {
133143
return 1
134144
}
135145
return 0
136146
}())
137147

138148
if cluster.Status.NodeSummary != nil {
139-
clusterTotalNodeNumberGauge.WithLabelValues(cluster.Name).Set(float64(cluster.Status.NodeSummary.TotalNum))
140-
clusterReadyNodeNumberGauge.WithLabelValues(cluster.Name).Set(float64(cluster.Status.NodeSummary.ReadyNum))
149+
clusterTotalNodeNumberGauge.WithLabelValues(labels...).Set(float64(cluster.Status.NodeSummary.TotalNum))
150+
clusterReadyNodeNumberGauge.WithLabelValues(labels...).Set(float64(cluster.Status.NodeSummary.ReadyNum))
141151
}
142152

143153
if cluster.Status.ResourceSummary != nil {
144154
if cluster.Status.ResourceSummary.Allocatable != nil {
145-
clusterMemoryAllocatableGauge.WithLabelValues(cluster.Name).Set(cluster.Status.ResourceSummary.Allocatable.Memory().AsApproximateFloat64())
146-
clusterCPUAllocatableGauge.WithLabelValues(cluster.Name).Set(cluster.Status.ResourceSummary.Allocatable.Cpu().AsApproximateFloat64())
147-
clusterPodAllocatableGauge.WithLabelValues(cluster.Name).Set(cluster.Status.ResourceSummary.Allocatable.Pods().AsApproximateFloat64())
155+
clusterMemoryAllocatableGauge.WithLabelValues(labels...).Set(cluster.Status.ResourceSummary.Allocatable.Memory().AsApproximateFloat64())
156+
clusterCPUAllocatableGauge.WithLabelValues(labels...).Set(cluster.Status.ResourceSummary.Allocatable.Cpu().AsApproximateFloat64())
157+
clusterPodAllocatableGauge.WithLabelValues(labels...).Set(cluster.Status.ResourceSummary.Allocatable.Pods().AsApproximateFloat64())
148158
}
149159

150160
if cluster.Status.ResourceSummary.Allocated != nil {
151-
clusterMemoryAllocatedGauge.WithLabelValues(cluster.Name).Set(cluster.Status.ResourceSummary.Allocated.Memory().AsApproximateFloat64())
152-
clusterCPUAllocatedGauge.WithLabelValues(cluster.Name).Set(cluster.Status.ResourceSummary.Allocated.Cpu().AsApproximateFloat64())
153-
clusterPodAllocatedGauge.WithLabelValues(cluster.Name).Set(cluster.Status.ResourceSummary.Allocated.Pods().AsApproximateFloat64())
161+
clusterMemoryAllocatedGauge.WithLabelValues(labels...).Set(cluster.Status.ResourceSummary.Allocated.Memory().AsApproximateFloat64())
162+
clusterCPUAllocatedGauge.WithLabelValues(labels...).Set(cluster.Status.ResourceSummary.Allocated.Cpu().AsApproximateFloat64())
163+
clusterPodAllocatedGauge.WithLabelValues(labels...).Set(cluster.Status.ResourceSummary.Allocated.Pods().AsApproximateFloat64())
154164
}
155165
}
156166
}
157167

158168
// RecordClusterSyncStatusDuration records the duration of the given cluster syncing status
159169
func RecordClusterSyncStatusDuration(cluster *v1alpha1.Cluster, startTime time.Time) {
160-
clusterSyncStatusDuration.WithLabelValues(cluster.Name).Observe(utilmetrics.DurationInSeconds(startTime))
170+
labels := []string{cluster.Name, cluster.Name}
171+
clusterSyncStatusDuration.WithLabelValues(labels...).Observe(utilmetrics.DurationInSeconds(startTime))
161172
}
162173

163174
// CleanupMetricsForCluster removes the cluster status metrics after the cluster is deleted.
164175
func CleanupMetricsForCluster(clusterName string) {
165-
clusterReadyGauge.DeleteLabelValues(clusterName)
166-
clusterTotalNodeNumberGauge.DeleteLabelValues(clusterName)
167-
clusterReadyNodeNumberGauge.DeleteLabelValues(clusterName)
168-
clusterMemoryAllocatableGauge.DeleteLabelValues(clusterName)
169-
clusterCPUAllocatableGauge.DeleteLabelValues(clusterName)
170-
clusterPodAllocatableGauge.DeleteLabelValues(clusterName)
171-
clusterMemoryAllocatedGauge.DeleteLabelValues(clusterName)
172-
clusterCPUAllocatedGauge.DeleteLabelValues(clusterName)
173-
clusterPodAllocatedGauge.DeleteLabelValues(clusterName)
174-
clusterSyncStatusDuration.DeleteLabelValues(clusterName)
176+
labels := []string{clusterName, clusterName}
177+
178+
clusterReadyGauge.DeleteLabelValues(labels...)
179+
clusterTotalNodeNumberGauge.DeleteLabelValues(labels...)
180+
clusterReadyNodeNumberGauge.DeleteLabelValues(labels...)
181+
clusterMemoryAllocatableGauge.DeleteLabelValues(labels...)
182+
clusterCPUAllocatableGauge.DeleteLabelValues(labels...)
183+
clusterPodAllocatableGauge.DeleteLabelValues(labels...)
184+
clusterMemoryAllocatedGauge.DeleteLabelValues(labels...)
185+
clusterCPUAllocatedGauge.DeleteLabelValues(labels...)
186+
clusterPodAllocatedGauge.DeleteLabelValues(labels...)
187+
clusterSyncStatusDuration.DeleteLabelValues(labels...)
175188
}
176189

177190
// RecordEvictionQueueMetrics record the depth Of the EvictionQueue
@@ -186,10 +199,11 @@ func RecordEvictionKindMetrics(clusterName, resourceKind string, increase bool)
186199
return
187200
}
188201

202+
labels := []string{clusterName, clusterName, resourceKind}
189203
if increase {
190-
evictionKindTotalMetrics.WithLabelValues(clusterName, resourceKind).Inc()
204+
evictionKindTotalMetrics.WithLabelValues(labels...).Inc()
191205
} else {
192-
evictionKindTotalMetrics.WithLabelValues(clusterName, resourceKind).Dec()
206+
evictionKindTotalMetrics.WithLabelValues(labels...).Dec()
193207
}
194208
}
195209

pkg/metrics/cluster_test.go

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ func TestClusterReadyMetrics(t *testing.T) {
5353
},
5454
},
5555
want: `
56-
# HELP cluster_ready_state State of the cluster(1 if ready, 0 otherwise).
56+
# HELP cluster_ready_state State of the cluster (1 if ready, 0 otherwise). [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]
5757
# TYPE cluster_ready_state gauge
58-
cluster_ready_state{cluster_name="foo"} 1
58+
cluster_ready_state{cluster_name="foo",member_cluster="foo"} 1
5959
`,
6060
},
6161
{
@@ -74,9 +74,9 @@ cluster_ready_state{cluster_name="foo"} 1
7474
},
7575
},
7676
want: `
77-
# HELP cluster_ready_state State of the cluster(1 if ready, 0 otherwise).
77+
# HELP cluster_ready_state State of the cluster (1 if ready, 0 otherwise). [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]
7878
# TYPE cluster_ready_state gauge
79-
cluster_ready_state{cluster_name="foo"} 0
79+
cluster_ready_state{cluster_name="foo",member_cluster="foo"} 0
8080
`,
8181
},
8282
}
@@ -110,9 +110,9 @@ func TestClusterTotalNodeNumberMetrics(t *testing.T) {
110110
},
111111
}
112112
want := `
113-
# HELP cluster_node_number Number of nodes in the cluster.
113+
# HELP cluster_node_number Number of nodes in the cluster. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]
114114
# TYPE cluster_node_number gauge
115-
cluster_node_number{cluster_name="foo"} 100
115+
cluster_node_number{cluster_name="foo",member_cluster="foo"} 100
116116
`
117117
clusterTotalNodeNumberGauge.Reset()
118118
RecordClusterStatus(testCluster)
@@ -143,9 +143,9 @@ func TestClusterReadyNodeNumberMetrics(t *testing.T) {
143143
},
144144
}
145145
want := `
146-
# HELP cluster_ready_node_number Number of ready nodes in the cluster.
146+
# HELP cluster_ready_node_number Number of ready nodes in the cluster. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]
147147
# TYPE cluster_ready_node_number gauge
148-
cluster_ready_node_number{cluster_name="foo"} 10
148+
cluster_ready_node_number{cluster_name="foo",member_cluster="foo"} 10
149149
`
150150
clusterReadyNodeNumberGauge.Reset()
151151
RecordClusterStatus(testCluster)
@@ -177,9 +177,9 @@ func TestClusterMemoryAllocatableMetrics(t *testing.T) {
177177
},
178178
}
179179
want := `
180-
# HELP cluster_memory_allocatable_bytes Allocatable cluster memory resource in bytes.
180+
# HELP cluster_memory_allocatable_bytes Allocatable cluster memory resource in bytes. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]
181181
# TYPE cluster_memory_allocatable_bytes gauge
182-
cluster_memory_allocatable_bytes{cluster_name="foo"} 200
182+
cluster_memory_allocatable_bytes{cluster_name="foo",member_cluster="foo"} 200
183183
`
184184
clusterMemoryAllocatableGauge.Reset()
185185
RecordClusterStatus(testCluster)
@@ -211,9 +211,9 @@ func TestClusterCPUAllocatableMetrics(t *testing.T) {
211211
},
212212
}
213213
want := `
214-
# HELP cluster_cpu_allocatable_number Number of allocatable CPU in the cluster.
214+
# HELP cluster_cpu_allocatable_number Number of allocatable CPU in the cluster. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]
215215
# TYPE cluster_cpu_allocatable_number gauge
216-
cluster_cpu_allocatable_number{cluster_name="foo"} 0.2
216+
cluster_cpu_allocatable_number{cluster_name="foo",member_cluster="foo"} 0.2
217217
`
218218
clusterCPUAllocatableGauge.Reset()
219219
RecordClusterStatus(testCluster)
@@ -245,9 +245,9 @@ func TestClusterPodAllocatableMetrics(t *testing.T) {
245245
},
246246
}
247247
want := `
248-
# HELP cluster_pod_allocatable_number Number of allocatable pods in the cluster.
248+
# HELP cluster_pod_allocatable_number Number of allocatable pods in the cluster. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]
249249
# TYPE cluster_pod_allocatable_number gauge
250-
cluster_pod_allocatable_number{cluster_name="foo"} 110
250+
cluster_pod_allocatable_number{cluster_name="foo",member_cluster="foo"} 110
251251
`
252252
clusterPodAllocatableGauge.Reset()
253253
RecordClusterStatus(testCluster)
@@ -279,9 +279,9 @@ func TestClusterMemoryAllocatedMetrics(t *testing.T) {
279279
},
280280
}
281281
want := `
282-
# HELP cluster_memory_allocated_bytes Allocated cluster memory resource in bytes.
282+
# HELP cluster_memory_allocated_bytes Allocated cluster memory resource in bytes. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]
283283
# TYPE cluster_memory_allocated_bytes gauge
284-
cluster_memory_allocated_bytes{cluster_name="foo"} 200
284+
cluster_memory_allocated_bytes{cluster_name="foo",member_cluster="foo"} 200
285285
`
286286
clusterMemoryAllocatedGauge.Reset()
287287
RecordClusterStatus(testCluster)
@@ -313,9 +313,9 @@ func TestClusterCPUAllocatedMetrics(t *testing.T) {
313313
},
314314
}
315315
want := `
316-
# HELP cluster_cpu_allocated_number Number of allocated CPU in the cluster.
316+
# HELP cluster_cpu_allocated_number Number of allocated CPU in the cluster. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]
317317
# TYPE cluster_cpu_allocated_number gauge
318-
cluster_cpu_allocated_number{cluster_name="foo"} 0.2
318+
cluster_cpu_allocated_number{cluster_name="foo",member_cluster="foo"} 0.2
319319
`
320320
clusterCPUAllocatedGauge.Reset()
321321
RecordClusterStatus(testCluster)
@@ -347,9 +347,9 @@ func TestClusterPodAllocatedMetrics(t *testing.T) {
347347
},
348348
}
349349
want := `
350-
# HELP cluster_pod_allocated_number Number of allocated pods in the cluster.
350+
# HELP cluster_pod_allocated_number Number of allocated pods in the cluster. [Label deprecation: cluster_name deprecated in 1.16; use member_cluster. Removal planned 1.18.]
351351
# TYPE cluster_pod_allocated_number gauge
352-
cluster_pod_allocated_number{cluster_name="foo"} 110
352+
cluster_pod_allocated_number{cluster_name="foo",member_cluster="foo"} 110
353353
`
354354
clusterPodAllocatedGauge.Reset()
355355
RecordClusterStatus(testCluster)

0 commit comments

Comments
 (0)