Skip to content

Commit 44c380b

Browse files
SerjKol80Sergey Kolosov
andcommitted
Add 'store' label to metric pd_cluster_status in 8.5. (#9865)
close #9855 Add 'store' label to metric pd_cluster_status in 8.5. Signed-off-by: Sergey Kolosov <[email protected]> Co-authored-by: Sergey Kolosov <[email protected]> (cherry picked from commit b0500cd) Signed-off-by: Sergey Kolosov <[email protected]>
1 parent 48eca84 commit 44c380b

File tree

4 files changed

+102
-90
lines changed

4 files changed

+102
-90
lines changed

pkg/statistics/metrics.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ var (
4747
Subsystem: "cluster",
4848
Name: "status",
4949
Help: "Status of the cluster.",
50-
}, []string{"type"})
50+
}, []string{"type", "store"})
5151

5252
placementStatusGauge = prometheus.NewGaugeVec(
5353
prometheus.GaugeOpts{

pkg/statistics/store_collection.go

Lines changed: 79 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -29,29 +29,31 @@ import (
2929
const (
3030
unknown = "unknown"
3131
labelType = "label"
32+
33+
clusterStatusStoreUpCount = "store_up_count"
34+
clusterStatusStoreDisconnectedCount = "store_disconnected_count"
35+
clusterStatusStoreSlowCount = "store_slow_count"
36+
clusterStatusStoreDownCount = "store_down_count"
37+
clusterStatusStoreUnhealthCount = "store_unhealth_count"
38+
clusterStatusStoreOfflineCount = "store_offline_count"
39+
clusterStatusStoreTombstoneCount = "store_tombstone_count"
40+
clusterStatusStoreLowSpaceCount = "store_low_space_count"
41+
clusterStatusStorePreparingCount = "store_preparing_count"
42+
clusterStatusStoreServingCount = "store_serving_count"
43+
clusterStatusStoreRemovingCount = "store_removing_count"
44+
clusterStatusStoreRemovedCount = "store_removed_count"
45+
46+
clusterStatusRegionCount = "region_count"
47+
clusterStatusLeaderCount = "leader_count"
48+
clusterStatusWitnessCount = "witness_count"
49+
clusterStatusLearnerCount = "learner_count"
50+
clusterStatusStorageSize = "storage_size"
51+
clusterStatusStorageCapacity = "storage_capacity"
3252
)
3353

3454
type storeStatistics struct {
35-
opt config.ConfProvider
36-
Up int
37-
Disconnect int
38-
Unhealthy int
39-
Down int
40-
Offline int
41-
Tombstone int
42-
LowSpace int
43-
Slow int
44-
StorageSize uint64
45-
StorageCapacity uint64
46-
RegionCount int
47-
LeaderCount int
48-
LearnerCount int
49-
WitnessCount int
50-
LabelCounter map[string]int
51-
Preparing int
52-
Serving int
53-
Removing int
54-
Removed int
55+
opt config.ConfProvider
56+
LabelCounter map[string]int
5557
}
5658

5759
func newStoreStatistics(opt config.ConfProvider) *storeStatistics {
@@ -61,61 +63,84 @@ func newStoreStatistics(opt config.ConfProvider) *storeStatistics {
6163
}
6264
}
6365

64-
func (s *storeStatistics) Observe(store *core.StoreInfo) {
65-
for _, k := range s.opt.GetLocationLabels() {
66-
v := store.GetLabelValue(k)
67-
if v == "" {
68-
v = unknown
69-
}
70-
key := fmt.Sprintf("%s:%s", k, v)
71-
// exclude tombstone
72-
if !store.IsRemoved() {
73-
s.LabelCounter[key]++
74-
}
66+
func (s *storeStatistics) observeStoreStatus(store *core.StoreInfo) map[string]float64 {
67+
result := map[string]float64{
68+
clusterStatusStoreUpCount: 0,
69+
clusterStatusStoreDisconnectedCount: 0,
70+
clusterStatusStoreSlowCount: 0,
71+
clusterStatusStoreDownCount: 0,
72+
clusterStatusStoreUnhealthCount: 0,
73+
clusterStatusStoreOfflineCount: 0,
74+
clusterStatusStoreTombstoneCount: 0,
75+
clusterStatusStoreLowSpaceCount: 0,
76+
clusterStatusStorePreparingCount: 0,
77+
clusterStatusStoreServingCount: 0,
78+
clusterStatusStoreRemovingCount: 0,
79+
clusterStatusStoreRemovedCount: 0,
7580
}
76-
storeAddress := store.GetAddress()
77-
id := strconv.FormatUint(store.GetID(), 10)
81+
7882
// Store state.
7983
isDown := false
8084
switch store.GetNodeState() {
8185
case metapb.NodeState_Preparing, metapb.NodeState_Serving:
8286
if store.DownTime() >= s.opt.GetMaxStoreDownTime() {
8387
isDown = true
84-
s.Down++
88+
result[clusterStatusStoreDownCount]++
8589
} else if store.IsUnhealthy() {
86-
s.Unhealthy++
90+
result[clusterStatusStoreUnhealthCount]++
8791
} else if store.IsDisconnected() {
88-
s.Disconnect++
92+
result[clusterStatusStoreDisconnectedCount]++
8993
} else if store.IsSlow() {
90-
s.Slow++
94+
result[clusterStatusStoreSlowCount]++
9195
} else {
92-
s.Up++
96+
result[clusterStatusStoreUpCount]++
9397
}
9498
if store.IsPreparing() {
95-
s.Preparing++
99+
result[clusterStatusStorePreparingCount]++
96100
} else {
97-
s.Serving++
101+
result[clusterStatusStoreServingCount]++
98102
}
99103
case metapb.NodeState_Removing:
100-
s.Offline++
101-
s.Removing++
104+
result[clusterStatusStoreOfflineCount]++
105+
result[clusterStatusStoreRemovingCount]++
102106
case metapb.NodeState_Removed:
103-
s.Tombstone++
104-
s.Removed++
105-
return
107+
result[clusterStatusStoreTombstoneCount]++
108+
result[clusterStatusStoreRemovedCount]++
109+
return result
106110
}
107111

108112
if !isDown && store.IsLowSpace(s.opt.GetLowSpaceRatio()) {
109-
s.LowSpace++
113+
result[clusterStatusStoreLowSpaceCount]++
114+
}
115+
return result
116+
}
117+
118+
func (s *storeStatistics) Observe(store *core.StoreInfo) {
119+
for _, k := range s.opt.GetLocationLabels() {
120+
v := store.GetLabelValue(k)
121+
if v == "" {
122+
v = unknown
123+
}
124+
key := fmt.Sprintf("%s:%s", k, v)
125+
// exclude tombstone
126+
if !store.IsRemoved() {
127+
s.LabelCounter[key]++
128+
}
129+
}
130+
storeAddress := store.GetAddress()
131+
id := strconv.FormatUint(store.GetID(), 10)
132+
storeStatusStats := s.observeStoreStatus(store)
133+
for statusType, value := range storeStatusStats {
134+
clusterStatusGauge.WithLabelValues(statusType, id).Set(value)
110135
}
111136

112137
// Store stats.
113-
s.StorageSize += store.StorageSize()
114-
s.StorageCapacity += store.GetCapacity()
115-
s.RegionCount += store.GetRegionCount()
116-
s.LeaderCount += store.GetLeaderCount()
117-
s.WitnessCount += store.GetWitnessCount()
118-
s.LearnerCount += store.GetLearnerCount()
138+
clusterStatusGauge.WithLabelValues(clusterStatusStorageSize, id).Set(float64(store.StorageSize()))
139+
clusterStatusGauge.WithLabelValues(clusterStatusStorageCapacity, id).Set(float64(store.GetCapacity()))
140+
clusterStatusGauge.WithLabelValues(clusterStatusRegionCount, id).Set(float64(store.GetRegionCount()))
141+
clusterStatusGauge.WithLabelValues(clusterStatusLeaderCount, id).Set(float64(store.GetLeaderCount()))
142+
clusterStatusGauge.WithLabelValues(clusterStatusWitnessCount, id).Set(float64(store.GetWitnessCount()))
143+
clusterStatusGauge.WithLabelValues(clusterStatusLearnerCount, id).Set(float64(store.GetLearnerCount()))
119144
limit, ok := store.GetStoreLimit().(*storelimit.SlidingWindows)
120145
if ok {
121146
cap := limit.GetCap()
@@ -181,30 +206,6 @@ func (s *storeStatistics) ObserveHotStat(store *core.StoreInfo, stats *StoresSta
181206
func (s *storeStatistics) Collect() {
182207
placementStatusGauge.Reset()
183208

184-
metrics := make(map[string]float64)
185-
metrics["store_up_count"] = float64(s.Up)
186-
metrics["store_disconnected_count"] = float64(s.Disconnect)
187-
metrics["store_down_count"] = float64(s.Down)
188-
metrics["store_unhealth_count"] = float64(s.Unhealthy)
189-
metrics["store_offline_count"] = float64(s.Offline)
190-
metrics["store_tombstone_count"] = float64(s.Tombstone)
191-
metrics["store_low_space_count"] = float64(s.LowSpace)
192-
metrics["store_slow_count"] = float64(s.Slow)
193-
metrics["store_preparing_count"] = float64(s.Preparing)
194-
metrics["store_serving_count"] = float64(s.Serving)
195-
metrics["store_removing_count"] = float64(s.Removing)
196-
metrics["store_removed_count"] = float64(s.Removed)
197-
metrics["region_count"] = float64(s.RegionCount)
198-
metrics["leader_count"] = float64(s.LeaderCount)
199-
metrics["witness_count"] = float64(s.WitnessCount)
200-
metrics["learner_count"] = float64(s.LearnerCount)
201-
metrics["storage_size"] = float64(s.StorageSize)
202-
metrics["storage_capacity"] = float64(s.StorageCapacity)
203-
204-
for typ, value := range metrics {
205-
clusterStatusGauge.WithLabelValues(typ).Set(value)
206-
}
207-
208209
// Current scheduling configurations of the cluster
209210
configs := make(map[string]float64)
210211
configs["leader-schedule-limit"] = float64(s.opt.GetLeaderScheduleLimit())
@@ -290,6 +291,7 @@ func ResetStoreStatistics(storeAddress string, id string) {
290291
for _, m := range metrics {
291292
storeStatusGauge.DeleteLabelValues(storeAddress, id, m)
292293
}
294+
clusterStatusGauge.DeletePartialMatch(utils.SingleLabel("store", id))
293295
}
294296

295297
type storeStatisticsMap struct {
@@ -322,4 +324,5 @@ func Reset() {
322324
storeStatusGauge.Reset()
323325
clusterStatusGauge.Reset()
324326
placementStatusGauge.Reset()
327+
clusterStatusGauge.Reset()
325328
}

pkg/statistics/store_collection_test.go

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,19 +72,6 @@ func TestStoreStatistics(t *testing.T) {
7272
}
7373
stats := storeStats.stats
7474

75-
re.Equal(6, stats.Up)
76-
re.Equal(7, stats.Preparing)
77-
re.Equal(0, stats.Serving)
78-
re.Equal(1, stats.Removing)
79-
re.Equal(1, stats.Removed)
80-
re.Equal(1, stats.Down)
81-
re.Equal(1, stats.Offline)
82-
re.Equal(0, stats.RegionCount)
83-
re.Equal(0, stats.WitnessCount)
84-
re.Equal(0, stats.Unhealthy)
85-
re.Equal(0, stats.Disconnect)
86-
re.Equal(1, stats.Tombstone)
87-
re.Equal(1, stats.LowSpace)
8875
re.Equal(2, stats.LabelCounter["zone:z1"])
8976
re.Equal(2, stats.LabelCounter["zone:z2"])
9077
re.Equal(2, stats.LabelCounter["zone:z3"])

pkg/statistics/utils/labels.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright 2020 TiKV Project Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package utils
16+
17+
import "github.com/prometheus/client_golang/prometheus"
18+
19+
// SingleLabel build a labels map containing only a single label
20+
func SingleLabel(key, value string) prometheus.Labels {
21+
return prometheus.Labels{key: value}
22+
}

0 commit comments

Comments
 (0)