Skip to content

Commit 9861229

Browse files
authored
Add zone label to ring_members metric. (#6900)
* add zone label to ring_members metric Signed-off-by: Alex Le <[email protected]> * update changelog Signed-off-by: Alex Le <[email protected]> * fix nil pointer Signed-off-by: Alex Le <[email protected]> --------- Signed-off-by: Alex Le <[email protected]>
1 parent 6b3bd7b commit 9861229

File tree

3 files changed

+164
-31
lines changed

3 files changed

+164
-31
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
* [ENHANCEMENT] Distributor: Add native histograms max sample size bytes limit validation. #6834
5858
* [ENHANCEMENT] Querier: Support caching parquet labels file in parquet queryable. #6835
5959
* [ENHANCEMENT] Querier: Support query limits in parquet queryable. #6870
60+
* [ENHANCEMENT] Ring: Add zone label to ring_members metric. #6900
6061
* [ENHANCEMENT] Ingester: Add new metric `cortex_ingester_push_errors_total` to track reasons for ingester request failures. #6901
6162
* [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517
6263
* [BUGFIX] Ingester: Fix labelset data race condition. #6573

pkg/ring/ring.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,8 @@ type Ring struct {
201201

202202
// List of zones for which there's at least 1 instance in the ring. This list is guaranteed
203203
// to be sorted alphabetically.
204-
ringZones []string
204+
ringZones []string
205+
previousRingZones []string
205206

206207
// Cache of shuffle-sharded subrings per identifier. Invalidated when topology changes.
207208
// If set to nil, no caching is done (used by tests, and subrings).
@@ -262,7 +263,7 @@ func NewWithStoreClientAndStrategy(cfg Config, name, key string, store kv.Client
262263
Name: "ring_members",
263264
Help: "Number of members in the ring",
264265
ConstLabels: map[string]string{"name": name}},
265-
[]string{"state"}),
266+
[]string{"state", "zone"}),
266267
totalTokensGauge: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
267268
Name: "ring_tokens_total",
268269
Help: "Number of tokens in the ring",
@@ -362,6 +363,7 @@ func (r *Ring) updateRingState(ringDesc *Desc) {
362363
r.ringTokensByZone = ringTokensByZone
363364
r.ringInstanceByToken = ringInstanceByToken
364365
r.ringInstanceIdByAddr = ringInstanceByAddr
366+
r.previousRingZones = r.ringZones
365367
r.ringZones = ringZones
366368
r.lastTopologyChange = now
367369
if r.shuffledSubringCache != nil {
@@ -665,12 +667,19 @@ func (r *Ring) updateRingMetrics(compareResult CompareResult) {
665667
return
666668
}
667669

668-
numByState := map[string]int{}
670+
numByStateByZone := map[string]map[string]int{}
669671
oldestTimestampByState := map[string]int64{}
670672

671673
// Initialized to zero so we emit zero-metrics (instead of not emitting anything)
672674
for _, s := range []string{unhealthy, ACTIVE.String(), LEAVING.String(), PENDING.String(), JOINING.String(), READONLY.String()} {
673-
numByState[s] = 0
675+
numByStateByZone[s] = map[string]int{}
676+
// make sure removed zones got zero value
677+
for _, zone := range r.previousRingZones {
678+
numByStateByZone[s][zone] = 0
679+
}
680+
for _, zone := range r.ringZones {
681+
numByStateByZone[s][zone] = 0
682+
}
674683
oldestTimestampByState[s] = 0
675684
}
676685

@@ -679,14 +688,19 @@ func (r *Ring) updateRingMetrics(compareResult CompareResult) {
679688
if !r.IsHealthy(&instance, Reporting, r.KVClient.LastUpdateTime(r.key)) {
680689
s = unhealthy
681690
}
682-
numByState[s]++
691+
if _, ok := numByStateByZone[s]; !ok {
692+
numByStateByZone[s] = map[string]int{}
693+
}
694+
numByStateByZone[s][instance.Zone]++
683695
if oldestTimestampByState[s] == 0 || instance.Timestamp < oldestTimestampByState[s] {
684696
oldestTimestampByState[s] = instance.Timestamp
685697
}
686698
}
687699

688-
for state, count := range numByState {
689-
r.numMembersGaugeVec.WithLabelValues(state).Set(float64(count))
700+
for state, zones := range numByStateByZone {
701+
for zone, count := range zones {
702+
r.numMembersGaugeVec.WithLabelValues(state, zone).Set(float64(count))
703+
}
690704
}
691705
for state, timestamp := range oldestTimestampByState {
692706
r.oldestTimestampGaugeVec.WithLabelValues(state).Set(float64(timestamp))

pkg/ring/ring_test.go

Lines changed: 142 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3202,12 +3202,12 @@ func TestUpdateMetrics(t *testing.T) {
32023202
ring_member_ownership_percent{member="B",name="test"} 0.5000000002328306
32033203
# HELP ring_members Number of members in the ring
32043204
# TYPE ring_members gauge
3205-
ring_members{name="test",state="ACTIVE"} 2
3206-
ring_members{name="test",state="JOINING"} 0
3207-
ring_members{name="test",state="LEAVING"} 0
3208-
ring_members{name="test",state="PENDING"} 0
3209-
ring_members{name="test",state="READONLY"} 0
3210-
ring_members{name="test",state="Unhealthy"} 0
3205+
ring_members{name="test",state="ACTIVE",zone=""} 2
3206+
ring_members{name="test",state="JOINING",zone=""} 0
3207+
ring_members{name="test",state="LEAVING",zone=""} 0
3208+
ring_members{name="test",state="PENDING",zone=""} 0
3209+
ring_members{name="test",state="READONLY",zone=""} 0
3210+
ring_members{name="test",state="Unhealthy",zone=""} 0
32113211
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
32123212
# TYPE ring_oldest_member_timestamp gauge
32133213
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11
@@ -3230,12 +3230,12 @@ func TestUpdateMetrics(t *testing.T) {
32303230
Expected: `
32313231
# HELP ring_members Number of members in the ring
32323232
# TYPE ring_members gauge
3233-
ring_members{name="test",state="ACTIVE"} 2
3234-
ring_members{name="test",state="JOINING"} 0
3235-
ring_members{name="test",state="LEAVING"} 0
3236-
ring_members{name="test",state="PENDING"} 0
3237-
ring_members{name="test",state="READONLY"} 0
3238-
ring_members{name="test",state="Unhealthy"} 0
3233+
ring_members{name="test",state="ACTIVE",zone=""} 2
3234+
ring_members{name="test",state="JOINING",zone=""} 0
3235+
ring_members{name="test",state="LEAVING",zone=""} 0
3236+
ring_members{name="test",state="PENDING",zone=""} 0
3237+
ring_members{name="test",state="READONLY",zone=""} 0
3238+
ring_members{name="test",state="Unhealthy",zone=""} 0
32393239
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
32403240
# TYPE ring_oldest_member_timestamp gauge
32413241
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11
@@ -3310,12 +3310,12 @@ func TestUpdateMetricsWithRemoval(t *testing.T) {
33103310
ring_member_ownership_percent{member="B",name="test"} 0.5000000002328306
33113311
# HELP ring_members Number of members in the ring
33123312
# TYPE ring_members gauge
3313-
ring_members{name="test",state="ACTIVE"} 2
3314-
ring_members{name="test",state="JOINING"} 0
3315-
ring_members{name="test",state="LEAVING"} 0
3316-
ring_members{name="test",state="PENDING"} 0
3317-
ring_members{name="test",state="READONLY"} 0
3318-
ring_members{name="test",state="Unhealthy"} 0
3313+
ring_members{name="test",state="ACTIVE",zone=""} 2
3314+
ring_members{name="test",state="JOINING",zone=""} 0
3315+
ring_members{name="test",state="LEAVING",zone=""} 0
3316+
ring_members{name="test",state="PENDING",zone=""} 0
3317+
ring_members{name="test",state="READONLY",zone=""} 0
3318+
ring_members{name="test",state="Unhealthy",zone=""} 0
33193319
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
33203320
# TYPE ring_oldest_member_timestamp gauge
33213321
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11
@@ -3347,12 +3347,130 @@ func TestUpdateMetricsWithRemoval(t *testing.T) {
33473347
ring_member_ownership_percent{member="A",name="test"} 1
33483348
# HELP ring_members Number of members in the ring
33493349
# TYPE ring_members gauge
3350-
ring_members{name="test",state="ACTIVE"} 1
3351-
ring_members{name="test",state="JOINING"} 0
3352-
ring_members{name="test",state="LEAVING"} 0
3353-
ring_members{name="test",state="PENDING"} 0
3354-
ring_members{name="test",state="READONLY"} 0
3355-
ring_members{name="test",state="Unhealthy"} 0
3350+
ring_members{name="test",state="ACTIVE",zone=""} 1
3351+
ring_members{name="test",state="JOINING",zone=""} 0
3352+
ring_members{name="test",state="LEAVING",zone=""} 0
3353+
ring_members{name="test",state="PENDING",zone=""} 0
3354+
ring_members{name="test",state="READONLY",zone=""} 0
3355+
ring_members{name="test",state="Unhealthy",zone=""} 0
3356+
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
3357+
# TYPE ring_oldest_member_timestamp gauge
3358+
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 22
3359+
ring_oldest_member_timestamp{name="test",state="JOINING"} 0
3360+
ring_oldest_member_timestamp{name="test",state="LEAVING"} 0
3361+
ring_oldest_member_timestamp{name="test",state="PENDING"} 0
3362+
ring_oldest_member_timestamp{name="test",state="READONLY"} 0
3363+
ring_oldest_member_timestamp{name="test",state="Unhealthy"} 0
3364+
# HELP ring_tokens_owned The number of tokens in the ring owned by the member
3365+
# TYPE ring_tokens_owned gauge
3366+
ring_tokens_owned{member="A",name="test"} 2
3367+
# HELP ring_tokens_total Number of tokens in the ring
3368+
# TYPE ring_tokens_total gauge
3369+
ring_tokens_total{name="test"} 2
3370+
`))
3371+
assert.NoError(t, err)
3372+
}
3373+
3374+
func TestUpdateMetricsWithZone(t *testing.T) {
3375+
cfg := Config{
3376+
KVStore: kv.Config{},
3377+
HeartbeatTimeout: 0, // get healthy stats
3378+
ReplicationFactor: 3,
3379+
ZoneAwarenessEnabled: true,
3380+
DetailedMetricsEnabled: true,
3381+
}
3382+
3383+
registry := prometheus.NewRegistry()
3384+
3385+
// create the ring to set up metrics, but do not start
3386+
ring, err := NewWithStoreClientAndStrategy(cfg, testRingName, testRingKey, &MockClient{}, NewDefaultReplicationStrategy(), registry, log.NewNopLogger())
3387+
require.NoError(t, err)
3388+
3389+
ringDesc := Desc{
3390+
Ingesters: map[string]InstanceDesc{
3391+
"A": {Addr: "127.0.0.1", Timestamp: 22, Zone: "zone1", Tokens: []uint32{math.MaxUint32 / 6, (math.MaxUint32 / 6) * 4}},
3392+
"B": {Addr: "127.0.0.2", Timestamp: 11, Zone: "zone2", Tokens: []uint32{(math.MaxUint32 / 6) * 2, (math.MaxUint32 / 6) * 5}},
3393+
"C": {Addr: "127.0.0.3", Timestamp: 33, Zone: "zone3", Tokens: []uint32{(math.MaxUint32 / 6) * 3, math.MaxUint32}},
3394+
},
3395+
}
3396+
ring.updateRingState(&ringDesc)
3397+
3398+
err = testutil.GatherAndCompare(registry, bytes.NewBufferString(`
3399+
# HELP ring_member_ownership_percent The percent ownership of the ring by member
3400+
# TYPE ring_member_ownership_percent gauge
3401+
ring_member_ownership_percent{member="A",name="test"} 0.3333333332557231
3402+
ring_member_ownership_percent{member="B",name="test"} 0.3333333330228925
3403+
ring_member_ownership_percent{member="C",name="test"} 0.3333333337213844
3404+
# HELP ring_members Number of members in the ring
3405+
# TYPE ring_members gauge
3406+
ring_members{name="test",state="ACTIVE",zone="zone1"} 1
3407+
ring_members{name="test",state="ACTIVE",zone="zone2"} 1
3408+
ring_members{name="test",state="ACTIVE",zone="zone3"} 1
3409+
ring_members{name="test",state="JOINING",zone="zone1"} 0
3410+
ring_members{name="test",state="JOINING",zone="zone2"} 0
3411+
ring_members{name="test",state="JOINING",zone="zone3"} 0
3412+
ring_members{name="test",state="LEAVING",zone="zone1"} 0
3413+
ring_members{name="test",state="LEAVING",zone="zone2"} 0
3414+
ring_members{name="test",state="LEAVING",zone="zone3"} 0
3415+
ring_members{name="test",state="PENDING",zone="zone1"} 0
3416+
ring_members{name="test",state="PENDING",zone="zone2"} 0
3417+
ring_members{name="test",state="PENDING",zone="zone3"} 0
3418+
ring_members{name="test",state="READONLY",zone="zone1"} 0
3419+
ring_members{name="test",state="READONLY",zone="zone2"} 0
3420+
ring_members{name="test",state="READONLY",zone="zone3"} 0
3421+
ring_members{name="test",state="Unhealthy",zone="zone1"} 0
3422+
ring_members{name="test",state="Unhealthy",zone="zone2"} 0
3423+
ring_members{name="test",state="Unhealthy",zone="zone3"} 0
3424+
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
3425+
# TYPE ring_oldest_member_timestamp gauge
3426+
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11
3427+
ring_oldest_member_timestamp{name="test",state="JOINING"} 0
3428+
ring_oldest_member_timestamp{name="test",state="LEAVING"} 0
3429+
ring_oldest_member_timestamp{name="test",state="PENDING"} 0
3430+
ring_oldest_member_timestamp{name="test",state="READONLY"} 0
3431+
ring_oldest_member_timestamp{name="test",state="Unhealthy"} 0
3432+
# HELP ring_tokens_owned The number of tokens in the ring owned by the member
3433+
# TYPE ring_tokens_owned gauge
3434+
ring_tokens_owned{member="A",name="test"} 2
3435+
ring_tokens_owned{member="B",name="test"} 2
3436+
ring_tokens_owned{member="C",name="test"} 2
3437+
# HELP ring_tokens_total Number of tokens in the ring
3438+
# TYPE ring_tokens_total gauge
3439+
ring_tokens_total{name="test"} 6
3440+
`))
3441+
require.NoError(t, err)
3442+
3443+
ringDescNew := Desc{
3444+
Ingesters: map[string]InstanceDesc{
3445+
"A": {Addr: "127.0.0.1", Timestamp: 22, Zone: "zone1", Tokens: []uint32{math.MaxUint32 / 6, (math.MaxUint32 / 6) * 4}},
3446+
},
3447+
}
3448+
ring.updateRingState(&ringDescNew)
3449+
3450+
err = testutil.GatherAndCompare(registry, bytes.NewBufferString(`
3451+
# HELP ring_member_ownership_percent The percent ownership of the ring by member
3452+
# TYPE ring_member_ownership_percent gauge
3453+
ring_member_ownership_percent{member="A",name="test"} 1
3454+
# HELP ring_members Number of members in the ring
3455+
# TYPE ring_members gauge
3456+
ring_members{name="test",state="ACTIVE",zone="zone1"} 1
3457+
ring_members{name="test",state="ACTIVE",zone="zone2"} 0
3458+
ring_members{name="test",state="ACTIVE",zone="zone3"} 0
3459+
ring_members{name="test",state="JOINING",zone="zone1"} 0
3460+
ring_members{name="test",state="JOINING",zone="zone2"} 0
3461+
ring_members{name="test",state="JOINING",zone="zone3"} 0
3462+
ring_members{name="test",state="LEAVING",zone="zone1"} 0
3463+
ring_members{name="test",state="LEAVING",zone="zone2"} 0
3464+
ring_members{name="test",state="LEAVING",zone="zone3"} 0
3465+
ring_members{name="test",state="PENDING",zone="zone1"} 0
3466+
ring_members{name="test",state="PENDING",zone="zone2"} 0
3467+
ring_members{name="test",state="PENDING",zone="zone3"} 0
3468+
ring_members{name="test",state="READONLY",zone="zone1"} 0
3469+
ring_members{name="test",state="READONLY",zone="zone2"} 0
3470+
ring_members{name="test",state="READONLY",zone="zone3"} 0
3471+
ring_members{name="test",state="Unhealthy",zone="zone1"} 0
3472+
ring_members{name="test",state="Unhealthy",zone="zone2"} 0
3473+
ring_members{name="test",state="Unhealthy",zone="zone3"} 0
33563474
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
33573475
# TYPE ring_oldest_member_timestamp gauge
33583476
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 22

0 commit comments

Comments
 (0)