Skip to content

Commit 9a324b2

Browse files
authored
metrics, elect: Show the metrics reader owner on Grafana (#615)
1 parent 08e6cec commit 9a324b2

File tree

10 files changed

+102
-57
lines changed

10 files changed

+102
-57
lines changed

pkg/balance/metricsreader/backend_reader.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ import (
3636

3737
const (
3838
// readerOwnerKeyPrefix is the key prefix in etcd for backend reader owner election.
39-
// For global owner, the key is "/tiproxy/metricreader/owner".
40-
// For zonal owner, the key is "/tiproxy/metricreader/{zone}/owner".
41-
readerOwnerKeyPrefix = "/tiproxy/metricreader"
39+
// For global owner, the key is "/tiproxy/metric_reader/owner".
40+
// For zonal owner, the key is "/tiproxy/metric_reader/{zone}/owner".
41+
readerOwnerKeyPrefix = "/tiproxy/metric_reader"
4242
readerOwnerKeySuffix = "owner"
4343
// sessionTTL is the session's TTL in seconds for backend reader owner election.
4444
sessionTTL = 30
@@ -225,9 +225,9 @@ func (br *BackendReader) queryAllOwners(ctx context.Context) (zones, owners []st
225225

226226
var zone string
227227
if strings.HasPrefix(key, readerOwnerKeySuffix) {
228-
// global owner key, such as "/tiproxy/metricreader/owner/leaseID"
228+
// global owner key, such as "/tiproxy/metric_reader/owner/leaseID"
229229
} else if endIdx := strings.Index(key, "/"); endIdx > 0 && strings.HasPrefix(key[endIdx+1:], readerOwnerKeySuffix) {
230-
// zonal owner key, such as "/tiproxy/metricreader/east/owner/leaseID"
230+
// zonal owner key, such as "/tiproxy/metric_reader/east/owner/leaseID"
231231
zone = key[:endIdx]
232232
} else {
233233
continue

pkg/manager/elect/election.go

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@ package elect
55

66
import (
77
"context"
8+
"strings"
89
"sync/atomic"
910
"time"
1011

1112
"github.com/pingcap/tiproxy/lib/util/errors"
1213
"github.com/pingcap/tiproxy/lib/util/retry"
1314
"github.com/pingcap/tiproxy/lib/util/waitgroup"
15+
"github.com/pingcap/tiproxy/pkg/metrics"
1416
"github.com/pingcap/tiproxy/pkg/util/etcd"
1517
"github.com/siddontang/go/hack"
1618
"go.etcd.io/etcd/api/v3/mvccpb"
@@ -21,7 +23,9 @@ import (
2123
)
2224

2325
const (
24-
logInterval = 10
26+
logInterval = 10
27+
ownerKeyPrefix = "/tiproxy/"
28+
ownerKeySuffix = "/owner"
2529
)
2630

2731
type Member interface {
@@ -65,26 +69,29 @@ var _ Election = (*election)(nil)
6569
type election struct {
6670
cfg ElectionConfig
6771
// id is typically the instance address
68-
id string
69-
key string
70-
lg *zap.Logger
71-
etcdCli *clientv3.Client
72-
elec atomic.Pointer[concurrency.Election]
73-
wg waitgroup.WaitGroup
74-
cancel context.CancelFunc
75-
member Member
72+
id string
73+
key string
74+
// trimedKey is shown as a label in grafana
75+
trimedKey string
76+
lg *zap.Logger
77+
etcdCli *clientv3.Client
78+
elec atomic.Pointer[concurrency.Election]
79+
wg waitgroup.WaitGroup
80+
cancel context.CancelFunc
81+
member Member
7682
}
7783

7884
// NewElection creates an Election.
7985
func NewElection(lg *zap.Logger, etcdCli *clientv3.Client, cfg ElectionConfig, id, key string, member Member) *election {
8086
lg = lg.With(zap.String("key", key), zap.String("id", id))
8187
return &election{
82-
lg: lg,
83-
etcdCli: etcdCli,
84-
cfg: cfg,
85-
id: id,
86-
key: key,
87-
member: member,
88+
lg: lg,
89+
etcdCli: etcdCli,
90+
cfg: cfg,
91+
id: id,
92+
key: key,
93+
trimedKey: strings.TrimSuffix(strings.TrimPrefix(key, ownerKeyPrefix), ownerKeySuffix),
94+
member: member,
8895
}
8996
}
9097

@@ -186,12 +193,15 @@ func (m *election) campaignLoop(ctx context.Context) {
186193
func (m *election) onElected(elec *concurrency.Election) {
187194
m.member.OnElected()
188195
m.elec.Store(elec)
196+
metrics.OwnerGauge.WithLabelValues(m.trimedKey).Set(1)
189197
m.lg.Info("elected as the owner")
190198
}
191199

192200
func (m *election) onRetired() {
193201
m.member.OnRetired()
194202
m.elec.Store(nil)
203+
// Delete the metric so that it doesn't show on Grafana.
204+
metrics.OwnerGauge.MetricVec.DeletePartialMatch(map[string]string{metrics.LblType: m.trimedKey})
195205
m.lg.Info("the owner retires")
196206
}
197207

pkg/manager/elect/election_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"testing"
99
"time"
1010

11+
"github.com/pingcap/tiproxy/lib/util/logger"
12+
"github.com/pingcap/tiproxy/pkg/metrics"
1113
"github.com/stretchr/testify/require"
1214
)
1315

@@ -108,3 +110,37 @@ func TestOwnerHang(t *testing.T) {
108110
ownerID = ts.getOwnerID()
109111
require.Equal(t, "1", ownerID)
110112
}
113+
114+
func TestOwnerMetric(t *testing.T) {
115+
lg, _ := logger.CreateLoggerForTest(t)
116+
checkMetric := func(key string, expectedFound bool) {
117+
results, err := metrics.Collect(metrics.OwnerGauge)
118+
require.NoError(t, err)
119+
found := false
120+
for _, result := range results {
121+
if *result.Label[0].Value == key {
122+
require.EqualValues(t, 1, *result.Gauge.Value)
123+
found = true
124+
break
125+
}
126+
}
127+
require.Equal(t, expectedFound, found)
128+
}
129+
130+
elec1 := NewElection(lg, nil, electionConfigForTest(1), "1", ownerKeyPrefix+"key"+ownerKeySuffix, newMockMember())
131+
elec1.onElected(nil)
132+
checkMetric("key", true)
133+
134+
elec2 := NewElection(lg, nil, electionConfigForTest(1), "1", "key2/1", newMockMember())
135+
elec2.onElected(nil)
136+
checkMetric("key2/1", true)
137+
138+
elec3 := NewElection(lg, nil, electionConfigForTest(1), "1", ownerKeyPrefix+"key3/1", newMockMember())
139+
elec3.onElected(nil)
140+
checkMetric("key3/1", true)
141+
142+
elec1.onRetired()
143+
checkMetric("key", false)
144+
checkMetric("key2/1", true)
145+
checkMetric("key3/1", true)
146+
}

pkg/manager/vip/manager.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99

1010
"github.com/pingcap/tiproxy/lib/config"
1111
"github.com/pingcap/tiproxy/pkg/manager/elect"
12-
"github.com/pingcap/tiproxy/pkg/metrics"
1312
clientv3 "go.etcd.io/etcd/client/v3"
1413
"go.uber.org/zap"
1514
)
@@ -82,7 +81,6 @@ func (vm *vipManager) Start(ctx context.Context, etcdCli *clientv3.Client) error
8281
}
8382

8483
func (vm *vipManager) OnElected() {
85-
metrics.VIPGauge.Set(1)
8684
hasIP, err := vm.operation.HasIP()
8785
if err != nil {
8886
vm.lg.Error("checking addresses failed", zap.Error(err))
@@ -104,7 +102,6 @@ func (vm *vipManager) OnElected() {
104102
}
105103

106104
func (vm *vipManager) OnRetired() {
107-
metrics.VIPGauge.Set(0)
108105
hasIP, err := vm.operation.HasIP()
109106
if err != nil {
110107
vm.lg.Error("checking addresses failed", zap.Error(err))

pkg/manager/vip/manager_test.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111

1212
"github.com/pingcap/tiproxy/lib/config"
1313
"github.com/pingcap/tiproxy/lib/util/logger"
14-
"github.com/pingcap/tiproxy/pkg/metrics"
1514
"github.com/stretchr/testify/require"
1615
)
1716

@@ -139,14 +138,6 @@ func TestNetworkOperation(t *testing.T) {
139138
return strings.Contains(text.String()[logIdx:], test.expectedLog)
140139
}, 3*time.Second, 10*time.Millisecond, "case %d", i)
141140
logIdx = len(text.String())
142-
143-
expectedVIPGauge := 0
144-
if test.eventType == eventTypeElected {
145-
expectedVIPGauge = 1
146-
}
147-
vipGauge, err := metrics.ReadGauge(metrics.VIPGauge)
148-
require.NoError(t, err)
149-
require.EqualValues(t, expectedVIPGauge, vipGauge, "case %d", i)
150141
}
151142
cancel()
152143
vm.Close()

pkg/metrics/grafana/tiproxy_summary.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,7 @@
667667
"dashLength": 10,
668668
"dashes": false,
669669
"datasource": "${DS_TEST-CLUSTER}",
670-
"description": "1 indicates the VIP owner.",
670+
"description": "The TiProxy owner of each job type.",
671671
"fill": 1,
672672
"fillGradient": 0,
673673
"gridPos": {
@@ -704,17 +704,17 @@
704704
"steppedLine": false,
705705
"targets": [
706706
{
707-
"expr": "tiproxy_server_vip{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}",
707+
"expr": "tiproxy_server_owner{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}",
708708
"format": "time_series",
709709
"intervalFactor": 2,
710-
"legendFormat": "{{instance}}",
710+
"legendFormat": "{{instance}} - {{type}}",
711711
"refId": "A"
712712
}
713713
],
714714
"thresholds": [ ],
715715
"timeFrom": null,
716716
"timeShift": null,
717-
"title": "VIP Owner",
717+
"title": "Owner",
718718
"tooltip": {
719719
"shared": true,
720720
"sort": 0,

pkg/metrics/grafana/tiproxy_summary.jsonnet

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -209,17 +209,17 @@ local uptimeP = graphPanel.new(
209209
)
210210
);
211211

212-
local vipP = graphPanel.new(
213-
title='VIP Owner',
212+
local ownerP = graphPanel.new(
213+
title='Owner',
214214
datasource=myDS,
215215
legend_rightSide=true,
216-
description='1 indicates the VIP owner.',
216+
description='The TiProxy owner of each job type.',
217217
format='short',
218218
)
219219
.addTarget(
220220
prometheus.target(
221-
'tiproxy_server_vip{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}',
222-
legendFormat='{{instance}}',
221+
'tiproxy_server_owner{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}',
222+
legendFormat='{{instance}} - {{type}}',
223223
)
224224
);
225225

@@ -579,7 +579,7 @@ newDash
579579
.addPanel(createConnP, gridPos=leftPanelPos)
580580
.addPanel(disconnP, gridPos=rightPanelPos)
581581
.addPanel(goroutineP, gridPos=leftPanelPos)
582-
.addPanel(vipP, gridPos=rightPanelPos)
582+
.addPanel(ownerP, gridPos=rightPanelPos)
583583
,
584584
gridPos=rowPos
585585
)

pkg/metrics/metrics.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ func init() {
9999
CreateConnCounter,
100100
DisConnCounter,
101101
MaxProcsGauge,
102-
VIPGauge,
102+
OwnerGauge,
103103
ServerEventCounter,
104104
ServerErrCounter,
105105
TimeJumpBackCounter,
@@ -159,3 +159,20 @@ func ReadGauge(gauge prometheus.Gauge) (float64, error) {
159159
}
160160
return metric.Gauge.GetValue(), nil
161161
}
162+
163+
func Collect(coll prometheus.Collector) ([]dto.Metric, error) {
164+
results := make([]dto.Metric, 0)
165+
ch := make(chan prometheus.Metric)
166+
go func() {
167+
coll.Collect(ch)
168+
close(ch)
169+
}()
170+
for m := range ch {
171+
var metric dto.Metric
172+
if err := m.Write(&metric); err != nil {
173+
return nil, err
174+
}
175+
results = append(results, metric)
176+
}
177+
return results, nil
178+
}

pkg/metrics/metrics_test.go

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111

1212
"github.com/pingcap/tiproxy/lib/util/logger"
1313
"github.com/prometheus/client_golang/prometheus"
14-
dto "github.com/prometheus/client_model/go"
1514
"github.com/stretchr/testify/require"
1615
)
1716

@@ -64,16 +63,11 @@ func TestDelLabelValues(t *testing.T) {
6463
}
6564

6665
getAddrs := func(coll prometheus.Collector) []string {
67-
ch := make(chan prometheus.Metric)
68-
go func() {
69-
coll.Collect(ch)
70-
close(ch)
71-
}()
66+
results, err := Collect(coll)
67+
require.NoError(t, err)
7268
addrs := make([]string, 0, 3)
73-
for m := range ch {
74-
var metric dto.Metric
75-
require.NoError(t, m.Write(&metric))
76-
for _, l := range metric.Label {
69+
for _, m := range results {
70+
for _, l := range m.Label {
7771
if strings.HasPrefix(*l.Value, "addr") && !slices.Contains(addrs, *l.Value) {
7872
addrs = append(addrs, *l.Value)
7973
}

pkg/metrics/server.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,13 @@ var (
4242
Help: "Number of disconnections.",
4343
}, []string{LblType})
4444

45-
VIPGauge = prometheus.NewGauge(
45+
OwnerGauge = prometheus.NewGaugeVec(
4646
prometheus.GaugeOpts{
4747
Namespace: ModuleProxy,
4848
Subsystem: LabelServer,
49-
Name: "vip",
50-
Help: "VIP owner.",
51-
})
49+
Name: "owner",
50+
Help: "The TiProxy owner of each job type.",
51+
}, []string{LblType})
5252

5353
MaxProcsGauge = prometheus.NewGauge(
5454
prometheus.GaugeOpts{

0 commit comments

Comments
 (0)