Skip to content

Commit 7e6371f

Browse files
authored
Merge pull request #860 from ydb-platform/metrics
* Refactored traces and metrics
2 parents 63560a5 + 72a0f51 commit 7e6371f

File tree

6 files changed

+160
-111
lines changed

6 files changed

+160
-111
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
* Refactored traces and metrics
12
* Renamed `{retry,table}.WithID` option to `{retry,table}.WithLabel`
23
* Added `ydb.WithTraceRetry` option
34
* Moved `internal/allocator.Buffers` to package `internal/xstring`

metrics/driver.go

Lines changed: 48 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,34 @@ import (
1010

1111
// driver makes driver with New publishing
1212
func driver(config Config) (t trace.Driver) {
13+
config = config.WithSystem("driver")
14+
endpoints := config.WithSystem("balancer").GaugeVec("endpoints", "local_dc", "az")
15+
balancersDiscoveries := config.WithSystem("balancer").CounterVec("discoveries", "status", "cause")
16+
balancerUpdates := config.WithSystem("balancer").CounterVec("updates", "cause")
17+
conns := config.GaugeVec("conns", "endpoint", "node_id")
18+
banned := config.WithSystem("conn").GaugeVec("banned", "endpoint", "node_id", "cause")
19+
requests := config.WithSystem("conn").CounterVec("requests", "status", "method", "endpoint", "node_id")
20+
tli := config.CounterVec("transaction_locks_invalidated")
21+
1322
type endpointKey struct {
1423
localDC bool
1524
az string
1625
}
17-
18-
config = config.WithSystem("driver")
19-
endpoints := config.WithSystem("balancer").GaugeVec("endpoints", "local_dc", "az")
20-
balancerUpdates := config.WithSystem("balancer").CounterVec("updates", "force")
21-
conns := config.GaugeVec("conns", "address", "node_id")
22-
banned := config.WithSystem("conn").GaugeVec("banned", "address", "node_id", "cause")
23-
requests := config.WithSystem("conn").CounterVec("requests", "status", "method")
24-
tli := config.CounterVec("transaction_locks_invalidated")
2526
knownEndpoints := make(map[endpointKey]struct{})
27+
2628
t.OnConnInvoke = func(info trace.DriverConnInvokeStartInfo) func(trace.DriverConnInvokeDoneInfo) {
27-
method := info.Method
29+
var (
30+
method = info.Method
31+
endpoint = info.Endpoint.Address()
32+
nodeID = info.Endpoint.NodeID()
33+
)
2834
return func(info trace.DriverConnInvokeDoneInfo) {
2935
if config.Details()&trace.DriverConnEvents != 0 {
3036
requests.With(map[string]string{
31-
"status": errorBrief(info.Error),
32-
"method": string(method),
37+
"status": errorBrief(info.Error),
38+
"method": string(method),
39+
"endpoint": endpoint,
40+
"node_id": strconv.FormatUint(uint64(nodeID), 10),
3341
}).Inc()
3442
if xerrors.IsOperationErrorTransactionLocksInvalidated(info.Error) {
3543
tli.With(nil).Inc()
@@ -42,13 +50,19 @@ func driver(config Config) (t trace.Driver) {
4250
) func(
4351
trace.DriverConnNewStreamDoneInfo,
4452
) {
45-
method := info.Method
53+
var (
54+
method = info.Method
55+
endpoint = info.Endpoint.Address()
56+
nodeID = info.Endpoint.NodeID()
57+
)
4658
return func(info trace.DriverConnNewStreamRecvInfo) func(trace.DriverConnNewStreamDoneInfo) {
4759
return func(info trace.DriverConnNewStreamDoneInfo) {
4860
if config.Details()&trace.DriverConnEvents != 0 {
4961
requests.With(map[string]string{
50-
"status": errorBrief(info.Error),
51-
"method": string(method),
62+
"status": errorBrief(info.Error),
63+
"method": string(method),
64+
"endpoint": endpoint,
65+
"node_id": strconv.FormatUint(uint64(nodeID), 10),
5266
}).Inc()
5367
}
5468
}
@@ -57,19 +71,30 @@ func driver(config Config) (t trace.Driver) {
5771
t.OnConnBan = func(info trace.DriverConnBanStartInfo) func(trace.DriverConnBanDoneInfo) {
5872
if config.Details()&trace.DriverConnEvents != 0 {
5973
banned.With(map[string]string{
60-
"address": info.Endpoint.Address(),
61-
"node_id": idToString(info.Endpoint.NodeID()),
62-
"cause": errorBrief(info.Cause),
74+
"endpoint": info.Endpoint.Address(),
75+
"node_id": idToString(info.Endpoint.NodeID()),
76+
"cause": errorBrief(info.Cause),
6377
}).Add(1)
6478
}
6579
return nil
6680
}
81+
t.OnBalancerClusterDiscoveryAttempt = func(info trace.DriverBalancerClusterDiscoveryAttemptStartInfo) func(
82+
trace.DriverBalancerClusterDiscoveryAttemptDoneInfo,
83+
) {
84+
eventType := repeater.EventType(*info.Context)
85+
return func(info trace.DriverBalancerClusterDiscoveryAttemptDoneInfo) {
86+
balancersDiscoveries.With(map[string]string{
87+
"status": errorBrief(info.Error),
88+
"cause": eventType,
89+
}).Inc()
90+
}
91+
}
6792
t.OnBalancerUpdate = func(info trace.DriverBalancerUpdateStartInfo) func(trace.DriverBalancerUpdateDoneInfo) {
6893
eventType := repeater.EventType(*info.Context)
6994
return func(info trace.DriverBalancerUpdateDoneInfo) {
7095
if config.Details()&trace.DriverBalancerEvents != 0 {
7196
balancerUpdates.With(map[string]string{
72-
"force": strconv.FormatBool(eventType == repeater.EventForce),
97+
"cause": eventType,
7398
}).Inc()
7499
newEndpoints := make(map[endpointKey]int, len(info.Endpoints))
75100
for _, e := range info.Endpoints {
@@ -99,14 +124,14 @@ func driver(config Config) (t trace.Driver) {
99124
}
100125
}
101126
t.OnConnDial = func(info trace.DriverConnDialStartInfo) func(trace.DriverConnDialDoneInfo) {
102-
address := info.Endpoint.Address()
127+
endpoint := info.Endpoint.Address()
103128
nodeID := info.Endpoint.NodeID()
104129
return func(info trace.DriverConnDialDoneInfo) {
105130
if config.Details()&trace.DriverConnEvents != 0 {
106131
if info.Error == nil {
107132
conns.With(map[string]string{
108-
"address": address,
109-
"node_id": idToString(nodeID),
133+
"endpoint": endpoint,
134+
"node_id": idToString(nodeID),
110135
}).Add(1)
111136
}
112137
}
@@ -115,8 +140,8 @@ func driver(config Config) (t trace.Driver) {
115140
t.OnConnClose = func(info trace.DriverConnCloseStartInfo) func(trace.DriverConnCloseDoneInfo) {
116141
if config.Details()&trace.DriverConnEvents != 0 {
117142
conns.With(map[string]string{
118-
"address": info.Endpoint.Address(),
119-
"node_id": idToString(info.Endpoint.NodeID()),
143+
"endpoint": info.Endpoint.Address(),
144+
"node_id": idToString(info.Endpoint.NodeID()),
120145
}).Add(-1)
121146
}
122147
return nil

metrics/retry.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,47 @@
11
package metrics
22

33
import (
4+
"time"
5+
46
"github.com/ydb-platform/ydb-go-sdk/v3/trace"
57
)
68

79
func retry(config Config) (t trace.Retry) {
10+
config = config.WithSystem("retry")
11+
errs := config.CounterVec("errors", "status", "retry_label", "final")
12+
attempts := config.HistogramVec("attempts", []float64{0, 1, 2, 3, 4, 5, 7, 10}, "retry_label")
13+
latency := config.TimerVec("latency", "status", "retry_label")
14+
t.OnRetry = func(info trace.RetryLoopStartInfo) func(trace.RetryLoopIntermediateInfo) func(trace.RetryLoopDoneInfo) {
15+
label := info.Label
16+
if label == "" {
17+
return nil
18+
}
19+
start := time.Now()
20+
return func(info trace.RetryLoopIntermediateInfo) func(trace.RetryLoopDoneInfo) {
21+
if info.Error != nil && config.Details()&trace.RetryEvents != 0 {
22+
errs.With(map[string]string{
23+
"status": errorBrief(info.Error),
24+
"retry_label": label,
25+
"final": "false",
26+
}).Inc()
27+
}
28+
return func(info trace.RetryLoopDoneInfo) {
29+
if config.Details()&trace.RetryEvents != 0 {
30+
attempts.With(map[string]string{
31+
"retry_label": label,
32+
}).Record(float64(info.Attempts))
33+
errs.With(map[string]string{
34+
"status": errorBrief(info.Error),
35+
"retry_label": label,
36+
"final": "true",
37+
}).Inc()
38+
latency.With(map[string]string{
39+
"status": errorBrief(info.Error),
40+
"retry_label": label,
41+
}).Record(time.Since(start))
42+
}
43+
}
44+
}
45+
}
846
return t
947
}

metrics/sql.go

Lines changed: 73 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,23 @@
11
package metrics
22

33
import (
4+
"time"
5+
46
"github.com/ydb-platform/ydb-go-sdk/v3/trace"
57
)
68

79
// databaseSQL makes trace.DatabaseSQL with measuring `database/sql` events
810
func databaseSQL(config Config) (t trace.DatabaseSQL) {
911
config = config.WithSystem("database").WithSystem("sql")
1012
conns := config.GaugeVec("conns")
11-
txs := config.GaugeVec("txs")
13+
inflight := config.WithSystem("conns").GaugeVec("inflight")
14+
query := config.CounterVec("query", "status", "query_mode")
15+
queryLatency := config.WithSystem("query").TimerVec("latency", "status", "query_mode")
16+
exec := config.CounterVec("exec", "status", "query_label", "query_mode")
17+
execLatency := config.WithSystem("exec").TimerVec("latency", "status", "query_mode")
18+
txBegin := config.WithSystem("tx").CounterVec("begin", "status")
19+
txCommit := config.WithSystem("tx").CounterVec("commit", "status")
20+
txRollback := config.WithSystem("tx").CounterVec("rollback", "status")
1221
t.OnConnectorConnect = func(info trace.DatabaseSQLConnectorConnectStartInfo) func(
1322
trace.DatabaseSQLConnectorConnectDoneInfo,
1423
) {
@@ -32,32 +41,80 @@ func databaseSQL(config Config) (t trace.DatabaseSQL) {
3241
t.OnConnBegin = func(info trace.DatabaseSQLConnBeginStartInfo) func(trace.DatabaseSQLConnBeginDoneInfo) {
3342
if config.Details()&trace.DatabaseSQLTxEvents != 0 {
3443
return func(info trace.DatabaseSQLConnBeginDoneInfo) {
35-
if info.Tx != nil {
36-
txs.With(nil).Add(1)
37-
}
44+
txBegin.With(map[string]string{
45+
"status": errorBrief(info.Error),
46+
}).Inc()
3847
}
3948
}
4049
return nil
4150
}
4251
t.OnTxCommit = func(info trace.DatabaseSQLTxCommitStartInfo) func(trace.DatabaseSQLTxCommitDoneInfo) {
43-
if config.Details()&trace.DatabaseSQLTxEvents != 0 {
44-
return func(info trace.DatabaseSQLTxCommitDoneInfo) {
45-
if info.Error == nil {
46-
txs.With(nil).Add(-1)
47-
}
52+
return func(info trace.DatabaseSQLTxCommitDoneInfo) {
53+
if config.Details()&trace.DatabaseSQLTxEvents != 0 {
54+
txCommit.With(map[string]string{
55+
"status": errorBrief(info.Error),
56+
}).Inc()
4857
}
4958
}
50-
return nil
5159
}
5260
t.OnTxRollback = func(info trace.DatabaseSQLTxRollbackStartInfo) func(trace.DatabaseSQLTxRollbackDoneInfo) {
53-
if config.Details()&trace.DatabaseSQLTxEvents != 0 {
54-
return func(info trace.DatabaseSQLTxRollbackDoneInfo) {
55-
if info.Error == nil {
56-
txs.With(nil).Add(-1)
57-
}
61+
return func(info trace.DatabaseSQLTxRollbackDoneInfo) {
62+
if config.Details()&trace.DatabaseSQLTxEvents != 0 {
63+
txRollback.With(map[string]string{
64+
"status": errorBrief(info.Error),
65+
}).Inc()
66+
}
67+
}
68+
}
69+
t.OnConnExec = func(info trace.DatabaseSQLConnExecStartInfo) func(trace.DatabaseSQLConnExecDoneInfo) {
70+
if config.Details()&trace.DatabaseSQLEvents != 0 {
71+
inflight.With(nil).Add(1)
72+
}
73+
var (
74+
mode = info.Mode
75+
start = time.Now()
76+
)
77+
return func(info trace.DatabaseSQLConnExecDoneInfo) {
78+
if config.Details()&trace.DatabaseSQLEvents != 0 {
79+
inflight.With(nil).Add(-1)
80+
}
81+
if config.Details()&trace.DatabaseSQLConnEvents != 0 {
82+
status := errorBrief(info.Error)
83+
exec.With(map[string]string{
84+
"status": status,
85+
"query_mode": mode,
86+
}).Inc()
87+
execLatency.With(map[string]string{
88+
"status": status,
89+
"query_mode": mode,
90+
}).Record(time.Since(start))
91+
}
92+
}
93+
}
94+
t.OnConnQuery = func(info trace.DatabaseSQLConnQueryStartInfo) func(trace.DatabaseSQLConnQueryDoneInfo) {
95+
if config.Details()&trace.DatabaseSQLEvents != 0 {
96+
inflight.With(nil).Add(1)
97+
}
98+
var (
99+
mode = info.Mode
100+
start = time.Now()
101+
)
102+
return func(info trace.DatabaseSQLConnQueryDoneInfo) {
103+
if config.Details()&trace.DatabaseSQLEvents != 0 {
104+
inflight.With(nil).Add(-1)
105+
}
106+
if config.Details()&trace.DatabaseSQLConnEvents != 0 {
107+
status := errorBrief(info.Error)
108+
query.With(map[string]string{
109+
"status": status,
110+
"query_mode": mode,
111+
}).Inc()
112+
queryLatency.With(map[string]string{
113+
"status": status,
114+
"query_mode": mode,
115+
}).Record(time.Since(start))
58116
}
59117
}
60-
return nil
61118
}
62119
return t
63120
}

metrics/table.go

Lines changed: 0 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -17,81 +17,11 @@ func table(config Config) (t trace.Table) {
1717
wait := config.WithSystem("pool").GaugeVec("wait")
1818
waitLatency := config.WithSystem("pool").WithSystem("wait").TimerVec("latency")
1919
alive := config.GaugeVec("sessions", "node_id")
20-
doAttempts := config.WithSystem("do").HistogramVec("attempts", []float64{0, 1, 2, 5, 10}, "name")
21-
doErrors := config.WithSystem("do").CounterVec("errors", "status", "name")
22-
doIntermediateErrors := config.WithSystem("do").WithSystem("intermediate").CounterVec("errors", "status", "name")
23-
doLatency := config.WithSystem("do").TimerVec("latency", "status", "name")
24-
doTxAttempts := config.WithSystem("doTx").HistogramVec("attempts", []float64{0, 1, 2, 5, 10}, "name")
25-
doTxIntermediateErrors := config.WithSystem("doTx").WithSystem("intermediate").CounterVec("errors", "status", "name")
26-
doTxErrors := config.WithSystem("doTx").CounterVec("errors", "status", "name")
27-
doTxLatency := config.WithSystem("doTx").TimerVec("latency", "status", "name")
2820
t.OnInit = func(info trace.TableInitStartInfo) func(trace.TableInitDoneInfo) {
2921
return func(info trace.TableInitDoneInfo) {
3022
limit.With(nil).Set(float64(info.Limit))
3123
}
3224
}
33-
t.OnDo = func(info trace.TableDoStartInfo) func(
34-
info trace.TableDoIntermediateInfo,
35-
) func(
36-
trace.TableDoDoneInfo,
37-
) {
38-
var (
39-
label = info.Label
40-
start = time.Now()
41-
)
42-
return func(info trace.TableDoIntermediateInfo) func(trace.TableDoDoneInfo) {
43-
if info.Error != nil && config.Details()&trace.TableEvents != 0 {
44-
doIntermediateErrors.With(map[string]string{
45-
"status": errorBrief(info.Error),
46-
"label": label,
47-
}).Inc()
48-
}
49-
return func(info trace.TableDoDoneInfo) {
50-
if config.Details()&trace.TableEvents != 0 {
51-
doAttempts.With(nil).Record(float64(info.Attempts))
52-
doErrors.With(map[string]string{
53-
"status": errorBrief(info.Error),
54-
"label": label,
55-
}).Inc()
56-
doLatency.With(map[string]string{
57-
"status": errorBrief(info.Error),
58-
"label": label,
59-
}).Record(time.Since(start))
60-
}
61-
}
62-
}
63-
}
64-
t.OnDoTx = func(info trace.TableDoTxStartInfo) func(
65-
info trace.TableDoTxIntermediateInfo,
66-
) func(
67-
trace.TableDoTxDoneInfo,
68-
) {
69-
var (
70-
label = info.Label
71-
start = time.Now()
72-
)
73-
return func(info trace.TableDoTxIntermediateInfo) func(trace.TableDoTxDoneInfo) {
74-
if info.Error != nil && config.Details()&trace.TableEvents != 0 {
75-
doTxIntermediateErrors.With(map[string]string{
76-
"status": errorBrief(info.Error),
77-
"label": label,
78-
}).Inc()
79-
}
80-
return func(info trace.TableDoTxDoneInfo) {
81-
if config.Details()&trace.TableEvents != 0 {
82-
doTxAttempts.With(nil).Record(float64(info.Attempts))
83-
doTxErrors.With(map[string]string{
84-
"status": errorBrief(info.Error),
85-
"label": label,
86-
}).Inc()
87-
doTxLatency.With(map[string]string{
88-
"status": errorBrief(info.Error),
89-
"label": label,
90-
}).Record(time.Since(start))
91-
}
92-
}
93-
}
94-
}
9525
t.OnSessionNew = func(info trace.TableSessionNewStartInfo) func(trace.TableSessionNewDoneInfo) {
9626
return func(info trace.TableSessionNewDoneInfo) {
9727
if info.Error == nil && config.Details()&trace.TableSessionEvents != 0 {

0 commit comments

Comments
 (0)