Skip to content

Commit 30ce8bc

Browse files
authored
feat(syncer/metrics): add metrics to syncer (#136)
1 parent aa5a016 commit 30ce8bc

File tree

3 files changed

+196
-20
lines changed

3 files changed

+196
-20
lines changed

sync/metrics.go

Lines changed: 182 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,48 +3,216 @@ package sync
33
import (
44
"context"
55
"sync/atomic"
6+
"time"
67

78
"go.opentelemetry.io/otel"
9+
"go.opentelemetry.io/otel/attribute"
810
"go.opentelemetry.io/otel/metric"
911
)
1012

1113
var meter = otel.Meter("header/sync")
1214

1315
type metrics struct {
14-
totalSynced atomic.Int64
15-
totalSyncedGauge metric.Float64ObservableGauge
16+
syncerReg metric.Registration
17+
18+
subjectiveHeadInst metric.Int64ObservableGauge
19+
syncLoopRunningInst metric.Int64ObservableGauge
20+
21+
syncLoopStarted metric.Int64Counter
22+
trustedPeersOutOfSync metric.Int64Counter
23+
unrecentHeader metric.Int64Counter
24+
subjectiveInit metric.Int64Counter
25+
26+
subjectiveHead atomic.Int64
27+
28+
syncLoopDurationHist metric.Float64Histogram
29+
syncLoopActive atomic.Int64
30+
syncStartedTs time.Time
31+
32+
requestRangeTimeHist metric.Float64Histogram
33+
requestRangeStartTs time.Time
34+
35+
blockTime metric.Float64Histogram
36+
prevHeader time.Time
1637
}
1738

1839
func newMetrics() (*metrics, error) {
19-
totalSynced, err := meter.Float64ObservableGauge(
20-
"total_synced_headers",
21-
metric.WithDescription("total synced headers"),
40+
syncLoopStarted, err := meter.Int64Counter(
41+
"hdr_sync_loop_started_counter",
42+
metric.WithDescription("sync loop started shows that syncing is in progress"),
2243
)
2344
if err != nil {
2445
return nil, err
2546
}
2647

27-
m := &metrics{
28-
totalSyncedGauge: totalSynced,
48+
trustedPeersOutOfSync, err := meter.Int64Counter(
49+
"hdr_sync_trust_peers_out_of_sync_counter",
50+
metric.WithDescription("trusted peers out of sync and gave outdated header"),
51+
)
52+
if err != nil {
53+
return nil, err
2954
}
3055

31-
callback := func(ctx context.Context, observer metric.Observer) error {
32-
observer.ObserveFloat64(totalSynced, float64(m.totalSynced.Load()))
33-
return nil
56+
unrecentHeader, err := meter.Int64Counter(
57+
"hdr_sync_unrecent_header_counter",
58+
metric.WithDescription("tracks every time Syncer returns an unrecent header"),
59+
)
60+
if err != nil {
61+
return nil, err
3462
}
35-
_, err = meter.RegisterCallback(callback, totalSynced)
63+
64+
subjectiveInit, err := meter.Int64Counter(
65+
"hdr_sync_subjective_init_counter",
66+
metric.WithDescription(
67+
"tracks how many times is the node initialized ",
68+
),
69+
)
70+
if err != nil {
71+
return nil, err
72+
}
73+
74+
subjectiveHead, err := meter.Int64ObservableGauge(
75+
"hdr_sync_subjective_head_gauge",
76+
metric.WithDescription("subjective head height"),
77+
)
78+
if err != nil {
79+
return nil, err
80+
}
81+
82+
syncLoopDurationHist, err := meter.Float64Histogram(
83+
"hdr_sync_loop_time_hist",
84+
metric.WithDescription("tracks the duration of syncing"))
85+
if err != nil {
86+
return nil, err
87+
}
88+
89+
requestRangeTimeHist, err := meter.Float64Histogram("hdr_sync_range_request_time_hist",
90+
metric.WithDescription("tracks the duration of GetRangeByHeight requests"))
91+
if err != nil {
92+
return nil, err
93+
}
94+
95+
syncLoopRunningInst, err := meter.Int64ObservableGauge(
96+
"hdr_sync_loop_status_gauge",
97+
metric.WithDescription("reports whether syncing is active or not"))
98+
if err != nil {
99+
return nil, err
100+
}
101+
102+
blockTime, err := meter.Float64Histogram(
103+
"hdr_sync_actual_blockTime_ts_hist",
104+
metric.WithDescription("duration between creation of 2 blocks"),
105+
)
106+
if err != nil {
107+
return nil, err
108+
}
109+
110+
m := &metrics{
111+
syncLoopStarted: syncLoopStarted,
112+
trustedPeersOutOfSync: trustedPeersOutOfSync,
113+
unrecentHeader: unrecentHeader,
114+
subjectiveInit: subjectiveInit,
115+
syncLoopDurationHist: syncLoopDurationHist,
116+
syncLoopRunningInst: syncLoopRunningInst,
117+
requestRangeTimeHist: requestRangeTimeHist,
118+
blockTime: blockTime,
119+
subjectiveHeadInst: subjectiveHead,
120+
}
121+
122+
m.syncerReg, err = meter.RegisterCallback(m.observeMetrics, m.subjectiveHeadInst, m.syncLoopRunningInst)
36123
if err != nil {
37124
return nil, err
38125
}
39126

40127
return m, nil
41128
}
42129

43-
// recordTotalSynced records the total amount of synced headers.
44-
func (m *metrics) recordTotalSynced(totalSynced int) {
130+
func (m *metrics) observeMetrics(_ context.Context, obs metric.Observer) error {
131+
obs.ObserveInt64(m.subjectiveHeadInst, m.subjectiveHead.Load())
132+
obs.ObserveInt64(m.syncLoopRunningInst, m.syncLoopActive.Load())
133+
return nil
134+
}
135+
136+
func (m *metrics) syncStarted(ctx context.Context) {
137+
m.observe(ctx, func(ctx context.Context) {
138+
m.syncStartedTs = time.Now()
139+
m.syncLoopStarted.Add(ctx, 1)
140+
m.syncLoopActive.Store(1)
141+
})
142+
}
143+
144+
func (m *metrics) syncFinished(ctx context.Context) {
145+
m.observe(ctx, func(ctx context.Context) {
146+
m.syncLoopActive.Store(0)
147+
m.syncLoopDurationHist.Record(ctx, time.Since(m.syncStartedTs).Seconds())
148+
})
149+
}
150+
151+
func (m *metrics) unrecentHead(ctx context.Context) {
152+
m.observe(ctx, func(ctx context.Context) {
153+
m.unrecentHeader.Add(ctx, 1)
154+
})
155+
}
156+
157+
func (m *metrics) trustedPeersOutOufSync(ctx context.Context) {
158+
m.observe(ctx, func(ctx context.Context) {
159+
m.trustedPeersOutOfSync.Add(ctx, 1)
160+
})
161+
}
162+
163+
func (m *metrics) subjectiveInitialization(ctx context.Context) {
164+
m.observe(ctx, func(ctx context.Context) {
165+
m.subjectiveInit.Add(ctx, 1)
166+
})
167+
}
168+
169+
func (m *metrics) updateGetRangeRequestInfo(ctx context.Context, amount int, failed bool) {
170+
m.observe(ctx, func(ctx context.Context) {
171+
m.requestRangeTimeHist.Record(ctx, time.Since(m.requestRangeStartTs).Seconds(),
172+
metric.WithAttributes(
173+
attribute.Int("headers amount", amount),
174+
attribute.Bool("request failed", failed),
175+
))
176+
})
177+
}
178+
179+
func (m *metrics) newSubjectiveHead(ctx context.Context, height uint64, timestamp time.Time) {
180+
m.observe(ctx, func(ctx context.Context) {
181+
m.subjectiveHead.Store(int64(height))
182+
183+
if !m.prevHeader.IsZero() {
184+
m.blockTime.Record(ctx, timestamp.Sub(m.prevHeader).Seconds())
185+
}
186+
})
187+
}
188+
189+
func (m *metrics) rangeRequestStart() {
190+
if m == nil {
191+
return
192+
}
193+
m.requestRangeStartTs = time.Now()
194+
}
195+
196+
func (m *metrics) rangeRequestStop() {
45197
if m == nil {
46198
return
47199
}
200+
m.requestRangeStartTs = time.Time{}
201+
}
48202

49-
m.totalSynced.Add(int64(totalSynced))
203+
func (m *metrics) observe(ctx context.Context, observeFn func(context.Context)) {
204+
if m == nil {
205+
return
206+
}
207+
if ctx.Err() != nil {
208+
ctx = context.Background()
209+
}
210+
observeFn(ctx)
211+
}
212+
213+
func (m *metrics) Close() error {
214+
if m == nil {
215+
return nil
216+
}
217+
return m.syncerReg.Unregister()
50218
}

sync/sync.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ func (s *Syncer[H]) Start(ctx context.Context) error {
109109
// Stop stops Syncer.
110110
func (s *Syncer[H]) Stop(context.Context) error {
111111
s.cancel()
112-
return nil
112+
return s.metrics.Close()
113113
}
114114

115115
// SyncWait blocks until ongoing sync is done.
@@ -158,7 +158,7 @@ func (s *Syncer[H]) State() State {
158158

159159
head, err := s.store.Head(s.ctx)
160160
if err == nil {
161-
state.Height = uint64(head.Height())
161+
state.Height = head.Height()
162162
} else if state.Error == "" {
163163
// don't ignore the error if we can show it in the state
164164
state.Error = err.Error()
@@ -180,7 +180,9 @@ func (s *Syncer[H]) syncLoop() {
180180
for {
181181
select {
182182
case <-s.triggerSync:
183+
s.metrics.syncStarted(s.ctx)
183184
s.sync(s.ctx)
185+
s.metrics.syncFinished(s.ctx)
184186
case <-s.ctx.Done():
185187
return
186188
}
@@ -239,8 +241,8 @@ func (s *Syncer[H]) sync(ctx context.Context) {
239241
func (s *Syncer[H]) doSync(ctx context.Context, fromHead, toHead H) (err error) {
240242
s.stateLk.Lock()
241243
s.state.ID++
242-
s.state.FromHeight = uint64(fromHead.Height()) + 1
243-
s.state.ToHeight = uint64(toHead.Height())
244+
s.state.FromHeight = fromHead.Height() + 1
245+
s.state.ToHeight = toHead.Height()
244246
s.state.FromHash = fromHead.Hash()
245247
s.state.ToHash = toHead.Hash()
246248
s.state.Start = time.Now()
@@ -315,7 +317,10 @@ func (s *Syncer[H]) requestHeaders(
315317
}
316318

317319
to := fromHead.Height() + size + 1
320+
s.metrics.rangeRequestStart()
318321
headers, err := s.getter.GetRangeByHeight(ctx, fromHead, to)
322+
s.metrics.updateGetRangeRequestInfo(s.ctx, int(size)/100, err != nil)
323+
s.metrics.rangeRequestStop()
319324
if err != nil {
320325
return err
321326
}
@@ -338,7 +343,5 @@ func (s *Syncer[H]) storeHeaders(ctx context.Context, headers ...H) error {
338343
if err != nil {
339344
return err
340345
}
341-
342-
s.metrics.recordTotalSynced(len(headers))
343346
return nil
344347
}

sync/sync_head.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ func (s *Syncer[H]) Head(ctx context.Context, _ ...header.HeadOption[H]) (H, err
3939
// if we can't get it - give what we have
4040
reqCtx, cancel := context.WithTimeout(ctx, time.Second*2) // TODO(@vgonkivs): make timeout configurable
4141
defer cancel()
42+
s.metrics.unrecentHead(s.ctx)
4243
netHead, err := s.getter.Head(reqCtx, header.WithTrustedHead[H](sbjHead))
4344
if err != nil {
4445
log.Warnw("failed to get recent head, returning current subjective", "sbjHead", sbjHead.Height(), "err", err)
@@ -85,10 +86,12 @@ func (s *Syncer[H]) subjectiveHead(ctx context.Context) (H, error) {
8586
return s.subjectiveHead(ctx)
8687
}
8788
defer s.getter.Unlock()
89+
8890
trustHead, err := s.getter.Head(ctx)
8991
if err != nil {
9092
return trustHead, err
9193
}
94+
s.metrics.subjectiveInitialization(s.ctx)
9295
// and set it as the new subjective head without validation,
9396
// or, in other words, do 'automatic subjective initialization'
9497
// NOTE: we avoid validation as the head expired to prevent possibility of the Long-Range Attack
@@ -103,6 +106,7 @@ func (s *Syncer[H]) subjectiveHead(ctx context.Context) (H, error) {
103106
log.Warnw("subjective initialization with an old header", "height", trustHead.Height())
104107
}
105108
log.Warn("trusted peer is out of sync")
109+
s.metrics.trustedPeersOutOufSync(s.ctx)
106110
return trustHead, nil
107111
}
108112

@@ -121,6 +125,7 @@ func (s *Syncer[H]) setSubjectiveHead(ctx context.Context, netHead H) {
121125
"hash", netHead.Hash().String(),
122126
"err", err)
123127
}
128+
s.metrics.newSubjectiveHead(s.ctx, netHead.Height(), netHead.Time())
124129

125130
storeHead, err := s.store.Head(ctx)
126131
if err == nil && storeHead.Height() >= netHead.Height() {

0 commit comments

Comments
 (0)