Skip to content

Commit aa5c4a6

Browse files
authored
chore(sync): Improve metrics, remove unnecessary (#354)
Removes unused metrics from syncer and adds a better range request metric. I do not consider this breaking as the metrics were relatively useless to begin with and no one relies on them. Overrides #352
1 parent 425f0dc commit aa5c4a6

File tree

3 files changed

+20
-96
lines changed

3 files changed

+20
-96
lines changed

sync/metrics.go

Lines changed: 17 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -11,47 +11,28 @@ import (
1111
"go.opentelemetry.io/otel/attribute"
1212
"go.opentelemetry.io/otel/metric"
1313

14-
otelattr "github.com/celestiaorg/go-header/internal/otelattr"
14+
"github.com/celestiaorg/go-header"
1515
)
1616

1717
var meter = otel.Meter("header/sync")
1818

1919
type metrics struct {
2020
syncerReg metric.Registration
2121

22-
subjectiveHeadInst metric.Int64ObservableGauge
23-
syncLoopRunningInst metric.Int64ObservableGauge
24-
25-
syncLoopStarted metric.Int64Counter
2622
trustedPeersOutOfSync metric.Int64Counter
2723
outdatedHeader metric.Int64Counter
2824
subjectiveInit metric.Int64Counter
2925

3026
bifurcations metric.Int64Counter
3127
failedBifurcations metric.Int64Counter
3228

33-
subjectiveHead atomic.Uint64
34-
35-
syncLoopDurationHist metric.Float64Histogram
36-
syncLoopActive atomic.Int64
37-
syncStartedTS time.Time
29+
subjectiveHeadInst metric.Int64ObservableGauge
30+
subjectiveHead atomic.Uint64
3831

3932
requestRangeTimeHist metric.Float64Histogram
40-
requestRangeStartTS time.Time
41-
42-
blockTime metric.Float64Histogram
43-
prevHeader time.Time
4433
}
4534

4635
func newMetrics() (*metrics, error) {
47-
syncLoopStarted, err := meter.Int64Counter(
48-
"hdr_sync_loop_started_counter",
49-
metric.WithDescription("sync loop started shows that syncing is in progress"),
50-
)
51-
if err != nil {
52-
return nil, err
53-
}
54-
5536
trustedPeersOutOfSync, err := meter.Int64Counter(
5637
"hdr_sync_trust_peers_out_of_sync_counter",
5738
metric.WithDescription("trusted peers out of sync and gave outdated header"),
@@ -106,52 +87,25 @@ func newMetrics() (*metrics, error) {
10687
return nil, err
10788
}
10889

109-
syncLoopDurationHist, err := meter.Float64Histogram(
110-
"hdr_sync_loop_time_hist",
111-
metric.WithDescription("tracks the duration of syncing"))
112-
if err != nil {
113-
return nil, err
114-
}
115-
11690
requestRangeTimeHist, err := meter.Float64Histogram("hdr_sync_range_request_time_hist",
11791
metric.WithDescription("tracks the duration of GetRangeByHeight requests"))
11892
if err != nil {
11993
return nil, err
12094
}
12195

122-
syncLoopRunningInst, err := meter.Int64ObservableGauge(
123-
"hdr_sync_loop_status_gauge",
124-
metric.WithDescription("reports whether syncing is active or not"))
125-
if err != nil {
126-
return nil, err
127-
}
128-
129-
blockTime, err := meter.Float64Histogram(
130-
"hdr_sync_actual_blockTime_ts_hist",
131-
metric.WithDescription("duration between creation of 2 blocks"),
132-
)
133-
if err != nil {
134-
return nil, err
135-
}
136-
13796
m := &metrics{
138-
syncLoopStarted: syncLoopStarted,
13997
trustedPeersOutOfSync: trustedPeersOutOfSync,
14098
outdatedHeader: outdatedHeader,
14199
subjectiveInit: subjectiveInit,
142100
bifurcations: bifurcations,
143101
failedBifurcations: failedBifurcations,
144-
syncLoopDurationHist: syncLoopDurationHist,
145-
syncLoopRunningInst: syncLoopRunningInst,
146102
requestRangeTimeHist: requestRangeTimeHist,
147-
blockTime: blockTime,
148103
subjectiveHeadInst: subjectiveHead,
149104
}
150105

151106
m.syncerReg, err = meter.RegisterCallback(
152107
m.observeMetrics,
153108
m.subjectiveHeadInst,
154-
m.syncLoopRunningInst,
155109
)
156110
if err != nil {
157111
return nil, err
@@ -166,25 +120,9 @@ func (m *metrics) observeMetrics(_ context.Context, obs metric.Observer) error {
166120
return fmt.Errorf("height overflows int64: %d", headHeight)
167121
}
168122
obs.ObserveInt64(m.subjectiveHeadInst, int64(headHeight))
169-
obs.ObserveInt64(m.syncLoopRunningInst, m.syncLoopActive.Load())
170123
return nil
171124
}
172125

173-
func (m *metrics) syncStarted(ctx context.Context) {
174-
m.observe(ctx, func(ctx context.Context) {
175-
m.syncStartedTS = time.Now()
176-
m.syncLoopStarted.Add(ctx, 1)
177-
m.syncLoopActive.Store(1)
178-
})
179-
}
180-
181-
func (m *metrics) syncFinished(ctx context.Context) {
182-
m.observe(ctx, func(ctx context.Context) {
183-
m.syncLoopActive.Store(0)
184-
m.syncLoopDurationHist.Record(ctx, time.Since(m.syncStartedTS).Seconds())
185-
})
186-
}
187-
188126
func (m *metrics) outdatedHead(ctx context.Context) {
189127
m.observe(ctx, func(ctx context.Context) {
190128
m.outdatedHeader.Add(ctx, 1)
@@ -203,23 +141,9 @@ func (m *metrics) subjectiveInitialization(ctx context.Context) {
203141
})
204142
}
205143

206-
func (m *metrics) updateGetRangeRequestInfo(ctx context.Context, amount uint64, failed bool) {
207-
m.observe(ctx, func(ctx context.Context) {
208-
m.requestRangeTimeHist.Record(ctx, time.Since(m.requestRangeStartTS).Seconds(),
209-
metric.WithAttributes(
210-
otelattr.Uint64("headers amount", amount),
211-
attribute.Bool("request failed", failed),
212-
))
213-
})
214-
}
215-
216-
func (m *metrics) newSubjectiveHead(ctx context.Context, height uint64, timestamp time.Time) {
217-
m.observe(ctx, func(ctx context.Context) {
144+
func (m *metrics) newSubjectiveHead(ctx context.Context, height uint64) {
145+
m.observe(ctx, func(_ context.Context) {
218146
m.subjectiveHead.Store(height)
219-
220-
if !m.prevHeader.IsZero() {
221-
m.blockTime.Record(ctx, timestamp.Sub(m.prevHeader).Seconds())
222-
}
223147
})
224148
}
225149

@@ -245,18 +169,21 @@ func (m *metrics) failedBifurcation(ctx context.Context, height uint64, hash str
245169
})
246170
}
247171

248-
func (m *metrics) rangeRequestStart() {
249-
if m == nil {
250-
return
251-
}
252-
m.requestRangeStartTS = time.Now()
253-
}
254-
255-
func (m *metrics) rangeRequestStop() {
172+
// recordRangeRequest will record the duration it takes to request a batch of
173+
// headers. It will also record whether the range request was the full
174+
// MaxRangeRequestSize (64) or not, giving us a lower-cardinality way to derive
175+
// sync speed.
176+
func (m *metrics) recordRangeRequest(ctx context.Context, duration time.Duration, size uint64) {
256177
if m == nil {
257178
return
258179
}
259-
m.requestRangeStartTS = time.Time{}
180+
m.observe(ctx, func(ctx context.Context) {
181+
m.requestRangeTimeHist.Record(ctx, duration.Seconds(),
182+
metric.WithAttributes(
183+
attribute.Bool("is_max_range_req_size", size == header.MaxRangeRequestSize),
184+
),
185+
)
186+
})
260187
}
261188

262189
func (m *metrics) observe(ctx context.Context, observeFn func(context.Context)) {

sync/syncer.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,7 @@ func (s *Syncer[H]) syncLoop() {
223223
for {
224224
select {
225225
case <-s.triggerSync:
226-
s.metrics.syncStarted(s.ctx)
227226
s.sync(s.ctx)
228-
s.metrics.syncFinished(s.ctx)
229227
case <-s.ctx.Done():
230228
return
231229
}
@@ -360,10 +358,9 @@ func (s *Syncer[H]) requestHeaders(
360358
}
361359

362360
to := fromHead.Height() + size + 1
363-
s.metrics.rangeRequestStart()
361+
start := time.Now()
364362
headers, err := s.getter.GetRangeByHeight(ctx, fromHead, to)
365-
s.metrics.updateGetRangeRequestInfo(s.ctx, size/100, err != nil)
366-
s.metrics.rangeRequestStop()
363+
s.metrics.recordRangeRequest(ctx, time.Since(start), size)
367364
if err != nil {
368365
return err
369366
}

sync/syncer_head.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ func (s *Syncer[H]) setLocalHead(ctx context.Context, netHead H) {
201201
"hash", netHead.Hash().String(),
202202
"err", err)
203203
}
204-
s.metrics.newSubjectiveHead(s.ctx, netHead.Height(), netHead.Time())
204+
s.metrics.newSubjectiveHead(s.ctx, netHead.Height())
205205

206206
storeHead, err := s.store.Head(ctx)
207207
if err == nil && storeHead.Height() >= netHead.Height() {

0 commit comments

Comments
 (0)