Skip to content

Commit 8419a99

Browse files
fix(reacher): add metrics
1 parent 4bf9b11 commit 8419a99

File tree

3 files changed

+71
-2
lines changed

3 files changed

+71
-2
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright 2021 The Swarm Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package reacher
6+
7+
import (
8+
m "github.com/ethersphere/bee/v2/pkg/metrics"
9+
"github.com/prometheus/client_golang/prometheus"
10+
)
11+
12+
// metrics groups reacher related prometheus counters.
13+
type metrics struct {
14+
Peers prometheus.Gauge
15+
PingAttemptCount prometheus.Counter
16+
PingErrorCount prometheus.Counter
17+
PingDuration prometheus.Histogram
18+
}
19+
20+
// newMetrics is a convenient constructor for creating new metrics.
21+
func newMetrics() metrics {
22+
const subsystem = "reacher"
23+
24+
return metrics{
25+
Peers: prometheus.NewGauge(prometheus.GaugeOpts{
26+
Namespace: m.Namespace,
27+
Subsystem: subsystem,
28+
Name: "peers",
29+
Help: "Number of peers currently in the reacher queue.",
30+
}),
31+
PingAttemptCount: prometheus.NewCounter(prometheus.CounterOpts{
32+
Namespace: m.Namespace,
33+
Subsystem: subsystem,
34+
Name: "ping_attempt_count",
35+
Help: "Number of ping attempts.",
36+
}),
37+
PingErrorCount: prometheus.NewCounter(prometheus.CounterOpts{
38+
Namespace: m.Namespace,
39+
Subsystem: subsystem,
40+
Name: "ping_error_count",
41+
Help: "Number of failed ping attempts.",
42+
}),
43+
PingDuration: prometheus.NewHistogram(prometheus.HistogramOpts{
44+
Namespace: m.Namespace,
45+
Subsystem: subsystem,
46+
Name: "ping_duration_seconds",
47+
Help: "Ping latency distribution in seconds.",
48+
Buckets: []float64{.1, .25, .5, 1, 2, 5, 10, 15},
49+
}),
50+
}
51+
}
52+
53+
// Metrics returns set of prometheus collectors.
54+
func (r *reacher) Metrics() []prometheus.Collector {
55+
return m.PrometheusCollectorsFromFields(r.metrics)
56+
}

pkg/p2p/libp2p/internal/reacher/reacher.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import (
2121

2222
const (
2323
pingTimeout = time.Second * 15
24-
workers = 4
24+
workers = 8
2525
retryAfterDuration = time.Minute * 5
2626
maxFailBackoffExponent = 4 // caps failure backoff at retryAfterDuration * 2^4 = 80 min
2727
maxSuccessBackoffExponent = 2 // caps success backoff at retryAfterDuration * 2^2 = 20 min
@@ -51,6 +51,7 @@ type reacher struct {
5151

5252
wg sync.WaitGroup
5353

54+
metrics metrics
5455
options *Options
5556
logger log.Logger
5657
}
@@ -70,6 +71,7 @@ func New(streamer p2p.Pinger, notifier p2p.ReachableNotifier, o *Options, log lo
7071
peerHeap: make(peerHeap, 0),
7172
peerIndex: make(map[string]*peer),
7273
notifier: notifier,
74+
metrics: newMetrics(),
7375
logger: log.WithName("reacher").Register(),
7476
}
7577

@@ -144,14 +146,19 @@ func (r *reacher) ping(c chan peer, ctx context.Context) {
144146
defer r.wg.Done()
145147
for p := range c {
146148
func() {
149+
r.metrics.PingAttemptCount.Inc()
147150
ctxt, cancel := context.WithTimeout(ctx, r.options.PingTimeout)
148151
defer cancel()
152+
start := time.Now()
149153
rtt, err := r.pinger.Ping(ctxt, p.addr)
150154
if err != nil {
155+
r.metrics.PingDuration.Observe(time.Since(start).Seconds())
156+
r.metrics.PingErrorCount.Inc()
151157
r.logger.Debug("ping failed", "peer", p.overlay.String(), "addr", p.addr.String(), "error", err)
152158
r.notifier.Reachable(p.overlay, p2p.ReachabilityStatusPrivate)
153159
r.notifyResult(p.overlay, false, p.generation)
154160
} else {
161+
r.metrics.PingDuration.Observe(rtt.Seconds())
155162
r.logger.Debug("ping succeeded", "peer", p.overlay.String(), "addr", p.addr.String(), "rtt", rtt)
156163
r.notifier.Reachable(p.overlay, p2p.ReachabilityStatusPublic)
157164
r.notifyResult(p.overlay, true, p.generation)
@@ -210,6 +217,7 @@ func (r *reacher) Connected(overlay swarm.Address, addr ma.Multiaddr) {
210217
p := &peer{overlay: overlay, addr: addr}
211218
r.peerIndex[key] = p
212219
heap.Push(&r.peerHeap, p)
220+
r.metrics.Peers.Inc()
213221
}
214222

215223
select {
@@ -267,6 +275,7 @@ func (r *reacher) Disconnected(overlay swarm.Address) {
267275
if p, ok := r.peerIndex[key]; ok {
268276
heap.Remove(&r.peerHeap, p.index)
269277
delete(r.peerIndex, key)
278+
r.metrics.Peers.Dec()
270279
}
271280
}
272281

pkg/p2p/libp2p/metrics.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,11 @@ func newMetrics() metrics {
121121
}
122122

123123
func (s *Service) Metrics() []prometheus.Collector {
124-
return append(m.PrometheusCollectorsFromFields(s.metrics), s.handshakeService.Metrics()...)
124+
collectors := append(m.PrometheusCollectorsFromFields(s.metrics), s.handshakeService.Metrics()...)
125+
if mc, ok := s.reacher.(interface{ Metrics() []prometheus.Collector }); ok {
126+
collectors = append(collectors, mc.Metrics()...)
127+
}
128+
return collectors
125129
}
126130

127131
// StatusMetrics exposes metrics that are exposed on the status protocol.

0 commit comments

Comments
 (0)