Skip to content

Commit 1239354

Browse files
authored
rcmgr: expose resource limits to Prometheus (#3433)
Signed-off-by: sneax <paladesh600@gmail.com>
1 parent dd26469 commit 1239354

File tree

3 files changed

+201
-3
lines changed

3 files changed

+201
-3
lines changed
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
package rcmgr
2+
3+
import (
4+
"testing"
5+
6+
"github.com/prometheus/client_golang/prometheus"
7+
dto "github.com/prometheus/client_model/go"
8+
)
9+
10+
func TestReportSystemLimits(t *testing.T) {
11+
// Register the metrics
12+
reg := prometheus.NewRegistry()
13+
reg.MustRegister(limits)
14+
15+
// Create a simple limiter with known limits
16+
limiter := NewFixedLimiter(ConcreteLimitConfig{
17+
system: BaseLimit{
18+
Memory: 1024 * 1024 * 1024, // 1GB
19+
FD: 256,
20+
Conns: 100,
21+
ConnsInbound: 50,
22+
ConnsOutbound: 50,
23+
Streams: 200,
24+
StreamsInbound: 100,
25+
StreamsOutbound: 100,
26+
},
27+
transient: BaseLimit{
28+
Memory: 512 * 1024 * 1024, // 512MB
29+
FD: 128,
30+
Conns: 50,
31+
ConnsInbound: 25,
32+
ConnsOutbound: 25,
33+
Streams: 100,
34+
StreamsInbound: 50,
35+
StreamsOutbound: 50,
36+
},
37+
})
38+
39+
// Create a stats reporter
40+
reporter, err := NewStatsTraceReporter()
41+
if err != nil {
42+
t.Fatal(err)
43+
}
44+
45+
// Report the limits
46+
reporter.ReportSystemLimits(limiter)
47+
48+
// Verify that metrics were set
49+
metrics, err := reg.Gather()
50+
if err != nil {
51+
t.Fatal(err)
52+
}
53+
54+
// Find the limits metric
55+
var limitsMetric *dto.MetricFamily
56+
for _, m := range metrics {
57+
if m.GetName() == "libp2p_rcmgr_limit" {
58+
limitsMetric = m
59+
break
60+
}
61+
}
62+
63+
if limitsMetric == nil {
64+
t.Fatal("limits metric not found")
65+
}
66+
67+
// Verify we have metrics for both system and transient scopes
68+
foundSystem := false
69+
foundTransient := false
70+
for _, metric := range limitsMetric.GetMetric() {
71+
for _, label := range metric.GetLabel() {
72+
if label.GetName() == "scope" {
73+
if label.GetValue() == "system" {
74+
foundSystem = true
75+
}
76+
if label.GetValue() == "transient" {
77+
foundTransient = true
78+
}
79+
}
80+
}
81+
}
82+
83+
if !foundSystem {
84+
t.Error("system scope limits not reported")
85+
}
86+
if !foundTransient {
87+
t.Error("transient scope limits not reported")
88+
}
89+
90+
// Verify specific limit values
91+
expectedLimits := map[string]map[string]float64{
92+
"system": {
93+
"memory": 1024 * 1024 * 1024,
94+
"fd": 256,
95+
"conns": 100,
96+
"conns_inbound": 50,
97+
"conns_outbound": 50,
98+
"streams": 200,
99+
"streams_inbound": 100,
100+
"streams_outbound": 100,
101+
},
102+
"transient": {
103+
"memory": 512 * 1024 * 1024,
104+
"fd": 128,
105+
"conns": 50,
106+
"conns_inbound": 25,
107+
"conns_outbound": 25,
108+
"streams": 100,
109+
"streams_inbound": 50,
110+
"streams_outbound": 50,
111+
},
112+
}
113+
114+
for _, metric := range limitsMetric.GetMetric() {
115+
var scope, resource string
116+
for _, label := range metric.GetLabel() {
117+
if label.GetName() == "scope" {
118+
scope = label.GetValue()
119+
}
120+
if label.GetName() == "resource" {
121+
resource = label.GetValue()
122+
}
123+
}
124+
125+
if scope == "" || resource == "" {
126+
continue
127+
}
128+
129+
expectedValue, ok := expectedLimits[scope][resource]
130+
if !ok {
131+
continue
132+
}
133+
134+
actualValue := metric.GetGauge().GetValue()
135+
if actualValue != expectedValue {
136+
t.Errorf("limit mismatch for %s/%s: expected %v, got %v", scope, resource, expectedValue, actualValue)
137+
}
138+
}
139+
}
140+
141+
func TestReportSystemLimitsNilLimiter(t *testing.T) {
142+
reporter, err := NewStatsTraceReporter()
143+
if err != nil {
144+
t.Fatal(err)
145+
}
146+
147+
// Should not panic with nil limiter
148+
reporter.ReportSystemLimits(nil)
149+
}

p2p/host/resource-manager/rcmgr.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,17 +178,20 @@ func NewResourceManager(limits Limiter, opts ...Option) (network.ResourceManager
178178
r.verifySourceAddressRateLimiter = newVerifySourceAddressRateLimiter(r.connLimiter)
179179

180180
if !r.disableMetrics {
181-
var sr TraceReporter
182181
sr, err := NewStatsTraceReporter()
183182
if err != nil {
184183
log.Error("failed to initialise StatsTraceReporter", "err", err)
185184
} else {
185+
// Report system limits to Prometheus
186+
sr.ReportSystemLimits(limits)
187+
186188
if r.trace == nil {
187189
r.trace = &trace{}
188190
}
189191
found := false
190192
for _, rep := range r.trace.reporters {
191-
if rep == sr {
193+
// Compare the actual reporter, not the interface
194+
if _, ok := rep.(StatsTraceReporter); ok {
192195
found = true
193196
break
194197
}

p2p/host/resource-manager/stats.go

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package rcmgr
33
import (
44
"strings"
55

6+
"github.com/libp2p/go-libp2p/core/network"
67
"github.com/libp2p/go-libp2p/p2p/metricshelper"
78
"github.com/prometheus/client_golang/prometheus"
89
)
@@ -123,6 +124,13 @@ var (
123124
Name: "blocked_resources",
124125
Help: "Number of blocked resources",
125126
}, []string{"dir", "scope", "resource"})
127+
128+
// System limits
129+
limits = prometheus.NewGaugeVec(prometheus.GaugeOpts{
130+
Namespace: metricNamespace,
131+
Name: "limit",
132+
Help: "Resource manager limits",
133+
}, []string{"scope", "resource"})
126134
)
127135

128136
var (
@@ -157,6 +165,7 @@ func MustRegisterWith(reg prometheus.Registerer) {
157165
previousConnMemory,
158166
fds,
159167
blockedResources,
168+
limits,
160169
)
161170
}
162171

@@ -171,10 +180,47 @@ func WithMetricsDisabled() Option {
171180
type StatsTraceReporter struct{}
172181

173182
func NewStatsTraceReporter() (StatsTraceReporter, error) {
174-
// TODO tell prometheus the system limits
175183
return StatsTraceReporter{}, nil
176184
}
177185

186+
// reportLimit reports a limit value to Prometheus
187+
func reportLimit(scope, resource string, value int64) {
188+
limits.With(prometheus.Labels{
189+
"scope": scope,
190+
"resource": resource,
191+
}).Set(float64(value))
192+
}
193+
194+
// ReportSystemLimits reports the system limits to Prometheus.
195+
// This should be called after creating the StatsTraceReporter with the resource manager's limits.
196+
func (r StatsTraceReporter) ReportSystemLimits(limiter Limiter) {
197+
if limiter == nil {
198+
return
199+
}
200+
201+
// System limits
202+
systemLimits := limiter.GetSystemLimits()
203+
reportLimit("system", "memory", systemLimits.GetMemoryLimit())
204+
reportLimit("system", "fd", int64(systemLimits.GetFDLimit()))
205+
reportLimit("system", "conns", int64(systemLimits.GetConnTotalLimit()))
206+
reportLimit("system", "conns_inbound", int64(systemLimits.GetConnLimit(network.DirInbound)))
207+
reportLimit("system", "conns_outbound", int64(systemLimits.GetConnLimit(network.DirOutbound)))
208+
reportLimit("system", "streams", int64(systemLimits.GetStreamTotalLimit()))
209+
reportLimit("system", "streams_inbound", int64(systemLimits.GetStreamLimit(network.DirInbound)))
210+
reportLimit("system", "streams_outbound", int64(systemLimits.GetStreamLimit(network.DirOutbound)))
211+
212+
// Transient limits
213+
transientLimits := limiter.GetTransientLimits()
214+
reportLimit("transient", "memory", transientLimits.GetMemoryLimit())
215+
reportLimit("transient", "fd", int64(transientLimits.GetFDLimit()))
216+
reportLimit("transient", "conns", int64(transientLimits.GetConnTotalLimit()))
217+
reportLimit("transient", "conns_inbound", int64(transientLimits.GetConnLimit(network.DirInbound)))
218+
reportLimit("transient", "conns_outbound", int64(transientLimits.GetConnLimit(network.DirOutbound)))
219+
reportLimit("transient", "streams", int64(transientLimits.GetStreamTotalLimit()))
220+
reportLimit("transient", "streams_inbound", int64(transientLimits.GetStreamLimit(network.DirInbound)))
221+
reportLimit("transient", "streams_outbound", int64(transientLimits.GetStreamLimit(network.DirOutbound)))
222+
}
223+
178224
func (r StatsTraceReporter) ConsumeEvent(evt TraceEvt) {
179225
tags := metricshelper.GetStringSlice()
180226
defer metricshelper.PutStringSlice(tags)

0 commit comments

Comments
 (0)