Skip to content

Commit 36b4c92

Browse files
committed
feat: quantile instead of percentile
1 parent bbdbca8 commit 36b4c92

File tree

2 files changed

+70
-24
lines changed

2 files changed

+70
-24
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,13 @@ When metrics are enabled, the following metrics are exposed:
137137

138138
### `ev_metrics_jsonrpc_request_slo_seconds`
139139
- **Type**: Gauge
140-
- **Labels**: `chain_id`, `percentile`
140+
- **Labels**: `chain_id`, `quantile`
141141
- **Description**: SLO thresholds for JSON-RPC request duration (enabled when `--evm-rpc-url` is provided)
142142
- **Values**:
143-
- `p50`: 0.2s
144-
- `p90`: 0.35s
145-
- `p95`: 0.4s
146-
- `p99`: 0.5s
143+
- `0.5`: 0.2s
144+
- `0.9`: 0.35s
145+
- `0.95`: 0.4s
146+
- `0.99`: 0.5s
147147

148148
### Block Height Drift Metrics
149149

pkg/metrics/metrics.go

Lines changed: 65 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,22 @@ type Metrics struct {
2424
CurrentBlockHeight *prometheus.GaugeVec
2525
// BlockHeightDrift tracks the drift between reference and target endpoints for a specific node.
2626
BlockHeightDrift *prometheus.GaugeVec
27-
// SubmissionDuration tracks DA blob submission duration quantiles over a rolling window.
27+
// SubmissionDuration tracks DA blob submission duration percentiles over a rolling window.
2828
SubmissionDuration *prometheus.SummaryVec
2929
// SubmissionDaHeight tracks the DA height at which blocks were submitted.
3030
SubmissionDaHeight *prometheus.GaugeVec
31-
// BlockTime tracks the time between consecutive blocks over a rolling window.
32-
BlockTime *prometheus.SummaryVec
31+
// BlockTime tracks the time between consecutive blocks with histogram buckets for accurate SLO calculations.
32+
BlockTime *prometheus.HistogramVec
33+
// BlockReceiveDelay tracks the delay between block creation and reception with histogram buckets.
34+
BlockReceiveDelay *prometheus.HistogramVec
3335
// JsonRpcRequestDuration tracks the duration of JSON-RPC requests to the EVM node.
3436
JsonRpcRequestDuration *prometheus.HistogramVec
3537
// JsonRpcRequestSloSeconds exports constant SLO thresholds for JSON-RPC requests.
3638
JsonRpcRequestSloSeconds *prometheus.GaugeVec
39+
// BlockTimeSloSeconds exports constant SLO thresholds for block time.
40+
BlockTimeSloSeconds *prometheus.GaugeVec
41+
// BlockReceiveDelaySloSeconds exports constant SLO thresholds for block receive delay.
42+
BlockReceiveDelaySloSeconds *prometheus.GaugeVec
3743
// EndpointAvailability tracks whether an endpoint is reachable (1.0 = available, 0.0 = unavailable).
3844
EndpointAvailability *prometheus.GaugeVec
3945
// EndpointErrors tracks endpoint connection errors by type.
@@ -136,18 +142,21 @@ func NewWithRegistry(namespace string, registerer prometheus.Registerer) *Metric
136142
},
137143
[]string{"chain_id", "type"},
138144
),
139-
BlockTime: factory.NewSummaryVec(
140-
prometheus.SummaryOpts{
145+
BlockTime: factory.NewHistogramVec(
146+
prometheus.HistogramOpts{
141147
Namespace: namespace,
142148
Name: "block_time_seconds",
143-
Help: "time between consecutive blocks over rolling window",
144-
Objectives: map[float64]float64{
145-
0.5: 0.05, // median block time
146-
0.9: 0.01, // p90
147-
0.99: 0.01, // p99
148-
},
149-
MaxAge: 5 * time.Minute,
150-
AgeBuckets: 5,
149+
Help: "time between consecutive blocks with histogram buckets for accurate SLO calculations",
150+
Buckets: []float64{0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0, 7.5, 10.0, 15.0, 20.0, 30.0},
151+
},
152+
[]string{"chain_id"},
153+
),
154+
BlockReceiveDelay: factory.NewHistogramVec(
155+
prometheus.HistogramOpts{
156+
Namespace: namespace,
157+
Name: "block_receive_delay_seconds",
158+
Help: "delay between block creation and reception with histogram buckets",
159+
Buckets: []float64{0.1, 0.25, 0.5, 1.0, 2.0, 3.0, 5.0, 10.0, 15.0, 30.0, 60.0},
151160
},
152161
[]string{"chain_id"},
153162
),
@@ -166,7 +175,23 @@ func NewWithRegistry(namespace string, registerer prometheus.Registerer) *Metric
166175
Name: "jsonrpc_request_slo_seconds",
167176
Help: "SLO thresholds for JSON-RPC request duration",
168177
},
169-
[]string{"chain_id", "percentile"},
178+
[]string{"chain_id", "quantile"},
179+
),
180+
BlockTimeSloSeconds: factory.NewGaugeVec(
181+
prometheus.GaugeOpts{
182+
Namespace: namespace,
183+
Name: "block_time_slo_seconds",
184+
Help: "SLO thresholds for block time",
185+
},
186+
[]string{"chain_id", "quantile"},
187+
),
188+
BlockReceiveDelaySloSeconds: factory.NewGaugeVec(
189+
prometheus.GaugeOpts{
190+
Namespace: namespace,
191+
Name: "block_receive_delay_slo_seconds",
192+
Help: "SLO thresholds for block receive delay",
193+
},
194+
[]string{"chain_id", "quantile"},
170195
),
171196
EndpointAvailability: factory.NewGaugeVec(
172197
prometheus.GaugeOpts{
@@ -439,17 +464,38 @@ func (m *Metrics) RecordBlockTime(chainID string, arrivalTime time.Time) {
439464
m.lastBlockArrivalTime[chainID] = arrivalTime
440465
}
441466

467+
// RecordBlockReceiveDelay records the delay between block creation and reception
468+
func (m *Metrics) RecordBlockReceiveDelay(chainID string, delay time.Duration) {
469+
m.BlockReceiveDelay.WithLabelValues(chainID).Observe(delay.Seconds())
470+
}
471+
442472
// RecordJsonRpcRequestDuration records the duration of a JSON-RPC request
443473
func (m *Metrics) RecordJsonRpcRequestDuration(chainID string, duration time.Duration) {
444474
m.JsonRpcRequestDuration.WithLabelValues(chainID).Observe(duration.Seconds())
445475
}
446476

447-
// InitializeJsonRpcSloThresholds initializes the constant SLO threshold gauges
477+
// InitializeJsonRpcSloThresholds initializes the constant SLO threshold gauges for JSON-RPC requests
448478
func (m *Metrics) InitializeJsonRpcSloThresholds(chainID string) {
449-
m.JsonRpcRequestSloSeconds.WithLabelValues(chainID, "p50").Set(0.2)
450-
m.JsonRpcRequestSloSeconds.WithLabelValues(chainID, "p90").Set(0.35)
451-
m.JsonRpcRequestSloSeconds.WithLabelValues(chainID, "p95").Set(0.4)
452-
m.JsonRpcRequestSloSeconds.WithLabelValues(chainID, "p99").Set(0.5)
479+
m.JsonRpcRequestSloSeconds.WithLabelValues(chainID, "0.5").Set(0.2)
480+
m.JsonRpcRequestSloSeconds.WithLabelValues(chainID, "0.9").Set(0.35)
481+
m.JsonRpcRequestSloSeconds.WithLabelValues(chainID, "0.95").Set(0.4)
482+
m.JsonRpcRequestSloSeconds.WithLabelValues(chainID, "0.99").Set(0.5)
483+
}
484+
485+
// InitializeBlockTimeSloThresholds initializes the constant SLO threshold gauges for block time
486+
func (m *Metrics) InitializeBlockTimeSloThresholds(chainID string) {
487+
m.BlockTimeSloSeconds.WithLabelValues(chainID, "0.5").Set(2.0)
488+
m.BlockTimeSloSeconds.WithLabelValues(chainID, "0.9").Set(3.0)
489+
m.BlockTimeSloSeconds.WithLabelValues(chainID, "0.95").Set(4.0)
490+
m.BlockTimeSloSeconds.WithLabelValues(chainID, "0.99").Set(5.0)
491+
}
492+
493+
// InitializeBlockReceiveDelaySloThresholds initializes the constant SLO threshold gauges for block receive delay
494+
func (m *Metrics) InitializeBlockReceiveDelaySloThresholds(chainID string) {
495+
m.BlockReceiveDelaySloSeconds.WithLabelValues(chainID, "0.5").Set(1.0)
496+
m.BlockReceiveDelaySloSeconds.WithLabelValues(chainID, "0.9").Set(3.0)
497+
m.BlockReceiveDelaySloSeconds.WithLabelValues(chainID, "0.95").Set(5.0)
498+
m.BlockReceiveDelaySloSeconds.WithLabelValues(chainID, "0.99").Set(10.0)
453499
}
454500

455501
// RecordEndpointAvailability records whether an endpoint is reachable

0 commit comments

Comments
 (0)