Skip to content

Commit 88ee34c

Browse files
authored
feat(metric): add AltDA metrics to batcher
* Adds a new `eigenda_failover_total` counter metric to batcher, tracking the total number of AltDA failovers. * Adds `datype` label to the `batcherTxEvs` metrics.
1 parent 6519ff6 commit 88ee34c

File tree

4 files changed

+36
-18
lines changed

4 files changed

+36
-18
lines changed

op-batcher/batcher/channel.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ func (c *channel) rewindAltDAFrameCursor(txData txData) {
8181
// It rewinds the channelBuilder's frameCursor to the first frame of the failed txData,
8282
// so that the frames can be resubmitted. failoverToEthDA should be set to true when using altDA
8383
// and altDA is down. This will switch the channel to submit frames to ethDA instead.
84-
// TODO: add a metric for altDA submission failures.
8584
func (c *channel) AltDASubmissionFailed(id string, failoverToEthDA bool) {
8685
// We coopt TxFailed to rewind the frame cursor.
8786
// This will force a resubmit of all the following frames as well,
@@ -100,6 +99,7 @@ func (c *channel) AltDASubmissionFailed(id string, failoverToEthDA bool) {
10099
// batcherService.initChannelConfig function stateless so that we can reuse it.
101100
c.log.Info("Failing over to calldata txs", "id", c.ID())
102101
c.cfg.DaType = DaTypeCalldata
102+
c.metr.RecordFailoverToEthDA()
103103
}
104104
}
105105

@@ -124,13 +124,13 @@ func (c *channel) TxFailed(id string) {
124124
} else {
125125
c.log.Warn("unknown transaction marked as failed", "id", id)
126126
}
127-
c.metr.RecordBatchTxFailed()
127+
c.metr.RecordBatchTxFailed(c.cfg.DaType.String())
128128
}
129129

130130
// TxConfirmed marks a transaction as confirmed on L1. Returns a bool indicating
131131
// whether the channel timed out on chain.
132132
func (c *channel) TxConfirmed(id string, inclusionBlock eth.BlockID) bool {
133-
c.metr.RecordBatchTxSuccess()
133+
c.metr.RecordBatchTxSuccess(c.cfg.DaType.String())
134134
c.log.Debug("marked transaction as confirmed", "id", id, "block", inclusionBlock)
135135
if _, ok := c.pendingTransactions[id]; !ok {
136136
c.log.Warn("unknown transaction marked as confirmed", "id", id, "block", inclusionBlock)

op-batcher/batcher/channel_manager.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,16 +147,18 @@ func (s *channelManager) TxFailed(_id txID) {
147147
func (s *channelManager) TxConfirmed(_id txID, inclusionBlock eth.BlockID) {
148148

149149
id := _id.String()
150+
daType := DaTypeCalldata
150151
if channel, ok := s.txChannels[id]; ok {
151152
delete(s.txChannels, id)
152153
if timedOut := channel.TxConfirmed(id, inclusionBlock); timedOut {
153154
s.log.Warn("channel timed out on chain", "channel_id", channel.ID(), "tx_id", id)
154155
s.handleChannelInvalidated(channel)
155156
}
157+
daType = channel.cfg.DaType
156158
} else {
157159
s.log.Warn("transaction from unknown channel marked as confirmed", "id", id)
158160
}
159-
s.metr.RecordBatchTxSubmitted()
161+
s.metr.RecordBatchTxSubmitted(daType.String())
160162
s.log.Debug("marked transaction as confirmed", "id", id, "block", inclusionBlock)
161163
}
162164

op-batcher/metrics/metrics.go

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,17 @@ type Metricer interface {
5959
// It should be called when clearing the ChannelManager state.
6060
ClearAllStateMetrics()
6161

62-
RecordBatchTxSubmitted()
63-
RecordBatchTxSuccess()
64-
RecordBatchTxFailed()
62+
RecordBatchTxSubmitted(daType string)
63+
RecordBatchTxSuccess(daType string)
64+
RecordBatchTxFailed(daType string)
6565

6666
RecordBlobUsedBytes(num int)
6767

6868
Document() []opmetrics.DocumentedMetric
6969

7070
PendingDABytes() float64
71+
72+
RecordFailoverToEthDA()
7173
}
7274

7375
type Metrics struct {
@@ -122,6 +124,8 @@ type Metrics struct {
122124
pidControllerIntegral prometheus.Gauge
123125
pidControllerDerivative prometheus.Gauge
124126
pidResponseTime prometheus.Histogram
127+
128+
eigenDAFailoverToEthDA prometheus.Counter
125129
}
126130

127131
var _ Metricer = (*Metrics)(nil)
@@ -235,7 +239,7 @@ func NewMetrics(procName string) *Metrics {
235239
Buckets: prometheus.LinearBuckets(0.0, eth.MaxBlobDataSize/13, 14),
236240
}),
237241

238-
batcherTxEvs: opmetrics.NewEventVec(factory, ns, "", "batcher_tx", "BatcherTx", []string{"stage"}),
242+
batcherTxEvs: opmetrics.NewEventVec(factory, ns, "", "batcher_tx", "BatcherTx", []string{"stage", "datype"}),
239243

240244
throttleIntensity: *factory.NewGaugeVec(prometheus.GaugeOpts{
241245
Namespace: ns,
@@ -298,6 +302,11 @@ func NewMetrics(procName string) *Metrics {
298302
Name: "unsafe_da_bytes",
299303
Help: "The estimated number of unsafe DA bytes",
300304
}),
305+
eigenDAFailoverToEthDA: factory.NewCounter(prometheus.CounterOpts{
306+
Namespace: ns,
307+
Name: "eigenda_failover_total",
308+
Help: "Total number of failovers to EthDA",
309+
}),
301310
}
302311
m.pendingDABytesGaugeFunc = factory.NewGaugeFunc(prometheus.GaugeOpts{
303312
Namespace: ns,
@@ -428,16 +437,16 @@ func (m *Metrics) RecordChannelTimedOut(id derive.ChannelID) {
428437
m.channelEvs.Record(StageTimedOut)
429438
}
430439

431-
func (m *Metrics) RecordBatchTxSubmitted() {
432-
m.batcherTxEvs.Record(TxStageSubmitted)
440+
func (m *Metrics) RecordBatchTxSubmitted(daType string) {
441+
m.batcherTxEvs.Record(TxStageSubmitted, daType)
433442
}
434443

435-
func (m *Metrics) RecordBatchTxSuccess() {
436-
m.batcherTxEvs.Record(TxStageSuccess)
444+
func (m *Metrics) RecordBatchTxSuccess(daType string) {
445+
m.batcherTxEvs.Record(TxStageSuccess, daType)
437446
}
438447

439-
func (m *Metrics) RecordBatchTxFailed() {
440-
m.batcherTxEvs.Record(TxStageFailed)
448+
func (m *Metrics) RecordBatchTxFailed(daType string) {
449+
m.batcherTxEvs.Record(TxStageFailed, daType)
441450
}
442451

443452
func (m *Metrics) RecordBlobUsedBytes(num int) {
@@ -511,3 +520,8 @@ func (m *Metrics) RecordThrottleControllerState(error, integral, derivative floa
511520
func (m *Metrics) RecordThrottleResponseTime(duration time.Duration) {
512521
m.pidResponseTime.Observe(duration.Seconds())
513522
}
523+
524+
// RecordFailoverToEthDA records when the system fails over to EthDA
525+
func (m *Metrics) RecordFailoverToEthDA() {
526+
m.eigenDAFailoverToEthDA.Inc()
527+
}

op-batcher/metrics/noop.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,10 @@ func (*noopMetrics) RecordUnsafeDABytes(int64) {}
5656
func (*noopMetrics) RecordThrottleControllerState(error, integral, derivative float64) {}
5757
func (*noopMetrics) RecordThrottleResponseTime(duration time.Duration) {}
5858

59-
func (*noopMetrics) RecordBatchTxSubmitted() {}
60-
func (*noopMetrics) RecordBatchTxSuccess() {}
61-
func (*noopMetrics) RecordBatchTxFailed() {}
62-
func (*noopMetrics) RecordBlobUsedBytes(int) {}
59+
func (*noopMetrics) RecordBatchTxSubmitted(string) {}
60+
func (*noopMetrics) RecordBatchTxSuccess(string) {}
61+
func (*noopMetrics) RecordBatchTxFailed(string) {}
62+
func (*noopMetrics) RecordBlobUsedBytes(int) {}
6363
func (*noopMetrics) StartBalanceMetrics(log.Logger, *ethclient.Client, common.Address) io.Closer {
6464
return nil
6565
}
@@ -78,3 +78,5 @@ func (nm *ThrottlingMetrics) PendingDABytes() float64 {
7878
}
7979

8080
func (*noopMetrics) ClearAllStateMetrics() {}
81+
82+
func (*noopMetrics) RecordFailoverToEthDA() {}

0 commit comments

Comments
 (0)