Skip to content

Commit d589ea5

Browse files
authored
feat: add user error metrics (#1356)
1 parent 5a5cfda commit d589ea5

File tree

3 files changed

+179
-6
lines changed

3 files changed

+179
-6
lines changed

batcher/aligned-batcher/src/lib.rs

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,7 @@ impl Batcher {
399399
Ok(msg) => msg,
400400
Err(e) => {
401401
warn!("Failed to deserialize message: {}", e);
402+
self.metrics.user_error(&["deserialize_error", ""]);
402403
return Ok(());
403404
}
404405
};
@@ -419,7 +420,7 @@ impl Batcher {
419420
ValidityResponseMessage::InvalidChainId,
420421
)
421422
.await;
422-
423+
self.metrics.user_error(&["invalid_chain_id", ""]);
423424
return Ok(());
424425
}
425426

@@ -435,7 +436,8 @@ impl Batcher {
435436
),
436437
)
437438
.await;
438-
439+
self.metrics
440+
.user_error(&["invalid_paument_service_address", ""]);
439441
return Ok(());
440442
}
441443

@@ -447,6 +449,7 @@ impl Batcher {
447449
ValidityResponseMessage::InvalidSignature,
448450
)
449451
.await;
452+
self.metrics.user_error(&["invalid_signature", ""]);
450453
return Ok(());
451454
};
452455
info!("Message signature verified");
@@ -455,6 +458,7 @@ impl Batcher {
455458
if proof_size > self.max_proof_size {
456459
error!("Proof size exceeds the maximum allowed size.");
457460
send_message(ws_conn_sink.clone(), ValidityResponseMessage::ProofTooLarge).await;
461+
self.metrics.user_error(&["proof_too_large", ""]);
458462
return Ok(());
459463
}
460464

@@ -478,6 +482,10 @@ impl Batcher {
478482
)),
479483
)
480484
.await;
485+
self.metrics.user_error(&[
486+
"disabled_verifier",
487+
&format!("{}", verification_data.proving_system),
488+
]);
481489
return Ok(());
482490
}
483491

@@ -488,6 +496,10 @@ impl Batcher {
488496
ValidityResponseMessage::InvalidProof(ProofInvalidReason::RejectedProof),
489497
)
490498
.await;
499+
self.metrics.user_error(&[
500+
"rejected_proof",
501+
&format!("{}", verification_data.proving_system),
502+
]);
491503
return Ok(());
492504
}
493505
}
@@ -509,6 +521,7 @@ impl Batcher {
509521
ValidityResponseMessage::InsufficientBalance(addr),
510522
)
511523
.await;
524+
self.metrics.user_error(&["insufficient_balance", ""]);
512525
return Ok(());
513526
}
514527

@@ -530,6 +543,7 @@ impl Batcher {
530543
"Failed to get user nonce from Ethereum for address {addr:?}. Error: {e:?}"
531544
);
532545
send_message(ws_conn_sink.clone(), ValidityResponseMessage::InvalidNonce).await;
546+
self.metrics.user_error(&["invalid_nonce", ""]);
533547
return Ok(());
534548
}
535549
};
@@ -548,6 +562,7 @@ impl Batcher {
548562
let Some(user_balance) = self.get_user_balance(&addr).await else {
549563
error!("Could not get balance for address {addr:?}");
550564
send_message(ws_conn_sink.clone(), ValidityResponseMessage::EthRpcError).await;
565+
self.metrics.user_error(&["eth_rpc_error", ""]);
551566
return Ok(());
552567
};
553568

@@ -560,6 +575,7 @@ impl Batcher {
560575
error!("Failed to get user proof count: User not found in user states, but it should have been already inserted");
561576
std::mem::drop(batch_state_lock);
562577
send_message(ws_conn_sink.clone(), ValidityResponseMessage::InvalidNonce).await;
578+
self.metrics.user_error(&["invalid_nonce", ""]);
563579
return Ok(());
564580
};
565581

@@ -570,6 +586,7 @@ impl Batcher {
570586
ValidityResponseMessage::InsufficientBalance(addr),
571587
)
572588
.await;
589+
self.metrics.user_error(&["insufficient_balance", ""]);
573590
return Ok(());
574591
}
575592

@@ -578,13 +595,15 @@ impl Batcher {
578595
error!("Failed to get cached user nonce: User not found in user states, but it should have been already inserted");
579596
std::mem::drop(batch_state_lock);
580597
send_message(ws_conn_sink.clone(), ValidityResponseMessage::InvalidNonce).await;
598+
self.metrics.user_error(&["invalid_nonce", ""]);
581599
return Ok(());
582600
};
583601

584602
if expected_nonce < msg_nonce {
585603
std::mem::drop(batch_state_lock);
586604
warn!("Invalid nonce for address {addr}, had nonce {expected_nonce:?} < {msg_nonce:?}");
587605
send_message(ws_conn_sink.clone(), ValidityResponseMessage::InvalidNonce).await;
606+
self.metrics.user_error(&["invalid_nonce", ""]);
588607
return Ok(());
589608
}
590609

@@ -608,13 +627,15 @@ impl Batcher {
608627
let Some(user_min_fee) = batch_state_lock.get_user_min_fee(&addr).await else {
609628
std::mem::drop(batch_state_lock);
610629
send_message(ws_conn_sink.clone(), ValidityResponseMessage::InvalidNonce).await;
630+
self.metrics.user_error(&["invalid_nonce", ""]);
611631
return Ok(());
612632
};
613633

614634
if msg_max_fee > user_min_fee {
615635
std::mem::drop(batch_state_lock);
616636
warn!("Invalid max fee for address {addr}, had fee {user_min_fee:?} < {msg_max_fee:?}");
617637
send_message(ws_conn_sink.clone(), ValidityResponseMessage::InvalidMaxFee).await;
638+
self.metrics.user_error(&["invalid_max_fee", ""]);
618639
return Ok(());
619640
}
620641

@@ -634,6 +655,7 @@ impl Batcher {
634655
{
635656
error!("Error while adding entry to batch: {e:?}");
636657
send_message(ws_conn_sink, ValidityResponseMessage::AddToBatchError).await;
658+
self.metrics.user_error(&["add_to_batch_error", ""]);
637659
return Ok(());
638660
};
639661

@@ -674,6 +696,7 @@ impl Batcher {
674696
std::mem::drop(batch_state_lock);
675697
warn!("Invalid nonce for address {addr}. Queue entry with nonce {nonce} not found");
676698
send_message(ws_conn_sink.clone(), ValidityResponseMessage::InvalidNonce).await;
699+
self.metrics.user_error(&["invalid_nonce", ""]);
677700
return;
678701
};
679702

@@ -686,7 +709,8 @@ impl Batcher {
686709
ValidityResponseMessage::InvalidReplacementMessage,
687710
)
688711
.await;
689-
712+
self.metrics
713+
.user_error(&["invalid_replacement_message", ""]);
690714
return;
691715
}
692716

@@ -723,6 +747,8 @@ impl Batcher {
723747
ValidityResponseMessage::InvalidReplacementMessage,
724748
)
725749
.await;
750+
self.metrics
751+
.user_error(&["invalid_replacement_message", ""]);
726752
return;
727753
}
728754

batcher/aligned-batcher/src/metrics.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
use std::{thread, time::Duration};
22

33
// Prometheus
4-
use prometheus::{opts, register_int_counter, register_int_gauge, IntCounter, IntGauge};
4+
use prometheus::{
5+
opts, register_int_counter, register_int_counter_vec, register_int_gauge, IntCounter,
6+
IntCounterVec, IntGauge,
7+
};
58

69
use warp::{Filter, Rejection, Reply};
710

@@ -12,6 +15,7 @@ pub struct BatcherMetrics {
1215
pub sent_batches: IntCounter,
1316
pub reverted_batches: IntCounter,
1417
pub canceled_batches: IntCounter,
18+
pub user_errors: IntCounterVec,
1519
pub batcher_started: IntCounter,
1620
pub gas_price_used_on_latest_batch: IntGauge,
1721
pub broken_ws_connections: IntCounter,
@@ -28,6 +32,10 @@ impl BatcherMetrics {
2832
register_int_counter!(opts!("reverted_batches", "Reverted Batches"))?;
2933
let canceled_batches =
3034
register_int_counter!(opts!("canceled_batches", "Canceled Batches"))?;
35+
let user_errors = register_int_counter_vec!(
36+
opts!("user_errors", "User Errors"),
37+
&["error_type", "proving_system"]
38+
)?;
3139
let batcher_started = register_int_counter!(opts!("batcher_started", "Batcher Started"))?;
3240
let gas_price_used_on_latest_batch =
3341
register_int_gauge!(opts!("gas_price_used_on_latest_batch", "Gas Price"))?;
@@ -41,6 +49,7 @@ impl BatcherMetrics {
4149
registry.register(Box::new(sent_batches.clone()))?;
4250
registry.register(Box::new(reverted_batches.clone()))?;
4351
registry.register(Box::new(canceled_batches.clone()))?;
52+
registry.register(Box::new(user_errors.clone()))?;
4453
registry.register(Box::new(gas_price_used_on_latest_batch.clone()))?;
4554
registry.register(Box::new(batcher_started.clone()))?;
4655
registry.register(Box::new(broken_ws_connections.clone()))?;
@@ -61,6 +70,7 @@ impl BatcherMetrics {
6170
sent_batches,
6271
reverted_batches,
6372
canceled_batches,
73+
user_errors,
6474
batcher_started,
6575
gas_price_used_on_latest_batch,
6676
broken_ws_connections,
@@ -89,4 +99,8 @@ impl BatcherMetrics {
8999
thread::sleep(Duration::from_secs(2));
90100
self.batcher_started.inc();
91101
}
102+
103+
pub fn user_error(&self, label_values: &[&str]) {
104+
self.user_errors.with_label_values(label_values).inc();
105+
}
92106
}

grafana/provisioning/dashboards/aligned/aggregator_batcher.json

Lines changed: 135 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,7 @@
943943
},
944944
"gridPos": {
945945
"h": 8,
946-
"w": 12,
946+
"w": 10,
947947
"x": 0,
948948
"y": 25
949949
},
@@ -982,6 +982,139 @@
982982
"title": "Broken websocket connections",
983983
"type": "timeseries"
984984
},
985+
{
986+
"datasource": {
987+
"type": "prometheus",
988+
"uid": "prometheus"
989+
},
990+
"fieldConfig": {
991+
"defaults": {
992+
"color": {
993+
"mode": "palette-classic"
994+
},
995+
"custom": {
996+
"axisCenteredZero": false,
997+
"axisColorMode": "text",
998+
"axisLabel": "",
999+
"axisPlacement": "auto",
1000+
"barAlignment": 0,
1001+
"drawStyle": "line",
1002+
"fillOpacity": 0,
1003+
"gradientMode": "none",
1004+
"hideFrom": {
1005+
"legend": false,
1006+
"tooltip": false,
1007+
"viz": false
1008+
},
1009+
"insertNulls": false,
1010+
"lineInterpolation": "linear",
1011+
"lineWidth": 1,
1012+
"pointSize": 5,
1013+
"scaleDistribution": {
1014+
"type": "linear"
1015+
},
1016+
"showPoints": "auto",
1017+
"spanNulls": false,
1018+
"stacking": {
1019+
"group": "A",
1020+
"mode": "none"
1021+
},
1022+
"thresholdsStyle": {
1023+
"mode": "off"
1024+
}
1025+
},
1026+
"mappings": [],
1027+
"thresholds": {
1028+
"mode": "absolute",
1029+
"steps": [
1030+
{
1031+
"color": "green",
1032+
"value": null
1033+
},
1034+
{
1035+
"color": "red",
1036+
"value": 80
1037+
}
1038+
]
1039+
}
1040+
},
1041+
"overrides": []
1042+
},
1043+
"gridPos": {
1044+
"h": 8,
1045+
"w": 11,
1046+
"x": 10,
1047+
"y": 25
1048+
},
1049+
"id": 24,
1050+
"options": {
1051+
"legend": {
1052+
"calcs": [],
1053+
"displayMode": "list",
1054+
"placement": "right",
1055+
"showLegend": true
1056+
},
1057+
"tooltip": {
1058+
"mode": "single",
1059+
"sort": "none"
1060+
}
1061+
},
1062+
"targets": [
1063+
{
1064+
"datasource": {
1065+
"type": "prometheus",
1066+
"uid": "prometheus"
1067+
},
1068+
"disableTextWrap": false,
1069+
"editorMode": "builder",
1070+
"expr": "user_errors",
1071+
"fullMetaSearch": false,
1072+
"includeNullMetadata": true,
1073+
"instant": false,
1074+
"legendFormat": "{{error_type}}",
1075+
"range": true,
1076+
"refId": "A",
1077+
"useBackend": false
1078+
}
1079+
],
1080+
"title": "User Error Count",
1081+
"transformations": [
1082+
{
1083+
"id": "calculateField",
1084+
"options": {
1085+
"alias": "proof_rejected",
1086+
"mode": "reduceRow",
1087+
"reduce": {
1088+
"include": [
1089+
"rejected_proof"
1090+
],
1091+
"reducer": "sum"
1092+
}
1093+
}
1094+
},
1095+
{
1096+
"id": "organize",
1097+
"options": {
1098+
"excludeByName": {
1099+
"rejected_proof": true
1100+
},
1101+
"indexByName": {},
1102+
"renameByName": {}
1103+
}
1104+
},
1105+
{
1106+
"id": "calculateField",
1107+
"options": {
1108+
"alias": "total",
1109+
"mode": "reduceRow",
1110+
"reduce": {
1111+
"reducer": "sum"
1112+
}
1113+
}
1114+
}
1115+
],
1116+
"type": "timeseries"
1117+
},
9851118
{
9861119
"collapsed": true,
9871120
"gridPos": {
@@ -1513,6 +1646,6 @@
15131646
"timezone": "browser",
15141647
"title": "Aggregator Data",
15151648
"uid": "aggregator",
1516-
"version": 2,
1649+
"version": 3,
15171650
"weekStart": ""
15181651
}

0 commit comments

Comments
 (0)