Skip to content

Commit 3a3bb3d

Browse files
authored
BM-282: Shift error logs to warns (github#98)
This PR adjusts certain classes of errors that were non-fatal / non-critical to `warn` logs. It does this by moving a bunch of Anyhow errors to classified errors to allow for best effort classification of what errors should be marked as `warn` closes github#96 closes BM-282
1 parent 908f7f4 commit 3a3bb3d

File tree

7 files changed

+110
-22
lines changed

7 files changed

+110
-22
lines changed

crates/api/src/lib.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,11 @@ impl IntoResponse for AppError {
140140
Self::ReceiptMissing(_) | Self::JournalMissing(_) => StatusCode::NOT_FOUND,
141141
Self::InternalErr(_) | Self::DbError(_) => StatusCode::INTERNAL_SERVER_ERROR,
142142
};
143-
tracing::error!("api error, code {code}: {self:?}");
143+
144+
match self {
145+
Self::ImgAlreadyExists(_) => tracing::warn!("api warn, code: {code}, {self:?}"),
146+
_ => tracing::error!("api error, code {code}: {self:?}"),
147+
}
144148

145149
(code, Json(ErrMsg { r#type: self.type_str(), msg: self.to_string() })).into_response()
146150
}

crates/boundless-market/src/contracts/proof_market.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,22 @@ use super::{
2626
pub enum MarketError {
2727
#[error("Transaction error: {0}")]
2828
TxnError(#[from] TxnErr),
29+
2930
#[error("Request is not fulfilled {0}")]
3031
RequestNotFulfilled(U256),
32+
3133
#[error("Request has expired {0}")]
3234
RequestHasExpired(U256),
35+
3336
#[error("Proof not found {0}")]
3437
ProofNotFound(U256),
38+
39+
#[error("Lockin reverted, possibly outbid: txn_hash: {0}")]
40+
LockRevert(B256),
41+
3542
#[error("Market error: {0}")]
3643
Error(#[from] anyhow::Error),
44+
3745
#[error("Timeout: {0}")]
3846
TimeoutReached(U256),
3947
}
@@ -252,10 +260,7 @@ where
252260

253261
if !receipt.status() {
254262
// TODO: Get + print revertReason
255-
return Err(MarketError::Error(anyhow!(
256-
"LockinRequest failed [{}], possibly outbid",
257-
receipt.transaction_hash
258-
)));
263+
return Err(MarketError::LockRevert(receipt.transaction_hash));
259264
}
260265

261266
tracing::info!("Registered request {:x}: {}", request.id, receipt.transaction_hash);

crates/broker/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ pub struct Args {
9292
}
9393

9494
/// Status of a order as it moves through the lifecycle
95-
#[derive(Clone, sqlx::Type, Debug, PartialEq, Serialize, Deserialize)]
95+
#[derive(Clone, Copy, sqlx::Type, Debug, PartialEq, Serialize, Deserialize)]
9696
enum OrderStatus {
9797
/// New order found on chain, waiting pricing analysis
9898
New,

crates/broker/src/market_monitor.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use boundless_market::contracts::{proof_market::ProofMarketService, IProofMarket
1717
use futures_util::StreamExt;
1818

1919
use crate::{
20+
db::DbError,
2021
task::{RetryRes, RetryTask, SupervisorErr},
2122
DbObj, Order,
2223
};
@@ -178,7 +179,20 @@ where
178179
)
179180
.await
180181
{
181-
tracing::error!("Failed to add new order into DB: {err:?}");
182+
match err {
183+
DbError::SqlErr(sqlx::Error::Database(db_err)) => {
184+
if db_err.is_unique_violation() {
185+
tracing::warn!("Duplicate order detected: {db_err:?}");
186+
} else {
187+
tracing::error!(
188+
"Failed to add new order into DB: {db_err:?}"
189+
);
190+
}
191+
}
192+
_ => {
193+
tracing::error!("Failed to add new order into DB: {err:?}");
194+
}
195+
}
182196
}
183197
}
184198
Err(err) => {

crates/broker/src/order_monitor.rs

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,29 @@ use alloy::{
1414
providers::{Provider, WalletProvider},
1515
transports::Transport,
1616
};
17-
use anyhow::{bail, Context, Result};
18-
use boundless_market::contracts::{proof_market::ProofMarketService, ProofStatus};
17+
use anyhow::{Context, Result};
18+
use boundless_market::contracts::{
19+
proof_market::{MarketError, ProofMarketService},
20+
ProofStatus,
21+
};
1922
use std::sync::Arc;
2023
use std::time::Duration;
24+
use thiserror::Error;
25+
26+
#[derive(Error, Debug)]
27+
pub enum LockOrderErr {
28+
#[error("Failed to fetch / push image: {0}")]
29+
OrderLockedInBlock(MarketError),
30+
31+
#[error("Invalid order status for locking: {0:?}")]
32+
InvalidStatus(OrderStatus),
33+
34+
#[error("Order already locked")]
35+
AlreadyLocked,
36+
37+
#[error("Other: {0}")]
38+
OtherErr(#[from] anyhow::Error),
39+
}
2140

2241
#[derive(Clone)]
2342
pub struct OrderMonitor<T, P> {
@@ -57,9 +76,9 @@ where
5776
Ok(Self { db, provider, block_time, config, market })
5877
}
5978

60-
async fn lock_order(&self, order_id: U256, order: &Order) -> Result<()> {
79+
async fn lock_order(&self, order_id: U256, order: &Order) -> Result<(), LockOrderErr> {
6180
if order.status != OrderStatus::Locking {
62-
bail!("Invalid order status for locking: {:?}", order.status);
81+
return Err(LockOrderErr::InvalidStatus(order.status));
6382
}
6483

6584
let order_status =
@@ -68,7 +87,7 @@ where
6887
tracing::warn!("Order {order_id:x} not open: {order_status:?}, skipping");
6988
// TODO: fetch some chain data to find out who / and for how much the order
7089
// was locked in at
71-
bail!("Order already locked");
90+
return Err(LockOrderErr::AlreadyLocked);
7291
}
7392

7493
let conf_priority_gas = {
@@ -83,7 +102,8 @@ where
83102
let lock_block = self
84103
.market
85104
.lockin_request(&order.request, &order.client_sig, conf_priority_gas)
86-
.await?;
105+
.await
106+
.map_err(LockOrderErr::OrderLockedInBlock)?;
87107

88108
let lock_price = self
89109
.market
@@ -104,8 +124,16 @@ where
104124
for (order_id, order) in orders.iter() {
105125
match self.lock_order(*order_id, order).await {
106126
Ok(_) => tracing::info!("Locked order: {order_id:x}"),
107-
Err(err) => {
108-
tracing::error!("Failed to lock order: {order_id:x} {err}");
127+
Err(ref err) => {
128+
match err {
129+
LockOrderErr::OtherErr(err) => {
130+
tracing::error!("Failed to lock order: {order_id:x} {err:?}");
131+
}
132+
// Only warn on known / classified errors
133+
_ => {
134+
tracing::warn!("Soft failed to lock order: {order_id:x} {err:?}");
135+
}
136+
}
109137
if let Err(err) = self.db.set_order_failure(*order_id, format!("{err:?}")).await
110138
{
111139
tracing::error!(

crates/broker/src/order_picker.rs

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,27 @@ use alloy::{
1515
};
1616
use anyhow::{Context, Result};
1717
use boundless_market::contracts::proof_market::ProofMarketService;
18+
use thiserror::Error;
19+
20+
#[derive(Error, Debug)]
21+
pub enum PriceOrderErr {
22+
#[error("Failed to fetch / push input: {0}")]
23+
FetchInputErr(anyhow::Error),
24+
25+
#[error("Failed to fetch / push image: {0}")]
26+
FetchImageErr(anyhow::Error),
27+
28+
#[error("Guest execution faulted: {0}")]
29+
GuestPanic(String),
30+
31+
#[error("Other: {0}")]
32+
OtherErr(#[from] anyhow::Error),
33+
}
1834

1935
use crate::{
2036
config::ConfigLock,
2137
db::DbObj,
22-
provers::ProverObj,
38+
provers::{ProverError, ProverObj},
2339
task::{RetryRes, RetryTask, SupervisorErr},
2440
Order,
2541
};
@@ -55,7 +71,7 @@ where
5571
Self { db, config, prover, block_time, provider, market }
5672
}
5773

58-
async fn price_order(&self, order_id: U256, order: &Order) -> Result<()> {
74+
async fn price_order(&self, order_id: U256, order: &Order) -> Result<(), PriceOrderErr> {
5975
tracing::debug!("Processing order {order_id:x}: {order:?}");
6076

6177
let (min_deadline, allowed_addresses_opt) = {
@@ -154,11 +170,11 @@ where
154170
// TODO: Move URI handling like this into the prover impls
155171
let image_id = crate::upload_image_uri(&self.prover, order, max_size, fetch_retries)
156172
.await
157-
.context("Failed to fetch and upload image_id")?;
173+
.map_err(PriceOrderErr::FetchImageErr)?;
158174

159175
let input_id = crate::upload_input_uri(&self.prover, order, max_size, fetch_retries)
160176
.await
161-
.context("Failed to fetch and upload input_id")?;
177+
.map_err(PriceOrderErr::FetchInputErr)?;
162178

163179
// Record the image/input IDs for proving stage
164180
self.db
@@ -200,7 +216,18 @@ where
200216
/* TODO assumptions */ Some(exec_limit * 1024 * 1024),
201217
)
202218
.await
203-
.context("Preflight failed")?;
219+
.map_err(|err| match err {
220+
ProverError::ProvingFailed(ref err_msg) => {
221+
// TODO: Get enum'd errors from the SDK to prevent str
222+
// checks
223+
if err_msg.contains("GuestPanic") {
224+
PriceOrderErr::GuestPanic(err_msg.clone())
225+
} else {
226+
PriceOrderErr::OtherErr(err.into())
227+
}
228+
}
229+
_ => PriceOrderErr::OtherErr(err.into()),
230+
})?;
204231

205232
// TODO: this only checks that we could prove this at peak_khz, not if the cluster currently
206233
// can absorb that proving load, we need to cordinate this check with parallel
@@ -356,7 +383,17 @@ where
356383
.set_order_failure(order_id, err.to_string())
357384
.await
358385
.expect("Failed to set order failure");
359-
tracing::error!("pricing order failed: {order_id:x} {err:?}");
386+
match err {
387+
PriceOrderErr::OtherErr(err) => {
388+
tracing::error!("Pricing order failed: {order_id:x} {err:?}");
389+
}
390+
// Only warn on known / classified errors
391+
_ => {
392+
tracing::warn!(
393+
"Pricing order soft failed: {order_id:x} {err:?}"
394+
);
395+
}
396+
}
360397
}
361398
});
362399
}

crates/broker/src/task.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ pub async fn supervisor(count: usize, task: Arc<impl RetryTask>) -> AnyhowRes<()
4040
Ok(_) => tracing::debug!("Task exited cleanly"),
4141
Err(err) => match err {
4242
SupervisorErr::Recover(err) => {
43-
tracing::error!(
43+
tracing::warn!(
4444
"Recoverable failure detected: {err:?}, spawning replacement"
4545
);
4646
tasks.spawn(task.spawn());

0 commit comments

Comments
 (0)