diff --git a/apps/fortuna/Cargo.lock b/apps/fortuna/Cargo.lock index 4588b93f6b..ea81a7e2b9 100644 --- a/apps/fortuna/Cargo.lock +++ b/apps/fortuna/Cargo.lock @@ -1647,7 +1647,7 @@ dependencies = [ [[package]] name = "fortuna" -version = "7.6.3" +version = "7.6.4" dependencies = [ "anyhow", "axum", diff --git a/apps/fortuna/Cargo.toml b/apps/fortuna/Cargo.toml index e0ff4f758d..340cb4f908 100644 --- a/apps/fortuna/Cargo.toml +++ b/apps/fortuna/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fortuna" -version = "7.6.3" +version = "7.6.4" edition = "2021" [lib] diff --git a/apps/fortuna/src/chain/ethereum.rs b/apps/fortuna/src/chain/ethereum.rs index 943f0f36b3..22b3d606f7 100644 --- a/apps/fortuna/src/chain/ethereum.rs +++ b/apps/fortuna/src/chain/ethereum.rs @@ -33,7 +33,9 @@ use { // contract in the same repo. abigen!( PythRandom, - "../../target_chains/ethereum/entropy_sdk/solidity/abis/IEntropy.json" + "../../target_chains/ethereum/entropy_sdk/solidity/abis/IEntropy.json"; + PythRandomErrors, + "../../target_chains/ethereum/entropy_sdk/solidity/abis/EntropyErrors.json" ); pub type MiddlewaresWrapper = LegacyTxMiddleware< diff --git a/apps/fortuna/src/eth_utils/utils.rs b/apps/fortuna/src/eth_utils/utils.rs index f89b3f6b23..ca256dc5e4 100644 --- a/apps/fortuna/src/eth_utils/utils.rs +++ b/apps/fortuna/src/eth_utils/utils.rs @@ -4,11 +4,15 @@ use { backoff::ExponentialBackoff, ethabi::ethereum_types::U64, ethers::{ - contract::ContractCall, + contract::{ContractCall, ContractError}, middleware::Middleware, - types::{TransactionReceipt, U256}, + providers::ProviderError, + types::{transaction::eip2718::TypedTransaction, TransactionReceipt, U256}, + }, + std::{ + fmt::Display, + sync::{atomic::AtomicU64, Arc}, }, - std::sync::{atomic::AtomicU64, Arc}, tokio::time::{timeout, Duration}, tracing, }; @@ -151,12 +155,17 @@ pub async fn estimate_tx_cost( /// the transaction exceeds this limit, the transaction is not submitted. /// Note however that any gas_escalation policy is applied to the estimate, so the actual gas used may exceed the limit. /// The transaction is retried until it is confirmed on chain or the maximum number of retries is reached. +/// You can pass an `error_mapper` function that will be called on each retry with the number of retries and the error. +/// This lets you customize the backoff behavior based on the error type. pub async fn submit_tx_with_backoff( middleware: Arc, call: ContractCall, gas_limit: U256, escalation_policy: EscalationPolicy, -) -> Result { + error_mapper: Option< + impl Fn(u64, backoff::Error>) -> backoff::Error>, + >, +) -> Result> { let start_time = std::time::Instant::now(); tracing::info!("Started processing event"); @@ -176,14 +185,19 @@ pub async fn submit_tx_with_backoff( let gas_multiplier_pct = escalation_policy.get_gas_multiplier_pct(num_retries); let fee_multiplier_pct = escalation_policy.get_fee_multiplier_pct(num_retries); - submit_tx( + let result = submit_tx( middleware.clone(), &call, padded_gas_limit, gas_multiplier_pct, fee_multiplier_pct, ) - .await + .await; + if let Some(ref mapper) = error_mapper { + result.map_err(|e| mapper(num_retries, e)) + } else { + result + } }, |e, dur| { let retry_number = num_retries.load(std::sync::atomic::Ordering::Relaxed); @@ -210,6 +224,51 @@ pub async fn submit_tx_with_backoff( }) } +pub enum SubmitTxError { + GasUsageEstimateError(ContractError), + GasLimitExceeded { estimate: U256, limit: U256 }, + GasPriceEstimateError(::Error), + SubmissionError(TypedTransaction, ::Error), + ConfirmationTimeout(TypedTransaction), + ConfirmationError(TypedTransaction, ProviderError), + ReceiptError(TypedTransaction, TransactionReceipt), +} + +impl Display for SubmitTxError +where + T: Middleware + NonceManaged + 'static, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SubmitTxError::GasUsageEstimateError(e) => { + write!(f, "Error estimating gas for reveal: {:?}", e) + } + SubmitTxError::GasLimitExceeded { estimate, limit } => write!( + f, + "Gas estimate for reveal with callback is higher than the gas limit {} > {}", + estimate, limit + ), + SubmitTxError::GasPriceEstimateError(e) => write!(f, "Gas price estimate error: {}", e), + SubmitTxError::SubmissionError(tx, e) => write!( + f, + "Error submitting the reveal transaction. Tx:{:?}, Error:{:?}", + tx, e + ), + SubmitTxError::ConfirmationTimeout(tx) => { + write!(f, "Tx stuck in mempool. Resetting nonce. Tx:{:?}", tx) + } + SubmitTxError::ConfirmationError(tx, e) => write!( + f, + "Error waiting for transaction receipt. Tx:{:?} Error:{:?}", + tx, e + ), + SubmitTxError::ReceiptError(tx, _) => { + write!(f, "Reveal transaction reverted on-chain. Tx:{:?}", tx,) + } + } + } +} + /// Submit a transaction to the blockchain. It estimates the gas for the transaction, /// pads both the gas and fee estimates using the provided multipliers, and submits the transaction. /// It will return a permanent or transient error depending on the error type and whether @@ -221,24 +280,23 @@ pub async fn submit_tx( // A value of 100 submits the tx with the same gas/fee as the estimate. gas_estimate_multiplier_pct: u64, fee_estimate_multiplier_pct: u64, -) -> Result> { +) -> Result>> { let gas_estimate_res = call.estimate_gas().await; let gas_estimate = gas_estimate_res.map_err(|e| { // we consider the error transient even if it is a contract revert since // it can be because of routing to a lagging RPC node. Retrying such errors will // incur a few additional RPC calls, but it is fine. - backoff::Error::transient(anyhow!("Error estimating gas for reveal: {:?}", e)) + backoff::Error::transient(SubmitTxError::GasUsageEstimateError(e)) })?; // The gas limit on the simulated transaction is the maximum expected tx gas estimate, // but we are willing to pad the gas a bit to ensure reliable submission. if gas_estimate > gas_limit { - return Err(backoff::Error::permanent(anyhow!( - "Gas estimate for reveal with callback is higher than the gas limit {} > {}", - gas_estimate, - gas_limit - ))); + return Err(backoff::Error::permanent(SubmitTxError::GasLimitExceeded { + estimate: gas_estimate, + limit: gas_limit, + })); } // Pad the gas estimate after checking it against the simulation gas limit. @@ -247,13 +305,11 @@ pub async fn submit_tx( let call = call.clone().gas(gas_estimate); let mut transaction = call.tx.clone(); - // manually fill the tx with the gas info, so we can log the details in case of error + // manually fill the tx with the gas price info, so we can log the details in case of error client .fill_transaction(&mut transaction, None) .await - .map_err(|e| { - backoff::Error::transient(anyhow!("Error filling the reveal transaction: {:?}", e)) - })?; + .map_err(|e| backoff::Error::transient(SubmitTxError::GasPriceEstimateError(e)))?; // Apply the fee escalation policy. Note: the unwrap_or_default should never default as we have a gas oracle // in the client that sets the gas price. @@ -271,11 +327,7 @@ pub async fn submit_tx( .send_transaction(transaction.clone(), None) .await .map_err(|e| { - backoff::Error::transient(anyhow!( - "Error submitting the reveal transaction. Tx:{:?}, Error:{:?}", - transaction, - e - )) + backoff::Error::transient(SubmitTxError::SubmissionError(transaction.clone(), e)) })?; let reset_nonce = || { @@ -292,34 +344,24 @@ pub async fn submit_tx( // in this case ethers internal polling will not reduce the number of retries // and keep retrying indefinitely. So we set a manual timeout here and reset the nonce. reset_nonce(); - backoff::Error::transient(anyhow!( - "Tx stuck in mempool. Resetting nonce. Tx:{:?}", - transaction - )) + backoff::Error::transient(SubmitTxError::ConfirmationTimeout(transaction.clone())) })?; let receipt = pending_receipt .map_err(|e| { - backoff::Error::transient(anyhow!( - "Error waiting for transaction receipt. Tx:{:?} Error:{:?}", - transaction, - e - )) + backoff::Error::transient(SubmitTxError::ConfirmationError(transaction.clone(), e)) })? .ok_or_else(|| { // RPC may not return an error on tx submission if the nonce is too high. // But we will never get a receipt. So we reset the nonce manager to get the correct nonce. reset_nonce(); - backoff::Error::transient(anyhow!( - "Can't verify the reveal, probably dropped from mempool. Resetting nonce. Tx:{:?}", - transaction - )) + backoff::Error::transient(SubmitTxError::ConfirmationTimeout(transaction.clone())) })?; if receipt.status == Some(U64::from(0)) { - return Err(backoff::Error::transient(anyhow!( - "Reveal transaction reverted on-chain. Tx:{:?}", - transaction + return Err(backoff::Error::transient(SubmitTxError::ReceiptError( + transaction.clone(), + receipt.clone(), ))); } diff --git a/apps/fortuna/src/keeper/process_event.rs b/apps/fortuna/src/keeper/process_event.rs index 823d03c896..707fcc3413 100644 --- a/apps/fortuna/src/keeper/process_event.rs +++ b/apps/fortuna/src/keeper/process_event.rs @@ -1,12 +1,14 @@ use { super::keeper_metrics::AccountLabel, crate::{ - chain::reader::RequestedWithCallbackEvent, - eth_utils::utils::submit_tx_with_backoff, + chain::{ethereum::PythRandomErrorsErrors, reader::RequestedWithCallbackEvent}, + eth_utils::utils::{submit_tx_with_backoff, SubmitTxError}, history::{RequestEntryState, RequestStatus}, keeper::block::ProcessParams, }, anyhow::{anyhow, Result}, + ethers::{abi::AbiDecode, contract::ContractError}, + std::time::Duration, tracing, }; @@ -74,12 +76,43 @@ pub async fn process_event_with_backoff( event.user_random_number, provider_revelation, ); + let error_mapper = |num_retries, e| { + if let backoff::Error::Transient { + err: SubmitTxError::GasUsageEstimateError(ContractError::Revert(revert)), + .. + } = &e + { + if let Ok(PythRandomErrorsErrors::NoSuchRequest(_)) = + PythRandomErrorsErrors::decode(revert) + { + let err = + SubmitTxError::GasUsageEstimateError(ContractError::Revert(revert.clone())); + // Slow down the retries if the request is not found. + // This probably means that the request is already fulfilled via another process. + // After 5 retries, we return the error permanently. + if num_retries >= 5 { + return backoff::Error::Permanent(err); + } + let retry_after_seconds = match num_retries { + 0 => 5, + 1 => 10, + _ => 60, + }; + return backoff::Error::Transient { + err, + retry_after: Some(Duration::from_secs(retry_after_seconds)), + }; + } + } + e + }; let success = submit_tx_with_backoff( contract.client(), contract_call, gas_limit, escalation_policy, + Some(error_mapper), ) .await; @@ -160,16 +193,45 @@ pub async fn process_event_with_backoff( .get_request(event.provider_address, event.sequence_number) .await; - tracing::error!("Failed to process event: {:?}. Request: {:?}", e, req); - // We only count failures for cases where we are completely certain that the callback failed. - if req.is_ok_and(|x| x.is_some()) { + if req.as_ref().is_ok_and(|x| x.is_some()) { + tracing::error!("Failed to process event: {}. Request: {:?}", e, req); metrics .requests_processed_failure .get_or_create(&account_label) .inc(); + // Do not display the internal error, it might include RPC details. + let reason = match e { + SubmitTxError::GasUsageEstimateError(ContractError::Revert(revert)) => { + format!("Reverted: {}", revert) + } + SubmitTxError::GasLimitExceeded { limit, estimate } => format!( + "Gas limit exceeded: limit = {}, estimate = {}", + limit, estimate + ), + SubmitTxError::GasUsageEstimateError(_) => { + "Unable to estimate gas usage".to_string() + } + SubmitTxError::GasPriceEstimateError(_) => { + "Unable to estimate gas price".to_string() + } + SubmitTxError::SubmissionError(_, _) => { + "Error submitting the transaction on-chain".to_string() + } + SubmitTxError::ConfirmationTimeout(tx) => format!( + "Transaction was submitted, but never confirmed. Hash: {}", + tx.sighash() + ), + SubmitTxError::ConfirmationError(tx, _) => format!( + "Transaction was submitted, but never confirmed. Hash: {}", + tx.sighash() + ), + SubmitTxError::ReceiptError(tx, _) => { + format!("Reveal transaction failed on-chain. Hash: {}", tx.sighash()) + } + }; status.state = RequestEntryState::Failed { - reason: format!("Error revealing: {:?}", e), + reason, provider_random_number: Some(provider_revelation), }; history.add(&status);