Skip to content

Commit b233284

Browse files
fedackingmpaulucciCopilot
authored
fix(l1): exit on irrecoverable error on sync (#5005)
**Motivation** If we have an irrecoverable error in sync, we should exit the node. At the beginning we need to handle deleting the leaves of previous snapsync in case the node was in half of the download. **Description** - Made it so that when the sync_cycle function finds an irrecuperable error, it exits the node. - Change `validate_folders` to `deletes_leaves_folder` to clean up the leaves at start in stead of stopping the node. --------- Co-authored-by: Martin Paulucci <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent 1f422e9 commit b233284

File tree

2 files changed

+77
-41
lines changed

2 files changed

+77
-41
lines changed

crates/networking/p2p/sync.rs

Lines changed: 67 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ use crate::sync::code_collector::CodeHashCollector;
99
use crate::sync::state_healing::heal_state_trie_wrap;
1010
use crate::sync::storage_healing::heal_storage_trie;
1111
use crate::utils::{
12-
current_unix_time, get_account_state_snapshots_dir, get_account_storages_snapshots_dir,
13-
get_code_hashes_snapshots_dir, validate_folders,
12+
current_unix_time, delete_leaves_folder, get_account_state_snapshots_dir,
13+
get_account_storages_snapshots_dir, get_code_hashes_snapshots_dir,
1414
};
1515
use crate::{
1616
metrics::METRICS,
@@ -148,15 +148,34 @@ impl Syncer {
148148
match self.sync_cycle(sync_head, store).await {
149149
Ok(()) => {
150150
info!(
151-
"Sync cycle finished, time elapsed: {} secs",
152-
start_time.elapsed().as_secs()
151+
time_elapsed_s = start_time.elapsed().as_secs(),
152+
%sync_head,
153+
"Sync cycle finished successfully",
153154
);
154155
}
155-
// TODO #2767: If the error is irrecoverable, we should exit ethrex
156-
Err(error) => error!(
157-
"Sync cycle failed due to {error}, time elapsed: {} secs ",
158-
start_time.elapsed().as_secs()
159-
),
156+
157+
// If the error is irrecoverable, we exit ethrex
158+
Err(error) => {
159+
match error.is_recoverable() {
160+
false => {
161+
// We exit the node, as we can't recover this error
162+
error!(
163+
time_elapsed_s = start_time.elapsed().as_secs(),
164+
%sync_head,
165+
%error, "Sync cycle failed, exiting as the error is irrecoverable",
166+
);
167+
std::process::exit(2);
168+
}
169+
true => {
170+
// We do nothing, as the error is recoverable
171+
error!(
172+
time_elapsed_s = start_time.elapsed().as_secs(),
173+
%sync_head,
174+
%error, "Sync cycle failed, retrying",
175+
);
176+
}
177+
}
178+
}
160179
}
161180
}
162181

@@ -197,18 +216,8 @@ impl Syncer {
197216
};
198217

199218
// We validate that we have the folders that are being used empty, as we currently assume
200-
// they are.
201-
if !validate_folders(&self.datadir) {
202-
// Temp std::process::exit until #2767 is done
203-
error!(
204-
"One of the folders used for temporary leaves during snap is still used. Delete them in {}",
205-
&self.datadir.to_str().unwrap_or_default()
206-
);
207-
std::process::exit(1);
208-
// Cloning a single string, the node should stop after this
209-
// return Err(SyncError::NotEmptyDatadirFolders(self.datadir.clone()));
210-
}
211-
219+
// they are. If they are not empty we empty the folder
220+
delete_leaves_folder(&self.datadir);
212221
loop {
213222
debug!("Sync Log 1: In snap sync");
214223
debug!(
@@ -1153,8 +1162,6 @@ pub enum SyncError {
11531162
CodeHashesSnapshotDecodeError(PathBuf),
11541163
#[error("Failed to get account state for block {0:?} and account hash {1:?}")]
11551164
AccountState(H256, H256),
1156-
#[error("Failed to acquire lock on maybe_big_account_storage")]
1157-
MaybeBigAccount,
11581165
#[error("Failed to fetch bytecodes from peers")]
11591166
BytecodesNotFound,
11601167
#[error("Failed to get account state snapshots directory")]
@@ -1167,12 +1174,6 @@ pub enum SyncError {
11671174
DifferentStateRoots(H256, H256, H256),
11681175
#[error("Failed to get block headers")]
11691176
NoBlockHeaders,
1170-
#[error("The download datadir folders at {0} are not empty, delete them first")]
1171-
NotEmptyDatadirFolders(PathBuf),
1172-
#[error("Couldn't create a thread")]
1173-
ThreadCreationError,
1174-
#[error("Called update_pivot outside snapsync mode")]
1175-
NotInSnapSync,
11761177
#[error("Peer handler error: {0}")]
11771178
PeerHandler(#[from] PeerHandlerError),
11781179
#[error("Corrupt Path")]
@@ -1191,6 +1192,43 @@ pub enum SyncError {
11911192
PeerTableError(#[from] PeerTableError),
11921193
}
11931194

1195+
impl SyncError {
1196+
pub fn is_recoverable(&self) -> bool {
1197+
match self {
1198+
SyncError::SnapshotReadError(_, _)
1199+
| SyncError::SnapshotDecodeError(_)
1200+
| SyncError::CodeHashesSnapshotDecodeError(_)
1201+
| SyncError::AccountState(_, _)
1202+
| SyncError::BytecodesNotFound
1203+
| SyncError::AccountStateSnapshotsDirNotFound
1204+
| SyncError::AccountStoragesSnapshotsDirNotFound
1205+
| SyncError::CodeHashesSnapshotsDirNotFound
1206+
| SyncError::DifferentStateRoots(_, _, _)
1207+
| SyncError::NoBlockHeaders
1208+
| SyncError::PeerHandler(_)
1209+
| SyncError::CorruptPath
1210+
| SyncError::TrieGenerationError(_)
1211+
| SyncError::AccountTempDBDirNotFound
1212+
| SyncError::StorageTempDBDirNotFound
1213+
| SyncError::RocksDBError(_)
1214+
| SyncError::BytecodeFileError
1215+
| SyncError::NoLatestCanonical
1216+
| SyncError::PeerTableError(_) => false,
1217+
SyncError::Chain(_)
1218+
| SyncError::Store(_)
1219+
| SyncError::Send(_)
1220+
| SyncError::Trie(_)
1221+
| SyncError::Rlp(_)
1222+
| SyncError::JoinHandle(_)
1223+
| SyncError::CorruptDB
1224+
| SyncError::BodiesNotFound
1225+
| SyncError::InvalidRangeReceived
1226+
| SyncError::BlockNumber(_)
1227+
| SyncError::NoBlocks => true,
1228+
}
1229+
}
1230+
}
1231+
11941232
impl<T> From<SendError<T>> for SyncError {
11951233
fn from(value: SendError<T>) -> Self {
11961234
Self::Send(value.to_string())

crates/networking/p2p/utils.rs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,20 +39,18 @@ pub fn public_key_from_signing_key(signer: &SecretKey) -> H512 {
3939
H512::from_slice(&encoded[1..])
4040
}
4141

42-
/// Returns true if the folders used for the partial download of the leaves exists. if they do exist, it
43-
/// should be deleted. We don't auto delete them on the off chance we want to backup the files
44-
pub fn validate_folders(datadir: &Path) -> bool {
45-
let folder_doesnt_exist = !std::fs::exists(get_account_state_snapshots_dir(datadir))
46-
.is_ok_and(|exists| exists)
47-
&& !std::fs::exists(get_account_storages_snapshots_dir(datadir)).is_ok_and(|exists| exists)
48-
&& !std::fs::exists(get_code_hashes_snapshots_dir(datadir)).is_ok_and(|exists| exists);
42+
/// Deletes the snap folders needed for downloading the leaves during the initial
43+
/// step of snap sync.
44+
pub fn delete_leaves_folder(datadir: &Path) {
45+
// We ignore the errors because this can happen when the folders don't exist
46+
let _ = std::fs::remove_dir_all(get_account_state_snapshots_dir(datadir));
47+
let _ = std::fs::remove_dir_all(get_account_storages_snapshots_dir(datadir));
48+
let _ = std::fs::remove_dir_all(get_code_hashes_snapshots_dir(datadir));
4949
#[cfg(feature = "rocksdb")]
50-
let folder_doesnt_exist = {
51-
folder_doesnt_exist
52-
&& !std::fs::exists(get_rocksdb_temp_accounts_dir(datadir)).is_ok_and(|exists| exists)
53-
&& !std::fs::exists(get_rocksdb_temp_storage_dir(datadir)).is_ok_and(|exists| exists)
50+
{
51+
let _ = std::fs::remove_dir_all(get_rocksdb_temp_accounts_dir(datadir));
52+
let _ = std::fs::remove_dir_all(get_rocksdb_temp_storage_dir(datadir));
5453
};
55-
folder_doesnt_exist
5654
}
5755

5856
pub fn get_account_storages_snapshots_dir(datadir: &Path) -> PathBuf {

0 commit comments

Comments
 (0)