Skip to content

Commit 5fd44d0

Browse files
committed
Fix blocks persistence
Signed-off-by: bakhtin <a@bakhtin.net>
1 parent bef3d7b commit 5fd44d0

File tree

5 files changed

+99
-4
lines changed

5 files changed

+99
-4
lines changed

crates/engine/primitives/src/config.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ pub const DEFAULT_PERSISTENCE_THRESHOLD: u64 = 2;
99
/// How close to the canonical head we persist blocks.
1010
pub const DEFAULT_MEMORY_BLOCK_BUFFER_TARGET: u64 = 0;
1111

12+
/// Default maximum number of entries the persistence pruner may delete per run.
13+
/// Caps MDBX dirty page accumulation to ~400 MB (100k entries × ~4 KB pages).
14+
pub const DEFAULT_PERSISTENCE_PRUNER_DELETE_LIMIT: usize = 100_000;
15+
1216
/// Returns the default number of storage worker threads based on available parallelism.
1317
fn default_storage_worker_count() -> usize {
1418
#[cfg(feature = "std")]
@@ -186,6 +190,13 @@ pub struct TreeConfig {
186190
/// computation is spawned in parallel and whichever finishes first is used.
187191
/// If `None`, the timeout fallback is disabled.
188192
state_root_task_timeout: Option<Duration>,
193+
/// Maximum number of entries the persistence pruner may delete in a single run.
194+
/// Limits MDBX dirty page accumulation to prevent OOM during the first prune after startup.
195+
persistence_pruner_delete_limit: usize,
196+
/// Timeout for the persistence pruner per run. Prevents a single prune from blocking
197+
/// block persistence for too long. Account and Storage History segments treat this as
198+
/// a soft limit.
199+
persistence_pruner_timeout: Option<Duration>,
189200
}
190201

191202
impl Default for TreeConfig {
@@ -220,6 +231,8 @@ impl Default for TreeConfig {
220231
sparse_trie_max_storage_tries: DEFAULT_SPARSE_TRIE_MAX_STORAGE_TRIES,
221232
disable_sparse_trie_cache_pruning: false,
222233
state_root_task_timeout: Some(DEFAULT_STATE_ROOT_TASK_TIMEOUT),
234+
persistence_pruner_delete_limit: DEFAULT_PERSISTENCE_PRUNER_DELETE_LIMIT,
235+
persistence_pruner_timeout: None,
223236
}
224237
}
225238
}
@@ -286,6 +299,8 @@ impl TreeConfig {
286299
sparse_trie_max_storage_tries,
287300
disable_sparse_trie_cache_pruning: false,
288301
state_root_task_timeout,
302+
persistence_pruner_delete_limit: DEFAULT_PERSISTENCE_PRUNER_DELETE_LIMIT,
303+
persistence_pruner_timeout: None,
289304
}
290305
}
291306

@@ -656,4 +671,26 @@ impl TreeConfig {
656671
self.state_root_task_timeout = timeout;
657672
self
658673
}
674+
675+
/// Returns the persistence pruner delete limit.
676+
pub const fn persistence_pruner_delete_limit(&self) -> usize {
677+
self.persistence_pruner_delete_limit
678+
}
679+
680+
/// Setter for persistence pruner delete limit.
681+
pub const fn with_persistence_pruner_delete_limit(mut self, limit: usize) -> Self {
682+
self.persistence_pruner_delete_limit = limit;
683+
self
684+
}
685+
686+
/// Returns the persistence pruner timeout.
687+
pub const fn persistence_pruner_timeout(&self) -> Option<Duration> {
688+
self.persistence_pruner_timeout
689+
}
690+
691+
/// Setter for persistence pruner timeout.
692+
pub const fn with_persistence_pruner_timeout(mut self, timeout: Option<Duration>) -> Self {
693+
self.persistence_pruner_timeout = timeout;
694+
self
695+
}
659696
}

crates/engine/tree/src/persistence.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,11 @@ where
154154
let start_time = Instant::now();
155155

156156
if let Some(last) = last_block {
157+
// Commit block data immediately so persistence is not blocked by pruning.
158+
// The pruner can take hours on the first run after startup (its
159+
// previous_tip_block_number starts at None, triggering a full segment scan
160+
// even when the pipeline already pruned). Running it in a separate transaction
161+
// ensures blocks become durable on disk without waiting for pruning to finish.
157162
let provider_rw = self.provider.database_provider_rw()?;
158163
provider_rw.save_blocks(blocks, SaveBlocksMode::Full)?;
159164

@@ -164,14 +169,16 @@ where
164169
provider_rw.save_safe_block_number(safe)?;
165170
}
166171

172+
provider_rw.commit()?;
173+
167174
if self.pruner.is_pruning_needed(last.number) {
168175
debug!(target: "engine::persistence", block_num=?last.number, "Running pruner");
169176
let prune_start = Instant::now();
177+
let provider_rw = self.provider.database_provider_rw()?;
170178
let _ = self.pruner.run_with_provider(&provider_rw, last.number)?;
179+
provider_rw.commit()?;
171180
self.metrics.prune_before_duration_seconds.record(prune_start.elapsed());
172181
}
173-
174-
provider_rw.commit()?;
175182
}
176183

177184
debug!(target: "engine::persistence", first=?first_block, last=?last_block, "Saved range of blocks");

crates/engine/tree/src/tree/mod.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,6 +1292,17 @@ where
12921292
} else if self.should_persist() {
12931293
let blocks_to_persist =
12941294
self.get_canonical_blocks_to_persist(PersistTarget::Threshold)?;
1295+
if blocks_to_persist.is_empty() {
1296+
// This indicates the canonical head block is not in blocks_by_hash,
1297+
// which prevents the chain walk from collecting blocks to persist.
1298+
warn!(
1299+
target: "engine::tree",
1300+
canonical_head = ?self.state.tree_state.canonical_head(),
1301+
last_persisted = ?self.persistence_state.last_persisted_block,
1302+
"should_persist=true but no blocks found to persist; \
1303+
canonical head may not be in memory"
1304+
);
1305+
}
12951306
self.persist_blocks(blocks_to_persist);
12961307
}
12971308
}

crates/node/builder/src/launch/engine.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,11 @@ impl EngineNodeLauncher {
170170
pruner_builder =
171171
pruner_builder.finished_exex_height(exex_manager_handle.finished_height());
172172
}
173+
pruner_builder =
174+
pruner_builder.delete_limit(engine_tree_config.persistence_pruner_delete_limit());
175+
if let Some(timeout) = engine_tree_config.persistence_pruner_timeout() {
176+
pruner_builder = pruner_builder.timeout(timeout);
177+
}
173178
let pruner = pruner_builder.build_with_provider_factory(ctx.provider_factory().clone());
174179
let pruner_events = pruner.events();
175180
info!(target: "reth::cli", prune_config=?ctx.prune_config(), "Pruner initialized");

crates/node/core/src/args/engine.rs

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
33
use clap::{builder::Resettable, Args};
44
use reth_engine_primitives::{
5-
TreeConfig, DEFAULT_MULTIPROOF_TASK_CHUNK_SIZE, DEFAULT_SPARSE_TRIE_MAX_STORAGE_TRIES,
6-
DEFAULT_SPARSE_TRIE_PRUNE_DEPTH,
5+
TreeConfig, DEFAULT_MULTIPROOF_TASK_CHUNK_SIZE, DEFAULT_PERSISTENCE_PRUNER_DELETE_LIMIT,
6+
DEFAULT_SPARSE_TRIE_MAX_STORAGE_TRIES, DEFAULT_SPARSE_TRIE_PRUNE_DEPTH,
77
};
88
use std::{sync::OnceLock, time::Duration};
99

@@ -400,6 +400,25 @@ pub struct EngineArgs {
400400
default_value = DefaultEngineValues::get_global().state_root_task_timeout.as_deref().unwrap_or("1s"),
401401
)]
402402
pub state_root_task_timeout: Option<Duration>,
403+
404+
/// Maximum number of entries the persistence pruner may delete in a single run.
405+
/// Limits MDBX dirty page accumulation to prevent OOM when the pruner runs for the
406+
/// first time after startup on a large database.
407+
///
408+
/// Set to 0 to disable the limit (unlimited).
409+
#[arg(long = "engine.persistence-pruner-delete-limit", default_value_t = DEFAULT_PERSISTENCE_PRUNER_DELETE_LIMIT)]
410+
pub persistence_pruner_delete_limit: usize,
411+
412+
/// Timeout for the persistence pruner per run. Prevents a single prune from blocking
413+
/// block persistence for too long.
414+
///
415+
/// CAUTION: Account and Storage History segments treat this as a soft limit.
416+
///
417+
/// Set to 0s to disable.
418+
///
419+
/// --engine.persistence-pruner-timeout 30s
420+
#[arg(long = "engine.persistence-pruner-timeout", value_parser = humantime::parse_duration)]
421+
pub persistence_pruner_timeout: Option<Duration>,
403422
}
404423

405424
#[allow(deprecated)]
@@ -464,6 +483,8 @@ impl Default for EngineArgs {
464483
state_root_task_timeout: state_root_task_timeout
465484
.as_deref()
466485
.map(|s| humantime::parse_duration(s).expect("valid default duration")),
486+
persistence_pruner_delete_limit: DEFAULT_PERSISTENCE_PRUNER_DELETE_LIMIT,
487+
persistence_pruner_timeout: None,
467488
}
468489
}
469490
}
@@ -498,6 +519,14 @@ impl EngineArgs {
498519
.with_sparse_trie_max_storage_tries(self.sparse_trie_max_storage_tries)
499520
.with_disable_sparse_trie_cache_pruning(self.disable_sparse_trie_cache_pruning)
500521
.with_state_root_task_timeout(self.state_root_task_timeout.filter(|d| !d.is_zero()))
522+
.with_persistence_pruner_delete_limit(if self.persistence_pruner_delete_limit == 0 {
523+
usize::MAX
524+
} else {
525+
self.persistence_pruner_delete_limit
526+
})
527+
.with_persistence_pruner_timeout(
528+
self.persistence_pruner_timeout.filter(|d| !d.is_zero()),
529+
)
501530
}
502531
}
503532

@@ -553,6 +582,8 @@ mod tests {
553582
sparse_trie_max_storage_tries: 100,
554583
disable_sparse_trie_cache_pruning: true,
555584
state_root_task_timeout: Some(Duration::from_secs(2)),
585+
persistence_pruner_delete_limit: 100_000,
586+
persistence_pruner_timeout: Some(Duration::from_secs(30)),
556587
};
557588

558589
let parsed_args = CommandParser::<EngineArgs>::parse_from([
@@ -591,6 +622,10 @@ mod tests {
591622
"--engine.disable-sparse-trie-cache-pruning",
592623
"--engine.state-root-task-timeout",
593624
"2s",
625+
"--engine.persistence-pruner-delete-limit",
626+
"100000",
627+
"--engine.persistence-pruner-timeout",
628+
"30s",
594629
])
595630
.args;
596631

0 commit comments

Comments
 (0)