Skip to content

Commit 42b32e0

Browse files
ilitteriavilagaston9gianbelinche
authored andcommitted
fix(l2): remove one-time checkpoint if already exists (#5083)
**Motivation** In a previous PR, DB checkpoints were introduced to ensure old state availability in the current path-based fashion. Every time a batch is sealed, a checkpoint whose state is the state of the latest block of the sealed batch is created to be used in the next batch. The checkpoint is needed in two different steps of the batch commitment: for batch preparation (this is essentially building the batch) and for witness generation. Both steps need a non-modified checkpoint, but they both need to modify the checkpoint to be able to re-execute the batch. As batch preparation occurs before witness generation, we opted to create a one-time checkpoint out of the main checkpoint that can be modified during batch preparation if needed (sometimes the batch was already available in the DB, and there's no need to re-execute anything); then, witness generation modifies the original checkpoint as needed because it is no longer needed. Once the one-time checkpoint fulfills its purpose, it is removed. Currently, if batch preparation fails, the one-time checkpoint is not removed, and after retrying batch preparation, there's another attempt at creating the one-time checkpoint, which ends in an error because the directory already exists. We need to either avoid creating the one-time checkpoint again or to remove the existing one. **Description** Remove the existing one-time checkpoint if it already exists. --------- Co-authored-by: avilagaston9 <[email protected]> Co-authored-by: Gianbelinche <[email protected]>
1 parent 393cfd7 commit 42b32e0

File tree

1 file changed

+64
-75
lines changed

1 file changed

+64
-75
lines changed

crates/l2/sequencer/l1_committer.rs

Lines changed: 64 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ use ethrex_storage::EngineType;
4646
use ethrex_storage::Store;
4747
use ethrex_storage_rollup::StoreRollup;
4848
use ethrex_vm::{BlockExecutionResult, Evm};
49+
use rand::Rng;
4950
use serde::Serialize;
5051
use std::{
5152
collections::{BTreeMap, HashMap},
@@ -270,16 +271,50 @@ impl L1Committer {
270271
)?;
271272
let first_block_to_commit = last_block + 1;
272273

274+
// We need to guarantee that the checkpoint path is new
275+
// to avoid causing a lock error under rocksdb feature.
276+
let rand_suffix: u32 = rand::thread_rng().r#gen();
277+
let one_time_checkpoint_path = self
278+
.checkpoints_dir
279+
.join(format!("temp_checkpoint_{batch_to_commit}_{rand_suffix}"));
280+
281+
// For re-execution we need to use a checkpoint to the previous state
282+
// (i.e. checkpoint of the state to the latest block from the previous
283+
// batch, or the state of the genesis if this is the first batch).
284+
// We already have this initial checkpoint as part of the L1Committer
285+
// struct, but we need to create a one-time copy of it because
286+
// we still need to use the current checkpoint store later for witness
287+
// generation.
288+
289+
let (one_time_checkpoint_store, one_time_checkpoint_blockchain) = self
290+
.create_checkpoint(&self.current_checkpoint_store, &one_time_checkpoint_path)
291+
.await?;
292+
273293
// Try to prepare batch
294+
let result = self
295+
.prepare_batch_from_block(
296+
*last_block,
297+
batch_to_commit,
298+
one_time_checkpoint_store,
299+
one_time_checkpoint_blockchain,
300+
)
301+
.await;
302+
303+
if one_time_checkpoint_path.exists() {
304+
let _ = remove_dir_all(&one_time_checkpoint_path).inspect_err(|e| {
305+
error!(
306+
"Failed to remove one-time checkpoint directory at path {one_time_checkpoint_path:?}. Should be removed manually. Error: {}", e.to_string()
307+
)
308+
});
309+
}
310+
274311
let (
275312
blobs_bundle,
276313
new_state_root,
277314
message_hashes,
278315
privileged_transactions_hash,
279316
last_block_of_batch,
280-
) = self
281-
.prepare_batch_from_block(*last_block, batch_to_commit)
282-
.await?;
317+
) = result?;
283318

284319
if *last_block == last_block_of_batch {
285320
debug!("No new blocks to commit, skipping");
@@ -370,6 +405,8 @@ impl L1Committer {
370405
&mut self,
371406
mut last_added_block_number: BlockNumber,
372407
batch_number: u64,
408+
one_time_checkpoint_store: Store,
409+
one_time_checkpoint_blockchain: Arc<Blockchain>,
373410
) -> Result<(BlobsBundle, H256, Vec<H256>, H256, BlockNumber), CommitterError> {
374411
let first_block_of_batch = last_added_block_number + 1;
375412
let mut blobs_bundle = BlobsBundle::default();
@@ -391,21 +428,6 @@ impl L1Committer {
391428

392429
info!("Preparing state diff from block {first_block_of_batch}, {batch_number}");
393430

394-
let one_time_checkpoint_path = self
395-
.checkpoints_dir
396-
.join(format!("temp_checkpoint_batch_{batch_number}"));
397-
398-
// For re-execution we need to use a checkpoint to the previous state
399-
// (i.e. checkpoint of the state to the latest block from the previous
400-
// batch, or the state of the genesis if this is the first batch).
401-
// We already have this initial checkpoint as part of the L1Committer
402-
// struct, but we need to create a one-time copy of it because
403-
// we still need to use the current checkpoint store later for witness
404-
// generation.
405-
let (one_time_checkpoint_store, one_time_checkpoint_blockchain) = self
406-
.create_checkpoint(&self.current_checkpoint_store, &one_time_checkpoint_path)
407-
.await?;
408-
409431
loop {
410432
let block_to_commit_number = last_added_block_number + 1;
411433

@@ -660,12 +682,6 @@ impl L1Committer {
660682
let privileged_transactions_hash =
661683
compute_privileged_transactions_hash(privileged_transactions_hashes)?;
662684

663-
remove_dir_all(&one_time_checkpoint_path).map_err(|e| {
664-
CommitterError::FailedToCreateCheckpoint(format!(
665-
"Failed to remove one-time checkpoint directory {one_time_checkpoint_path:?}: {e}"
666-
))
667-
})?;
668-
669685
Ok((
670686
blobs_bundle,
671687
new_state_root,
@@ -686,11 +702,30 @@ impl L1Committer {
686702
)
687703
.await?;
688704

689-
let batch_witness = self
690-
.current_checkpoint_blockchain
705+
let rand_suffix: u32 = rand::thread_rng().r#gen();
706+
let one_time_checkpoint_path = self.checkpoints_dir.join(format!(
707+
"temp_checkpoint_witness_{}_{rand_suffix}",
708+
batch.number
709+
));
710+
// We need to create a one-time checkpoint copy because if witness generation fails the checkpoint would be modified
711+
let (_, one_time_checkpoint_blockchain) = self
712+
.create_checkpoint(&self.current_checkpoint_store, &one_time_checkpoint_path)
713+
.await?;
714+
715+
let result = one_time_checkpoint_blockchain
691716
.generate_witness_for_blocks_with_fee_configs(&blocks, Some(&fee_configs))
692717
.await
693-
.map_err(CommitterError::FailedToGenerateBatchWitness)?;
718+
.map_err(CommitterError::FailedToGenerateBatchWitness);
719+
720+
if one_time_checkpoint_path.exists() {
721+
let _ = remove_dir_all(&one_time_checkpoint_path).inspect_err(|e| {
722+
error!(
723+
"Failed to remove one-time checkpoint directory at path {one_time_checkpoint_path:?}. Should be removed manually. Error: {}", e.to_string()
724+
)
725+
});
726+
}
727+
728+
let batch_witness = result?;
694729

695730
// We still need to differentiate the validium case because for validium
696731
// we are generating the BlobsBundle with BlobsBundle::default which
@@ -755,7 +790,7 @@ impl L1Committer {
755790
// We need to skip checkpoint creation if the directory already exists.
756791
// Sometimes the commit_next_batch task is retried after a failure, and in
757792
// that case we would try to create a checkpoint again at the same path,
758-
// causing an lock error under rocksdb feature.
793+
// causing a lock error under rocksdb feature.
759794
if new_checkpoint_path.exists() {
760795
debug!("Checkpoint at path {new_checkpoint_path:?} already exists, skipping creation");
761796
return Ok(());
@@ -778,7 +813,7 @@ impl L1Committer {
778813
/// 1. Creates a checkpoint of the provided store at the specified path.
779814
/// 2. Initializes a new store and blockchain for the checkpoint.
780815
/// 3. Regenerates the head state in the checkpoint store.
781-
/// 4. Validates that the checkpoint store's head block number and latest block match those of the original store.
816+
/// 4. TODO: Validates that the checkpoint contains the needed state root.
782817
async fn create_checkpoint(
783818
&self,
784819
checkpointee: &Store,
@@ -806,54 +841,8 @@ impl L1Committer {
806841
self.blockchain.options.clone(),
807842
));
808843

809-
let checkpoint_head_block_number = checkpoint_store.get_latest_block_number().await?;
810-
811-
let db_head_block_number = checkpointee.get_latest_block_number().await?;
812-
813-
if checkpoint_head_block_number != db_head_block_number {
814-
return Err(CommitterError::FailedToCreateCheckpoint(
815-
"checkpoint store head block number does not match main store head block number before regeneration".to_string(),
816-
));
817-
}
818-
819844
regenerate_head_state(&checkpoint_store, &checkpoint_blockchain).await?;
820845

821-
let checkpoint_latest_block_number = checkpoint_store.get_latest_block_number().await?;
822-
823-
let db_latest_block_number = checkpointee.get_latest_block_number().await?;
824-
825-
let checkpoint_latest_block = checkpoint_store
826-
.get_block_by_number(checkpoint_latest_block_number)
827-
.await?
828-
.ok_or(CommitterError::FailedToCreateCheckpoint(
829-
"latest block not found in checkpoint store".to_string(),
830-
))?;
831-
832-
let db_latest_block = checkpointee
833-
.get_block_by_number(db_latest_block_number)
834-
.await?
835-
.ok_or(CommitterError::FailedToCreateCheckpoint(
836-
"latest block not found in main store".to_string(),
837-
))?;
838-
839-
if !checkpoint_store.has_state_root(checkpoint_latest_block.header.state_root)? {
840-
return Err(CommitterError::FailedToCreateCheckpoint(
841-
"checkpoint store state is not regenerated properly".to_string(),
842-
));
843-
}
844-
845-
if checkpoint_latest_block_number != db_head_block_number {
846-
return Err(CommitterError::FailedToCreateCheckpoint(
847-
"checkpoint store latest block number does not match main store head block number after regeneration".to_string(),
848-
));
849-
}
850-
851-
if checkpoint_latest_block.hash() != db_latest_block.hash() {
852-
return Err(CommitterError::FailedToCreateCheckpoint(
853-
"checkpoint store latest block hash does not match main store latest block hash after regeneration".to_string(),
854-
));
855-
}
856-
857846
Ok((checkpoint_store, checkpoint_blockchain))
858847
}
859848

0 commit comments

Comments
 (0)