From 8bd73864be62c92e07b3099f00cfc701d5505ef0 Mon Sep 17 00:00:00 2001 From: Pana Date: Fri, 22 Aug 2025 18:34:06 +0800 Subject: [PATCH 1/9] add dump subcommand which can dump all espace account as json file --- Cargo.lock | 71 +++++ Cargo.toml | 1 + bins/conflux/src/cli.rs | 4 + bins/conflux/src/command/dump.rs | 154 +++++++++++ bins/conflux/src/command/mod.rs | 1 + bins/conflux/src/main.rs | 15 +- crates/cfx_types/src/lib.rs | 3 + crates/client/src/lib.rs | 1 + crates/client/src/state_dump.rs | 248 ++++++++++++++++++ crates/dbs/statedb/src/lib.rs | 45 ++++ .../dbs/storage/src/impls/replicated_state.rs | 3 + crates/primitives/src/storage_key.rs | 8 + crates/rpc/rpc-eth-types/Cargo.toml | 1 + crates/rpc/rpc-eth-types/src/lib.rs | 2 + crates/rpc/rpc-eth-types/src/state_dump.rs | 112 ++++++++ crates/rpc/rpc-primitives/src/bytes.rs | 26 ++ tools/consensus_bench/Cargo.lock | 77 ++++++ tools/evm-spec-tester/Cargo.lock | 71 +++++ 18 files changed, 842 insertions(+), 1 deletion(-) create mode 100644 bins/conflux/src/command/dump.rs create mode 100644 crates/client/src/state_dump.rs create mode 100644 crates/rpc/rpc-eth-types/src/state_dump.rs diff --git a/Cargo.lock b/Cargo.lock index a6d3820b93..de5a329d85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1513,6 +1513,7 @@ dependencies = [ "rlp 0.4.6", "serde", "serde_json", + "serde_with", "similar-asserts", "thiserror 2.0.11", ] @@ -1964,6 +1965,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-targets 0.52.6", ] @@ -2605,6 +2607,41 @@ dependencies = [ "hibitset", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.96", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.96", +] + [[package]] name = "db" version = "0.1.0" @@ -4491,6 +4528,12 @@ dependencies = [ "syn 2.0.96", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.0.3" @@ -7987,6 +8030,34 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5" +dependencies = [ + "base64 0.22.1", + "chrono", + "hex", + "serde", + "serde_derive", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.96", +] + [[package]] name = "serde_yaml" version = "0.8.26" diff --git a/Cargo.toml b/Cargo.toml index e9aed9dcd7..171ddd679d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -272,6 +272,7 @@ serde = { version = "1.0", features = [ ], default-features = false } serde_json = { version = "1.0", default-features = false, features = ["alloc"] } serde_derive = { version = "1.0", default-features = false } +serde_with = { version = "3", default-features = false, features = ["macros"] } hex = "0.4" rustc-hex = "2.1" hex-literal = "1.0" diff --git a/bins/conflux/src/cli.rs b/bins/conflux/src/cli.rs index 15ec87e631..08f2454212 100644 --- a/bins/conflux/src/cli.rs +++ b/bins/conflux/src/cli.rs @@ -1,3 +1,4 @@ +use crate::command::dump::DumpCommand; use clap::{Args, Parser, Subcommand, ValueEnum}; /// Conflux client @@ -267,6 +268,9 @@ pub enum Commands { /// Manage accounts #[command(subcommand_required = true, arg_required_else_help = true)] Account(AccountSubcommands), + /// Dump eSpace account state at a given block number + #[command(subcommand_required = false, arg_required_else_help = false)] + Dump(DumpCommand), /// RPC based subcommands to query blockchain information and send /// transactions #[command(subcommand_required = true, arg_required_else_help = true)] diff --git a/bins/conflux/src/command/dump.rs b/bins/conflux/src/command/dump.rs new file mode 100644 index 0000000000..985bf022d0 --- /dev/null +++ b/bins/conflux/src/command/dump.rs @@ -0,0 +1,154 @@ +use cfx_types::Address; +use clap::{ArgMatches, Args}; +use client::{ + configuration::Configuration, + state_dump::{dump_whole_state, StateDumpConfig}, +}; +use parking_lot::{Condvar, Mutex}; +use serde_json; +use std::{collections::HashMap, fs, path::Path, str::FromStr, sync::Arc}; + +#[derive(Args, Debug)] +pub struct DumpCommand { + /// Include accounts for which we don't have the address (missing preimage) + // #[arg(id = "incompletes", long = "incompletes")] + // incompletes: bool, + /// Print streaming JSON iteratively, delimited by newlines + // #[arg(id = "iterative", long = "iterative", default_value = "true")] + // iterative: bool, + /// Max number of elements (0 = no limit) + #[arg( + id = "limit", + long = "limit", + value_name = "NUM", + default_value = "0" + )] + limit: u64, + /// Target block number, if not specified, the latest block will be used + #[arg(id = "block", long = "block", value_name = "NUM")] + block: Option, + /// Exclude contract code (save db lookups) + #[arg(id = "nocode", long = "nocode")] + no_code: bool, + /// Exclude storage entries (save db lookups) + #[arg(id = "nostorage", long = "nostorage")] + no_storage: bool, + /// Start position address + #[arg( + id = "start", + long = "start", + value_name = "String", + default_value = "0x0000000000000000000000000000000000000000" + )] + start: String, + /// Path to the output folder (default: ./dump) + #[arg(id = "output", long = "output", value_name = "PATH")] + output: Option, + /// Multi file mode + #[arg(id = "multifile", long = "multifile")] + multi_file: bool, +} + +impl DumpCommand { + pub fn parse(matches: &ArgMatches) -> Result { + let output = matches.get_one::("output").cloned(); + Ok(Self { + block: matches.get_one::("block").cloned(), + // incompletes: matches.get_flag("incompletes"), + // iterative: matches.get_flag("iterative"), + limit: matches.get_one::("limit").cloned().unwrap_or(0), + no_code: matches.get_flag("nocode"), + no_storage: matches.get_flag("nostorage"), + start: matches.get_one::("start").cloned().unwrap_or( + "0x0000000000000000000000000000000000000000".to_string(), + ), + output, + multi_file: matches.get_flag("multifile"), + }) + } + + fn get_state_dump_config(&self) -> Result { + let address_str = self.start.strip_prefix("0x").unwrap_or(&self.start); + let start_address = Address::from_str(address_str) + .map_err(|e| format!("Invalid address: {}", e))?; + Ok(StateDumpConfig { + start_address, + limit: self.limit, + block: self.block, + no_code: self.no_code, + no_storage: self.no_storage, + }) + } + + pub fn execute(&self, conf: &mut Configuration) -> Result { + // Determine output directory + let output_path = match self.output { + Some(ref path) => path, + None => { + "./dump" // Default to "./dump" if no output specified + } + }; + + // Ensure the directory exists + if !Path::new(output_path).exists() { + fs::create_dir_all(output_path).map_err(|e| { + format!("Failed to create output directory: {}", e) + })?; + } + + let exit = Arc::new((Mutex::new(false), Condvar::new())); + + let config = self.get_state_dump_config()?; + let state = dump_whole_state(conf, exit, &config)?; + let total_accounts = state.accounts.len(); + + if self.multi_file { + // Write to multiple files + for (address, account_state) in state.accounts { + // Create filename using address (without 0x prefix) + let filename = format!("{}.json", address); + let file_path = Path::new(output_path).join(&filename); + + // Serialize account_state to JSON + let json_content = serde_json::to_string_pretty(&account_state) + .map_err(|e| { + format!( + "Failed to serialize account state for {}: {}", + address, e + ) + })?; + + // Write to file + fs::write(&file_path, json_content).map_err(|e| { + format!( + "Failed to write file {}: {}", + file_path.display(), + e + ) + })?; + } + + // Write meta info + let meta_file_path = Path::new(output_path).join("meta.json"); + let mut meta_info = HashMap::new(); + meta_info.insert("root".to_string(), state.root); + let meta_content = serde_json::to_string_pretty(&meta_info) + .map_err(|e| format!("Failed to serialize state: {}", e))?; + fs::write(&meta_file_path, meta_content) + .map_err(|e| format!("Failed to write meta file: {}", e))?; + } else { + // Write to a single file + let file_path = Path::new(output_path).join("state.json"); + let json_content = serde_json::to_string_pretty(&state) + .map_err(|e| format!("Failed to serialize state: {}", e))?; + fs::write(&file_path, json_content).map_err(|e| { + format!("Failed to write file {}: {}", file_path.display(), e) + })?; + } + + Ok(format!( + "Dumped {} account state to output directory: {}", + total_accounts, output_path + )) + } +} diff --git a/bins/conflux/src/command/mod.rs b/bins/conflux/src/command/mod.rs index f908b8674e..fc391d2412 100644 --- a/bins/conflux/src/command/mod.rs +++ b/bins/conflux/src/command/mod.rs @@ -3,5 +3,6 @@ // See http://www.gnu.org/licenses/ pub mod account; +pub mod dump; pub mod helpers; pub mod rpc; diff --git a/bins/conflux/src/main.rs b/bins/conflux/src/main.rs index 8d40263a2c..b14f9b57fa 100644 --- a/bins/conflux/src/main.rs +++ b/bins/conflux/src/main.rs @@ -19,7 +19,10 @@ use client::{ full::FullClient, light::LightClient, }; -use command::account::{AccountCmd, ImportAccounts, ListAccounts, NewAccount}; +use command::{ + account::{AccountCmd, ImportAccounts, ListAccounts, NewAccount}, + dump::DumpCommand, +}; use log::{info, LevelFilter}; use log4rs::{ append::{console::ConsoleAppender, file::FileAppender}, @@ -144,6 +147,16 @@ fn handle_sub_command(matches: &ArgMatches) -> Result, String> { return Ok(Some(execute_output)); } + // dump sub-commands + if let Some(("dump", dump_matches)) = matches.subcommand() { + let dump_cmd = DumpCommand::parse(dump_matches).map_err(|e| { + format!("Failed to parse dump command arguments: {}", e) + })?; + let mut conf = Configuration::parse(&matches)?; + let execute_output = dump_cmd.execute(&mut conf)?; + return Ok(Some(execute_output)); + } + // general RPC commands let mut subcmd_matches = matches; while let Some(m) = subcmd_matches.subcommand() { diff --git a/crates/cfx_types/src/lib.rs b/crates/cfx_types/src/lib.rs index 3324fbf8bf..ea5f3d1258 100644 --- a/crates/cfx_types/src/lib.rs +++ b/crates/cfx_types/src/lib.rs @@ -7,6 +7,9 @@ pub use ethereum_types::{ H160, H256, H512, H520, H64, U128, U256, U512, U64, }; +pub type StorageKey = H256; +pub type StorageValue = U256; + mod space; pub use space::{Space, SpaceMap}; diff --git a/crates/client/src/lib.rs b/crates/client/src/lib.rs index 12e21d175c..ea9caa24aa 100644 --- a/crates/client/src/lib.rs +++ b/crates/client/src/lib.rs @@ -13,3 +13,4 @@ mod node_types; pub mod rpc; pub use cfx_config as configuration; pub use node_types::{archive, full, light}; +pub mod state_dump; diff --git a/crates/client/src/state_dump.rs b/crates/client/src/state_dump.rs new file mode 100644 index 0000000000..7c8a29221d --- /dev/null +++ b/crates/client/src/state_dump.rs @@ -0,0 +1,248 @@ +use crate::common::initialize_not_light_node_modules; +use cfx_config::Configuration; +use cfx_rpc_eth_types::{AccountState, StateDump, EOA_STORAGE_ROOT_H256}; +use cfx_rpc_primitives::Bytes; +use cfx_statedb::{StateDbExt, StateDbGeneric}; +use cfx_storage::state_manager::StateManagerTrait; +use cfx_types::{Address, Space, H256}; +use cfxcore::NodeType; +use keccak_hash::{keccak, KECCAK_EMPTY}; +use parking_lot::{Condvar, Mutex}; +use primitives::{ + Account, SkipInputCheck, StorageKey, StorageKeyWithSpace, StorageValue, +}; +use rlp::Rlp; +use std::{ + collections::{BTreeMap, HashMap}, + ops::Deref, + sync::Arc, + thread, + time::Duration, +}; + +pub struct StateDumpConfig { + pub start_address: Address, + pub limit: u64, + pub block: Option, + pub no_code: bool, + pub no_storage: bool, +} + +pub fn dump_whole_state( + conf: &mut Configuration, exit_cond_var: Arc<(Mutex, Condvar)>, + config: &StateDumpConfig, +) -> Result { + println!("Preparing state..."); + let ( + data_man, + _, + _, + consensus, + sync_service, + _, + _, + _, + _, + _, + _, + _, + _, + _, + _, + _, + ) = initialize_not_light_node_modules( + conf, + exit_cond_var, + NodeType::Archive, + )?; + + while sync_service.catch_up_mode() { + thread::sleep(Duration::from_secs(1)); + } + + /* + 1. Get the state at the target epoch, or the latest state if target_epoch is None + 2. Iterate through the state, and dump the account state + */ + + let state_manager = data_man.storage_manager.clone(); + let target_height = match config.block { + Some(epoch) => epoch, + None => consensus.latest_confirmed_epoch_number(), + }; + + println!("Start to dump state at epoch: {:?}", target_height); + + let epoch_hash = consensus + .get_hash_from_epoch_number(target_height.into()) + .map_err(|e| e.to_string())?; + + let block = consensus + .get_phantom_block_by_hash(&epoch_hash, false)? + .expect("Failed to get block"); + + let state_root = block.pivot_header.deferred_state_root(); + + let state_index = data_man + .get_state_readonly_index(&epoch_hash) + .ok_or("Failed to get state index")?; + + let state = state_manager + .get_state_no_commit(state_index, true, Some(Space::Ethereum)) + .map_err(|e| e.to_string())? + .ok_or("Failed to get state")?; + + let mut state_db = StateDbGeneric::new(state); + + let accounts = + export_space_accounts(&mut state_db, Space::Ethereum, config) + .map_err(|e| e.to_string())?; + + let state_dump = StateDump { + root: state_root.clone(), + accounts, + next: None, + }; + + Ok(state_dump) +} + +fn export_space_accounts( + state: &mut StateDbGeneric, space: Space, config: &StateDumpConfig, +) -> Result, Box> { + let empty_key = StorageKey::EmptyKey.with_space(space); + let kv_pairs = state.read_all(empty_key, None)?; + + let mut accounts_map = BTreeMap::new(); + let mut codes_map = HashMap::new(); + let mut storage_map = HashMap::new(); + + for (key, value) in kv_pairs { + let storage_key_with_space = + StorageKeyWithSpace::from_key_bytes::(&key); + if storage_key_with_space.space != space { + continue; + } + match storage_key_with_space.key { + StorageKey::AccountKey(address_bytes) => { + let address = Address::from_slice(address_bytes); + println!("Find account: {:?}", address); + let account = + Account::new_from_rlp(address, &Rlp::new(&value))?; + accounts_map.insert(address, account); + } + StorageKey::CodeKey { + address_bytes, + code_hash_bytes: _, + } => { + if config.no_code { + continue; + } + + let address = Address::from_slice(address_bytes); + let code = Bytes(value.to_vec()); + codes_map.insert(address, code); + } + StorageKey::StorageKey { + address_bytes, + storage_key, + } => { + if config.no_storage { + continue; + } + + let address = Address::from_slice(address_bytes); + let h256_storage_key = H256::from_slice(storage_key); + let storage_value_with_owner: StorageValue = + rlp::decode(&value)?; + let account_storage_map = + storage_map.entry(address).or_insert(BTreeMap::new()); + account_storage_map + .insert(h256_storage_key, storage_value_with_owner.value); + } + _ => { + continue; + } + } + } + + let mut accounts = BTreeMap::new(); + + for (address, account) in accounts_map { + let is_contract = account.code_hash != KECCAK_EMPTY; + // conflux state tree don't have storage root, so we use a fixed value + let root = EOA_STORAGE_ROOT_H256; + let address_hash = keccak(address); + + let code = if is_contract { + codes_map.get(&address).cloned() + } else { + None + }; + + let storage = if is_contract { + storage_map.get(&address).cloned() + } else { + None + }; + + let account_state = AccountState { + balance: account.balance, + nonce: account.nonce.as_u64(), + root, + code_hash: account.code_hash, + code, + storage, + address: Some(address), + address_hash: Some(address_hash), + }; + + accounts.insert(address, account_state); + + if config.limit > 0 && accounts.len() >= config.limit as usize { + break; + } + } + + Ok(accounts) +} + +#[allow(unused)] +fn get_account_state( + state: &mut StateDbGeneric, account: &Account, config: &StateDumpConfig, +) -> Result> { + let address = account.address(); + + let is_contract = account.code_hash != KECCAK_EMPTY; + // get code + let code = if is_contract && !config.no_code { + state + .get_code(address, &account.code_hash)? + .map(|code_info| Bytes(code_info.code.deref().to_vec())) + } else { + None + }; + + let storage = if is_contract && !config.no_storage { + let storage = state.get_account_storage_entries(&address, None)?; + Some(storage) + } else { + None + }; + + // conflux state tree don't have storage root, so we use a fixed value + let root = EOA_STORAGE_ROOT_H256; + + let address_hash = keccak(address.address); + + Ok(AccountState { + balance: account.balance, + nonce: account.nonce.as_u64(), + root, + code_hash: account.code_hash, + code, + storage, + address: Some(address.address), + address_hash: Some(address_hash), + }) +} diff --git a/crates/dbs/statedb/src/lib.rs b/crates/dbs/statedb/src/lib.rs index 543128a090..9261cfeba2 100644 --- a/crates/dbs/statedb/src/lib.rs +++ b/crates/dbs/statedb/src/lib.rs @@ -11,6 +11,8 @@ pub mod global_params; #[cfg(feature = "testonly_code")] mod in_memory_storage; mod statedb_ext; +use cfx_types::H256; +use primitives::StorageValue; use cfx_db_errors::statedb as error; @@ -179,6 +181,13 @@ mod impls { self.modify_single_value(key, None) } + pub fn read_all( + &mut self, key_prefix: StorageKeyWithSpace, + debug_record: Option<&mut ComputeEpochDebugRecord>, + ) -> Result> { + self.delete_all::(key_prefix, debug_record) + } + pub fn delete_all( &mut self, key_prefix: StorageKeyWithSpace, debug_record: Option<&mut ComputeEpochDebugRecord>, @@ -253,6 +262,42 @@ mod impls { Ok(deleted_kvs) } + pub fn get_account_storage_entries( + &mut self, address: &AddressWithSpace, + debug_record: Option<&mut ComputeEpochDebugRecord>, + ) -> Result> + { + let mut storage = BTreeMap::new(); + + let storage_prefix = + StorageKey::new_storage_root_key(&address.address) + .with_space(address.space); + + let kv_pairs = self.read_all(storage_prefix, debug_record)?; + for (key, value) in kv_pairs { + let storage_key_with_space = + StorageKeyWithSpace::from_key_bytes::(&key); + if let StorageKey::StorageKey { + address_bytes: _, + storage_key, + } = storage_key_with_space.key + { + let h256_storage_key = H256::from_slice(storage_key); + let storage_value_with_owner: StorageValue = + rlp::decode(&value)?; + storage.insert( + h256_storage_key, + storage_value_with_owner.value, + ); + } else { + trace!("Not an storage key: {:?}", storage_key_with_space); + continue; + } + } + + Ok(storage) + } + /// Load the storage layout for state commits. /// Modification to storage layout is the same as modification of /// any other key-values. But as required by MPT structure we diff --git a/crates/dbs/storage/src/impls/replicated_state.rs b/crates/dbs/storage/src/impls/replicated_state.rs index ed8ce9ab01..5445fdab7a 100644 --- a/crates/dbs/storage/src/impls/replicated_state.rs +++ b/crates/dbs/storage/src/impls/replicated_state.rs @@ -160,6 +160,7 @@ enum OwnedStorageKey { }, DepositListKey(Vec), VoteListKey(Vec), + EmptyKey, } impl OwnedStorageKey { @@ -194,6 +195,7 @@ impl OwnedStorageKey { OwnedStorageKey::VoteListKey(k) => { StorageKey::VoteListKey(k.as_slice()) } + OwnedStorageKey::EmptyKey => StorageKey::EmptyKey, } } } @@ -245,6 +247,7 @@ impl<'a> From> for OwnedStorageKey { StorageKey::VoteListKey(k) => { OwnedStorageKey::VoteListKey(k.to_vec()) } + StorageKey::EmptyKey => OwnedStorageKey::EmptyKey, } } } diff --git a/crates/primitives/src/storage_key.rs b/crates/primitives/src/storage_key.rs index 43cd9073be..f55ea534ea 100644 --- a/crates/primitives/src/storage_key.rs +++ b/crates/primitives/src/storage_key.rs @@ -58,6 +58,8 @@ pub enum StorageKey<'a> { }, DepositListKey(&'a [u8]), VoteListKey(&'a [u8]), + // Empty key is used to traverse all key and value pairs. + EmptyKey, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -195,6 +197,9 @@ impl<'a> StorageKeyWithSpace<'a> { StorageKey::VoteListKey(address_bytes) => { delta_mpt_storage_key::new_vote_list_key(address_bytes, padding) } + StorageKey::EmptyKey => { + return vec![]; + } }; return if self.space == Space::Native { @@ -284,6 +289,9 @@ impl<'a> StorageKeyWithSpace<'a> { key } + StorageKey::EmptyKey => { + return vec![]; + } }; return if self.space == Space::Native { diff --git a/crates/rpc/rpc-eth-types/Cargo.toml b/crates/rpc/rpc-eth-types/Cargo.toml index 1e3562271f..548dfcda95 100644 --- a/crates/rpc/rpc-eth-types/Cargo.toml +++ b/crates/rpc/rpc-eth-types/Cargo.toml @@ -15,6 +15,7 @@ license-file.workspace = true [dependencies] serde = { workspace = true } serde_json = { workspace = true } +serde_with = { workspace = true, features = ["base64"] } cfx-types = { workspace = true } cfx-bytes = { workspace = true } thiserror = { workspace = true } diff --git a/crates/rpc/rpc-eth-types/src/lib.rs b/crates/rpc/rpc-eth-types/src/lib.rs index 29afd76396..9424798937 100644 --- a/crates/rpc/rpc-eth-types/src/lib.rs +++ b/crates/rpc/rpc-eth-types/src/lib.rs @@ -11,6 +11,7 @@ mod log; mod receipt; mod simulate; mod state; +mod state_dump; mod sync; pub mod trace; pub mod trace_filter; @@ -35,6 +36,7 @@ pub use state::{ AccountOverride, AccountStateOverrideMode, EvmOverrides, RpcAccountOverride, RpcStateOverride, StateOverride, }; +pub use state_dump::*; pub use sync::{SyncInfo, SyncStatus}; pub use trace::*; pub use trace_filter::TraceFilter; diff --git a/crates/rpc/rpc-eth-types/src/state_dump.rs b/crates/rpc/rpc-eth-types/src/state_dump.rs new file mode 100644 index 0000000000..17b46ff323 --- /dev/null +++ b/crates/rpc/rpc-eth-types/src/state_dump.rs @@ -0,0 +1,112 @@ +use cfx_rpc_primitives::Bytes; +use cfx_types::{Address, StorageKey, StorageValue, H256, U256}; +use serde::{Deserialize, Serialize}; +use serde_with::{base64::Base64, serde_as}; +use std::collections::BTreeMap; + +// Empty storage trie root +// 0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421 +pub const EOA_STORAGE_ROOT_H256: H256 = H256([ + 0x56, 0xe8, 0x1f, 0x17, 0x1b, 0xcc, 0x55, 0xa6, 0xff, 0x83, 0x45, 0xe6, + 0x92, 0xc0, 0xf8, 0x6e, 0x5b, 0x48, 0xe0, 0x1b, 0x99, 0x6c, 0xad, 0xc0, + 0x01, 0x62, 0x2f, 0xb5, 0xe3, 0x63, 0xb4, 0x21, +]); + +/// Represents the state of an account in the Ethereum state trie. +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct AccountState { + /// The balance of the account + pub balance: U256, + /// The nonce of the account + pub nonce: u64, + /// The root hash of the account + pub root: H256, + /// The code hash of the account + pub code_hash: H256, + /// The code of the account + #[serde(skip_serializing_if = "Option::is_none")] + pub code: Option, + /// A map of storage slots, indexed by storage key + #[serde(skip_serializing_if = "Option::is_none")] + pub storage: Option>, + /// Address only present in iterative (line-by-line) mode + #[serde(skip_serializing_if = "Option::is_none")] + pub address: Option
, + /// If we don't have address, we can output the key + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "key")] + pub address_hash: Option, +} + +/// Represents a state dump, which includes the root hash of the state trie, +/// Note: There are some differences in JSON serialization compared to geth's +/// output, such as: +/// - The root field in geth doesn't have a 0x prefix, while here it does +/// - The balance field of accounts in geth is a decimal string, while here it's +/// a hexadecimal string +/// - The value field of storage in geth doesn't have a 0x prefix, while here it +/// does +#[serde_as] +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct StateDump { + /// The root hash of the state trie + pub root: H256, + /// A map of accounts, indexed by address + pub accounts: BTreeMap, + /// Next can be set to represent that this dump is only partial, and Next + /// is where an iterator should be positioned in order to continue the + /// dump. + #[serde(skip_serializing_if = "Option::is_none")] + #[serde_as(as = "Option")] + pub next: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn test_state_dump_serialization() { + let json_input = json!({ + "root": "0x5a1f70040e967bef6a32ee65e7fa2c3ea580e277e42cf3e3daf60a677ef18127", + "accounts": { + "0x000baa01f2a21d29dce20b88032752b990dac124": { + "balance": "0x10000000000000000000", + "nonce": 0, + "root": "0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421", + "codeHash": "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470", + "address": "0x000baa01f2a21d29dce20b88032752b990dac124", + "key": "0x000108a52c8b050f1098144f89e0b8e7e41310ea139f020b690b56e424508f4c" + }, + "0x201d43c399f2495e19a591eab93fa3384ec6c72e": { + "balance": "0x0", + "nonce": 1, + "root": "0x297c068574a50ffef03843dda4075c3b6b5790be78b30e3c9df4e02e4ba9125c", + "codeHash": "0xbe6e2f7cdf118a0b2092927e0a0cf4a54316165ac5172bcda327939e04c9818f", + "code": "0x36602c57343d527f9e4ac34f21c619cefc926c8bd93b54bf5a39c7ab2127a895af1cc0691d7e3dff593da1005b363d3d373d3d3d3d610076806062363936013d732efa42b7d7591cbf436cce4973f900d8314c86dd5af43d3d93803e606057fd5bf34ad30ecfb92b9311a853d296c515fb0d6505d89c68db32372fd77e57b0879f97224bb89dac59e267486b38ee20309c8cc1acfb854eb9303a31c50a42f48a8fcc63b84d60abf8c5408ea569569af66c0cc3a76f6e00000000000000000000000000000000000000000000000000000000000af9ac0076", + "storage": { + "0x0000000000000000000000000000000000000000000000000000000000000000": "0x100000000000000000000000000686f559c", + "0x0000000000000000000000000000000000000000000000000000000000000002": "0x1", + "0x0000000000000000000000000000000000000000000000000000000000000008": "0xdead000000000000000000000000000000000000000000000000000000000000", + "0x000000000000000000000000000000000000000000000000000000000000000a": "0x1", + "0x405787fa12a823e0f2b7631cc41b3ba8828b3321ca811111fa75cd3aa3bb5ace": "0xffffffff", + "0x405787fa12a823e0f2b7631cc41b3ba8828b3321ca811111fa75cd3aa3bb5acf": "0x4ad30ecfb92b9311a853d296c515fb0d6505d89c", + "0x405787fa12a823e0f2b7631cc41b3ba8828b3321ca811111fa75cd3aa3bb5ad1": "0x68db32372fd77e57b0879f97224bb89dac59e267486b38ee20309c8cc1acfb85", + "0x405787fa12a823e0f2b7631cc41b3ba8828b3321ca811111fa75cd3aa3bb5ad2": "0x686f559c00000000000000000000000000000001" + }, + "address": "0x201d43c399f2495e19a591eab93fa3384ec6c72e", + "key": "0x0000e65fdfaa2681656a211a55bc6fdcfe918f34cc037407ba12874c16cd7da9" + } + }, + "next": "AAEx7TCXUlkysLMMJcS/W974Ue7bbhgSK3EUHVNFCtQ=" + }); + + let parsed: StateDump = + serde_json::from_value(json_input.clone()).unwrap(); + let output = serde_json::to_value(&parsed).unwrap(); + assert_eq!(json_input, output); + } +} diff --git a/crates/rpc/rpc-primitives/src/bytes.rs b/crates/rpc/rpc-primitives/src/bytes.rs index 79bf4c7a21..cbf8b79792 100644 --- a/crates/rpc/rpc-primitives/src/bytes.rs +++ b/crates/rpc/rpc-primitives/src/bytes.rs @@ -20,6 +20,10 @@ //! Serializable wrapper around vector of bytes +use core::{ + borrow::Borrow, + ops::{Deref, DerefMut}, +}; use rustc_hex::{FromHex, ToHex}; use serde::{ de::{Error, Visitor}, @@ -49,6 +53,28 @@ impl Into> for Bytes { fn into(self) -> Vec { self.0 } } +impl Deref for Bytes { + type Target = Vec; + + #[inline] + fn deref(&self) -> &Self::Target { &self.0 } +} + +impl DerefMut for Bytes { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } +} + +impl AsRef<[u8]> for Bytes { + #[inline] + fn as_ref(&self) -> &[u8] { self.0.as_ref() } +} + +impl Borrow<[u8]> for Bytes { + #[inline] + fn borrow(&self) -> &[u8] { self.as_ref() } +} + impl Serialize for Bytes { fn serialize(&self, serializer: S) -> Result where S: Serializer { diff --git a/tools/consensus_bench/Cargo.lock b/tools/consensus_bench/Cargo.lock index 19d95df6c3..1e6d79fcdc 100644 --- a/tools/consensus_bench/Cargo.lock +++ b/tools/consensus_bench/Cargo.lock @@ -632,6 +632,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "base64ct" version = "1.1.1" @@ -1175,6 +1181,7 @@ dependencies = [ "rlp 0.4.6", "serde", "serde_json", + "serde_with", "similar-asserts", "thiserror 2.0.11", ] @@ -1533,6 +1540,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-targets 0.52.6", ] @@ -1935,6 +1943,41 @@ dependencies = [ "hibitset", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.96", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.96", +] + [[package]] name = "db" version = "0.1.0" @@ -3450,6 +3493,12 @@ dependencies = [ "syn 2.0.96", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.0.3" @@ -5953,6 +6002,34 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5" +dependencies = [ + "base64 0.22.1", + "chrono", + "hex", + "serde", + "serde_derive", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.96", +] + [[package]] name = "serde_yaml" version = "0.8.26" diff --git a/tools/evm-spec-tester/Cargo.lock b/tools/evm-spec-tester/Cargo.lock index 64d0b3416c..bdcef8601b 100644 --- a/tools/evm-spec-tester/Cargo.lock +++ b/tools/evm-spec-tester/Cargo.lock @@ -1338,6 +1338,7 @@ dependencies = [ "rlp 0.4.6", "serde", "serde_json", + "serde_with", "similar-asserts", "thiserror 2.0.11", ] @@ -1716,6 +1717,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-targets 0.52.6", ] @@ -2148,6 +2150,41 @@ dependencies = [ "hibitset", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.96", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.96", +] + [[package]] name = "db" version = "0.1.0" @@ -3798,6 +3835,12 @@ dependencies = [ "syn 2.0.96", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.0.3" @@ -6595,6 +6638,34 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5" +dependencies = [ + "base64 0.22.1", + "chrono", + "hex", + "serde", + "serde_derive", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.96", +] + [[package]] name = "serde_yaml" version = "0.8.26" From 3e6a5af380620006ba69aa13ce0e09c4d9adffcc Mon Sep 17 00:00:00 2001 From: Pana Date: Tue, 26 Aug 2025 10:26:26 +0800 Subject: [PATCH 2/9] add dump command doc --- docs/commands/readme.md | 80 +++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 23 deletions(-) diff --git a/docs/commands/readme.md b/docs/commands/readme.md index 7431824d5d..57f73be6dd 100644 --- a/docs/commands/readme.md +++ b/docs/commands/readme.md @@ -6,34 +6,26 @@ Currently Available Subcommands: - `account`:Account Management - `rpc`:RPC-based subcommands, used for querying blockchain information and sending transactions +- `dump`: Dump eSpace account state at a given block number - `help`:Print help message ```sh -./conflux -h -conflux conflux-rust/v2.4.0-82500ad-20250418/x86_64-linux-gnu/rustc1.77.2 -The Conflux Team -Conflux client. +Conflux client -USAGE: - conflux [FLAGS] [OPTIONS] [SUBCOMMAND] +Usage: conflux [OPTIONS] [COMMAND] -FLAGS: - --archive - --full - -h, --help Prints help information - --light - --tg_archive - -V, --version Prints version information - -OPTIONS: - -c, --config Sets a custom config file. - ... - +Commands: + account Manage accounts + dump Dump eSpace account state at a given block number + rpc RPC based subcommands to query blockchain information and send transactions + help Print this message or the help of the given subcommand(s) -SUBCOMMANDS: - account Manage accounts - help Prints this message or the help of the given subcommand(s) - rpc RPC based subcommands to query blockchain information and send transactions +Options: + --mode + Use the preset testing configurations. dev or test + -p, --port + Specify the port for P2P connections + ... ``` Each command-line tool comes with its own help information. Users can view the help message by using the -h or --help flag. @@ -57,4 +49,46 @@ SUBCOMMANDS: import Import accounts from JSON UTC keystore files to the specified --chain (default conflux) list List existing accounts of the given --chain (default conflux). new Create a new account (and its associated key) for the given --chain (default conflux). -``` \ No newline at end of file +``` + +## dump subcommand + +This command can be used to export all account states at a certain block height in eSpace to JSON files, facilitating development and debugging. The exported data structure example is as follows + +```sh +$ ./conflux --config devnode.toml dump --block 1000 # export state at height 1000 +{ + "root": "0xdd606752e465cb6a1e2f0df718057536ab00cd66d9c6fa46085309145823d3c0", + "accounts": { + "0x004e322e7ea7e63547d25639d8e8ed282318eec9": { + "balance": "0x152cfd9872b245dcbcae", + "nonce": 210, + "root": "0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421", + "codeHash": "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470", + "address": "0x004e322e7ea7e63547d25639d8e8ed282318eec9", + "key": "0x0c1bad9586421be5b0d8eda4446cac4ce7692d67301d07146a87455e7bc9d30e" + }, + "0x0c80d6926edc73977dce4c97ff8966abf04fe80e": { + "balance": "0x0", + "nonce": 2, + "root": "0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421", + "codeHash": "0xe79d1e04e3004c8d97ad51f5f08cfd1a79e6cdcce2a3a6d59676a9858bccd173", + "code": "0xf90338b903206080604052600436106100385760003.....", + "storage": { + "0x0000000000000000000000000000000000000000000000000000000000000000": "0xc", + "0x0000000000000000000000000000000000000000000000000000000000000001": "0x27e26b9234ec81a0247a6083edf8b329fb1ccde9" + }, + "address": "0x0c80d6926edc73977dce4c97ff8966abf04fe80e", + "key": "0x691460d9548cee180ba8cd9f0960fee74fed16501d80cdb3182aa0f41b160e54" + } + } +} +``` + +Note: + +1. Conflux contract data storage differs significantly from Ethereum, as it is not stored in separate MPT form, therefore the storage root cannot be obtained. The exported data's `account.root` is fixed as `0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421` +2. The exported root information is the full state root of Conflux dual-space (Core Space, eSpace), only for maintaining data format consistency, not the state root of all eSpace accounts. +3. When exporting mainnet state data, due to the large number of accounts, high machine configuration is required, and the runtime will be quite long; if you want to export the state at a certain height in history, it needs to be performed on a fullstate node data. +4. When performing state export, please stop the node program first, then execute the export operation in the node directory. +5. Please use the binary corresponding to the network and execute the export operation in the corresponding network data directory; do not use testnet or master code compiled binary to execute export operations on mainnet data. \ No newline at end of file From 655a35846e4926be69d167a7bcdd57cd046e1269 Mon Sep 17 00:00:00 2001 From: Pana Date: Wed, 27 Aug 2025 15:35:24 +0800 Subject: [PATCH 3/9] support iterate dump mode --- Cargo.lock | 1 + bins/conflux/src/command/dump.rs | 74 +++++---- crates/client/Cargo.toml | 1 + crates/client/src/state_dump.rs | 155 ++++++++++++++++-- crates/dbs/statedb/src/lib.rs | 11 +- .../storage/src/impls/recording_storage.rs | 2 + .../dbs/storage/src/impls/replicated_state.rs | 11 +- .../dbs/storage/src/impls/single_mpt_state.rs | 47 +++++- crates/dbs/storage/src/impls/state.rs | 110 +++++++++++++ crates/dbs/storage/src/lib.rs | 3 +- crates/dbs/storage/src/state.rs | 7 + 11 files changed, 365 insertions(+), 57 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index de5a329d85..c6382ab322 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2115,6 +2115,7 @@ dependencies = [ "diem-crypto", "diem-types", "dir", + "fallible-iterator", "futures 0.3.30", "itertools 0.14.0", "jsonrpc-core", diff --git a/bins/conflux/src/command/dump.rs b/bins/conflux/src/command/dump.rs index 985bf022d0..e95a693efd 100644 --- a/bins/conflux/src/command/dump.rs +++ b/bins/conflux/src/command/dump.rs @@ -1,12 +1,12 @@ -use cfx_types::Address; +use cfx_types::parse_hex_string; use clap::{ArgMatches, Args}; use client::{ configuration::Configuration, - state_dump::{dump_whole_state, StateDumpConfig}, + state_dump::{dump_whole_state, iterate_dump_whole_state, StateDumpConfig}, }; use parking_lot::{Condvar, Mutex}; use serde_json; -use std::{collections::HashMap, fs, path::Path, str::FromStr, sync::Arc}; +use std::{collections::HashMap, fs, path::Path, sync::Arc}; #[derive(Args, Debug)] pub struct DumpCommand { @@ -68,8 +68,7 @@ impl DumpCommand { } fn get_state_dump_config(&self) -> Result { - let address_str = self.start.strip_prefix("0x").unwrap_or(&self.start); - let start_address = Address::from_str(address_str) + let start_address = parse_hex_string(&self.start) .map_err(|e| format!("Invalid address: {}", e))?; Ok(StateDumpConfig { start_address, @@ -97,46 +96,56 @@ impl DumpCommand { } let exit = Arc::new((Mutex::new(false), Condvar::new())); - let config = self.get_state_dump_config()?; - let state = dump_whole_state(conf, exit, &config)?; - let total_accounts = state.accounts.len(); - if self.multi_file { + let _total_accounts = if self.multi_file { // Write to multiple files - for (address, account_state) in state.accounts { - // Create filename using address (without 0x prefix) - let filename = format!("{}.json", address); - let file_path = Path::new(output_path).join(&filename); + let state_root = iterate_dump_whole_state( + conf, + exit, + &config, + |account_state| { + let address = + account_state.address.expect("address is not set"); + let filename = format!("{:?}.json", address); + let file_path = Path::new(output_path).join(&filename); - // Serialize account_state to JSON - let json_content = serde_json::to_string_pretty(&account_state) - .map_err(|e| { - format!( + // Serialize account_state to JSON + let json_content = + serde_json::to_string_pretty(&account_state) + .map_err(|e| { + format!( "Failed to serialize account state for {}: {}", address, e ) - })?; + }) + .expect("Failed to serialize account state"); - // Write to file - fs::write(&file_path, json_content).map_err(|e| { - format!( - "Failed to write file {}: {}", - file_path.display(), - e - ) - })?; - } + // Write to file + fs::write(&file_path, json_content) + .map_err(|e| { + format!( + "Failed to write file {}: {}", + file_path.display(), + e + ) + }) + .expect("Failed to write file"); + }, + )?; // Write meta info - let meta_file_path = Path::new(output_path).join("meta.json"); let mut meta_info = HashMap::new(); - meta_info.insert("root".to_string(), state.root); + meta_info.insert("root".to_string(), state_root); + let meta_file_path = Path::new(output_path).join("meta.json"); let meta_content = serde_json::to_string_pretty(&meta_info) .map_err(|e| format!("Failed to serialize state: {}", e))?; fs::write(&meta_file_path, meta_content) .map_err(|e| format!("Failed to write meta file: {}", e))?; + 0 } else { + let state = dump_whole_state(conf, exit, &config)?; + let total_accounts = state.accounts.len(); // Write to a single file let file_path = Path::new(output_path).join("state.json"); let json_content = serde_json::to_string_pretty(&state) @@ -144,11 +153,12 @@ impl DumpCommand { fs::write(&file_path, json_content).map_err(|e| { format!("Failed to write file {}: {}", file_path.display(), e) })?; - } + total_accounts + }; Ok(format!( - "Dumped {} account state to output directory: {}", - total_accounts, output_path + "Dumped account state to output directory: {}", + output_path )) } } diff --git a/crates/client/Cargo.toml b/crates/client/Cargo.toml index 87e03cdd28..6d8767e498 100644 --- a/crates/client/Cargo.toml +++ b/crates/client/Cargo.toml @@ -79,6 +79,7 @@ cfx-parity-trace-types = { workspace = true } cfx-tasks = { workspace = true } cfx-config = { workspace = true } cfxcore-types = { workspace = true } +fallible-iterator = { workspace = true } [dev-dependencies] criterion = { workspace = true } diff --git a/crates/client/src/state_dump.rs b/crates/client/src/state_dump.rs index 7c8a29221d..b00e7eef7a 100644 --- a/crates/client/src/state_dump.rs +++ b/crates/client/src/state_dump.rs @@ -3,9 +3,13 @@ use cfx_config::Configuration; use cfx_rpc_eth_types::{AccountState, StateDump, EOA_STORAGE_ROOT_H256}; use cfx_rpc_primitives::Bytes; use cfx_statedb::{StateDbExt, StateDbGeneric}; -use cfx_storage::state_manager::StateManagerTrait; +use cfx_storage::{ + state_manager::StateManagerTrait, utils::to_key_prefix_iter_upper_bound, + KeyValueDbIterableTrait, +}; use cfx_types::{Address, Space, H256}; use cfxcore::NodeType; +use fallible_iterator::FallibleIterator; use keccak_hash::{keccak, KECCAK_EMPTY}; use parking_lot::{Condvar, Mutex}; use primitives::{ @@ -13,7 +17,7 @@ use primitives::{ }; use rlp::Rlp; use std::{ - collections::{BTreeMap, HashMap}, + collections::{BTreeMap, HashMap, HashSet}, ops::Deref, sync::Arc, thread, @@ -32,6 +36,44 @@ pub fn dump_whole_state( conf: &mut Configuration, exit_cond_var: Arc<(Mutex, Condvar)>, config: &StateDumpConfig, ) -> Result { + let (mut state_db, state_root) = + prepare_state_db(conf, exit_cond_var, config)?; + + let accounts = + export_space_accounts(&mut state_db, Space::Ethereum, config) + .map_err(|e| e.to_string())?; + + let state_dump = StateDump { + root: state_root, + accounts, + next: None, + }; + + Ok(state_dump) +} + +pub fn iterate_dump_whole_state( + conf: &mut Configuration, exit_cond_var: Arc<(Mutex, Condvar)>, + config: &StateDumpConfig, callback: F, +) -> Result { + let (mut state_db, state_root) = + prepare_state_db(conf, exit_cond_var, config)?; + + export_space_accounts_with_iterator( + &mut state_db, + Space::Ethereum, + config, + callback, + ) + .map_err(|e| e.to_string())?; + + Ok(state_root) +} + +fn prepare_state_db( + conf: &mut Configuration, exit_cond_var: Arc<(Mutex, Condvar)>, + config: &StateDumpConfig, +) -> Result<(StateDbGeneric, H256), String> { println!("Preparing state..."); let ( data_man, @@ -71,8 +113,6 @@ pub fn dump_whole_state( None => consensus.latest_confirmed_epoch_number(), }; - println!("Start to dump state at epoch: {:?}", target_height); - let epoch_hash = consensus .get_hash_from_epoch_number(target_height.into()) .map_err(|e| e.to_string())?; @@ -92,19 +132,9 @@ pub fn dump_whole_state( .map_err(|e| e.to_string())? .ok_or("Failed to get state")?; - let mut state_db = StateDbGeneric::new(state); - - let accounts = - export_space_accounts(&mut state_db, Space::Ethereum, config) - .map_err(|e| e.to_string())?; - - let state_dump = StateDump { - root: state_root.clone(), - accounts, - next: None, - }; + let state_db = StateDbGeneric::new(state); - Ok(state_dump) + Ok((state_db, state_root.clone())) } fn export_space_accounts( @@ -177,12 +207,18 @@ fn export_space_accounts( let code = if is_contract { codes_map.get(&address).cloned() } else { + if let Some(code) = codes_map.get(&address) { + println!("no-contract account have code: {:?}", code); + } None }; let storage = if is_contract { storage_map.get(&address).cloned() } else { + if let Some(storage) = storage_map.get(&address) { + println!("no-contract account have storage: {:?}", storage); + } None }; @@ -207,6 +243,93 @@ fn export_space_accounts( Ok(accounts) } +fn export_space_accounts_with_iterator( + state: &mut StateDbGeneric, space: Space, config: &StateDumpConfig, + callback: F, +) -> Result<(), Box> { + let empty_key = StorageKey::EmptyKey.with_space(space); + let (kvs, maybe_kv_iterator) = state.read_all_iterator(empty_key)?; + + let mut deleted_keys = HashSet::new(); + let mut found_accounts = 0; + + // Iterate key value pairs from delta trie and intermediate trie + for (k, v) in kvs { + let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); + let key = storage_key.to_key_bytes(); + deleted_keys.insert(key.clone()); + + let storage_key_with_space = + StorageKeyWithSpace::from_key_bytes::(&key); + if storage_key_with_space.space != space { + continue; + } + + if let StorageKey::AccountKey(address_bytes) = + storage_key_with_space.key + { + let address = Address::from_slice(address_bytes); + println!("Find account: {:?}", address); + let account = Account::new_from_rlp(address, &Rlp::new(&v))?; + + let account_state = get_account_state(state, &account, config)?; + callback(account_state); + found_accounts += 1; + + if config.limit > 0 && found_accounts >= config.limit as usize { + break; + } + } else { + continue; + } + } + + let lower_bound_incl = empty_key.to_key_bytes(); + let upper_bound_excl = to_key_prefix_iter_upper_bound(&lower_bound_incl); + + if let Some(mut kv_iterator) = maybe_kv_iterator { + let mut kvs = kv_iterator + .iter_range( + lower_bound_incl.as_slice(), + upper_bound_excl.as_ref().map(|v| &**v), + )? + .take(); + + while let Some((key, value)) = kvs.next()? { + if deleted_keys.contains(&key) { + continue; + } + + let storage_key_with_space = + StorageKeyWithSpace::from_key_bytes::(&key); + if storage_key_with_space.space != space { + continue; + } + + if let StorageKey::AccountKey(address_bytes) = + storage_key_with_space.key + { + let address = Address::from_slice(address_bytes); + println!("Find account: {:?}", address); + let account = + Account::new_from_rlp(address, &Rlp::new(&value))?; + + let account_state = get_account_state(state, &account, config)?; + callback(account_state); + found_accounts += 1; + + if config.limit > 0 && found_accounts >= config.limit as usize { + break; + } + } else { + continue; + } + } + } + + Ok(()) +} + #[allow(unused)] fn get_account_state( state: &mut StateDbGeneric, account: &Account, config: &StateDumpConfig, diff --git a/crates/dbs/statedb/src/lib.rs b/crates/dbs/statedb/src/lib.rs index 9261cfeba2..e310d328de 100644 --- a/crates/dbs/statedb/src/lib.rs +++ b/crates/dbs/statedb/src/lib.rs @@ -188,6 +188,15 @@ mod impls { self.delete_all::(key_prefix, debug_record) } + pub fn read_all_iterator( + &mut self, access_key_prefix: StorageKeyWithSpace, + ) -> Result<(Vec, Option>>)> + { + self.storage + .read_all_iterator(access_key_prefix) + .map_err(|err| err.into()) + } + pub fn delete_all( &mut self, key_prefix: StorageKeyWithSpace, debug_record: Option<&mut ComputeEpochDebugRecord>, @@ -535,7 +544,7 @@ mod impls { }; use cfx_storage::{ utils::{access_mode, to_key_prefix_iter_upper_bound}, - MptKeyValue, StorageStateTrait, + KvdbSqliteSharded, MptKeyValue, StorageStateTrait, }; use cfx_types::{ address_util::AddressUtil, Address, AddressWithSpace, Space, diff --git a/crates/dbs/storage/src/impls/recording_storage.rs b/crates/dbs/storage/src/impls/recording_storage.rs index e13252c952..e6adfe3d32 100644 --- a/crates/dbs/storage/src/impls/recording_storage.rs +++ b/crates/dbs/storage/src/impls/recording_storage.rs @@ -54,6 +54,7 @@ impl StateTrait fn compute_state_root(&mut self) -> Result; fn get_state_root(&self) -> Result; fn commit(&mut self, epoch_id: EpochId) -> Result; + fn read_all_iterator(&mut self, access_key_prefix: StorageKeyWithSpace) -> Result<(Vec, Option>>)>; } } @@ -94,6 +95,7 @@ impl StateTrait use crate::{ impls::{ errors::*, merkle_patricia_trie::MptKeyValue, state_proof::StateProof, + storage_db::kvdb_sqlite_sharded::KvdbSqliteSharded, }, state::*, StateProofMerger, diff --git a/crates/dbs/storage/src/impls/replicated_state.rs b/crates/dbs/storage/src/impls/replicated_state.rs index 5445fdab7a..0dfc12a494 100644 --- a/crates/dbs/storage/src/impls/replicated_state.rs +++ b/crates/dbs/storage/src/impls/replicated_state.rs @@ -1,6 +1,7 @@ use crate::{ - impls::errors::*, state::StateTrait, MptKeyValue, NodeMerkleProof, - StateProof, StorageStateTraitExt, + impls::{errors::*, storage_db::kvdb_sqlite_sharded::KvdbSqliteSharded}, + state::StateTrait, + MptKeyValue, NodeMerkleProof, StateProof, StorageStateTraitExt, }; use cfx_internal_common::StateRootWithAuxInfo; use cfx_types::Space; @@ -306,6 +307,12 @@ impl StateTrait for ReplicatedState
{ self.state.read_all(access_key_prefix) } + fn read_all_iterator( + &mut self, access_key_prefix: StorageKeyWithSpace, + ) -> Result<(Vec, Option>>)> { + self.state.read_all_iterator(access_key_prefix) + } + fn compute_state_root(&mut self) -> Result { self.replication_handler .send_op(StateOperation::ComputeStateRoot); diff --git a/crates/dbs/storage/src/impls/single_mpt_state.rs b/crates/dbs/storage/src/impls/single_mpt_state.rs index 004ed33525..e3513a15b7 100644 --- a/crates/dbs/storage/src/impls/single_mpt_state.rs +++ b/crates/dbs/storage/src/impls/single_mpt_state.rs @@ -1,5 +1,8 @@ use crate::{ - impls::{errors::*, state::ChildrenMerkleMap}, + impls::{ + errors::*, state::ChildrenMerkleMap, + storage_db::kvdb_sqlite_sharded::KvdbSqliteSharded, + }, state::StateTrait, utils::access_mode, CowNodeRef, DeltaMpt, MptKeyValue, NodeRefDeltaMpt, OwnedNodeSet, @@ -10,7 +13,7 @@ use primitives::{ EpochId, MerkleHash, MptValue, StateRoot, StorageKeyWithSpace, MERKLE_NULL_NODE, }; -use std::{cell::UnsafeCell, collections::HashSet, sync::Arc}; +use std::{cell::UnsafeCell, sync::Arc}; pub struct SingleMptState { trie: Arc, @@ -292,13 +295,10 @@ impl SingleMptState { }; let mut result = Vec::new(); - // This is used to keep track of the deleted keys. - let mut deleted_keys = HashSet::new(); if let Some(kvs) = trie_kvs { for (k, v) in kvs { let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); let k = storage_key.to_key_bytes(); - deleted_keys.insert(k.clone()); if v.len() > 0 { result.push((k, v)); } @@ -310,6 +310,37 @@ impl SingleMptState { Ok(Some(result)) } } + + fn read_all_iterator_impl( + &mut self, access_key_prefix: StorageKeyWithSpace, + ) -> Result<(Vec, Option>>)> { + self.ensure_temp_slab_for_db_load(); + + // Retrieve and delete key/value pairs from delta trie + let trie_kvs = { + let key_prefix = access_key_prefix.to_key_bytes(); + let deleted = SubTrieVisitor::new( + &self.trie, + self.trie_root.clone(), + &mut self.owned_node_set, + )? + .traversal(&key_prefix, &key_prefix)?; + deleted + }; + + let mut result = Vec::new(); + if let Some(kvs) = trie_kvs { + for (k, v) in kvs { + let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); + let k = storage_key.to_key_bytes(); + if v.len() > 0 { + result.push((k, v)); + } + } + } + + Ok((result, None)) + } } impl StateTrait for SingleMptState { @@ -373,6 +404,12 @@ impl StateTrait for SingleMptState { self.delete_all_impl::(access_key_prefix) } + fn read_all_iterator( + &mut self, access_key_prefix: StorageKeyWithSpace, + ) -> Result<(Vec, Option>>)> { + self.read_all_iterator_impl(access_key_prefix) + } + fn compute_state_root(&mut self) -> Result { self.ensure_temp_slab_for_db_load(); diff --git a/crates/dbs/storage/src/impls/state.rs b/crates/dbs/storage/src/impls/state.rs index 3627e3dbc3..071335608b 100644 --- a/crates/dbs/storage/src/impls/state.rs +++ b/crates/dbs/storage/src/impls/state.rs @@ -309,6 +309,12 @@ impl StateTrait for State { self.delete_all_impl::(access_key_prefix) } + fn read_all_iterator( + &mut self, access_key_prefix: StorageKeyWithSpace, + ) -> Result<(Vec, Option>>)> { + self.read_all_iterator_impl(access_key_prefix) + } + fn compute_state_root(&mut self) -> Result { self.ensure_temp_slab_for_db_load(); @@ -952,6 +958,109 @@ impl State { Ok(Some(result)) } } + + /// Read all key/value pairs with access_key_prefix as prefix. + /// It will return data from delta trie, intermediate trie as a vector, + /// and data from snapshot as a iterator. + /// To use the iterator, you need to call `take()` on it. + /// ```rust + /// let (kvs, kv_iterator) = state.read_all_impl(access_key_prefix)?; + /// let lower_bound_incl = access_key_prefix.to_key_bytes(); + /// let upper_bound_excl = to_key_prefix_iter_upper_bound(&lower_bound_incl); + /// let mut kvs = kv_iterator + /// .iter_range( + /// lower_bound_incl.as_slice(), + /// upper_bound_excl.as_ref().map(|v| &**v), + /// )? + /// .take(); + /// + /// // use it as a iterator + /// let mut snapshot_kvs = Vec::new(); + /// while let Some((key, value)) = kvs.next()? { + /// snapshot_kvs.push((key, value)); + /// } + /// ``` + /// Note: In the iterator, the keys may be duplicated with the keys in the + /// vector. + pub fn read_all_iterator_impl( + &mut self, access_key_prefix: StorageKeyWithSpace, + ) -> Result<(Vec, Option>>)> { + self.ensure_temp_slab_for_db_load(); + + // Retrieve and delete key/value pairs from delta trie + let delta_trie_kvs = match &self.delta_trie_root { + None => None, + Some(old_root_node) => { + let delta_mpt_key_prefix = access_key_prefix + .to_delta_mpt_key_bytes(&self.delta_trie_key_padding); + let deleted = SubTrieVisitor::new( + &self.delta_trie, + old_root_node.clone(), + &mut self.owned_node_set, + )? + .traversal(&delta_mpt_key_prefix, &delta_mpt_key_prefix)?; + deleted + } + }; + + // Retrieve key/value pairs from intermediate trie + let intermediate_trie_kvs = match &self.intermediate_trie_root { + None => None, + Some(root_node) => { + if self.maybe_intermediate_trie_key_padding.is_some() + && self.maybe_intermediate_trie.is_some() + { + let intermediate_trie_key_padding = self + .maybe_intermediate_trie_key_padding + .as_ref() + .unwrap(); + let intermediate_mpt_key_prefix = access_key_prefix + .to_delta_mpt_key_bytes(intermediate_trie_key_padding); + let values = SubTrieVisitor::new( + self.maybe_intermediate_trie.as_ref().unwrap(), + root_node.clone(), + &mut self.owned_node_set, + )? + .traversal( + &intermediate_mpt_key_prefix, + &intermediate_mpt_key_prefix, + )?; + + values + } else { + None + } + } + }; + + let mut result = Vec::new(); + // This is used to keep track of the deleted keys. + let mut deleted_keys = HashSet::new(); + if let Some(kvs) = delta_trie_kvs { + for (k, v) in kvs { + deleted_keys.insert(k.clone()); + if v.len() > 0 { + result.push((k, v)); + } + } + } + + if let Some(kvs) = intermediate_trie_kvs { + for (k, v) in kvs { + if !deleted_keys.contains(&k) { + deleted_keys.insert(k.clone()); + if v.len() > 0 { + result.push((k, v)); + } + } + } + } + + // Retrieve key/value pairs from snapshot + let kv_iterator = self.snapshot_db.snapshot_kv_iterator()?.take(); + + Ok((result, Some(kv_iterator))) + } } use crate::{ @@ -965,6 +1074,7 @@ use crate::{ node_merkle_proof::NodeMerkleProof, state_manager::*, state_proof::StateProof, + storage_db::kvdb_sqlite_sharded::KvdbSqliteSharded, }, state::*, storage_db::*, diff --git a/crates/dbs/storage/src/lib.rs b/crates/dbs/storage/src/lib.rs index ecd4f758cf..c9ddedd784 100644 --- a/crates/dbs/storage/src/lib.rs +++ b/crates/dbs/storage/src/lib.rs @@ -213,6 +213,7 @@ pub use self::{ storage_db::{ kvdb_rocksdb::KvdbRocksdb, kvdb_sqlite::{KvdbSqlite, KvdbSqliteStatements}, + kvdb_sqlite_sharded::KvdbSqliteSharded, snapshot_db_manager_sqlite::SnapshotDbManagerSqlite, sqlite::SqliteConnection, }, @@ -226,7 +227,7 @@ pub use self::{ StateIndex, StateManager as StorageManager, StateManagerTrait as StorageManagerTrait, }, - storage_db::KeyValueDbTrait, + storage_db::{KeyValueDbIterableTrait, KeyValueDbTrait}, }; #[cfg(any(test, feature = "testonly_code"))] diff --git a/crates/dbs/storage/src/state.rs b/crates/dbs/storage/src/state.rs index 9b6a06183e..4f0f91295f 100644 --- a/crates/dbs/storage/src/state.rs +++ b/crates/dbs/storage/src/state.rs @@ -39,6 +39,12 @@ pub trait StateTrait: Sync + Send { &mut self, access_key_prefix: StorageKeyWithSpace, ) -> Result>>; + fn read_all_iterator( + &mut self, _access_key_prefix: StorageKeyWithSpace, + ) -> Result<(Vec, Option>>)> { + Err(Error::Msg("Not implemented".into())) + } + // Finalize /// It's costly to compute state root however it's only necessary to compute /// state root once before committing. @@ -78,6 +84,7 @@ pub trait StateDbGetOriginalMethods { use super::{ impls::{ errors::*, node_merkle_proof::NodeMerkleProof, state_proof::StateProof, + storage_db::kvdb_sqlite_sharded::KvdbSqliteSharded, }, MptKeyValue, StateRootWithAuxInfo, }; From 5118c30a00a4c39ee69b85b6660706ef5586cd97 Mon Sep 17 00:00:00 2001 From: Pana Date: Wed, 27 Aug 2025 17:45:32 +0800 Subject: [PATCH 4/9] add time to log add log add read_all_with_callback split state dump into multiple task add more log to indicate progress --- Cargo.lock | 1 + crates/client/Cargo.toml | 1 + crates/client/src/state_dump.rs | 134 +++++------- crates/dbs/statedb/src/lib.rs | 10 +- .../src/impls/delta_mpt/cow_node_ref.rs | 51 +++++ .../src/impls/delta_mpt/subtrie_visitor.rs | 62 ++++++ .../storage/src/impls/recording_storage.rs | 3 +- .../dbs/storage/src/impls/replicated_state.rs | 20 +- .../dbs/storage/src/impls/single_mpt_state.rs | 70 +++--- crates/dbs/storage/src/impls/state.rs | 207 ++++++++++-------- crates/dbs/storage/src/state.rs | 6 +- crates/primitives/src/storage_key.rs | 15 ++ docs/commands/readme.md | 15 +- 13 files changed, 384 insertions(+), 211 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c6382ab322..f0210ab909 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2106,6 +2106,7 @@ dependencies = [ "cfxcore-types", "cfxkey", "cfxstore", + "chrono", "consensus-types", "criterion", "ctrlc", diff --git a/crates/client/Cargo.toml b/crates/client/Cargo.toml index 6d8767e498..61453ce5e1 100644 --- a/crates/client/Cargo.toml +++ b/crates/client/Cargo.toml @@ -80,6 +80,7 @@ cfx-tasks = { workspace = true } cfx-config = { workspace = true } cfxcore-types = { workspace = true } fallible-iterator = { workspace = true } +chrono = { workspace = true } [dev-dependencies] criterion = { workspace = true } diff --git a/crates/client/src/state_dump.rs b/crates/client/src/state_dump.rs index b00e7eef7a..c49eada067 100644 --- a/crates/client/src/state_dump.rs +++ b/crates/client/src/state_dump.rs @@ -3,13 +3,10 @@ use cfx_config::Configuration; use cfx_rpc_eth_types::{AccountState, StateDump, EOA_STORAGE_ROOT_H256}; use cfx_rpc_primitives::Bytes; use cfx_statedb::{StateDbExt, StateDbGeneric}; -use cfx_storage::{ - state_manager::StateManagerTrait, utils::to_key_prefix_iter_upper_bound, - KeyValueDbIterableTrait, -}; +use cfx_storage::state_manager::StateManagerTrait; use cfx_types::{Address, Space, H256}; use cfxcore::NodeType; -use fallible_iterator::FallibleIterator; +use chrono::Utc; use keccak_hash::{keccak, KECCAK_EMPTY}; use parking_lot::{Condvar, Mutex}; use primitives::{ @@ -17,7 +14,7 @@ use primitives::{ }; use rlp::Rlp; use std::{ - collections::{BTreeMap, HashMap, HashSet}, + collections::{BTreeMap, HashMap}, ops::Deref, sync::Arc, thread, @@ -32,6 +29,10 @@ pub struct StateDumpConfig { pub no_storage: bool, } +// This method will read all data (k, v) from the Conflux state tree (including +// core space and espace accounts, code, storage, deposit, vote_list) into +// memory at once, then parse and assemble them and assemble all account states +// into a StateDump struct and return it pub fn dump_whole_state( conf: &mut Configuration, exit_cond_var: Arc<(Mutex, Condvar)>, config: &StateDumpConfig, @@ -52,6 +53,11 @@ pub fn dump_whole_state( Ok(state_dump) } +// This method will iterate through the entire state tree, storing each found +// account in a temporary map After iterating through all accounts, it will +// retrieve the code and storage for each account, then call the callback method +// Pass the AccountState as a parameter to the callback method, which will +// handle the AccountState pub fn iterate_dump_whole_state( conf: &mut Configuration, exit_cond_var: Arc<(Mutex, Condvar)>, config: &StateDumpConfig, callback: F, @@ -59,7 +65,7 @@ pub fn iterate_dump_whole_state( let (mut state_db, state_root) = prepare_state_db(conf, exit_cond_var, config)?; - export_space_accounts_with_iterator( + export_space_accounts_with_callback( &mut state_db, Space::Ethereum, config, @@ -74,7 +80,7 @@ fn prepare_state_db( conf: &mut Configuration, exit_cond_var: Arc<(Mutex, Condvar)>, config: &StateDumpConfig, ) -> Result<(StateDbGeneric, H256), String> { - println!("Preparing state..."); + println("Preparing state..."); let ( data_man, _, @@ -140,6 +146,7 @@ fn prepare_state_db( fn export_space_accounts( state: &mut StateDbGeneric, space: Space, config: &StateDumpConfig, ) -> Result, Box> { + println("Start to iterate state..."); let empty_key = StorageKey::EmptyKey.with_space(space); let kv_pairs = state.read_all(empty_key, None)?; @@ -156,7 +163,7 @@ fn export_space_accounts( match storage_key_with_space.key { StorageKey::AccountKey(address_bytes) => { let address = Address::from_slice(address_bytes); - println!("Find account: {:?}", address); + println(&format!("Find account: {:?}", address)); let account = Account::new_from_rlp(address, &Rlp::new(&value))?; accounts_map.insert(address, account); @@ -208,7 +215,7 @@ fn export_space_accounts( codes_map.get(&address).cloned() } else { if let Some(code) = codes_map.get(&address) { - println!("no-contract account have code: {:?}", code); + println(&format!("no-contract account have code: {:?}", code)); } None }; @@ -216,8 +223,8 @@ fn export_space_accounts( let storage = if is_contract { storage_map.get(&address).cloned() } else { - if let Some(storage) = storage_map.get(&address) { - println!("no-contract account have storage: {:?}", storage); + if let Some(_storage) = storage_map.get(&address) { + println(&format!("no-contract account have storage")); } None }; @@ -243,86 +250,61 @@ fn export_space_accounts( Ok(accounts) } -fn export_space_accounts_with_iterator( +pub fn export_space_accounts_with_callback( state: &mut StateDbGeneric, space: Space, config: &StateDumpConfig, callback: F, ) -> Result<(), Box> { - let empty_key = StorageKey::EmptyKey.with_space(space); - let (kvs, maybe_kv_iterator) = state.read_all_iterator(empty_key)?; - - let mut deleted_keys = HashSet::new(); + println("Start to iterate state..."); let mut found_accounts = 0; + let mut core_space_key_count: u64 = 0; + let mut total_key_count: u64 = 0; - // Iterate key value pairs from delta trie and intermediate trie - for (k, v) in kvs { - let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); - let key = storage_key.to_key_bytes(); - deleted_keys.insert(key.clone()); - - let storage_key_with_space = - StorageKeyWithSpace::from_key_bytes::(&key); - if storage_key_with_space.space != space { - continue; - } - - if let StorageKey::AccountKey(address_bytes) = - storage_key_with_space.key - { - let address = Address::from_slice(address_bytes); - println!("Find account: {:?}", address); - let account = Account::new_from_rlp(address, &Rlp::new(&v))?; - - let account_state = get_account_state(state, &account, config)?; - callback(account_state); - found_accounts += 1; - - if config.limit > 0 && found_accounts >= config.limit as usize { - break; - } - } else { - continue; - } - } - - let lower_bound_incl = empty_key.to_key_bytes(); - let upper_bound_excl = to_key_prefix_iter_upper_bound(&lower_bound_incl); + for i in 0..=255 { + let prefix = [i]; + let start_key = StorageKey::AddressPrefixKey(&prefix).with_space(space); - if let Some(mut kv_iterator) = maybe_kv_iterator { - let mut kvs = kv_iterator - .iter_range( - lower_bound_incl.as_slice(), - upper_bound_excl.as_ref().map(|v| &**v), - )? - .take(); - - while let Some((key, value)) = kvs.next()? { - if deleted_keys.contains(&key) { - continue; - } + let mut account_states = BTreeMap::new(); + let mut inner_callback = |(key, value): (Vec, Box<[u8]>)| { + total_key_count += 1; let storage_key_with_space = StorageKeyWithSpace::from_key_bytes::(&key); if storage_key_with_space.space != space { - continue; + core_space_key_count += 1; + return; + } + + if total_key_count % 10000 == 0 { + println(&format!( + "total_key_count: {}, core_space_key_count: {}", + total_key_count, core_space_key_count + )); } if let StorageKey::AccountKey(address_bytes) = storage_key_with_space.key { let address = Address::from_slice(address_bytes); - println!("Find account: {:?}", address); - let account = - Account::new_from_rlp(address, &Rlp::new(&value))?; + println(&format!("Find account: {:?}", address)); + let account = Account::new_from_rlp(address, &Rlp::new(&value)) + .expect("Failed to decode account"); - let account_state = get_account_state(state, &account, config)?; - callback(account_state); - found_accounts += 1; + account_states.insert(address, account); + } + }; - if config.limit > 0 && found_accounts >= config.limit as usize { - break; - } - } else { - continue; + state.read_all_with_callback(start_key, &mut inner_callback)?; + + if account_states.len() > 0 { + println("Start to read account code and storage data..."); + } + + for (_address, account) in account_states { + let account_state = get_account_state(state, &account, config)?; + callback(account_state); + found_accounts += 1; + if config.limit > 0 && found_accounts >= config.limit as usize { + break; } } } @@ -369,3 +351,7 @@ fn get_account_state( address_hash: Some(address_hash), }) } + +fn println(message: &str) { + println!("[{}] {}", Utc::now().format("%Y-%m-%d %H:%M:%S"), message); +} diff --git a/crates/dbs/statedb/src/lib.rs b/crates/dbs/statedb/src/lib.rs index e310d328de..4580384f9e 100644 --- a/crates/dbs/statedb/src/lib.rs +++ b/crates/dbs/statedb/src/lib.rs @@ -188,12 +188,12 @@ mod impls { self.delete_all::(key_prefix, debug_record) } - pub fn read_all_iterator( + pub fn read_all_with_callback( &mut self, access_key_prefix: StorageKeyWithSpace, - ) -> Result<(Vec, Option>>)> - { + callback: &mut dyn FnMut(MptKeyValue), + ) -> Result<()> { self.storage - .read_all_iterator(access_key_prefix) + .read_all_with_callback(access_key_prefix, callback) .map_err(|err| err.into()) } @@ -544,7 +544,7 @@ mod impls { }; use cfx_storage::{ utils::{access_mode, to_key_prefix_iter_upper_bound}, - KvdbSqliteSharded, MptKeyValue, StorageStateTrait, + MptKeyValue, StorageStateTrait, }; use cfx_types::{ address_util::AddressUtil, Address, AddressWithSpace, Space, diff --git a/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs b/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs index 4297c5fe92..b691a93472 100644 --- a/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs +++ b/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs @@ -622,6 +622,57 @@ impl CowNodeRef { Ok(()) } + pub fn iterate_internal_with_callback( + &self, owned_node_set: &OwnedNodeSet, trie: &DeltaMpt, + guarded_trie_node: GuardedMaybeOwnedTrieNodeAsCowCallParam, + key_prefix: CompressedPathRaw, db: &mut DeltaDbOwnedReadTraitObj, + callback: &mut dyn FnMut(MptKeyValue), + ) -> Result<()> { + if guarded_trie_node.as_ref().as_ref().has_value() { + assert!(CompressedPathRaw::has_second_nibble( + key_prefix.path_mask() + )); + callback(( + key_prefix.path_slice().to_vec(), + guarded_trie_node.as_ref().as_ref().value_clone().unwrap(), + )); + } + + let children_table = + guarded_trie_node.as_ref().as_ref().children_table.clone(); + // Free the lock for trie_node. + // FIXME: try to share the lock. + drop(guarded_trie_node); + + let node_memory_manager = trie.get_node_memory_manager(); + let allocator = node_memory_manager.get_allocator(); + for (i, node_ref) in children_table.iter() { + let mut cow_child_node = + Self::new((*node_ref).into(), owned_node_set, self.mpt_id); + let child_node = cow_child_node.get_trie_node( + node_memory_manager, + &allocator, + db, + )?; + let key_prefix = CompressedPathRaw::join_connected_paths( + &key_prefix, + i, + &child_node.compressed_path_ref(), + ); + let child_node = GuardedValue::take(child_node); + cow_child_node.iterate_internal_with_callback( + owned_node_set, + trie, + child_node, + key_prefix, + db, + callback, + )?; + } + + Ok(()) + } + /// Recursively commit dirty nodes. pub fn commit_dirty_recursively< Transaction: BorrowMut, diff --git a/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs b/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs index 9cc3d8a4fd..50ddbfb2a1 100644 --- a/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs +++ b/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs @@ -633,6 +633,68 @@ impl<'trie, 'db: 'trie> SubTrieVisitor<'trie, 'db> { Ok(Some(values)) } + /// return all key/value pairs given the prefix + pub fn traversal_with_callback( + mut self, key: KeyPart, key_remaining: KeyPart, + callback: &mut dyn FnMut(MptKeyValue), + ) -> Result<()> { + let node_memory_manager = self.node_memory_manager(); + let allocator = node_memory_manager.get_allocator(); + let mut node_cow = self.root.take(); + + let trie_node_ref = node_cow.get_trie_node( + node_memory_manager, + &allocator, + &mut *self.db.get_mut().to_owned_read()?, + )?; + + let key_prefix: CompressedPathRaw; + match trie_node_ref.walk::(key_remaining) { + WalkStop::ChildNotFound { .. } => return Ok(()), + WalkStop::Arrived => { + // To enumerate the subtree. + key_prefix = key.into(); + } + WalkStop::PathDiverted { + key_child_index, + unmatched_child_index, + unmatched_path_remaining, + .. + } => { + if key_child_index.is_some() { + return Ok(()); + } + // To enumerate the subtree. + key_prefix = CompressedPathRaw::join_connected_paths( + &key, + unmatched_child_index, + &unmatched_path_remaining, + ); + } + WalkStop::Descent { + key_remaining, + child_node, + .. + } => { + drop(trie_node_ref); + self.new_visitor_for_subtree(child_node.clone().into()) + .traversal_with_callback(key, key_remaining, callback)?; + return Ok(()); + } + } + + let trie_node = GuardedValue::take(trie_node_ref); + node_cow.iterate_internal_with_callback( + self.owned_node_set.get_ref(), + self.get_trie_ref(), + trie_node, + key_prefix, + &mut *self.db.get_mut().to_owned_read()?, + callback, + )?; + Ok(()) + } + // In a method we visit node one or 2 times but borrow-checker prevent // holding and access other fields so it's visited multiple times. // FIXME: Check if we did something like this. diff --git a/crates/dbs/storage/src/impls/recording_storage.rs b/crates/dbs/storage/src/impls/recording_storage.rs index e6adfe3d32..100edbd7be 100644 --- a/crates/dbs/storage/src/impls/recording_storage.rs +++ b/crates/dbs/storage/src/impls/recording_storage.rs @@ -54,7 +54,7 @@ impl StateTrait fn compute_state_root(&mut self) -> Result; fn get_state_root(&self) -> Result; fn commit(&mut self, epoch_id: EpochId) -> Result; - fn read_all_iterator(&mut self, access_key_prefix: StorageKeyWithSpace) -> Result<(Vec, Option>>)>; + fn read_all_with_callback(&mut self, access_key_prefix: StorageKeyWithSpace, callback: &mut dyn FnMut(MptKeyValue)) -> Result<()>; } } @@ -95,7 +95,6 @@ impl StateTrait use crate::{ impls::{ errors::*, merkle_patricia_trie::MptKeyValue, state_proof::StateProof, - storage_db::kvdb_sqlite_sharded::KvdbSqliteSharded, }, state::*, StateProofMerger, diff --git a/crates/dbs/storage/src/impls/replicated_state.rs b/crates/dbs/storage/src/impls/replicated_state.rs index 0dfc12a494..2b9f41bb5f 100644 --- a/crates/dbs/storage/src/impls/replicated_state.rs +++ b/crates/dbs/storage/src/impls/replicated_state.rs @@ -1,7 +1,6 @@ use crate::{ - impls::{errors::*, storage_db::kvdb_sqlite_sharded::KvdbSqliteSharded}, - state::StateTrait, - MptKeyValue, NodeMerkleProof, StateProof, StorageStateTraitExt, + impls::errors::*, state::StateTrait, MptKeyValue, NodeMerkleProof, + StateProof, StorageStateTraitExt, }; use cfx_internal_common::StateRootWithAuxInfo; use cfx_types::Space; @@ -162,6 +161,7 @@ enum OwnedStorageKey { DepositListKey(Vec), VoteListKey(Vec), EmptyKey, + AddressPrefixKey(Vec), } impl OwnedStorageKey { @@ -197,6 +197,9 @@ impl OwnedStorageKey { StorageKey::VoteListKey(k.as_slice()) } OwnedStorageKey::EmptyKey => StorageKey::EmptyKey, + OwnedStorageKey::AddressPrefixKey(k) => { + StorageKey::AddressPrefixKey(k.as_slice()) + } } } } @@ -249,6 +252,9 @@ impl<'a> From> for OwnedStorageKey { OwnedStorageKey::VoteListKey(k.to_vec()) } StorageKey::EmptyKey => OwnedStorageKey::EmptyKey, + StorageKey::AddressPrefixKey(k) => { + OwnedStorageKey::AddressPrefixKey(k.to_vec()) + } } } } @@ -307,10 +313,12 @@ impl StateTrait for ReplicatedState
{ self.state.read_all(access_key_prefix) } - fn read_all_iterator( + fn read_all_with_callback( &mut self, access_key_prefix: StorageKeyWithSpace, - ) -> Result<(Vec, Option>>)> { - self.state.read_all_iterator(access_key_prefix) + callback: &mut dyn FnMut(MptKeyValue), + ) -> Result<()> { + self.state + .read_all_with_callback(access_key_prefix, callback) } fn compute_state_root(&mut self) -> Result { diff --git a/crates/dbs/storage/src/impls/single_mpt_state.rs b/crates/dbs/storage/src/impls/single_mpt_state.rs index e3513a15b7..e09ec8264a 100644 --- a/crates/dbs/storage/src/impls/single_mpt_state.rs +++ b/crates/dbs/storage/src/impls/single_mpt_state.rs @@ -1,8 +1,5 @@ use crate::{ - impls::{ - errors::*, state::ChildrenMerkleMap, - storage_db::kvdb_sqlite_sharded::KvdbSqliteSharded, - }, + impls::{errors::*, state::ChildrenMerkleMap}, state::StateTrait, utils::access_mode, CowNodeRef, DeltaMpt, MptKeyValue, NodeRefDeltaMpt, OwnedNodeSet, @@ -13,6 +10,7 @@ use primitives::{ EpochId, MerkleHash, MptValue, StateRoot, StorageKeyWithSpace, MERKLE_NULL_NODE, }; +use rustc_hex::ToHex; use std::{cell::UnsafeCell, sync::Arc}; pub struct SingleMptState { @@ -270,7 +268,7 @@ impl SingleMptState { self.pre_modification(); } - // Retrieve and delete key/value pairs from delta trie + // Retrieve and delete key/value pairs from single mpt trie let trie_kvs = { let key_prefix = access_key_prefix.to_key_bytes(); let deleted = if AM::READ_ONLY { @@ -297,8 +295,6 @@ impl SingleMptState { let mut result = Vec::new(); if let Some(kvs) = trie_kvs { for (k, v) in kvs { - let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); - let k = storage_key.to_key_bytes(); if v.len() > 0 { result.push((k, v)); } @@ -311,35 +307,42 @@ impl SingleMptState { } } - fn read_all_iterator_impl( + fn read_all_with_callback_impl( &mut self, access_key_prefix: StorageKeyWithSpace, - ) -> Result<(Vec, Option>>)> { + callback: &mut dyn FnMut(MptKeyValue), + ) -> Result<()> { self.ensure_temp_slab_for_db_load(); - // Retrieve and delete key/value pairs from delta trie - let trie_kvs = { - let key_prefix = access_key_prefix.to_key_bytes(); - let deleted = SubTrieVisitor::new( - &self.trie, - self.trie_root.clone(), - &mut self.owned_node_set, - )? - .traversal(&key_prefix, &key_prefix)?; - deleted - }; + let mut total_key_count: u64 = 0; - let mut result = Vec::new(); - if let Some(kvs) = trie_kvs { - for (k, v) in kvs { - let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); - let k = storage_key.to_key_bytes(); - if v.len() > 0 { - result.push((k, v)); - } + let mut inner_callback = |(k, v): MptKeyValue| { + total_key_count += 1; + if total_key_count % 10000 == 0 { + println!( + "read_all_with_callback_impl -> total_key_count: {} {}", + total_key_count, + k.to_hex::() + ); } - } + if v.len() > 0 { + callback((k, v)); + } + }; - Ok((result, None)) + // Retrieve and delete key/value pairs from delta trie + let key_prefix = access_key_prefix.to_key_bytes(); + SubTrieVisitor::new( + &self.trie, + self.trie_root.clone(), + &mut self.owned_node_set, + )? + .traversal_with_callback( + &key_prefix, + &key_prefix, + &mut inner_callback, + )?; + + Ok(()) } } @@ -404,10 +407,11 @@ impl StateTrait for SingleMptState { self.delete_all_impl::(access_key_prefix) } - fn read_all_iterator( + fn read_all_with_callback( &mut self, access_key_prefix: StorageKeyWithSpace, - ) -> Result<(Vec, Option>>)> { - self.read_all_iterator_impl(access_key_prefix) + callback: &mut dyn FnMut(MptKeyValue), + ) -> Result<()> { + self.read_all_with_callback_impl(access_key_prefix, callback) } fn compute_state_root(&mut self) -> Result { diff --git a/crates/dbs/storage/src/impls/state.rs b/crates/dbs/storage/src/impls/state.rs index 071335608b..b1ad971ed0 100644 --- a/crates/dbs/storage/src/impls/state.rs +++ b/crates/dbs/storage/src/impls/state.rs @@ -309,10 +309,11 @@ impl StateTrait for State { self.delete_all_impl::(access_key_prefix) } - fn read_all_iterator( + fn read_all_with_callback( &mut self, access_key_prefix: StorageKeyWithSpace, - ) -> Result<(Vec, Option>>)> { - self.read_all_iterator_impl(access_key_prefix) + callback: &mut dyn FnMut(MptKeyValue), + ) -> Result<()> { + self.read_all_with_callback_impl(access_key_prefix, callback) } fn compute_state_root(&mut self) -> Result { @@ -908,6 +909,14 @@ impl State { snapshot_kvs.push((key, value)); } + let is_address_search_prefix = + if let StorageKey::AddressPrefixKey(prefix) = access_key_prefix.key + { + Some(prefix) + } else { + None + }; + let mut result = Vec::new(); // This is used to keep track of the deleted keys. let mut deleted_keys = HashSet::new(); @@ -915,6 +924,15 @@ impl State { for (k, v) in kvs { let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); let k = storage_key.to_key_bytes(); + + // If it's an address search prefix, and k is not start with + // prefix, skip the key. + if let Some(prefix) = is_address_search_prefix { + if !k.starts_with(prefix) { + continue; + } + } + deleted_keys.insert(k.clone()); if v.len() > 0 { result.push((k, v)); @@ -925,11 +943,21 @@ impl State { if let Some(kvs) = intermediate_trie_kvs { for (k, v) in kvs { let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); + let k = storage_key.to_key_bytes(); + + // If it's an address search prefix, and k is not start with + // prefix, skip the key. + if let Some(prefix) = is_address_search_prefix { + if !k.starts_with(prefix) { + continue; + } + } + // Only delete non-empty keys. if v.len() > 0 && !AM::READ_ONLY { self.delete(storage_key)?; } - let k = storage_key.to_key_bytes(); + if !deleted_keys.contains(&k) { deleted_keys.insert(k.clone()); if v.len() > 0 { @@ -959,107 +987,115 @@ impl State { } } - /// Read all key/value pairs with access_key_prefix as prefix. - /// It will return data from delta trie, intermediate trie as a vector, - /// and data from snapshot as a iterator. - /// To use the iterator, you need to call `take()` on it. - /// ```rust - /// let (kvs, kv_iterator) = state.read_all_impl(access_key_prefix)?; - /// let lower_bound_incl = access_key_prefix.to_key_bytes(); - /// let upper_bound_excl = to_key_prefix_iter_upper_bound(&lower_bound_incl); - /// let mut kvs = kv_iterator - /// .iter_range( - /// lower_bound_incl.as_slice(), - /// upper_bound_excl.as_ref().map(|v| &**v), - /// )? - /// .take(); - /// - /// // use it as a iterator - /// let mut snapshot_kvs = Vec::new(); - /// while let Some((key, value)) = kvs.next()? { - /// snapshot_kvs.push((key, value)); - /// } - /// ``` - /// Note: In the iterator, the keys may be duplicated with the keys in the - /// vector. - pub fn read_all_iterator_impl( + pub fn read_all_with_callback_impl( &mut self, access_key_prefix: StorageKeyWithSpace, - ) -> Result<(Vec, Option>>)> { + callback: &mut dyn FnMut(MptKeyValue), + ) -> Result<()> { self.ensure_temp_slab_for_db_load(); - // Retrieve and delete key/value pairs from delta trie - let delta_trie_kvs = match &self.delta_trie_root { - None => None, - Some(old_root_node) => { - let delta_mpt_key_prefix = access_key_prefix - .to_delta_mpt_key_bytes(&self.delta_trie_key_padding); - let deleted = SubTrieVisitor::new( - &self.delta_trie, - old_root_node.clone(), - &mut self.owned_node_set, - )? - .traversal(&delta_mpt_key_prefix, &delta_mpt_key_prefix)?; - deleted - } - }; - - // Retrieve key/value pairs from intermediate trie - let intermediate_trie_kvs = match &self.intermediate_trie_root { - None => None, - Some(root_node) => { - if self.maybe_intermediate_trie_key_padding.is_some() - && self.maybe_intermediate_trie.is_some() - { - let intermediate_trie_key_padding = self - .maybe_intermediate_trie_key_padding - .as_ref() - .unwrap(); - let intermediate_mpt_key_prefix = access_key_prefix - .to_delta_mpt_key_bytes(intermediate_trie_key_padding); - let values = SubTrieVisitor::new( - self.maybe_intermediate_trie.as_ref().unwrap(), - root_node.clone(), - &mut self.owned_node_set, - )? - .traversal( - &intermediate_mpt_key_prefix, - &intermediate_mpt_key_prefix, - )?; - - values - } else { - None - } - } - }; + let is_address_search_prefix = + if let StorageKey::AddressPrefixKey(prefix) = access_key_prefix.key + { + Some(prefix) + } else { + None + }; - let mut result = Vec::new(); // This is used to keep track of the deleted keys. let mut deleted_keys = HashSet::new(); - if let Some(kvs) = delta_trie_kvs { - for (k, v) in kvs { + + // Retrieve and delete key/value pairs from delta trie + if let Some(old_root_node) = &self.delta_trie_root { + let mut inner_callback = |(k, v): MptKeyValue| { + let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); + let k = storage_key.to_key_bytes(); + + // If it's an address search prefix, and k is not start with + // prefix, skip the key. + if let Some(prefix) = is_address_search_prefix { + if !k.starts_with(prefix) { + return; + } + } deleted_keys.insert(k.clone()); if v.len() > 0 { - result.push((k, v)); + callback((k, v)); + } + }; + let delta_mpt_key_prefix = access_key_prefix + .to_delta_mpt_key_bytes(&self.delta_trie_key_padding); + SubTrieVisitor::new( + &self.delta_trie, + old_root_node.clone(), + &mut self.owned_node_set, + )? + .traversal_with_callback( + &delta_mpt_key_prefix, + &delta_mpt_key_prefix, + &mut inner_callback, + )?; + }; + + // Retrieve key/value pairs from intermediate trie + if let Some(root_node) = &self.intermediate_trie_root { + let mut inner_callback = |(k, v): MptKeyValue| { + let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); + let k = storage_key.to_key_bytes(); + + // If it's an address search prefix, and k is not start with + // prefix, skip the key. + if let Some(prefix) = is_address_search_prefix { + if !k.starts_with(prefix) { + return; + } } - } - } - if let Some(kvs) = intermediate_trie_kvs { - for (k, v) in kvs { if !deleted_keys.contains(&k) { deleted_keys.insert(k.clone()); if v.len() > 0 { - result.push((k, v)); + callback((k, v)); } } + }; + if self.maybe_intermediate_trie_key_padding.is_some() + && self.maybe_intermediate_trie.is_some() + { + let intermediate_trie_key_padding = + self.maybe_intermediate_trie_key_padding.as_ref().unwrap(); + let intermediate_mpt_key_prefix = access_key_prefix + .to_delta_mpt_key_bytes(intermediate_trie_key_padding); + SubTrieVisitor::new( + self.maybe_intermediate_trie.as_ref().unwrap(), + root_node.clone(), + &mut self.owned_node_set, + )? + .traversal_with_callback( + &intermediate_mpt_key_prefix, + &intermediate_mpt_key_prefix, + &mut inner_callback, + )?; } } // Retrieve key/value pairs from snapshot - let kv_iterator = self.snapshot_db.snapshot_kv_iterator()?.take(); + let mut kv_iterator = self.snapshot_db.snapshot_kv_iterator()?.take(); + let lower_bound_incl = access_key_prefix.to_key_bytes(); + let upper_bound_excl = + to_key_prefix_iter_upper_bound(&lower_bound_incl); + let mut kvs = kv_iterator + .iter_range( + lower_bound_incl.as_slice(), + upper_bound_excl.as_ref().map(|v| &**v), + )? + .take(); - Ok((result, Some(kv_iterator))) + while let Some((k, v)) = kvs.next()? { + if !deleted_keys.contains(&k) { + callback((k, v)); + } + } + + Ok(()) } } @@ -1074,7 +1110,6 @@ use crate::{ node_merkle_proof::NodeMerkleProof, state_manager::*, state_proof::StateProof, - storage_db::kvdb_sqlite_sharded::KvdbSqliteSharded, }, state::*, storage_db::*, diff --git a/crates/dbs/storage/src/state.rs b/crates/dbs/storage/src/state.rs index 4f0f91295f..4881c728d5 100644 --- a/crates/dbs/storage/src/state.rs +++ b/crates/dbs/storage/src/state.rs @@ -39,9 +39,10 @@ pub trait StateTrait: Sync + Send { &mut self, access_key_prefix: StorageKeyWithSpace, ) -> Result>>; - fn read_all_iterator( + fn read_all_with_callback( &mut self, _access_key_prefix: StorageKeyWithSpace, - ) -> Result<(Vec, Option>>)> { + _callback: &mut dyn FnMut(MptKeyValue), + ) -> Result<()> { Err(Error::Msg("Not implemented".into())) } @@ -84,7 +85,6 @@ pub trait StateDbGetOriginalMethods { use super::{ impls::{ errors::*, node_merkle_proof::NodeMerkleProof, state_proof::StateProof, - storage_db::kvdb_sqlite_sharded::KvdbSqliteSharded, }, MptKeyValue, StateRootWithAuxInfo, }; diff --git a/crates/primitives/src/storage_key.rs b/crates/primitives/src/storage_key.rs index f55ea534ea..148e3fcdf0 100644 --- a/crates/primitives/src/storage_key.rs +++ b/crates/primitives/src/storage_key.rs @@ -60,6 +60,9 @@ pub enum StorageKey<'a> { VoteListKey(&'a [u8]), // Empty key is used to traverse all key and value pairs. EmptyKey, + // Address prefix key is used to search all keys with the same address + // prefix, eg [1, 2](0x0102) will search all keys with prefix 0x0102 + AddressPrefixKey(&'a [u8]), } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -200,6 +203,13 @@ impl<'a> StorageKeyWithSpace<'a> { StorageKey::EmptyKey => { return vec![]; } + StorageKey::AddressPrefixKey(_address_bytes) => { + // delta mpt trie does not support address prefix key search + // so we search all keys and filter them by address prefix + // due to delta mpt trie won't be very big, so the performance + // impact is not very big + return vec![]; + } }; return if self.space == Space::Native { @@ -292,6 +302,11 @@ impl<'a> StorageKeyWithSpace<'a> { StorageKey::EmptyKey => { return vec![]; } + StorageKey::AddressPrefixKey(address_bytes) => { + let mut key = Vec::with_capacity(address_bytes.len()); + key.extend_from_slice(address_bytes); + return key; + } }; return if self.space == Space::Native { diff --git a/docs/commands/readme.md b/docs/commands/readme.md index 57f73be6dd..96be6fd6b4 100644 --- a/docs/commands/readme.md +++ b/docs/commands/readme.md @@ -89,6 +89,17 @@ Note: 1. Conflux contract data storage differs significantly from Ethereum, as it is not stored in separate MPT form, therefore the storage root cannot be obtained. The exported data's `account.root` is fixed as `0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421` 2. The exported root information is the full state root of Conflux dual-space (Core Space, eSpace), only for maintaining data format consistency, not the state root of all eSpace accounts. -3. When exporting mainnet state data, due to the large number of accounts, high machine configuration is required, and the runtime will be quite long; if you want to export the state at a certain height in history, it needs to be performed on a fullstate node data. +3. When exporting mainnet state data, due to the large number of accounts, high machine configuration is required, and the runtime will be quite long; if you want to export the state at a certain height in history, it needs to be performed on a `fullstate node` data. 4. When performing state export, please stop the node program first, then execute the export operation in the node directory. -5. Please use the binary corresponding to the network and execute the export operation in the corresponding network data directory; do not use testnet or master code compiled binary to execute export operations on mainnet data. \ No newline at end of file +5. Please use the binary corresponding to the network and execute the export operation in the corresponding network data directory; `do not` use `testnet or master` code compiled binary to execute export operations on `mainnet data`. +6. If the state is very big, recommand export state into multi file, through arg `--multifile` +7. When this command start, it will first sync to latest block(if your node is not fully synced, this step will take long time), and then do the state export operation +8. When running this command on mainnnet or testnet, recommend open below configs + ```toml + storage_delta_mpts_cache_size=10_000_0000 + storage_delta_mpts_cache_start_size=1_000_0000 + storage_delta_mpts_slab_idle_size=5_000_0000 + storage_single_mpt_cache_start_size=2_000_0000 + storage_single_mpt_slab_idle_size=1_000_000_00 + storage_single_mpt_cache_size=20_000_0000 + ``` \ No newline at end of file From 95af03c0ee726f7d67ab8342a80b86e4ac216e53 Mon Sep 17 00:00:00 2001 From: Pana Date: Mon, 1 Sep 2025 17:07:41 +0800 Subject: [PATCH 5/9] add logic to filter core space storage keys --- crates/client/src/state_dump.rs | 9 ++- crates/dbs/statedb/src/lib.rs | 9 ++- .../src/impls/delta_mpt/cow_node_ref.rs | 13 ++++- .../src/impls/delta_mpt/subtrie_visitor.rs | 15 ++++- .../storage/src/impls/recording_storage.rs | 6 +- .../dbs/storage/src/impls/replicated_state.rs | 11 +++- .../dbs/storage/src/impls/single_mpt_state.rs | 14 ++++- crates/dbs/storage/src/impls/state.rs | 16 +++++- crates/dbs/storage/src/state.rs | 8 ++- crates/primitives/src/storage_key.rs | 56 +++++++++++++++++++ 10 files changed, 136 insertions(+), 21 deletions(-) diff --git a/crates/client/src/state_dump.rs b/crates/client/src/state_dump.rs index c49eada067..f67f819efd 100644 --- a/crates/client/src/state_dump.rs +++ b/crates/client/src/state_dump.rs @@ -10,7 +10,8 @@ use chrono::Utc; use keccak_hash::{keccak, KECCAK_EMPTY}; use parking_lot::{Condvar, Mutex}; use primitives::{ - Account, SkipInputCheck, StorageKey, StorageKeyWithSpace, StorageValue, + Account, SkipInputCheck, SpaceStorageFilter, StorageKey, + StorageKeyWithSpace, StorageValue, }; use rlp::Rlp; use std::{ @@ -293,7 +294,11 @@ pub fn export_space_accounts_with_callback( } }; - state.read_all_with_callback(start_key, &mut inner_callback)?; + state.read_all_with_callback( + start_key, + &mut inner_callback, + Some(SpaceStorageFilter::from(space)), + )?; if account_states.len() > 0 { println("Start to read account code and storage data..."); diff --git a/crates/dbs/statedb/src/lib.rs b/crates/dbs/statedb/src/lib.rs index 4580384f9e..52928df385 100644 --- a/crates/dbs/statedb/src/lib.rs +++ b/crates/dbs/statedb/src/lib.rs @@ -12,7 +12,7 @@ pub mod global_params; mod in_memory_storage; mod statedb_ext; use cfx_types::H256; -use primitives::StorageValue; +use primitives::{SpaceStorageFilter, StorageValue}; use cfx_db_errors::statedb as error; @@ -191,9 +191,14 @@ mod impls { pub fn read_all_with_callback( &mut self, access_key_prefix: StorageKeyWithSpace, callback: &mut dyn FnMut(MptKeyValue), + space_storage_filter: Option, ) -> Result<()> { self.storage - .read_all_with_callback(access_key_prefix, callback) + .read_all_with_callback( + access_key_prefix, + callback, + space_storage_filter, + ) .map_err(|err| err.into()) } diff --git a/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs b/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs index b691a93472..d65921e6fb 100644 --- a/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs +++ b/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs @@ -622,12 +622,19 @@ impl CowNodeRef { Ok(()) } + // space_storage_filter can be used to filter the wanted space storage keys pub fn iterate_internal_with_callback( &self, owned_node_set: &OwnedNodeSet, trie: &DeltaMpt, guarded_trie_node: GuardedMaybeOwnedTrieNodeAsCowCallParam, key_prefix: CompressedPathRaw, db: &mut DeltaDbOwnedReadTraitObj, - callback: &mut dyn FnMut(MptKeyValue), + callback: &mut dyn FnMut(MptKeyValue), is_delta_mpt: bool, + space_storage_filter: Option, ) -> Result<()> { + if let Some(filter) = space_storage_filter { + if filter.is_filtered(is_delta_mpt, key_prefix.path_slice()) { + return Ok(()); + } + } if guarded_trie_node.as_ref().as_ref().has_value() { assert!(CompressedPathRaw::has_second_nibble( key_prefix.path_mask() @@ -667,6 +674,8 @@ impl CowNodeRef { key_prefix, db, callback, + is_delta_mpt, + space_storage_filter, )?; } @@ -961,7 +970,7 @@ use super::{ AtomicCommitTransaction, DeltaMpt, *, }; use parking_lot::MutexGuard; -use primitives::{MerkleHash, MptValue, MERKLE_NULL_NODE}; +use primitives::{MerkleHash, MptValue, SpaceStorageFilter, MERKLE_NULL_NODE}; use rlp::*; use std::{ borrow::BorrowMut, cell::Cell, convert::TryInto, ops::Deref, diff --git a/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs b/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs index 50ddbfb2a1..39648b8b74 100644 --- a/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs +++ b/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs @@ -636,7 +636,8 @@ impl<'trie, 'db: 'trie> SubTrieVisitor<'trie, 'db> { /// return all key/value pairs given the prefix pub fn traversal_with_callback( mut self, key: KeyPart, key_remaining: KeyPart, - callback: &mut dyn FnMut(MptKeyValue), + callback: &mut dyn FnMut(MptKeyValue), is_delta_mpt: bool, + space_storage_filter: Option, ) -> Result<()> { let node_memory_manager = self.node_memory_manager(); let allocator = node_memory_manager.get_allocator(); @@ -678,7 +679,13 @@ impl<'trie, 'db: 'trie> SubTrieVisitor<'trie, 'db> { } => { drop(trie_node_ref); self.new_visitor_for_subtree(child_node.clone().into()) - .traversal_with_callback(key, key_remaining, callback)?; + .traversal_with_callback( + key, + key_remaining, + callback, + is_delta_mpt, + space_storage_filter, + )?; return Ok(()); } } @@ -691,6 +698,8 @@ impl<'trie, 'db: 'trie> SubTrieVisitor<'trie, 'db> { key_prefix, &mut *self.db.get_mut().to_owned_read()?, callback, + is_delta_mpt, + space_storage_filter, )?; Ok(()) } @@ -919,5 +928,5 @@ use super::{ ChildrenTableDeltaMpt, DeltaMpt, *, }; use parking_lot::MutexGuard; -use primitives::{MerkleHash, MptValue, MERKLE_NULL_NODE}; +use primitives::{MerkleHash, MptValue, SpaceStorageFilter, MERKLE_NULL_NODE}; use std::marker::PhantomData; diff --git a/crates/dbs/storage/src/impls/recording_storage.rs b/crates/dbs/storage/src/impls/recording_storage.rs index 100edbd7be..52f2b81554 100644 --- a/crates/dbs/storage/src/impls/recording_storage.rs +++ b/crates/dbs/storage/src/impls/recording_storage.rs @@ -54,7 +54,7 @@ impl StateTrait fn compute_state_root(&mut self) -> Result; fn get_state_root(&self) -> Result; fn commit(&mut self, epoch_id: EpochId) -> Result; - fn read_all_with_callback(&mut self, access_key_prefix: StorageKeyWithSpace, callback: &mut dyn FnMut(MptKeyValue)) -> Result<()>; + fn read_all_with_callback(&mut self, access_key_prefix: StorageKeyWithSpace, callback: &mut dyn FnMut(MptKeyValue), space_storage_filter: Option) -> Result<()>; } } @@ -102,4 +102,6 @@ use crate::{ use cfx_internal_common::StateRootWithAuxInfo; use delegate::delegate; use parking_lot::Mutex; -use primitives::{CheckInput, EpochId, StorageKeyWithSpace}; +use primitives::{ + CheckInput, EpochId, SpaceStorageFilter, StorageKeyWithSpace, +}; diff --git a/crates/dbs/storage/src/impls/replicated_state.rs b/crates/dbs/storage/src/impls/replicated_state.rs index 2b9f41bb5f..b49cdbf3b0 100644 --- a/crates/dbs/storage/src/impls/replicated_state.rs +++ b/crates/dbs/storage/src/impls/replicated_state.rs @@ -6,7 +6,8 @@ use cfx_internal_common::StateRootWithAuxInfo; use cfx_types::Space; use parking_lot::Mutex; use primitives::{ - EpochId, NodeMerkleTriplet, StaticBool, StorageKey, StorageKeyWithSpace, + EpochId, NodeMerkleTriplet, SpaceStorageFilter, StaticBool, StorageKey, + StorageKeyWithSpace, }; use std::{ sync::mpsc::{channel, Sender}, @@ -316,9 +317,13 @@ impl StateTrait for ReplicatedState
{ fn read_all_with_callback( &mut self, access_key_prefix: StorageKeyWithSpace, callback: &mut dyn FnMut(MptKeyValue), + space_storage_filter: Option, ) -> Result<()> { - self.state - .read_all_with_callback(access_key_prefix, callback) + self.state.read_all_with_callback( + access_key_prefix, + callback, + space_storage_filter, + ) } fn compute_state_root(&mut self) -> Result { diff --git a/crates/dbs/storage/src/impls/single_mpt_state.rs b/crates/dbs/storage/src/impls/single_mpt_state.rs index e09ec8264a..36ec03f6e2 100644 --- a/crates/dbs/storage/src/impls/single_mpt_state.rs +++ b/crates/dbs/storage/src/impls/single_mpt_state.rs @@ -7,8 +7,8 @@ use crate::{ }; use cfx_internal_common::{StateRootAuxInfo, StateRootWithAuxInfo}; use primitives::{ - EpochId, MerkleHash, MptValue, StateRoot, StorageKeyWithSpace, - MERKLE_NULL_NODE, + EpochId, MerkleHash, MptValue, SpaceStorageFilter, StateRoot, + StorageKeyWithSpace, MERKLE_NULL_NODE, }; use rustc_hex::ToHex; use std::{cell::UnsafeCell, sync::Arc}; @@ -310,6 +310,7 @@ impl SingleMptState { fn read_all_with_callback_impl( &mut self, access_key_prefix: StorageKeyWithSpace, callback: &mut dyn FnMut(MptKeyValue), + space_storage_filter: Option, ) -> Result<()> { self.ensure_temp_slab_for_db_load(); @@ -340,6 +341,8 @@ impl SingleMptState { &key_prefix, &key_prefix, &mut inner_callback, + false, + space_storage_filter, )?; Ok(()) @@ -410,8 +413,13 @@ impl StateTrait for SingleMptState { fn read_all_with_callback( &mut self, access_key_prefix: StorageKeyWithSpace, callback: &mut dyn FnMut(MptKeyValue), + space_storage_filter: Option, ) -> Result<()> { - self.read_all_with_callback_impl(access_key_prefix, callback) + self.read_all_with_callback_impl( + access_key_prefix, + callback, + space_storage_filter, + ) } fn compute_state_root(&mut self) -> Result { diff --git a/crates/dbs/storage/src/impls/state.rs b/crates/dbs/storage/src/impls/state.rs index b1ad971ed0..fd5b407996 100644 --- a/crates/dbs/storage/src/impls/state.rs +++ b/crates/dbs/storage/src/impls/state.rs @@ -312,8 +312,13 @@ impl StateTrait for State { fn read_all_with_callback( &mut self, access_key_prefix: StorageKeyWithSpace, callback: &mut dyn FnMut(MptKeyValue), + space_storage_filter: Option, ) -> Result<()> { - self.read_all_with_callback_impl(access_key_prefix, callback) + self.read_all_with_callback_impl( + access_key_prefix, + callback, + space_storage_filter, + ) } fn compute_state_root(&mut self) -> Result { @@ -990,6 +995,7 @@ impl State { pub fn read_all_with_callback_impl( &mut self, access_key_prefix: StorageKeyWithSpace, callback: &mut dyn FnMut(MptKeyValue), + space_storage_filter: Option, ) -> Result<()> { self.ensure_temp_slab_for_db_load(); @@ -1033,6 +1039,8 @@ impl State { &delta_mpt_key_prefix, &delta_mpt_key_prefix, &mut inner_callback, + true, + space_storage_filter, )?; }; @@ -1073,6 +1081,8 @@ impl State { &intermediate_mpt_key_prefix, &intermediate_mpt_key_prefix, &mut inner_callback, + true, + space_storage_filter, )?; } } @@ -1121,8 +1131,8 @@ use cfx_types::AddressWithSpace; use fallible_iterator::FallibleIterator; use primitives::{ DeltaMptKeyPadding, EpochId, MerkleHash, MptValue, NodeMerkleTriplet, - SkipInputCheck, StateRoot, StaticBool, StorageKey, StorageKeyWithSpace, - StorageRoot, MERKLE_NULL_NODE, NULL_EPOCH, + SkipInputCheck, SpaceStorageFilter, StateRoot, StaticBool, StorageKey, + StorageKeyWithSpace, StorageRoot, MERKLE_NULL_NODE, NULL_EPOCH, }; use rustc_hex::ToHex; use std::{ diff --git a/crates/dbs/storage/src/state.rs b/crates/dbs/storage/src/state.rs index 4881c728d5..223eaf8aca 100644 --- a/crates/dbs/storage/src/state.rs +++ b/crates/dbs/storage/src/state.rs @@ -39,9 +39,14 @@ pub trait StateTrait: Sync + Send { &mut self, access_key_prefix: StorageKeyWithSpace, ) -> Result>>; + /// space_storage_filter can be used to filter only the wanted contract + /// storage key if you specify SpaceStorageFilter::Native, it will + /// filter out the ethereum space storage key/value, by this way, we can + /// speedup space key/value traversal fn read_all_with_callback( &mut self, _access_key_prefix: StorageKeyWithSpace, _callback: &mut dyn FnMut(MptKeyValue), + _space_storage_filter: Option, ) -> Result<()> { Err(Error::Msg("Not implemented".into())) } @@ -90,5 +95,6 @@ use super::{ }; use crate::StorageRootProof; use primitives::{ - EpochId, NodeMerkleTriplet, StaticBool, StorageKeyWithSpace, StorageRoot, + EpochId, NodeMerkleTriplet, SpaceStorageFilter, StaticBool, + StorageKeyWithSpace, StorageRoot, }; diff --git a/crates/primitives/src/storage_key.rs b/crates/primitives/src/storage_key.rs index 148e3fcdf0..dce0e3196d 100644 --- a/crates/primitives/src/storage_key.rs +++ b/crates/primitives/src/storage_key.rs @@ -739,6 +739,62 @@ mod delta_mpt_storage_key { } } +// This enum is used to filter only the wanted contract storage key when +// traversal the trie for example, when traverse eth space key/value, we can +// only filter the eSpace storage key/value to accelerate the traversal +// speed +// Native means filter(keep) the native space storage key/value +// Ethereum means filter(keep) the ethereum space storage key/value +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct SpaceStorageFilter(pub Space); + +impl From for SpaceStorageFilter { + fn from(space: Space) -> Self { SpaceStorageFilter(space) } +} + +impl From for Space { + fn from(filter: SpaceStorageFilter) -> Self { filter.0 } +} + +impl SpaceStorageFilter { + pub fn is_native(&self) -> bool { matches!(self.0, Space::Native) } + + pub fn is_ethereum(&self) -> bool { matches!(self.0, Space::Ethereum) } + + // return the flag index according the trie type + // if is_delta_mpt is true, then the space flag is at the 32th index + // otherwise, the space flag is at the 20th index + fn flag_index(&self, is_delta_mpt: bool) -> usize { + if is_delta_mpt { + delta_mpt_storage_key::KEY_PADDING_BYTES + } else { + StorageKeyWithSpace::ACCOUNT_BYTES + } + } + + // return true if the key is filtered out + pub fn is_filtered(&self, is_delta_mpt: bool, key: &[u8]) -> bool { + let flag_index = self.flag_index(is_delta_mpt); + if key.len() > flag_index { + match self.0 { + Space::Native => { + if key[flag_index] == StorageKeyWithSpace::EVM_SPACE_TYPE[0] + { + return true; + } + } + Space::Ethereum => { + if key[flag_index] != StorageKeyWithSpace::EVM_SPACE_TYPE[0] + { + return true; + } + } + } + } + false + } +} + use super::{MerkleHash, MERKLE_NULL_NODE}; use crate::{ hash::keccak, storage_key::delta_mpt_storage_key::ACCOUNT_KEYPART_BYTES, From 355829fb25b80217ab6f8105b03dcbb30e4b95a3 Mon Sep 17 00:00:00 2001 From: Pana Date: Tue, 2 Sep 2025 10:32:51 +0800 Subject: [PATCH 6/9] pretty log --- crates/client/src/state_dump.rs | 13 +++++++------ crates/dbs/storage/src/impls/single_mpt_state.rs | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/crates/client/src/state_dump.rs b/crates/client/src/state_dump.rs index f67f819efd..a70a6df183 100644 --- a/crates/client/src/state_dump.rs +++ b/crates/client/src/state_dump.rs @@ -268,12 +268,6 @@ pub fn export_space_accounts_with_callback( let mut inner_callback = |(key, value): (Vec, Box<[u8]>)| { total_key_count += 1; - let storage_key_with_space = - StorageKeyWithSpace::from_key_bytes::(&key); - if storage_key_with_space.space != space { - core_space_key_count += 1; - return; - } if total_key_count % 10000 == 0 { println(&format!( @@ -282,6 +276,13 @@ pub fn export_space_accounts_with_callback( )); } + let storage_key_with_space = + StorageKeyWithSpace::from_key_bytes::(&key); + if storage_key_with_space.space != space { + core_space_key_count += 1; + return; + } + if let StorageKey::AccountKey(address_bytes) = storage_key_with_space.key { diff --git a/crates/dbs/storage/src/impls/single_mpt_state.rs b/crates/dbs/storage/src/impls/single_mpt_state.rs index 36ec03f6e2..fc5d1ba642 100644 --- a/crates/dbs/storage/src/impls/single_mpt_state.rs +++ b/crates/dbs/storage/src/impls/single_mpt_state.rs @@ -320,7 +320,7 @@ impl SingleMptState { total_key_count += 1; if total_key_count % 10000 == 0 { println!( - "read_all_with_callback_impl -> total_key_count: {} {}", + "read_all_with_callback_impl: total_key_count {} key {}", total_key_count, k.to_hex::() ); From 51915dff34d99407c4ed035e516302a487325f2c Mon Sep 17 00:00:00 2001 From: Pana Date: Tue, 2 Sep 2025 18:27:41 +0800 Subject: [PATCH 7/9] optimize contract storage read method --- crates/client/src/state_dump.rs | 49 ++++++++++++++----- crates/dbs/statedb/src/lib.rs | 7 ++- .../src/impls/delta_mpt/cow_node_ref.rs | 13 +++-- .../src/impls/delta_mpt/subtrie_visitor.rs | 8 +-- .../storage/src/impls/recording_storage.rs | 6 +-- .../dbs/storage/src/impls/replicated_state.rs | 8 ++- .../dbs/storage/src/impls/single_mpt_state.rs | 14 +++--- crates/dbs/storage/src/impls/state.rs | 16 +++--- crates/dbs/storage/src/state.rs | 10 +--- crates/primitives/src/storage_key.rs | 4 +- 10 files changed, 75 insertions(+), 60 deletions(-) diff --git a/crates/client/src/state_dump.rs b/crates/client/src/state_dump.rs index a70a6df183..99c5729b92 100644 --- a/crates/client/src/state_dump.rs +++ b/crates/client/src/state_dump.rs @@ -4,14 +4,13 @@ use cfx_rpc_eth_types::{AccountState, StateDump, EOA_STORAGE_ROOT_H256}; use cfx_rpc_primitives::Bytes; use cfx_statedb::{StateDbExt, StateDbGeneric}; use cfx_storage::state_manager::StateManagerTrait; -use cfx_types::{Address, Space, H256}; +use cfx_types::{Address, Space, H256, U256}; use cfxcore::NodeType; use chrono::Utc; use keccak_hash::{keccak, KECCAK_EMPTY}; use parking_lot::{Condvar, Mutex}; use primitives::{ - Account, SkipInputCheck, SpaceStorageFilter, StorageKey, - StorageKeyWithSpace, StorageValue, + Account, SkipInputCheck, StorageKey, StorageKeyWithSpace, StorageValue, }; use rlp::Rlp; use std::{ @@ -260,7 +259,7 @@ pub fn export_space_accounts_with_callback( let mut core_space_key_count: u64 = 0; let mut total_key_count: u64 = 0; - for i in 0..=255 { + for i in 198..=255 { let prefix = [i]; let start_key = StorageKey::AddressPrefixKey(&prefix).with_space(space); @@ -295,18 +294,15 @@ pub fn export_space_accounts_with_callback( } }; - state.read_all_with_callback( - start_key, - &mut inner_callback, - Some(SpaceStorageFilter::from(space)), - )?; + state.read_all_with_callback(start_key, &mut inner_callback, true)?; if account_states.len() > 0 { println("Start to read account code and storage data..."); } for (_address, account) in account_states { - let account_state = get_account_state(state, &account, config)?; + let account_state = + get_account_state(state, &account, config, space)?; callback(account_state); found_accounts += 1; if config.limit > 0 && found_accounts >= config.limit as usize { @@ -321,6 +317,7 @@ pub fn export_space_accounts_with_callback( #[allow(unused)] fn get_account_state( state: &mut StateDbGeneric, account: &Account, config: &StateDumpConfig, + space: Space, ) -> Result> { let address = account.address(); @@ -335,7 +332,7 @@ fn get_account_state( }; let storage = if is_contract && !config.no_storage { - let storage = state.get_account_storage_entries(&address, None)?; + let storage = get_contract_storage(state, &address.address, space)?; Some(storage) } else { None @@ -358,6 +355,36 @@ fn get_account_state( }) } +fn get_contract_storage( + state: &mut StateDbGeneric, address: &Address, space: Space, +) -> Result, Box> { + let mut storage: BTreeMap = Default::default(); + + let mut inner_callback = |(key, value): (Vec, Box<[u8]>)| { + let storage_key_with_space = + StorageKeyWithSpace::from_key_bytes::(&key); + if storage_key_with_space.space != space { + return; + } + + if let StorageKey::StorageKey { + address_bytes: _, + storage_key, + } = storage_key_with_space.key + { + let h256_storage_key = H256::from_slice(storage_key); + let storage_value_with_owner: StorageValue = + rlp::decode(&value).expect("Failed to decode storage value"); + storage.insert(h256_storage_key, storage_value_with_owner.value); + }; + }; + + let start_key = StorageKey::new_storage_root_key(address).with_space(space); + state.read_all_with_callback(start_key, &mut inner_callback, false)?; + + Ok(storage) +} + fn println(message: &str) { println!("[{}] {}", Utc::now().format("%Y-%m-%d %H:%M:%S"), message); } diff --git a/crates/dbs/statedb/src/lib.rs b/crates/dbs/statedb/src/lib.rs index 52928df385..740c1456d5 100644 --- a/crates/dbs/statedb/src/lib.rs +++ b/crates/dbs/statedb/src/lib.rs @@ -12,7 +12,7 @@ pub mod global_params; mod in_memory_storage; mod statedb_ext; use cfx_types::H256; -use primitives::{SpaceStorageFilter, StorageValue}; +use primitives::StorageValue; use cfx_db_errors::statedb as error; @@ -190,14 +190,13 @@ mod impls { pub fn read_all_with_callback( &mut self, access_key_prefix: StorageKeyWithSpace, - callback: &mut dyn FnMut(MptKeyValue), - space_storage_filter: Option, + callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool, ) -> Result<()> { self.storage .read_all_with_callback( access_key_prefix, callback, - space_storage_filter, + only_account_key, ) .map_err(|err| err.into()) } diff --git a/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs b/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs index d65921e6fb..eb67f95284 100644 --- a/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs +++ b/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs @@ -622,16 +622,19 @@ impl CowNodeRef { Ok(()) } - // space_storage_filter can be used to filter the wanted space storage keys + // only_account_key can be used to filter the only account key pub fn iterate_internal_with_callback( &self, owned_node_set: &OwnedNodeSet, trie: &DeltaMpt, guarded_trie_node: GuardedMaybeOwnedTrieNodeAsCowCallParam, key_prefix: CompressedPathRaw, db: &mut DeltaDbOwnedReadTraitObj, callback: &mut dyn FnMut(MptKeyValue), is_delta_mpt: bool, - space_storage_filter: Option, + only_account_key: bool, ) -> Result<()> { - if let Some(filter) = space_storage_filter { - if filter.is_filtered(is_delta_mpt, key_prefix.path_slice()) { + // filter out all the key that is longer than the account key + if only_account_key { + let key_len = + SpaceStorageFilter::space_flag_index(is_delta_mpt) + 1; + if key_prefix.path_slice().len() > key_len { return Ok(()); } } @@ -675,7 +678,7 @@ impl CowNodeRef { db, callback, is_delta_mpt, - space_storage_filter, + only_account_key, )?; } diff --git a/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs b/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs index 39648b8b74..c20bcf4e32 100644 --- a/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs +++ b/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs @@ -637,7 +637,7 @@ impl<'trie, 'db: 'trie> SubTrieVisitor<'trie, 'db> { pub fn traversal_with_callback( mut self, key: KeyPart, key_remaining: KeyPart, callback: &mut dyn FnMut(MptKeyValue), is_delta_mpt: bool, - space_storage_filter: Option, + only_account_key: bool, ) -> Result<()> { let node_memory_manager = self.node_memory_manager(); let allocator = node_memory_manager.get_allocator(); @@ -684,7 +684,7 @@ impl<'trie, 'db: 'trie> SubTrieVisitor<'trie, 'db> { key_remaining, callback, is_delta_mpt, - space_storage_filter, + only_account_key, )?; return Ok(()); } @@ -699,7 +699,7 @@ impl<'trie, 'db: 'trie> SubTrieVisitor<'trie, 'db> { &mut *self.db.get_mut().to_owned_read()?, callback, is_delta_mpt, - space_storage_filter, + only_account_key, )?; Ok(()) } @@ -928,5 +928,5 @@ use super::{ ChildrenTableDeltaMpt, DeltaMpt, *, }; use parking_lot::MutexGuard; -use primitives::{MerkleHash, MptValue, SpaceStorageFilter, MERKLE_NULL_NODE}; +use primitives::{MerkleHash, MptValue, MERKLE_NULL_NODE}; use std::marker::PhantomData; diff --git a/crates/dbs/storage/src/impls/recording_storage.rs b/crates/dbs/storage/src/impls/recording_storage.rs index 52f2b81554..695adb48e2 100644 --- a/crates/dbs/storage/src/impls/recording_storage.rs +++ b/crates/dbs/storage/src/impls/recording_storage.rs @@ -54,7 +54,7 @@ impl StateTrait fn compute_state_root(&mut self) -> Result; fn get_state_root(&self) -> Result; fn commit(&mut self, epoch_id: EpochId) -> Result; - fn read_all_with_callback(&mut self, access_key_prefix: StorageKeyWithSpace, callback: &mut dyn FnMut(MptKeyValue), space_storage_filter: Option) -> Result<()>; + fn read_all_with_callback(&mut self, access_key_prefix: StorageKeyWithSpace, callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool) -> Result<()>; } } @@ -102,6 +102,4 @@ use crate::{ use cfx_internal_common::StateRootWithAuxInfo; use delegate::delegate; use parking_lot::Mutex; -use primitives::{ - CheckInput, EpochId, SpaceStorageFilter, StorageKeyWithSpace, -}; +use primitives::{CheckInput, EpochId, StorageKeyWithSpace}; diff --git a/crates/dbs/storage/src/impls/replicated_state.rs b/crates/dbs/storage/src/impls/replicated_state.rs index b49cdbf3b0..068b80ceff 100644 --- a/crates/dbs/storage/src/impls/replicated_state.rs +++ b/crates/dbs/storage/src/impls/replicated_state.rs @@ -6,8 +6,7 @@ use cfx_internal_common::StateRootWithAuxInfo; use cfx_types::Space; use parking_lot::Mutex; use primitives::{ - EpochId, NodeMerkleTriplet, SpaceStorageFilter, StaticBool, StorageKey, - StorageKeyWithSpace, + EpochId, NodeMerkleTriplet, StaticBool, StorageKey, StorageKeyWithSpace, }; use std::{ sync::mpsc::{channel, Sender}, @@ -316,13 +315,12 @@ impl StateTrait for ReplicatedState
{ fn read_all_with_callback( &mut self, access_key_prefix: StorageKeyWithSpace, - callback: &mut dyn FnMut(MptKeyValue), - space_storage_filter: Option, + callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool, ) -> Result<()> { self.state.read_all_with_callback( access_key_prefix, callback, - space_storage_filter, + only_account_key, ) } diff --git a/crates/dbs/storage/src/impls/single_mpt_state.rs b/crates/dbs/storage/src/impls/single_mpt_state.rs index fc5d1ba642..526a964a3c 100644 --- a/crates/dbs/storage/src/impls/single_mpt_state.rs +++ b/crates/dbs/storage/src/impls/single_mpt_state.rs @@ -7,8 +7,8 @@ use crate::{ }; use cfx_internal_common::{StateRootAuxInfo, StateRootWithAuxInfo}; use primitives::{ - EpochId, MerkleHash, MptValue, SpaceStorageFilter, StateRoot, - StorageKeyWithSpace, MERKLE_NULL_NODE, + EpochId, MerkleHash, MptValue, StateRoot, StorageKeyWithSpace, + MERKLE_NULL_NODE, }; use rustc_hex::ToHex; use std::{cell::UnsafeCell, sync::Arc}; @@ -309,8 +309,7 @@ impl SingleMptState { fn read_all_with_callback_impl( &mut self, access_key_prefix: StorageKeyWithSpace, - callback: &mut dyn FnMut(MptKeyValue), - space_storage_filter: Option, + callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool, ) -> Result<()> { self.ensure_temp_slab_for_db_load(); @@ -342,7 +341,7 @@ impl SingleMptState { &key_prefix, &mut inner_callback, false, - space_storage_filter, + only_account_key, )?; Ok(()) @@ -412,13 +411,12 @@ impl StateTrait for SingleMptState { fn read_all_with_callback( &mut self, access_key_prefix: StorageKeyWithSpace, - callback: &mut dyn FnMut(MptKeyValue), - space_storage_filter: Option, + callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool, ) -> Result<()> { self.read_all_with_callback_impl( access_key_prefix, callback, - space_storage_filter, + only_account_key, ) } diff --git a/crates/dbs/storage/src/impls/state.rs b/crates/dbs/storage/src/impls/state.rs index fd5b407996..db1ac768ec 100644 --- a/crates/dbs/storage/src/impls/state.rs +++ b/crates/dbs/storage/src/impls/state.rs @@ -311,13 +311,12 @@ impl StateTrait for State { fn read_all_with_callback( &mut self, access_key_prefix: StorageKeyWithSpace, - callback: &mut dyn FnMut(MptKeyValue), - space_storage_filter: Option, + callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool, ) -> Result<()> { self.read_all_with_callback_impl( access_key_prefix, callback, - space_storage_filter, + only_account_key, ) } @@ -994,8 +993,7 @@ impl State { pub fn read_all_with_callback_impl( &mut self, access_key_prefix: StorageKeyWithSpace, - callback: &mut dyn FnMut(MptKeyValue), - space_storage_filter: Option, + callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool, ) -> Result<()> { self.ensure_temp_slab_for_db_load(); @@ -1040,7 +1038,7 @@ impl State { &delta_mpt_key_prefix, &mut inner_callback, true, - space_storage_filter, + only_account_key, )?; }; @@ -1082,7 +1080,7 @@ impl State { &intermediate_mpt_key_prefix, &mut inner_callback, true, - space_storage_filter, + only_account_key, )?; } } @@ -1131,8 +1129,8 @@ use cfx_types::AddressWithSpace; use fallible_iterator::FallibleIterator; use primitives::{ DeltaMptKeyPadding, EpochId, MerkleHash, MptValue, NodeMerkleTriplet, - SkipInputCheck, SpaceStorageFilter, StateRoot, StaticBool, StorageKey, - StorageKeyWithSpace, StorageRoot, MERKLE_NULL_NODE, NULL_EPOCH, + SkipInputCheck, StateRoot, StaticBool, StorageKey, StorageKeyWithSpace, + StorageRoot, MERKLE_NULL_NODE, NULL_EPOCH, }; use rustc_hex::ToHex; use std::{ diff --git a/crates/dbs/storage/src/state.rs b/crates/dbs/storage/src/state.rs index 223eaf8aca..96abb65728 100644 --- a/crates/dbs/storage/src/state.rs +++ b/crates/dbs/storage/src/state.rs @@ -39,14 +39,9 @@ pub trait StateTrait: Sync + Send { &mut self, access_key_prefix: StorageKeyWithSpace, ) -> Result>>; - /// space_storage_filter can be used to filter only the wanted contract - /// storage key if you specify SpaceStorageFilter::Native, it will - /// filter out the ethereum space storage key/value, by this way, we can - /// speedup space key/value traversal fn read_all_with_callback( &mut self, _access_key_prefix: StorageKeyWithSpace, - _callback: &mut dyn FnMut(MptKeyValue), - _space_storage_filter: Option, + _callback: &mut dyn FnMut(MptKeyValue), _only_account_key: bool, ) -> Result<()> { Err(Error::Msg("Not implemented".into())) } @@ -95,6 +90,5 @@ use super::{ }; use crate::StorageRootProof; use primitives::{ - EpochId, NodeMerkleTriplet, SpaceStorageFilter, StaticBool, - StorageKeyWithSpace, StorageRoot, + EpochId, NodeMerkleTriplet, StaticBool, StorageKeyWithSpace, StorageRoot, }; diff --git a/crates/primitives/src/storage_key.rs b/crates/primitives/src/storage_key.rs index dce0e3196d..6878f7a92d 100644 --- a/crates/primitives/src/storage_key.rs +++ b/crates/primitives/src/storage_key.rs @@ -764,7 +764,7 @@ impl SpaceStorageFilter { // return the flag index according the trie type // if is_delta_mpt is true, then the space flag is at the 32th index // otherwise, the space flag is at the 20th index - fn flag_index(&self, is_delta_mpt: bool) -> usize { + pub fn space_flag_index(is_delta_mpt: bool) -> usize { if is_delta_mpt { delta_mpt_storage_key::KEY_PADDING_BYTES } else { @@ -774,7 +774,7 @@ impl SpaceStorageFilter { // return true if the key is filtered out pub fn is_filtered(&self, is_delta_mpt: bool, key: &[u8]) -> bool { - let flag_index = self.flag_index(is_delta_mpt); + let flag_index = Self::space_flag_index(is_delta_mpt); if key.len() > flag_index { match self.0 { Space::Native => { From 66858fde19da20d5cffe5df1bd635f2b7a5a1100 Mon Sep 17 00:00:00 2001 From: Pana Date: Wed, 3 Sep 2025 12:54:55 +0800 Subject: [PATCH 8/9] chunk file if contract storage is two big --- bins/conflux/src/command/dump.rs | 7 +++++-- crates/client/src/state_dump.rs | 19 ++++++++++++++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/bins/conflux/src/command/dump.rs b/bins/conflux/src/command/dump.rs index e95a693efd..49f4c90f6f 100644 --- a/bins/conflux/src/command/dump.rs +++ b/bins/conflux/src/command/dump.rs @@ -67,7 +67,9 @@ impl DumpCommand { }) } - fn get_state_dump_config(&self) -> Result { + fn get_state_dump_config( + &self, output_path: &str, + ) -> Result { let start_address = parse_hex_string(&self.start) .map_err(|e| format!("Invalid address: {}", e))?; Ok(StateDumpConfig { @@ -76,6 +78,7 @@ impl DumpCommand { block: self.block, no_code: self.no_code, no_storage: self.no_storage, + out_put_path: output_path.to_string(), }) } @@ -96,7 +99,7 @@ impl DumpCommand { } let exit = Arc::new((Mutex::new(false), Condvar::new())); - let config = self.get_state_dump_config()?; + let config = self.get_state_dump_config(output_path)?; let _total_accounts = if self.multi_file { // Write to multiple files diff --git a/crates/client/src/state_dump.rs b/crates/client/src/state_dump.rs index 99c5729b92..a251b6c1bd 100644 --- a/crates/client/src/state_dump.rs +++ b/crates/client/src/state_dump.rs @@ -15,7 +15,9 @@ use primitives::{ use rlp::Rlp; use std::{ collections::{BTreeMap, HashMap}, + fs, ops::Deref, + path::Path, sync::Arc, thread, time::Duration, @@ -27,6 +29,7 @@ pub struct StateDumpConfig { pub block: Option, pub no_code: bool, pub no_storage: bool, + pub out_put_path: String, } // This method will read all data (k, v) from the Conflux state tree (including @@ -332,7 +335,8 @@ fn get_account_state( }; let storage = if is_contract && !config.no_storage { - let storage = get_contract_storage(state, &address.address, space)?; + let storage = + get_contract_storage(state, &address.address, space, config)?; Some(storage) } else { None @@ -357,8 +361,10 @@ fn get_account_state( fn get_contract_storage( state: &mut StateDbGeneric, address: &Address, space: Space, + config: &StateDumpConfig, ) -> Result, Box> { let mut storage: BTreeMap = Default::default(); + let mut chunk_count = 0; let mut inner_callback = |(key, value): (Vec, Box<[u8]>)| { let storage_key_with_space = @@ -376,6 +382,17 @@ fn get_contract_storage( let storage_value_with_owner: StorageValue = rlp::decode(&value).expect("Failed to decode storage value"); storage.insert(h256_storage_key, storage_value_with_owner.value); + + if storage.len() == 5000_0000 { + chunk_count += 1; + let name = format!("{:?}-chunk{}.json", address, chunk_count); + let file_path = Path::new(&config.out_put_path).join(&name); + let json_content = serde_json::to_string_pretty(&storage) + .expect("Failed to serialize storage"); + fs::write(&file_path, json_content) + .expect("Failed to write storage file"); + storage.clear(); + } }; }; From 0fe75d8aff589314b59cf8d8f79777e712a7a0d4 Mon Sep 17 00:00:00 2001 From: Pana Date: Wed, 3 Sep 2025 14:20:24 +0800 Subject: [PATCH 9/9] update parameter --- crates/client/src/state_dump.rs | 4 ++-- docs/commands/readme.md | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/client/src/state_dump.rs b/crates/client/src/state_dump.rs index a251b6c1bd..958608f8f4 100644 --- a/crates/client/src/state_dump.rs +++ b/crates/client/src/state_dump.rs @@ -262,7 +262,7 @@ pub fn export_space_accounts_with_callback( let mut core_space_key_count: u64 = 0; let mut total_key_count: u64 = 0; - for i in 198..=255 { + for i in 0..=255 { let prefix = [i]; let start_key = StorageKey::AddressPrefixKey(&prefix).with_space(space); @@ -383,7 +383,7 @@ fn get_contract_storage( rlp::decode(&value).expect("Failed to decode storage value"); storage.insert(h256_storage_key, storage_value_with_owner.value); - if storage.len() == 5000_0000 { + if storage.len() == 5000_000 { chunk_count += 1; let name = format!("{:?}-chunk{}.json", address, chunk_count); let file_path = Path::new(&config.out_put_path).join(&name); diff --git a/docs/commands/readme.md b/docs/commands/readme.md index 96be6fd6b4..6018af256d 100644 --- a/docs/commands/readme.md +++ b/docs/commands/readme.md @@ -94,7 +94,8 @@ Note: 5. Please use the binary corresponding to the network and execute the export operation in the corresponding network data directory; `do not` use `testnet or master` code compiled binary to execute export operations on `mainnet data`. 6. If the state is very big, recommand export state into multi file, through arg `--multifile` 7. When this command start, it will first sync to latest block(if your node is not fully synced, this step will take long time), and then do the state export operation -8. When running this command on mainnnet or testnet, recommend open below configs +8. If a contract's storage key is very much, it's storage data will be splited into multi file, eg `0xc6e865c213c89ca42a622c5572d19f00d84d7a16-chunk1.json` each with 5000,000 key +9. When running this command on mainnnet or testnet, recommend open below configs ```toml storage_delta_mpts_cache_size=10_000_0000 storage_delta_mpts_cache_start_size=1_000_0000