diff --git a/Cargo.lock b/Cargo.lock index 79e5c93d8d..6b1e33c33e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1593,6 +1593,7 @@ dependencies = [ "rlp 0.4.6", "serde", "serde_json", + "serde_with", "similar-asserts", "thiserror 2.0.11", ] @@ -2044,6 +2045,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-targets 0.52.6", ] @@ -2185,6 +2187,7 @@ dependencies = [ "cfxcore-types", "cfxkey", "cfxstore", + "chrono", "consensus-types", "criterion", "ctrlc", @@ -2194,6 +2197,7 @@ dependencies = [ "diem-crypto", "diem-types", "dir", + "fallible-iterator", "futures 0.3.30", "itertools 0.14.0", "jsonrpc-core", @@ -2696,6 +2700,41 @@ dependencies = [ "hibitset", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.96", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.96", +] + [[package]] name = "dashmap" version = "6.1.0" @@ -4662,6 +4701,12 @@ dependencies = [ "syn 2.0.96", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.0.3" @@ -8439,6 +8484,34 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5" +dependencies = [ + "base64 0.22.1", + "chrono", + "hex", + "serde", + "serde_derive", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.96", +] + [[package]] name = "serde_yaml" version = "0.8.26" diff --git a/Cargo.toml b/Cargo.toml index 4bf8bcdf3a..679c06c1a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -272,6 +272,7 @@ serde = { version = "1.0", features = [ ], default-features = false } serde_json = { version = "1.0", default-features = false, features = ["alloc"] } serde_derive = { version = "1.0", default-features = false } +serde_with = { version = "3", default-features = false, features = ["macros"] } hex = "0.4" rustc-hex = "2.1" hex-literal = "1.0" diff --git a/bins/conflux/src/cli.rs b/bins/conflux/src/cli.rs index 15ec87e631..08f2454212 100644 --- a/bins/conflux/src/cli.rs +++ b/bins/conflux/src/cli.rs @@ -1,3 +1,4 @@ +use crate::command::dump::DumpCommand; use clap::{Args, Parser, Subcommand, ValueEnum}; /// Conflux client @@ -267,6 +268,9 @@ pub enum Commands { /// Manage accounts #[command(subcommand_required = true, arg_required_else_help = true)] Account(AccountSubcommands), + /// Dump eSpace account state at a given block number + #[command(subcommand_required = false, arg_required_else_help = false)] + Dump(DumpCommand), /// RPC based subcommands to query blockchain information and send /// transactions #[command(subcommand_required = true, arg_required_else_help = true)] diff --git a/bins/conflux/src/command/dump.rs b/bins/conflux/src/command/dump.rs new file mode 100644 index 0000000000..49f4c90f6f --- /dev/null +++ b/bins/conflux/src/command/dump.rs @@ -0,0 +1,167 @@ +use cfx_types::parse_hex_string; +use clap::{ArgMatches, Args}; +use client::{ + configuration::Configuration, + state_dump::{dump_whole_state, iterate_dump_whole_state, StateDumpConfig}, +}; +use parking_lot::{Condvar, Mutex}; +use serde_json; +use std::{collections::HashMap, fs, path::Path, sync::Arc}; + +#[derive(Args, Debug)] +pub struct DumpCommand { + /// Include accounts for which we don't have the address (missing preimage) + // #[arg(id = "incompletes", long = "incompletes")] + // incompletes: bool, + /// Print streaming JSON iteratively, delimited by newlines + // #[arg(id = "iterative", long = "iterative", default_value = "true")] + // iterative: bool, + /// Max number of elements (0 = no limit) + #[arg( + id = "limit", + long = "limit", + value_name = "NUM", + default_value = "0" + )] + limit: u64, + /// Target block number, if not specified, the latest block will be used + #[arg(id = "block", long = "block", value_name = "NUM")] + block: Option, + /// Exclude contract code (save db lookups) + #[arg(id = "nocode", long = "nocode")] + no_code: bool, + /// Exclude storage entries (save db lookups) + #[arg(id = "nostorage", long = "nostorage")] + no_storage: bool, + /// Start position address + #[arg( + id = "start", + long = "start", + value_name = "String", + default_value = "0x0000000000000000000000000000000000000000" + )] + start: String, + /// Path to the output folder (default: ./dump) + #[arg(id = "output", long = "output", value_name = "PATH")] + output: Option, + /// Multi file mode + #[arg(id = "multifile", long = "multifile")] + multi_file: bool, +} + +impl DumpCommand { + pub fn parse(matches: &ArgMatches) -> Result { + let output = matches.get_one::("output").cloned(); + Ok(Self { + block: matches.get_one::("block").cloned(), + // incompletes: matches.get_flag("incompletes"), + // iterative: matches.get_flag("iterative"), + limit: matches.get_one::("limit").cloned().unwrap_or(0), + no_code: matches.get_flag("nocode"), + no_storage: matches.get_flag("nostorage"), + start: matches.get_one::("start").cloned().unwrap_or( + "0x0000000000000000000000000000000000000000".to_string(), + ), + output, + multi_file: matches.get_flag("multifile"), + }) + } + + fn get_state_dump_config( + &self, output_path: &str, + ) -> Result { + let start_address = parse_hex_string(&self.start) + .map_err(|e| format!("Invalid address: {}", e))?; + Ok(StateDumpConfig { + start_address, + limit: self.limit, + block: self.block, + no_code: self.no_code, + no_storage: self.no_storage, + out_put_path: output_path.to_string(), + }) + } + + pub fn execute(&self, conf: &mut Configuration) -> Result { + // Determine output directory + let output_path = match self.output { + Some(ref path) => path, + None => { + "./dump" // Default to "./dump" if no output specified + } + }; + + // Ensure the directory exists + if !Path::new(output_path).exists() { + fs::create_dir_all(output_path).map_err(|e| { + format!("Failed to create output directory: {}", e) + })?; + } + + let exit = Arc::new((Mutex::new(false), Condvar::new())); + let config = self.get_state_dump_config(output_path)?; + + let _total_accounts = if self.multi_file { + // Write to multiple files + let state_root = iterate_dump_whole_state( + conf, + exit, + &config, + |account_state| { + let address = + account_state.address.expect("address is not set"); + let filename = format!("{:?}.json", address); + let file_path = Path::new(output_path).join(&filename); + + // Serialize account_state to JSON + let json_content = + serde_json::to_string_pretty(&account_state) + .map_err(|e| { + format!( + "Failed to serialize account state for {}: {}", + address, e + ) + }) + .expect("Failed to serialize account state"); + + // Write to file + fs::write(&file_path, json_content) + .map_err(|e| { + format!( + "Failed to write file {}: {}", + file_path.display(), + e + ) + }) + .expect("Failed to write file"); + }, + )?; + + // Write meta info + let mut meta_info = HashMap::new(); + meta_info.insert("root".to_string(), state_root); + let meta_file_path = Path::new(output_path).join("meta.json"); + let meta_content = serde_json::to_string_pretty(&meta_info) + .map_err(|e| format!("Failed to serialize state: {}", e))?; + fs::write(&meta_file_path, meta_content) + .map_err(|e| format!("Failed to write meta file: {}", e))?; + 0 + } else { + let state = dump_whole_state(conf, exit, &config)?; + let total_accounts = state.accounts.len(); + // Write to a single file + let file_path = Path::new(output_path).join("state.json"); + let json_content = serde_json::to_string_pretty(&state) + .map_err(|e| format!("Failed to serialize state: {}", e))?; + fs::write(&file_path, json_content).map_err(|e| { + format!("Failed to write file {}: {}", file_path.display(), e) + })?; + total_accounts + }; + + Ok(format!( + "Dumped account state to output directory: {}", + output_path + )) + } +} diff --git a/bins/conflux/src/command/mod.rs b/bins/conflux/src/command/mod.rs index f908b8674e..fc391d2412 100644 --- a/bins/conflux/src/command/mod.rs +++ b/bins/conflux/src/command/mod.rs @@ -3,5 +3,6 @@ // See http://www.gnu.org/licenses/ pub mod account; +pub mod dump; pub mod helpers; pub mod rpc; diff --git a/bins/conflux/src/main.rs b/bins/conflux/src/main.rs index 0f779d556f..4625869883 100644 --- a/bins/conflux/src/main.rs +++ b/bins/conflux/src/main.rs @@ -30,7 +30,10 @@ use client::{ full::FullClient, light::LightClient, }; -use command::account::{AccountCmd, ImportAccounts, ListAccounts, NewAccount}; +use command::{ + account::{AccountCmd, ImportAccounts, ListAccounts, NewAccount}, + dump::DumpCommand, +}; use log::{info, LevelFilter}; use log4rs::{ append::{console::ConsoleAppender, file::FileAppender}, @@ -155,6 +158,16 @@ fn handle_sub_command(matches: &ArgMatches) -> Result, String> { return Ok(Some(execute_output)); } + // dump sub-commands + if let Some(("dump", dump_matches)) = matches.subcommand() { + let dump_cmd = DumpCommand::parse(dump_matches).map_err(|e| { + format!("Failed to parse dump command arguments: {}", e) + })?; + let mut conf = Configuration::parse(&matches)?; + let execute_output = dump_cmd.execute(&mut conf)?; + return Ok(Some(execute_output)); + } + // general RPC commands let mut subcmd_matches = matches; while let Some(m) = subcmd_matches.subcommand() { diff --git a/crates/cfx_types/src/lib.rs b/crates/cfx_types/src/lib.rs index 3324fbf8bf..ea5f3d1258 100644 --- a/crates/cfx_types/src/lib.rs +++ b/crates/cfx_types/src/lib.rs @@ -7,6 +7,9 @@ pub use ethereum_types::{ H160, H256, H512, H520, H64, U128, U256, U512, U64, }; +pub type StorageKey = H256; +pub type StorageValue = U256; + mod space; pub use space::{Space, SpaceMap}; diff --git a/crates/client/Cargo.toml b/crates/client/Cargo.toml index 7d40cf1cb5..587af95a0d 100644 --- a/crates/client/Cargo.toml +++ b/crates/client/Cargo.toml @@ -79,6 +79,8 @@ cfx-parity-trace-types = { workspace = true } cfx-tasks = { workspace = true } cfx-config = { workspace = true } cfxcore-types = { workspace = true } +fallible-iterator = { workspace = true } +chrono = { workspace = true } cfx-mallocator-utils = { workspace = true } [dev-dependencies] diff --git a/crates/client/src/lib.rs b/crates/client/src/lib.rs index 12e21d175c..ea9caa24aa 100644 --- a/crates/client/src/lib.rs +++ b/crates/client/src/lib.rs @@ -13,3 +13,4 @@ mod node_types; pub mod rpc; pub use cfx_config as configuration; pub use node_types::{archive, full, light}; +pub mod state_dump; diff --git a/crates/client/src/state_dump.rs b/crates/client/src/state_dump.rs new file mode 100644 index 0000000000..958608f8f4 --- /dev/null +++ b/crates/client/src/state_dump.rs @@ -0,0 +1,407 @@ +use crate::common::initialize_not_light_node_modules; +use cfx_config::Configuration; +use cfx_rpc_eth_types::{AccountState, StateDump, EOA_STORAGE_ROOT_H256}; +use cfx_rpc_primitives::Bytes; +use cfx_statedb::{StateDbExt, StateDbGeneric}; +use cfx_storage::state_manager::StateManagerTrait; +use cfx_types::{Address, Space, H256, U256}; +use cfxcore::NodeType; +use chrono::Utc; +use keccak_hash::{keccak, KECCAK_EMPTY}; +use parking_lot::{Condvar, Mutex}; +use primitives::{ + Account, SkipInputCheck, StorageKey, StorageKeyWithSpace, StorageValue, +}; +use rlp::Rlp; +use std::{ + collections::{BTreeMap, HashMap}, + fs, + ops::Deref, + path::Path, + sync::Arc, + thread, + time::Duration, +}; + +pub struct StateDumpConfig { + pub start_address: Address, + pub limit: u64, + pub block: Option, + pub no_code: bool, + pub no_storage: bool, + pub out_put_path: String, +} + +// This method will read all data (k, v) from the Conflux state tree (including +// core space and espace accounts, code, storage, deposit, vote_list) into +// memory at once, then parse and assemble them and assemble all account states +// into a StateDump struct and return it +pub fn dump_whole_state( + conf: &mut Configuration, exit_cond_var: Arc<(Mutex, Condvar)>, + config: &StateDumpConfig, +) -> Result { + let (mut state_db, state_root) = + prepare_state_db(conf, exit_cond_var, config)?; + + let accounts = + export_space_accounts(&mut state_db, Space::Ethereum, config) + .map_err(|e| e.to_string())?; + + let state_dump = StateDump { + root: state_root, + accounts, + next: None, + }; + + Ok(state_dump) +} + +// This method will iterate through the entire state tree, storing each found +// account in a temporary map After iterating through all accounts, it will +// retrieve the code and storage for each account, then call the callback method +// Pass the AccountState as a parameter to the callback method, which will +// handle the AccountState +pub fn iterate_dump_whole_state( + conf: &mut Configuration, exit_cond_var: Arc<(Mutex, Condvar)>, + config: &StateDumpConfig, callback: F, +) -> Result { + let (mut state_db, state_root) = + prepare_state_db(conf, exit_cond_var, config)?; + + export_space_accounts_with_callback( + &mut state_db, + Space::Ethereum, + config, + callback, + ) + .map_err(|e| e.to_string())?; + + Ok(state_root) +} + +fn prepare_state_db( + conf: &mut Configuration, exit_cond_var: Arc<(Mutex, Condvar)>, + config: &StateDumpConfig, +) -> Result<(StateDbGeneric, H256), String> { + println("Preparing state..."); + let ( + data_man, + _, + _, + consensus, + sync_service, + _, + _, + _, + _, + _, + _, + _, + _, + _, + _, + _, + ) = initialize_not_light_node_modules( + conf, + exit_cond_var, + NodeType::Archive, + )?; + + while sync_service.catch_up_mode() { + thread::sleep(Duration::from_secs(1)); + } + + /* + 1. Get the state at the target epoch, or the latest state if target_epoch is None + 2. Iterate through the state, and dump the account state + */ + + let state_manager = data_man.storage_manager.clone(); + let target_height = match config.block { + Some(epoch) => epoch, + None => consensus.latest_confirmed_epoch_number(), + }; + + let epoch_hash = consensus + .get_hash_from_epoch_number(target_height.into()) + .map_err(|e| e.to_string())?; + + let block = consensus + .get_phantom_block_by_hash(&epoch_hash, false)? + .expect("Failed to get block"); + + let state_root = block.pivot_header.deferred_state_root(); + + let state_index = data_man + .get_state_readonly_index(&epoch_hash) + .ok_or("Failed to get state index")?; + + let state = state_manager + .get_state_no_commit(state_index, true, Some(Space::Ethereum)) + .map_err(|e| e.to_string())? + .ok_or("Failed to get state")?; + + let state_db = StateDbGeneric::new(state); + + Ok((state_db, state_root.clone())) +} + +fn export_space_accounts( + state: &mut StateDbGeneric, space: Space, config: &StateDumpConfig, +) -> Result, Box> { + println("Start to iterate state..."); + let empty_key = StorageKey::EmptyKey.with_space(space); + let kv_pairs = state.read_all(empty_key, None)?; + + let mut accounts_map = BTreeMap::new(); + let mut codes_map = HashMap::new(); + let mut storage_map = HashMap::new(); + + for (key, value) in kv_pairs { + let storage_key_with_space = + StorageKeyWithSpace::from_key_bytes::(&key); + if storage_key_with_space.space != space { + continue; + } + match storage_key_with_space.key { + StorageKey::AccountKey(address_bytes) => { + let address = Address::from_slice(address_bytes); + println(&format!("Find account: {:?}", address)); + let account = + Account::new_from_rlp(address, &Rlp::new(&value))?; + accounts_map.insert(address, account); + } + StorageKey::CodeKey { + address_bytes, + code_hash_bytes: _, + } => { + if config.no_code { + continue; + } + + let address = Address::from_slice(address_bytes); + let code = Bytes(value.to_vec()); + codes_map.insert(address, code); + } + StorageKey::StorageKey { + address_bytes, + storage_key, + } => { + if config.no_storage { + continue; + } + + let address = Address::from_slice(address_bytes); + let h256_storage_key = H256::from_slice(storage_key); + let storage_value_with_owner: StorageValue = + rlp::decode(&value)?; + let account_storage_map = + storage_map.entry(address).or_insert(BTreeMap::new()); + account_storage_map + .insert(h256_storage_key, storage_value_with_owner.value); + } + _ => { + continue; + } + } + } + + let mut accounts = BTreeMap::new(); + + for (address, account) in accounts_map { + let is_contract = account.code_hash != KECCAK_EMPTY; + // conflux state tree don't have storage root, so we use a fixed value + let root = EOA_STORAGE_ROOT_H256; + let address_hash = keccak(address); + + let code = if is_contract { + codes_map.get(&address).cloned() + } else { + if let Some(code) = codes_map.get(&address) { + println(&format!("no-contract account have code: {:?}", code)); + } + None + }; + + let storage = if is_contract { + storage_map.get(&address).cloned() + } else { + if let Some(_storage) = storage_map.get(&address) { + println(&format!("no-contract account have storage")); + } + None + }; + + let account_state = AccountState { + balance: account.balance, + nonce: account.nonce.as_u64(), + root, + code_hash: account.code_hash, + code, + storage, + address: Some(address), + address_hash: Some(address_hash), + }; + + accounts.insert(address, account_state); + + if config.limit > 0 && accounts.len() >= config.limit as usize { + break; + } + } + + Ok(accounts) +} + +pub fn export_space_accounts_with_callback( + state: &mut StateDbGeneric, space: Space, config: &StateDumpConfig, + callback: F, +) -> Result<(), Box> { + println("Start to iterate state..."); + let mut found_accounts = 0; + let mut core_space_key_count: u64 = 0; + let mut total_key_count: u64 = 0; + + for i in 0..=255 { + let prefix = [i]; + let start_key = StorageKey::AddressPrefixKey(&prefix).with_space(space); + + let mut account_states = BTreeMap::new(); + + let mut inner_callback = |(key, value): (Vec, Box<[u8]>)| { + total_key_count += 1; + + if total_key_count % 10000 == 0 { + println(&format!( + "total_key_count: {}, core_space_key_count: {}", + total_key_count, core_space_key_count + )); + } + + let storage_key_with_space = + StorageKeyWithSpace::from_key_bytes::(&key); + if storage_key_with_space.space != space { + core_space_key_count += 1; + return; + } + + if let StorageKey::AccountKey(address_bytes) = + storage_key_with_space.key + { + let address = Address::from_slice(address_bytes); + println(&format!("Find account: {:?}", address)); + let account = Account::new_from_rlp(address, &Rlp::new(&value)) + .expect("Failed to decode account"); + + account_states.insert(address, account); + } + }; + + state.read_all_with_callback(start_key, &mut inner_callback, true)?; + + if account_states.len() > 0 { + println("Start to read account code and storage data..."); + } + + for (_address, account) in account_states { + let account_state = + get_account_state(state, &account, config, space)?; + callback(account_state); + found_accounts += 1; + if config.limit > 0 && found_accounts >= config.limit as usize { + break; + } + } + } + + Ok(()) +} + +#[allow(unused)] +fn get_account_state( + state: &mut StateDbGeneric, account: &Account, config: &StateDumpConfig, + space: Space, +) -> Result> { + let address = account.address(); + + let is_contract = account.code_hash != KECCAK_EMPTY; + // get code + let code = if is_contract && !config.no_code { + state + .get_code(address, &account.code_hash)? + .map(|code_info| Bytes(code_info.code.deref().to_vec())) + } else { + None + }; + + let storage = if is_contract && !config.no_storage { + let storage = + get_contract_storage(state, &address.address, space, config)?; + Some(storage) + } else { + None + }; + + // conflux state tree don't have storage root, so we use a fixed value + let root = EOA_STORAGE_ROOT_H256; + + let address_hash = keccak(address.address); + + Ok(AccountState { + balance: account.balance, + nonce: account.nonce.as_u64(), + root, + code_hash: account.code_hash, + code, + storage, + address: Some(address.address), + address_hash: Some(address_hash), + }) +} + +fn get_contract_storage( + state: &mut StateDbGeneric, address: &Address, space: Space, + config: &StateDumpConfig, +) -> Result, Box> { + let mut storage: BTreeMap = Default::default(); + let mut chunk_count = 0; + + let mut inner_callback = |(key, value): (Vec, Box<[u8]>)| { + let storage_key_with_space = + StorageKeyWithSpace::from_key_bytes::(&key); + if storage_key_with_space.space != space { + return; + } + + if let StorageKey::StorageKey { + address_bytes: _, + storage_key, + } = storage_key_with_space.key + { + let h256_storage_key = H256::from_slice(storage_key); + let storage_value_with_owner: StorageValue = + rlp::decode(&value).expect("Failed to decode storage value"); + storage.insert(h256_storage_key, storage_value_with_owner.value); + + if storage.len() == 5000_000 { + chunk_count += 1; + let name = format!("{:?}-chunk{}.json", address, chunk_count); + let file_path = Path::new(&config.out_put_path).join(&name); + let json_content = serde_json::to_string_pretty(&storage) + .expect("Failed to serialize storage"); + fs::write(&file_path, json_content) + .expect("Failed to write storage file"); + storage.clear(); + } + }; + }; + + let start_key = StorageKey::new_storage_root_key(address).with_space(space); + state.read_all_with_callback(start_key, &mut inner_callback, false)?; + + Ok(storage) +} + +fn println(message: &str) { + println!("[{}] {}", Utc::now().format("%Y-%m-%d %H:%M:%S"), message); +} diff --git a/crates/dbs/statedb/src/lib.rs b/crates/dbs/statedb/src/lib.rs index 6fde0ed607..6c13e8b121 100644 --- a/crates/dbs/statedb/src/lib.rs +++ b/crates/dbs/statedb/src/lib.rs @@ -11,6 +11,8 @@ pub mod global_params; #[cfg(feature = "testonly_code")] mod in_memory_storage; mod statedb_ext; +use cfx_types::H256; +use primitives::StorageValue; use cfx_db_errors::statedb as error; @@ -175,6 +177,26 @@ mod impls { self.modify_single_value(key, None) } + pub fn read_all( + &mut self, key_prefix: StorageKeyWithSpace, + debug_record: Option<&mut ComputeEpochDebugRecord>, + ) -> Result> { + self.delete_all::(key_prefix, debug_record) + } + + pub fn read_all_with_callback( + &mut self, access_key_prefix: StorageKeyWithSpace, + callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool, + ) -> Result<()> { + self.storage + .read_all_with_callback( + access_key_prefix, + callback, + only_account_key, + ) + .map_err(|err| err.into()) + } + pub fn delete_all( &mut self, key_prefix: StorageKeyWithSpace, debug_record: Option<&mut ComputeEpochDebugRecord>, @@ -249,6 +271,42 @@ mod impls { Ok(deleted_kvs) } + pub fn get_account_storage_entries( + &mut self, address: &AddressWithSpace, + debug_record: Option<&mut ComputeEpochDebugRecord>, + ) -> Result> + { + let mut storage = BTreeMap::new(); + + let storage_prefix = + StorageKey::new_storage_root_key(&address.address) + .with_space(address.space); + + let kv_pairs = self.read_all(storage_prefix, debug_record)?; + for (key, value) in kv_pairs { + let storage_key_with_space = + StorageKeyWithSpace::from_key_bytes::(&key); + if let StorageKey::StorageKey { + address_bytes: _, + storage_key, + } = storage_key_with_space.key + { + let h256_storage_key = H256::from_slice(storage_key); + let storage_value_with_owner: StorageValue = + rlp::decode(&value)?; + storage.insert( + h256_storage_key, + storage_value_with_owner.value, + ); + } else { + trace!("Not an storage key: {:?}", storage_key_with_space); + continue; + } + } + + Ok(storage) + } + /// Load the storage layout for state commits. /// Modification to storage layout is the same as modification of /// any other key-values. But as required by MPT structure we diff --git a/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs b/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs index 4297c5fe92..eb67f95284 100644 --- a/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs +++ b/crates/dbs/storage/src/impls/delta_mpt/cow_node_ref.rs @@ -622,6 +622,69 @@ impl CowNodeRef { Ok(()) } + // only_account_key can be used to filter the only account key + pub fn iterate_internal_with_callback( + &self, owned_node_set: &OwnedNodeSet, trie: &DeltaMpt, + guarded_trie_node: GuardedMaybeOwnedTrieNodeAsCowCallParam, + key_prefix: CompressedPathRaw, db: &mut DeltaDbOwnedReadTraitObj, + callback: &mut dyn FnMut(MptKeyValue), is_delta_mpt: bool, + only_account_key: bool, + ) -> Result<()> { + // filter out all the key that is longer than the account key + if only_account_key { + let key_len = + SpaceStorageFilter::space_flag_index(is_delta_mpt) + 1; + if key_prefix.path_slice().len() > key_len { + return Ok(()); + } + } + if guarded_trie_node.as_ref().as_ref().has_value() { + assert!(CompressedPathRaw::has_second_nibble( + key_prefix.path_mask() + )); + callback(( + key_prefix.path_slice().to_vec(), + guarded_trie_node.as_ref().as_ref().value_clone().unwrap(), + )); + } + + let children_table = + guarded_trie_node.as_ref().as_ref().children_table.clone(); + // Free the lock for trie_node. + // FIXME: try to share the lock. + drop(guarded_trie_node); + + let node_memory_manager = trie.get_node_memory_manager(); + let allocator = node_memory_manager.get_allocator(); + for (i, node_ref) in children_table.iter() { + let mut cow_child_node = + Self::new((*node_ref).into(), owned_node_set, self.mpt_id); + let child_node = cow_child_node.get_trie_node( + node_memory_manager, + &allocator, + db, + )?; + let key_prefix = CompressedPathRaw::join_connected_paths( + &key_prefix, + i, + &child_node.compressed_path_ref(), + ); + let child_node = GuardedValue::take(child_node); + cow_child_node.iterate_internal_with_callback( + owned_node_set, + trie, + child_node, + key_prefix, + db, + callback, + is_delta_mpt, + only_account_key, + )?; + } + + Ok(()) + } + /// Recursively commit dirty nodes. pub fn commit_dirty_recursively< Transaction: BorrowMut, @@ -910,7 +973,7 @@ use super::{ AtomicCommitTransaction, DeltaMpt, *, }; use parking_lot::MutexGuard; -use primitives::{MerkleHash, MptValue, MERKLE_NULL_NODE}; +use primitives::{MerkleHash, MptValue, SpaceStorageFilter, MERKLE_NULL_NODE}; use rlp::*; use std::{ borrow::BorrowMut, cell::Cell, convert::TryInto, ops::Deref, diff --git a/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs b/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs index 9cc3d8a4fd..c20bcf4e32 100644 --- a/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs +++ b/crates/dbs/storage/src/impls/delta_mpt/subtrie_visitor.rs @@ -633,6 +633,77 @@ impl<'trie, 'db: 'trie> SubTrieVisitor<'trie, 'db> { Ok(Some(values)) } + /// return all key/value pairs given the prefix + pub fn traversal_with_callback( + mut self, key: KeyPart, key_remaining: KeyPart, + callback: &mut dyn FnMut(MptKeyValue), is_delta_mpt: bool, + only_account_key: bool, + ) -> Result<()> { + let node_memory_manager = self.node_memory_manager(); + let allocator = node_memory_manager.get_allocator(); + let mut node_cow = self.root.take(); + + let trie_node_ref = node_cow.get_trie_node( + node_memory_manager, + &allocator, + &mut *self.db.get_mut().to_owned_read()?, + )?; + + let key_prefix: CompressedPathRaw; + match trie_node_ref.walk::(key_remaining) { + WalkStop::ChildNotFound { .. } => return Ok(()), + WalkStop::Arrived => { + // To enumerate the subtree. + key_prefix = key.into(); + } + WalkStop::PathDiverted { + key_child_index, + unmatched_child_index, + unmatched_path_remaining, + .. + } => { + if key_child_index.is_some() { + return Ok(()); + } + // To enumerate the subtree. + key_prefix = CompressedPathRaw::join_connected_paths( + &key, + unmatched_child_index, + &unmatched_path_remaining, + ); + } + WalkStop::Descent { + key_remaining, + child_node, + .. + } => { + drop(trie_node_ref); + self.new_visitor_for_subtree(child_node.clone().into()) + .traversal_with_callback( + key, + key_remaining, + callback, + is_delta_mpt, + only_account_key, + )?; + return Ok(()); + } + } + + let trie_node = GuardedValue::take(trie_node_ref); + node_cow.iterate_internal_with_callback( + self.owned_node_set.get_ref(), + self.get_trie_ref(), + trie_node, + key_prefix, + &mut *self.db.get_mut().to_owned_read()?, + callback, + is_delta_mpt, + only_account_key, + )?; + Ok(()) + } + // In a method we visit node one or 2 times but borrow-checker prevent // holding and access other fields so it's visited multiple times. // FIXME: Check if we did something like this. diff --git a/crates/dbs/storage/src/impls/recording_storage.rs b/crates/dbs/storage/src/impls/recording_storage.rs index e13252c952..695adb48e2 100644 --- a/crates/dbs/storage/src/impls/recording_storage.rs +++ b/crates/dbs/storage/src/impls/recording_storage.rs @@ -54,6 +54,7 @@ impl StateTrait fn compute_state_root(&mut self) -> Result; fn get_state_root(&self) -> Result; fn commit(&mut self, epoch_id: EpochId) -> Result; + fn read_all_with_callback(&mut self, access_key_prefix: StorageKeyWithSpace, callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool) -> Result<()>; } } diff --git a/crates/dbs/storage/src/impls/replicated_state.rs b/crates/dbs/storage/src/impls/replicated_state.rs index ed8ce9ab01..068b80ceff 100644 --- a/crates/dbs/storage/src/impls/replicated_state.rs +++ b/crates/dbs/storage/src/impls/replicated_state.rs @@ -160,6 +160,8 @@ enum OwnedStorageKey { }, DepositListKey(Vec), VoteListKey(Vec), + EmptyKey, + AddressPrefixKey(Vec), } impl OwnedStorageKey { @@ -194,6 +196,10 @@ impl OwnedStorageKey { OwnedStorageKey::VoteListKey(k) => { StorageKey::VoteListKey(k.as_slice()) } + OwnedStorageKey::EmptyKey => StorageKey::EmptyKey, + OwnedStorageKey::AddressPrefixKey(k) => { + StorageKey::AddressPrefixKey(k.as_slice()) + } } } } @@ -245,6 +251,10 @@ impl<'a> From> for OwnedStorageKey { StorageKey::VoteListKey(k) => { OwnedStorageKey::VoteListKey(k.to_vec()) } + StorageKey::EmptyKey => OwnedStorageKey::EmptyKey, + StorageKey::AddressPrefixKey(k) => { + OwnedStorageKey::AddressPrefixKey(k.to_vec()) + } } } } @@ -303,6 +313,17 @@ impl StateTrait for ReplicatedState
{ self.state.read_all(access_key_prefix) } + fn read_all_with_callback( + &mut self, access_key_prefix: StorageKeyWithSpace, + callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool, + ) -> Result<()> { + self.state.read_all_with_callback( + access_key_prefix, + callback, + only_account_key, + ) + } + fn compute_state_root(&mut self) -> Result { self.replication_handler .send_op(StateOperation::ComputeStateRoot); diff --git a/crates/dbs/storage/src/impls/single_mpt_state.rs b/crates/dbs/storage/src/impls/single_mpt_state.rs index 7afc84b950..526a964a3c 100644 --- a/crates/dbs/storage/src/impls/single_mpt_state.rs +++ b/crates/dbs/storage/src/impls/single_mpt_state.rs @@ -10,6 +10,7 @@ use primitives::{ EpochId, MerkleHash, MptValue, StateRoot, StorageKeyWithSpace, MERKLE_NULL_NODE, }; +use rustc_hex::ToHex; use std::{cell::UnsafeCell, sync::Arc}; pub struct SingleMptState { @@ -305,6 +306,46 @@ impl SingleMptState { Ok(Some(result)) } } + + fn read_all_with_callback_impl( + &mut self, access_key_prefix: StorageKeyWithSpace, + callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool, + ) -> Result<()> { + self.ensure_temp_slab_for_db_load(); + + let mut total_key_count: u64 = 0; + + let mut inner_callback = |(k, v): MptKeyValue| { + total_key_count += 1; + if total_key_count % 10000 == 0 { + println!( + "read_all_with_callback_impl: total_key_count {} key {}", + total_key_count, + k.to_hex::() + ); + } + if v.len() > 0 { + callback((k, v)); + } + }; + + // Retrieve and delete key/value pairs from delta trie + let key_prefix = access_key_prefix.to_key_bytes(); + SubTrieVisitor::new( + &self.trie, + self.trie_root.clone(), + &mut self.owned_node_set, + )? + .traversal_with_callback( + &key_prefix, + &key_prefix, + &mut inner_callback, + false, + only_account_key, + )?; + + Ok(()) + } } impl StateTrait for SingleMptState { @@ -368,6 +409,17 @@ impl StateTrait for SingleMptState { self.delete_all_impl::(access_key_prefix) } + fn read_all_with_callback( + &mut self, access_key_prefix: StorageKeyWithSpace, + callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool, + ) -> Result<()> { + self.read_all_with_callback_impl( + access_key_prefix, + callback, + only_account_key, + ) + } + fn compute_state_root(&mut self) -> Result { self.ensure_temp_slab_for_db_load(); diff --git a/crates/dbs/storage/src/impls/state.rs b/crates/dbs/storage/src/impls/state.rs index 3627e3dbc3..db1ac768ec 100644 --- a/crates/dbs/storage/src/impls/state.rs +++ b/crates/dbs/storage/src/impls/state.rs @@ -309,6 +309,17 @@ impl StateTrait for State { self.delete_all_impl::(access_key_prefix) } + fn read_all_with_callback( + &mut self, access_key_prefix: StorageKeyWithSpace, + callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool, + ) -> Result<()> { + self.read_all_with_callback_impl( + access_key_prefix, + callback, + only_account_key, + ) + } + fn compute_state_root(&mut self) -> Result { self.ensure_temp_slab_for_db_load(); @@ -902,6 +913,14 @@ impl State { snapshot_kvs.push((key, value)); } + let is_address_search_prefix = + if let StorageKey::AddressPrefixKey(prefix) = access_key_prefix.key + { + Some(prefix) + } else { + None + }; + let mut result = Vec::new(); // This is used to keep track of the deleted keys. let mut deleted_keys = HashSet::new(); @@ -909,6 +928,15 @@ impl State { for (k, v) in kvs { let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); let k = storage_key.to_key_bytes(); + + // If it's an address search prefix, and k is not start with + // prefix, skip the key. + if let Some(prefix) = is_address_search_prefix { + if !k.starts_with(prefix) { + continue; + } + } + deleted_keys.insert(k.clone()); if v.len() > 0 { result.push((k, v)); @@ -919,11 +947,21 @@ impl State { if let Some(kvs) = intermediate_trie_kvs { for (k, v) in kvs { let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); + let k = storage_key.to_key_bytes(); + + // If it's an address search prefix, and k is not start with + // prefix, skip the key. + if let Some(prefix) = is_address_search_prefix { + if !k.starts_with(prefix) { + continue; + } + } + // Only delete non-empty keys. if v.len() > 0 && !AM::READ_ONLY { self.delete(storage_key)?; } - let k = storage_key.to_key_bytes(); + if !deleted_keys.contains(&k) { deleted_keys.insert(k.clone()); if v.len() > 0 { @@ -952,6 +990,121 @@ impl State { Ok(Some(result)) } } + + pub fn read_all_with_callback_impl( + &mut self, access_key_prefix: StorageKeyWithSpace, + callback: &mut dyn FnMut(MptKeyValue), only_account_key: bool, + ) -> Result<()> { + self.ensure_temp_slab_for_db_load(); + + let is_address_search_prefix = + if let StorageKey::AddressPrefixKey(prefix) = access_key_prefix.key + { + Some(prefix) + } else { + None + }; + + // This is used to keep track of the deleted keys. + let mut deleted_keys = HashSet::new(); + + // Retrieve and delete key/value pairs from delta trie + if let Some(old_root_node) = &self.delta_trie_root { + let mut inner_callback = |(k, v): MptKeyValue| { + let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); + let k = storage_key.to_key_bytes(); + + // If it's an address search prefix, and k is not start with + // prefix, skip the key. + if let Some(prefix) = is_address_search_prefix { + if !k.starts_with(prefix) { + return; + } + } + deleted_keys.insert(k.clone()); + if v.len() > 0 { + callback((k, v)); + } + }; + let delta_mpt_key_prefix = access_key_prefix + .to_delta_mpt_key_bytes(&self.delta_trie_key_padding); + SubTrieVisitor::new( + &self.delta_trie, + old_root_node.clone(), + &mut self.owned_node_set, + )? + .traversal_with_callback( + &delta_mpt_key_prefix, + &delta_mpt_key_prefix, + &mut inner_callback, + true, + only_account_key, + )?; + }; + + // Retrieve key/value pairs from intermediate trie + if let Some(root_node) = &self.intermediate_trie_root { + let mut inner_callback = |(k, v): MptKeyValue| { + let storage_key = StorageKeyWithSpace::from_delta_mpt_key(&k); + let k = storage_key.to_key_bytes(); + + // If it's an address search prefix, and k is not start with + // prefix, skip the key. + if let Some(prefix) = is_address_search_prefix { + if !k.starts_with(prefix) { + return; + } + } + + if !deleted_keys.contains(&k) { + deleted_keys.insert(k.clone()); + if v.len() > 0 { + callback((k, v)); + } + } + }; + if self.maybe_intermediate_trie_key_padding.is_some() + && self.maybe_intermediate_trie.is_some() + { + let intermediate_trie_key_padding = + self.maybe_intermediate_trie_key_padding.as_ref().unwrap(); + let intermediate_mpt_key_prefix = access_key_prefix + .to_delta_mpt_key_bytes(intermediate_trie_key_padding); + SubTrieVisitor::new( + self.maybe_intermediate_trie.as_ref().unwrap(), + root_node.clone(), + &mut self.owned_node_set, + )? + .traversal_with_callback( + &intermediate_mpt_key_prefix, + &intermediate_mpt_key_prefix, + &mut inner_callback, + true, + only_account_key, + )?; + } + } + + // Retrieve key/value pairs from snapshot + let mut kv_iterator = self.snapshot_db.snapshot_kv_iterator()?.take(); + let lower_bound_incl = access_key_prefix.to_key_bytes(); + let upper_bound_excl = + to_key_prefix_iter_upper_bound(&lower_bound_incl); + let mut kvs = kv_iterator + .iter_range( + lower_bound_incl.as_slice(), + upper_bound_excl.as_ref().map(|v| &**v), + )? + .take(); + + while let Some((k, v)) = kvs.next()? { + if !deleted_keys.contains(&k) { + callback((k, v)); + } + } + + Ok(()) + } } use crate::{ diff --git a/crates/dbs/storage/src/lib.rs b/crates/dbs/storage/src/lib.rs index ecd4f758cf..c9ddedd784 100644 --- a/crates/dbs/storage/src/lib.rs +++ b/crates/dbs/storage/src/lib.rs @@ -213,6 +213,7 @@ pub use self::{ storage_db::{ kvdb_rocksdb::KvdbRocksdb, kvdb_sqlite::{KvdbSqlite, KvdbSqliteStatements}, + kvdb_sqlite_sharded::KvdbSqliteSharded, snapshot_db_manager_sqlite::SnapshotDbManagerSqlite, sqlite::SqliteConnection, }, @@ -226,7 +227,7 @@ pub use self::{ StateIndex, StateManager as StorageManager, StateManagerTrait as StorageManagerTrait, }, - storage_db::KeyValueDbTrait, + storage_db::{KeyValueDbIterableTrait, KeyValueDbTrait}, }; #[cfg(any(test, feature = "testonly_code"))] diff --git a/crates/dbs/storage/src/state.rs b/crates/dbs/storage/src/state.rs index 9b6a06183e..96abb65728 100644 --- a/crates/dbs/storage/src/state.rs +++ b/crates/dbs/storage/src/state.rs @@ -39,6 +39,13 @@ pub trait StateTrait: Sync + Send { &mut self, access_key_prefix: StorageKeyWithSpace, ) -> Result>>; + fn read_all_with_callback( + &mut self, _access_key_prefix: StorageKeyWithSpace, + _callback: &mut dyn FnMut(MptKeyValue), _only_account_key: bool, + ) -> Result<()> { + Err(Error::Msg("Not implemented".into())) + } + // Finalize /// It's costly to compute state root however it's only necessary to compute /// state root once before committing. diff --git a/crates/primitives/src/storage_key.rs b/crates/primitives/src/storage_key.rs index 43cd9073be..6878f7a92d 100644 --- a/crates/primitives/src/storage_key.rs +++ b/crates/primitives/src/storage_key.rs @@ -58,6 +58,11 @@ pub enum StorageKey<'a> { }, DepositListKey(&'a [u8]), VoteListKey(&'a [u8]), + // Empty key is used to traverse all key and value pairs. + EmptyKey, + // Address prefix key is used to search all keys with the same address + // prefix, eg [1, 2](0x0102) will search all keys with prefix 0x0102 + AddressPrefixKey(&'a [u8]), } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -195,6 +200,16 @@ impl<'a> StorageKeyWithSpace<'a> { StorageKey::VoteListKey(address_bytes) => { delta_mpt_storage_key::new_vote_list_key(address_bytes, padding) } + StorageKey::EmptyKey => { + return vec![]; + } + StorageKey::AddressPrefixKey(_address_bytes) => { + // delta mpt trie does not support address prefix key search + // so we search all keys and filter them by address prefix + // due to delta mpt trie won't be very big, so the performance + // impact is not very big + return vec![]; + } }; return if self.space == Space::Native { @@ -284,6 +299,14 @@ impl<'a> StorageKeyWithSpace<'a> { key } + StorageKey::EmptyKey => { + return vec![]; + } + StorageKey::AddressPrefixKey(address_bytes) => { + let mut key = Vec::with_capacity(address_bytes.len()); + key.extend_from_slice(address_bytes); + return key; + } }; return if self.space == Space::Native { @@ -716,6 +739,62 @@ mod delta_mpt_storage_key { } } +// This enum is used to filter only the wanted contract storage key when +// traversal the trie for example, when traverse eth space key/value, we can +// only filter the eSpace storage key/value to accelerate the traversal +// speed +// Native means filter(keep) the native space storage key/value +// Ethereum means filter(keep) the ethereum space storage key/value +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct SpaceStorageFilter(pub Space); + +impl From for SpaceStorageFilter { + fn from(space: Space) -> Self { SpaceStorageFilter(space) } +} + +impl From for Space { + fn from(filter: SpaceStorageFilter) -> Self { filter.0 } +} + +impl SpaceStorageFilter { + pub fn is_native(&self) -> bool { matches!(self.0, Space::Native) } + + pub fn is_ethereum(&self) -> bool { matches!(self.0, Space::Ethereum) } + + // return the flag index according the trie type + // if is_delta_mpt is true, then the space flag is at the 32th index + // otherwise, the space flag is at the 20th index + pub fn space_flag_index(is_delta_mpt: bool) -> usize { + if is_delta_mpt { + delta_mpt_storage_key::KEY_PADDING_BYTES + } else { + StorageKeyWithSpace::ACCOUNT_BYTES + } + } + + // return true if the key is filtered out + pub fn is_filtered(&self, is_delta_mpt: bool, key: &[u8]) -> bool { + let flag_index = Self::space_flag_index(is_delta_mpt); + if key.len() > flag_index { + match self.0 { + Space::Native => { + if key[flag_index] == StorageKeyWithSpace::EVM_SPACE_TYPE[0] + { + return true; + } + } + Space::Ethereum => { + if key[flag_index] != StorageKeyWithSpace::EVM_SPACE_TYPE[0] + { + return true; + } + } + } + } + false + } +} + use super::{MerkleHash, MERKLE_NULL_NODE}; use crate::{ hash::keccak, storage_key::delta_mpt_storage_key::ACCOUNT_KEYPART_BYTES, diff --git a/crates/rpc/rpc-eth-types/Cargo.toml b/crates/rpc/rpc-eth-types/Cargo.toml index 1e3562271f..548dfcda95 100644 --- a/crates/rpc/rpc-eth-types/Cargo.toml +++ b/crates/rpc/rpc-eth-types/Cargo.toml @@ -15,6 +15,7 @@ license-file.workspace = true [dependencies] serde = { workspace = true } serde_json = { workspace = true } +serde_with = { workspace = true, features = ["base64"] } cfx-types = { workspace = true } cfx-bytes = { workspace = true } thiserror = { workspace = true } diff --git a/crates/rpc/rpc-eth-types/src/lib.rs b/crates/rpc/rpc-eth-types/src/lib.rs index 5e43e96489..ecc58bb7a8 100644 --- a/crates/rpc/rpc-eth-types/src/lib.rs +++ b/crates/rpc/rpc-eth-types/src/lib.rs @@ -11,6 +11,7 @@ mod log; mod receipt; mod simulate; mod state; +mod state_dump; mod sync; pub mod trace; pub mod trace_filter; @@ -35,6 +36,7 @@ pub use state::{ AccountOverride, AccountStateOverrideMode, EvmOverrides, RpcAccountOverride, RpcStateOverride, StateOverride, }; +pub use state_dump::*; pub use sync::{SyncInfo, SyncStatus}; pub use trace::*; pub use trace_filter::TraceFilter; diff --git a/crates/rpc/rpc-eth-types/src/state_dump.rs b/crates/rpc/rpc-eth-types/src/state_dump.rs new file mode 100644 index 0000000000..17b46ff323 --- /dev/null +++ b/crates/rpc/rpc-eth-types/src/state_dump.rs @@ -0,0 +1,112 @@ +use cfx_rpc_primitives::Bytes; +use cfx_types::{Address, StorageKey, StorageValue, H256, U256}; +use serde::{Deserialize, Serialize}; +use serde_with::{base64::Base64, serde_as}; +use std::collections::BTreeMap; + +// Empty storage trie root +// 0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421 +pub const EOA_STORAGE_ROOT_H256: H256 = H256([ + 0x56, 0xe8, 0x1f, 0x17, 0x1b, 0xcc, 0x55, 0xa6, 0xff, 0x83, 0x45, 0xe6, + 0x92, 0xc0, 0xf8, 0x6e, 0x5b, 0x48, 0xe0, 0x1b, 0x99, 0x6c, 0xad, 0xc0, + 0x01, 0x62, 0x2f, 0xb5, 0xe3, 0x63, 0xb4, 0x21, +]); + +/// Represents the state of an account in the Ethereum state trie. +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct AccountState { + /// The balance of the account + pub balance: U256, + /// The nonce of the account + pub nonce: u64, + /// The root hash of the account + pub root: H256, + /// The code hash of the account + pub code_hash: H256, + /// The code of the account + #[serde(skip_serializing_if = "Option::is_none")] + pub code: Option, + /// A map of storage slots, indexed by storage key + #[serde(skip_serializing_if = "Option::is_none")] + pub storage: Option>, + /// Address only present in iterative (line-by-line) mode + #[serde(skip_serializing_if = "Option::is_none")] + pub address: Option
, + /// If we don't have address, we can output the key + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "key")] + pub address_hash: Option, +} + +/// Represents a state dump, which includes the root hash of the state trie, +/// Note: There are some differences in JSON serialization compared to geth's +/// output, such as: +/// - The root field in geth doesn't have a 0x prefix, while here it does +/// - The balance field of accounts in geth is a decimal string, while here it's +/// a hexadecimal string +/// - The value field of storage in geth doesn't have a 0x prefix, while here it +/// does +#[serde_as] +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct StateDump { + /// The root hash of the state trie + pub root: H256, + /// A map of accounts, indexed by address + pub accounts: BTreeMap, + /// Next can be set to represent that this dump is only partial, and Next + /// is where an iterator should be positioned in order to continue the + /// dump. + #[serde(skip_serializing_if = "Option::is_none")] + #[serde_as(as = "Option")] + pub next: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn test_state_dump_serialization() { + let json_input = json!({ + "root": "0x5a1f70040e967bef6a32ee65e7fa2c3ea580e277e42cf3e3daf60a677ef18127", + "accounts": { + "0x000baa01f2a21d29dce20b88032752b990dac124": { + "balance": "0x10000000000000000000", + "nonce": 0, + "root": "0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421", + "codeHash": "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470", + "address": "0x000baa01f2a21d29dce20b88032752b990dac124", + "key": "0x000108a52c8b050f1098144f89e0b8e7e41310ea139f020b690b56e424508f4c" + }, + "0x201d43c399f2495e19a591eab93fa3384ec6c72e": { + "balance": "0x0", + "nonce": 1, + "root": "0x297c068574a50ffef03843dda4075c3b6b5790be78b30e3c9df4e02e4ba9125c", + "codeHash": "0xbe6e2f7cdf118a0b2092927e0a0cf4a54316165ac5172bcda327939e04c9818f", + "code": "0x36602c57343d527f9e4ac34f21c619cefc926c8bd93b54bf5a39c7ab2127a895af1cc0691d7e3dff593da1005b363d3d373d3d3d3d610076806062363936013d732efa42b7d7591cbf436cce4973f900d8314c86dd5af43d3d93803e606057fd5bf34ad30ecfb92b9311a853d296c515fb0d6505d89c68db32372fd77e57b0879f97224bb89dac59e267486b38ee20309c8cc1acfb854eb9303a31c50a42f48a8fcc63b84d60abf8c5408ea569569af66c0cc3a76f6e00000000000000000000000000000000000000000000000000000000000af9ac0076", + "storage": { + "0x0000000000000000000000000000000000000000000000000000000000000000": "0x100000000000000000000000000686f559c", + "0x0000000000000000000000000000000000000000000000000000000000000002": "0x1", + "0x0000000000000000000000000000000000000000000000000000000000000008": "0xdead000000000000000000000000000000000000000000000000000000000000", + "0x000000000000000000000000000000000000000000000000000000000000000a": "0x1", + "0x405787fa12a823e0f2b7631cc41b3ba8828b3321ca811111fa75cd3aa3bb5ace": "0xffffffff", + "0x405787fa12a823e0f2b7631cc41b3ba8828b3321ca811111fa75cd3aa3bb5acf": "0x4ad30ecfb92b9311a853d296c515fb0d6505d89c", + "0x405787fa12a823e0f2b7631cc41b3ba8828b3321ca811111fa75cd3aa3bb5ad1": "0x68db32372fd77e57b0879f97224bb89dac59e267486b38ee20309c8cc1acfb85", + "0x405787fa12a823e0f2b7631cc41b3ba8828b3321ca811111fa75cd3aa3bb5ad2": "0x686f559c00000000000000000000000000000001" + }, + "address": "0x201d43c399f2495e19a591eab93fa3384ec6c72e", + "key": "0x0000e65fdfaa2681656a211a55bc6fdcfe918f34cc037407ba12874c16cd7da9" + } + }, + "next": "AAEx7TCXUlkysLMMJcS/W974Ue7bbhgSK3EUHVNFCtQ=" + }); + + let parsed: StateDump = + serde_json::from_value(json_input.clone()).unwrap(); + let output = serde_json::to_value(&parsed).unwrap(); + assert_eq!(json_input, output); + } +} diff --git a/crates/rpc/rpc-primitives/src/bytes.rs b/crates/rpc/rpc-primitives/src/bytes.rs index 79bf4c7a21..cbf8b79792 100644 --- a/crates/rpc/rpc-primitives/src/bytes.rs +++ b/crates/rpc/rpc-primitives/src/bytes.rs @@ -20,6 +20,10 @@ //! Serializable wrapper around vector of bytes +use core::{ + borrow::Borrow, + ops::{Deref, DerefMut}, +}; use rustc_hex::{FromHex, ToHex}; use serde::{ de::{Error, Visitor}, @@ -49,6 +53,28 @@ impl Into> for Bytes { fn into(self) -> Vec { self.0 } } +impl Deref for Bytes { + type Target = Vec; + + #[inline] + fn deref(&self) -> &Self::Target { &self.0 } +} + +impl DerefMut for Bytes { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } +} + +impl AsRef<[u8]> for Bytes { + #[inline] + fn as_ref(&self) -> &[u8] { self.0.as_ref() } +} + +impl Borrow<[u8]> for Bytes { + #[inline] + fn borrow(&self) -> &[u8] { self.as_ref() } +} + impl Serialize for Bytes { fn serialize(&self, serializer: S) -> Result where S: Serializer { diff --git a/docs/commands/readme.md b/docs/commands/readme.md index 7431824d5d..6018af256d 100644 --- a/docs/commands/readme.md +++ b/docs/commands/readme.md @@ -6,34 +6,26 @@ Currently Available Subcommands: - `account`:Account Management - `rpc`:RPC-based subcommands, used for querying blockchain information and sending transactions +- `dump`: Dump eSpace account state at a given block number - `help`:Print help message ```sh -./conflux -h -conflux conflux-rust/v2.4.0-82500ad-20250418/x86_64-linux-gnu/rustc1.77.2 -The Conflux Team -Conflux client. +Conflux client -USAGE: - conflux [FLAGS] [OPTIONS] [SUBCOMMAND] +Usage: conflux [OPTIONS] [COMMAND] -FLAGS: - --archive - --full - -h, --help Prints help information - --light - --tg_archive - -V, --version Prints version information - -OPTIONS: - -c, --config Sets a custom config file. - ... - +Commands: + account Manage accounts + dump Dump eSpace account state at a given block number + rpc RPC based subcommands to query blockchain information and send transactions + help Print this message or the help of the given subcommand(s) -SUBCOMMANDS: - account Manage accounts - help Prints this message or the help of the given subcommand(s) - rpc RPC based subcommands to query blockchain information and send transactions +Options: + --mode + Use the preset testing configurations. dev or test + -p, --port + Specify the port for P2P connections + ... ``` Each command-line tool comes with its own help information. Users can view the help message by using the -h or --help flag. @@ -57,4 +49,58 @@ SUBCOMMANDS: import Import accounts from JSON UTC keystore files to the specified --chain (default conflux) list List existing accounts of the given --chain (default conflux). new Create a new account (and its associated key) for the given --chain (default conflux). -``` \ No newline at end of file +``` + +## dump subcommand + +This command can be used to export all account states at a certain block height in eSpace to JSON files, facilitating development and debugging. The exported data structure example is as follows + +```sh +$ ./conflux --config devnode.toml dump --block 1000 # export state at height 1000 +{ + "root": "0xdd606752e465cb6a1e2f0df718057536ab00cd66d9c6fa46085309145823d3c0", + "accounts": { + "0x004e322e7ea7e63547d25639d8e8ed282318eec9": { + "balance": "0x152cfd9872b245dcbcae", + "nonce": 210, + "root": "0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421", + "codeHash": "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470", + "address": "0x004e322e7ea7e63547d25639d8e8ed282318eec9", + "key": "0x0c1bad9586421be5b0d8eda4446cac4ce7692d67301d07146a87455e7bc9d30e" + }, + "0x0c80d6926edc73977dce4c97ff8966abf04fe80e": { + "balance": "0x0", + "nonce": 2, + "root": "0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421", + "codeHash": "0xe79d1e04e3004c8d97ad51f5f08cfd1a79e6cdcce2a3a6d59676a9858bccd173", + "code": "0xf90338b903206080604052600436106100385760003.....", + "storage": { + "0x0000000000000000000000000000000000000000000000000000000000000000": "0xc", + "0x0000000000000000000000000000000000000000000000000000000000000001": "0x27e26b9234ec81a0247a6083edf8b329fb1ccde9" + }, + "address": "0x0c80d6926edc73977dce4c97ff8966abf04fe80e", + "key": "0x691460d9548cee180ba8cd9f0960fee74fed16501d80cdb3182aa0f41b160e54" + } + } +} +``` + +Note: + +1. Conflux contract data storage differs significantly from Ethereum, as it is not stored in separate MPT form, therefore the storage root cannot be obtained. The exported data's `account.root` is fixed as `0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421` +2. The exported root information is the full state root of Conflux dual-space (Core Space, eSpace), only for maintaining data format consistency, not the state root of all eSpace accounts. +3. When exporting mainnet state data, due to the large number of accounts, high machine configuration is required, and the runtime will be quite long; if you want to export the state at a certain height in history, it needs to be performed on a `fullstate node` data. +4. When performing state export, please stop the node program first, then execute the export operation in the node directory. +5. Please use the binary corresponding to the network and execute the export operation in the corresponding network data directory; `do not` use `testnet or master` code compiled binary to execute export operations on `mainnet data`. +6. If the state is very big, recommand export state into multi file, through arg `--multifile` +7. When this command start, it will first sync to latest block(if your node is not fully synced, this step will take long time), and then do the state export operation +8. If a contract's storage key is very much, it's storage data will be splited into multi file, eg `0xc6e865c213c89ca42a622c5572d19f00d84d7a16-chunk1.json` each with 5000,000 key +9. When running this command on mainnnet or testnet, recommend open below configs + ```toml + storage_delta_mpts_cache_size=10_000_0000 + storage_delta_mpts_cache_start_size=1_000_0000 + storage_delta_mpts_slab_idle_size=5_000_0000 + storage_single_mpt_cache_start_size=2_000_0000 + storage_single_mpt_slab_idle_size=1_000_000_00 + storage_single_mpt_cache_size=20_000_0000 + ``` \ No newline at end of file diff --git a/tools/consensus_bench/Cargo.lock b/tools/consensus_bench/Cargo.lock index c0a9839f38..2d427ec260 100644 --- a/tools/consensus_bench/Cargo.lock +++ b/tools/consensus_bench/Cargo.lock @@ -632,6 +632,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "base64ct" version = "1.1.1" @@ -1175,6 +1181,7 @@ dependencies = [ "rlp 0.4.6", "serde", "serde_json", + "serde_with", "similar-asserts", "thiserror 2.0.11", ] @@ -1533,6 +1540,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-targets 0.52.6", ] @@ -1935,6 +1943,41 @@ dependencies = [ "hibitset", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.96", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.96", +] + [[package]] name = "db" version = "0.1.0" @@ -3449,6 +3492,12 @@ dependencies = [ "syn 2.0.96", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.0.3" @@ -5953,6 +6002,34 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5" +dependencies = [ + "base64 0.22.1", + "chrono", + "hex", + "serde", + "serde_derive", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.96", +] + [[package]] name = "serde_yaml" version = "0.8.26" diff --git a/tools/evm-spec-tester/Cargo.lock b/tools/evm-spec-tester/Cargo.lock index f4e0feabb7..9d4a7e88dd 100644 --- a/tools/evm-spec-tester/Cargo.lock +++ b/tools/evm-spec-tester/Cargo.lock @@ -1340,6 +1340,7 @@ dependencies = [ "rlp 0.4.6", "serde", "serde_json", + "serde_with", "similar-asserts", "thiserror 2.0.11", ] @@ -1718,6 +1719,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-targets 0.52.6", ] @@ -2150,6 +2152,41 @@ dependencies = [ "hibitset", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.96", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.96", +] + [[package]] name = "db" version = "0.1.0" @@ -3799,6 +3836,12 @@ dependencies = [ "syn 2.0.96", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.0.3" @@ -6608,6 +6651,34 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5" +dependencies = [ + "base64 0.22.1", + "chrono", + "hex", + "serde", + "serde_derive", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.96", +] + [[package]] name = "serde_yaml" version = "0.8.26"