diff --git a/CHANGELOG.md b/CHANGELOG.md index db8263f6e34..64b6a64c97b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ ## Perf ### 2025-11-03 + +- Switch to binary fuse filter for added performance on trie layers [#5159](https://github.com/lambdaclass/ethrex/pull/5159) + +### 2025-10-31 + - Merge execution with some post-execution validations [#5170](https://github.com/lambdaclass/ethrex/pull/5170) ### 2025-10-31 diff --git a/Cargo.lock b/Cargo.lock index 06f830f89e9..e45435a871f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3767,7 +3767,6 @@ dependencies = [ "ethrex-trie", "hex", "hex-literal", - "qfilter", "rayon", "rocksdb", "rustc-hash 2.1.1", @@ -3778,6 +3777,7 @@ dependencies = [ "thiserror 2.0.17", "tokio", "tracing", + "xorfilter-rs", ] [[package]] @@ -7756,15 +7756,6 @@ dependencies = [ "parking_lot", ] -[[package]] -name = "qfilter" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "746341cd2357c9a4df2d951522b4a8dd1ef553e543119899ad7bf87e938c8fbe" -dependencies = [ - "xxhash-rust", -] - [[package]] name = "quick-error" version = "1.2.3" @@ -12242,10 +12233,10 @@ dependencies = [ ] [[package]] -name = "xxhash-rust" -version = "0.8.15" +name = "xorfilter-rs" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" +checksum = "47f9da296a88b6bc150b896d17770a62d4dc6f63ecf0ed10a9c08a1cb3d12f24" [[package]] name = "yaml-rust2" diff --git a/crates/l2/tee/quote-gen/Cargo.lock b/crates/l2/tee/quote-gen/Cargo.lock index 77b00c85eb5..ba71994ee6b 100644 --- a/crates/l2/tee/quote-gen/Cargo.lock +++ b/crates/l2/tee/quote-gen/Cargo.lock @@ -2075,12 +2075,14 @@ name = "ethrex-blockchain" version = "5.0.0" dependencies = [ "bytes", - "cfg-if 1.0.3", "ethrex-common", "ethrex-metrics", "ethrex-rlp", "ethrex-storage", + "ethrex-trie", "ethrex-vm", + "hex", + "rustc-hash", "secp256k1", "sha3", "thiserror 2.0.16", @@ -2094,17 +2096,19 @@ name = "ethrex-common" version = "5.0.0" dependencies = [ "bytes", - "c-kzg", "crc32fast", "ethereum-types 0.15.1", + "ethrex-crypto", "ethrex-rlp", "ethrex-trie", "hex", "kzg-rs", "lazy_static", + "libc", "once_cell", "rayon", "rkyv", + "rustc-hash", "secp256k1", "serde 1.0.228", "serde_json", @@ -2163,7 +2167,6 @@ dependencies = [ "axum", "bincode", "bytes", - "cfg-if 1.0.3", "chrono", "clap", "color-eyre", @@ -2284,6 +2287,7 @@ dependencies = [ "malachite", "p256", "ripemd", + "rustc-hash", "secp256k1", "serde 1.0.228", "serde_json", @@ -2366,7 +2370,6 @@ dependencies = [ "axum", "axum-extra", "bytes", - "cfg-if 1.0.3", "envy", "ethereum-types 0.15.1", "ethrex-blockchain", @@ -2440,14 +2443,13 @@ dependencies = [ "ethrex-rlp", "ethrex-trie", "hex", - "qfilter", - "rayon", "rustc-hash", "serde 1.0.228", "serde_json", "sha3", "thiserror 2.0.16", "tracing", + "xorfilter-rs", ] [[package]] @@ -2503,7 +2505,6 @@ version = "5.0.0" dependencies = [ "bincode", "bytes", - "cfg-if 1.0.3", "derive_more 1.0.0", "dyn-clone", "ethereum-types 0.15.1", @@ -2882,6 +2883,7 @@ dependencies = [ "bytes", "ethrex-blockchain", "ethrex-common", + "ethrex-crypto", "ethrex-l2-common", "ethrex-rlp", "ethrex-storage", @@ -4747,15 +4749,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "qfilter" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "746341cd2357c9a4df2d951522b4a8dd1ef553e543119899ad7bf87e938c8fbe" -dependencies = [ - "xxhash-rust", -] - [[package]] name = "quote" version = "0.3.15" @@ -7479,10 +7472,10 @@ dependencies = [ ] [[package]] -name = "xxhash-rust" -version = "0.8.15" +name = "xorfilter-rs" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" +checksum = "47f9da296a88b6bc150b896d17770a62d4dc6f63ecf0ed10a9c08a1cb3d12f24" [[package]] name = "yansi" diff --git a/crates/storage/Cargo.toml b/crates/storage/Cargo.toml index c95898bae78..a088d82b339 100644 --- a/crates/storage/Cargo.toml +++ b/crates/storage/Cargo.toml @@ -26,7 +26,7 @@ rocksdb = { workspace = true, optional = true } rustc-hash.workspace = true tokio = { workspace = true, optional = true, features = ["rt"] } bincode = "1.3.3" -qfilter = "0.2.5" +xorfilter-rs = "0.5.1" rayon.workspace = true [features] diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index ae52f3b7294..7ac9983c2ba 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -1,6 +1,8 @@ use ethrex_common::H256; use rayon::iter::{ParallelBridge, ParallelIterator}; -use rustc_hash::FxHashMap; +use rayon::slice::ParallelSliceMut; +use rustc_hash::{FxBuildHasher, FxHashMap}; +use std::hash::BuildHasher; use std::sync::Arc; use ethrex_trie::{Nibbles, TrieDB, TrieError}; @@ -10,9 +12,11 @@ struct TrieLayer { nodes: Arc, Vec>>, parent: H256, id: usize, + // pre-computed 64-bit digests to recreate the global bloom filter + bloom_digests: Arc>, } -#[derive(Clone, Debug)] +#[derive(Clone, Default)] pub struct TrieLayerCache { /// Monotonically increasing ID for layers, starting at 1. /// TODO: this implementation panics on overflow @@ -26,26 +30,23 @@ pub struct TrieLayerCache { /// In case a bloom filter insert or merge fails, we need to mark the bloom filter as poisoned /// so we never use it again, because if we don't we may be misled into believing a key is not present /// on a diff layer when it is (i.e. a false negative), leading to wrong executions. - bloom: Option, + bloom: Option>>, } -impl Default for TrieLayerCache { - fn default() -> Self { - // Try to create the bloom filter, if it fails use poison mode. - let bloom = Self::create_filter().ok(); - Self { - bloom, - last_id: 0, - layers: Default::default(), - } +impl std::fmt::Debug for TrieLayerCache { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TrieLayerCache") + .field("last_id", &self.last_id) + .field("layers", &self.layers) + // bloom doesn't implement Debug + .finish_non_exhaustive() } } impl TrieLayerCache { // TODO: tune this - fn create_filter() -> Result { - qfilter::Filter::new_resizeable(1_000_000, 100_000_000, 0.02) - .inspect_err(|e| tracing::warn!("could not create trie layering bloom filter {e}")) + fn create_filter() -> xorfilter::Fuse8 { + xorfilter::Fuse8::new(1_000_000) } pub fn get(&self, state_root: H256, key: Nibbles) -> Option> { @@ -110,70 +111,53 @@ impl TrieLayerCache { return; } - // add this new bloom to the global one. - if let Some(filter) = &mut self.bloom { - for (p, _) in &key_values { - if let Err(qfilter::Error::CapacityExceeded) = filter.insert(p.as_ref()) { - tracing::warn!("TrieLayerCache: put_batch capacity exceeded"); - self.bloom = None; - break; - } - } - } - let nodes: FxHashMap, Vec> = key_values .into_iter() .map(|(path, value)| (path.into_vec(), value)) .collect(); self.last_id += 1; + + let key_hashes: Vec = nodes + .keys() + .par_bridge() + .map(|key| FxBuildHasher.hash_one(key)) + .collect(); + let entry = TrieLayer { nodes: Arc::new(nodes), parent, id: self.last_id, + bloom_digests: Arc::new(key_hashes), }; + self.layers.insert(state_root, Arc::new(entry)); + // We need to rebuild the filter, with xorfilter we can't simply add the layer since it's static. + self.rebuild_bloom(); } /// Rebuilds the global bloom filter accruing all current existing layers. pub fn rebuild_bloom(&mut self) { - let mut blooms: Vec<_> = self + let mut bloom = Self::create_filter(); + + // Parallelize key hashing ourselves because populate from xorfilter doesn't. + let mut bloom_digests: Vec = self .layers .values() - .par_bridge() - .map(|entry| { - let Ok(mut bloom) = Self::create_filter() else { - tracing::warn!("TrieLayerCache: rebuild_bloom could not create filter"); - return None; - }; - for (p, _) in entry.nodes.iter() { - if let Err(qfilter::Error::CapacityExceeded) = bloom.insert(p) { - tracing::warn!("TrieLayerCache: rebuild_bloom capacity exceeded"); - return None; - } - } - Some(bloom) - }) + .flat_map(|x| x.bloom_digests.iter().copied()) .collect(); - let Some(mut ret) = blooms.pop().flatten() else { - tracing::warn!("TrieLayerCache: rebuild_bloom no valid bloom found"); + // xorfilter needs "few" or no unique keys, so we need to do this. + bloom_digests.par_sort_unstable(); + bloom_digests.dedup(); + + if let Err(e) = bloom.build_keys(&bloom_digests) { + tracing::warn!("TrieLayerCache: rebuild_bloom error: {e}"); self.bloom = None; return; - }; - for bloom in blooms.iter() { - let Some(bloom) = bloom else { - tracing::warn!("TrieLayerCache: rebuild_bloom no valid bloom found"); - self.bloom = None; - return; - }; - if let Err(qfilter::Error::CapacityExceeded) = ret.merge(false, bloom) { - tracing::warn!("TrieLayerCache: rebuild_bloom capacity exceeded"); - self.bloom = None; - return; - } } - self.bloom = Some(ret); + + self.bloom = Some(Arc::new(bloom)); } pub fn commit(&mut self, state_root: H256) -> Option, Vec)>> {