From e9252af1068b5670e8a65c51c976b5854b28556e Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 11:02:38 +0100 Subject: [PATCH 01/13] Use xorfilter for trie layers --- Cargo.lock | 18 ++---- crates/storage/Cargo.toml | 3 +- crates/storage/trie_db/layering.rs | 90 +++++++++++++++--------------- 3 files changed, 49 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 06f830f89e9..2314907d220 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3767,8 +3767,6 @@ dependencies = [ "ethrex-trie", "hex", "hex-literal", - "qfilter", - "rayon", "rocksdb", "rustc-hash 2.1.1", "serde", @@ -3778,6 +3776,7 @@ dependencies = [ "thiserror 2.0.17", "tokio", "tracing", + "xorfilter-rs", ] [[package]] @@ -7756,15 +7755,6 @@ dependencies = [ "parking_lot", ] -[[package]] -name = "qfilter" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "746341cd2357c9a4df2d951522b4a8dd1ef553e543119899ad7bf87e938c8fbe" -dependencies = [ - "xxhash-rust", -] - [[package]] name = "quick-error" version = "1.2.3" @@ -12242,10 +12232,10 @@ dependencies = [ ] [[package]] -name = "xxhash-rust" -version = "0.8.15" +name = "xorfilter-rs" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" +checksum = "47f9da296a88b6bc150b896d17770a62d4dc6f63ecf0ed10a9c08a1cb3d12f24" [[package]] name = "yaml-rust2" diff --git a/crates/storage/Cargo.toml b/crates/storage/Cargo.toml index c95898bae78..e3468d67ef7 100644 --- a/crates/storage/Cargo.toml +++ b/crates/storage/Cargo.toml @@ -26,8 +26,7 @@ rocksdb = { workspace = true, optional = true } rustc-hash.workspace = true tokio = { workspace = true, optional = true, features = ["rt"] } bincode = "1.3.3" -qfilter = "0.2.5" -rayon.workspace = true +xorfilter-rs = "0.5.1" [features] default = [] diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index 75d67757f38..16eb6eec966 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -1,5 +1,4 @@ use ethrex_common::H256; -use rayon::iter::{ParallelBridge, ParallelIterator}; use rustc_hash::FxHashMap; use std::sync::Arc; @@ -12,7 +11,6 @@ struct TrieLayer { id: usize, } -#[derive(Clone, Debug)] pub struct TrieLayerCache { /// Monotonically increasing ID for layers, starting at 1. /// TODO: this implementation panics on overflow @@ -26,26 +24,49 @@ pub struct TrieLayerCache { /// In case a bloom filter insert or merge fails, we need to mark the bloom filter as poisoned /// so we never use it again, because if we don't we may be misled into believing a key is not present /// on a diff layer when it is (i.e. a false negative), leading to wrong executions. - bloom: Option, + bloom: Option, } impl Default for TrieLayerCache { fn default() -> Self { // Try to create the bloom filter, if it fails use poison mode. - let bloom = Self::create_filter().ok(); Self { - bloom, + bloom: Some(Self::create_filter()), last_id: 0, layers: Default::default(), } } } +impl Clone for TrieLayerCache { + fn clone(&self) -> Self { + let mut trie = Self { + last_id: self.last_id, + layers: self.layers.clone(), + bloom: None, + }; + + // Fuse8 is not Clone. + trie.rebuild_bloom(); + + trie + } +} + +impl std::fmt::Debug for TrieLayerCache { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TrieLayerCache") + .field("last_id", &self.last_id) + .field("layers", &self.layers) + // bloom doesn't implement Debug + .finish_non_exhaustive() + } +} + impl TrieLayerCache { // TODO: tune this - fn create_filter() -> Result { - qfilter::Filter::new_resizeable(100_000, 100_000_000, 0.02) - .inspect_err(|e| tracing::warn!("could not create trie layering bloom filter {e}")) + fn create_filter() -> xorfilter::Fuse8 { + xorfilter::Fuse8::new(10_000_000) } pub fn get(&self, state_root: H256, key: Nibbles) -> Option> { @@ -113,11 +134,12 @@ impl TrieLayerCache { // add this new bloom to the global one. if let Some(filter) = &mut self.bloom { for (p, _) in &key_values { - if let Err(qfilter::Error::CapacityExceeded) = filter.insert(p.as_ref()) { - tracing::warn!("TrieLayerCache: put_batch capacity exceeded"); - self.bloom = None; - break; - } + filter.insert(p.as_ref()); + } + + if let Err(e) = filter.build() { + tracing::warn!("TrieLayerCache: rebuild_bloom error: {e}"); + self.bloom = None; } } @@ -137,43 +159,19 @@ impl TrieLayerCache { /// Rebuilds the global bloom filter accruing all current existing layers. pub fn rebuild_bloom(&mut self) { - let mut blooms: Vec<_> = self - .layers - .values() - .par_bridge() - .map(|entry| { - let Ok(mut bloom) = Self::create_filter() else { - tracing::warn!("TrieLayerCache: rebuild_bloom could not create filter"); - return None; - }; - for (p, _) in entry.nodes.iter() { - if let Err(qfilter::Error::CapacityExceeded) = bloom.insert(p) { - tracing::warn!("TrieLayerCache: rebuild_bloom capacity exceeded"); - return None; - } - } - Some(bloom) - }) - .collect(); + let mut bloom = Self::create_filter(); + + for key in self.layers.values().flat_map(|x| x.nodes.keys()) { + bloom.populate(key); + } - let Some(mut ret) = blooms.pop().flatten() else { - tracing::warn!("TrieLayerCache: rebuild_bloom no valid bloom found"); + if let Err(e) = bloom.build() { + tracing::warn!("TrieLayerCache: rebuild_bloom error: {e}"); self.bloom = None; return; - }; - for bloom in blooms.iter() { - let Some(bloom) = bloom else { - tracing::warn!("TrieLayerCache: rebuild_bloom no valid bloom found"); - self.bloom = None; - return; - }; - if let Err(qfilter::Error::CapacityExceeded) = ret.merge(false, bloom) { - tracing::warn!("TrieLayerCache: rebuild_bloom capacity exceeded"); - self.bloom = None; - return; - } } - self.bloom = Some(ret); + + self.bloom = Some(bloom); } pub fn commit(&mut self, state_root: H256) -> Option, Vec)>> { From a256593c9d2d648d8041272f79df3a3a20b266b4 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 11:11:29 +0100 Subject: [PATCH 02/13] adjust size --- crates/storage/trie_db/layering.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index 16eb6eec966..a873e0ff897 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -66,7 +66,7 @@ impl std::fmt::Debug for TrieLayerCache { impl TrieLayerCache { // TODO: tune this fn create_filter() -> xorfilter::Fuse8 { - xorfilter::Fuse8::new(10_000_000) + xorfilter::Fuse8::new(1_000_000) } pub fn get(&self, state_root: H256, key: Nibbles) -> Option> { From 1d219cd0b73d61736451028ec20817dd335270eb Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 11:12:01 +0100 Subject: [PATCH 03/13] lockfile --- crates/l2/tee/quote-gen/Cargo.lock | 31 ++++++++++++------------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/crates/l2/tee/quote-gen/Cargo.lock b/crates/l2/tee/quote-gen/Cargo.lock index 77b00c85eb5..ba71994ee6b 100644 --- a/crates/l2/tee/quote-gen/Cargo.lock +++ b/crates/l2/tee/quote-gen/Cargo.lock @@ -2075,12 +2075,14 @@ name = "ethrex-blockchain" version = "5.0.0" dependencies = [ "bytes", - "cfg-if 1.0.3", "ethrex-common", "ethrex-metrics", "ethrex-rlp", "ethrex-storage", + "ethrex-trie", "ethrex-vm", + "hex", + "rustc-hash", "secp256k1", "sha3", "thiserror 2.0.16", @@ -2094,17 +2096,19 @@ name = "ethrex-common" version = "5.0.0" dependencies = [ "bytes", - "c-kzg", "crc32fast", "ethereum-types 0.15.1", + "ethrex-crypto", "ethrex-rlp", "ethrex-trie", "hex", "kzg-rs", "lazy_static", + "libc", "once_cell", "rayon", "rkyv", + "rustc-hash", "secp256k1", "serde 1.0.228", "serde_json", @@ -2163,7 +2167,6 @@ dependencies = [ "axum", "bincode", "bytes", - "cfg-if 1.0.3", "chrono", "clap", "color-eyre", @@ -2284,6 +2287,7 @@ dependencies = [ "malachite", "p256", "ripemd", + "rustc-hash", "secp256k1", "serde 1.0.228", "serde_json", @@ -2366,7 +2370,6 @@ dependencies = [ "axum", "axum-extra", "bytes", - "cfg-if 1.0.3", "envy", "ethereum-types 0.15.1", "ethrex-blockchain", @@ -2440,14 +2443,13 @@ dependencies = [ "ethrex-rlp", "ethrex-trie", "hex", - "qfilter", - "rayon", "rustc-hash", "serde 1.0.228", "serde_json", "sha3", "thiserror 2.0.16", "tracing", + "xorfilter-rs", ] [[package]] @@ -2503,7 +2505,6 @@ version = "5.0.0" dependencies = [ "bincode", "bytes", - "cfg-if 1.0.3", "derive_more 1.0.0", "dyn-clone", "ethereum-types 0.15.1", @@ -2882,6 +2883,7 @@ dependencies = [ "bytes", "ethrex-blockchain", "ethrex-common", + "ethrex-crypto", "ethrex-l2-common", "ethrex-rlp", "ethrex-storage", @@ -4747,15 +4749,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "qfilter" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "746341cd2357c9a4df2d951522b4a8dd1ef553e543119899ad7bf87e938c8fbe" -dependencies = [ - "xxhash-rust", -] - [[package]] name = "quote" version = "0.3.15" @@ -7479,10 +7472,10 @@ dependencies = [ ] [[package]] -name = "xxhash-rust" -version = "0.8.15" +name = "xorfilter-rs" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" +checksum = "47f9da296a88b6bc150b896d17770a62d4dc6f63ecf0ed10a9c08a1cb3d12f24" [[package]] name = "yansi" From 2fcf04cbc437d9d642539f487b23f9a704fdf478 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 11:12:45 +0100 Subject: [PATCH 04/13] changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b1338c18ae..694fc72ce74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Perf +### 2025-11-03 + +- Switch to binary fuse filter for added performance on trie layers [#5159](https://github.com/lambdaclass/ethrex/pull/5159) + ### 2025-10-31 - Improved discovery and peer initialization [#5147](https://github.com/lambdaclass/ethrex/pull/5147) From aab862e710fdc61c06abce1786f9f8dbf47746e8 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 11:44:24 +0100 Subject: [PATCH 05/13] fix --- Cargo.lock | 1 + crates/storage/Cargo.toml | 1 + crates/storage/trie_db/layering.rs | 29 +++++++++++++++-------------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2314907d220..e45435a871f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3767,6 +3767,7 @@ dependencies = [ "ethrex-trie", "hex", "hex-literal", + "rayon", "rocksdb", "rustc-hash 2.1.1", "serde", diff --git a/crates/storage/Cargo.toml b/crates/storage/Cargo.toml index e3468d67ef7..a088d82b339 100644 --- a/crates/storage/Cargo.toml +++ b/crates/storage/Cargo.toml @@ -27,6 +27,7 @@ rustc-hash.workspace = true tokio = { workspace = true, optional = true, features = ["rt"] } bincode = "1.3.3" xorfilter-rs = "0.5.1" +rayon.workspace = true [features] default = [] diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index a873e0ff897..14d84f62782 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -1,5 +1,7 @@ use ethrex_common::H256; +use rayon::iter::{ParallelBridge, ParallelIterator}; use rustc_hash::FxHashMap; +use std::hash::BuildHasher; use std::sync::Arc; use ethrex_trie::{Nibbles, TrieDB, TrieError}; @@ -131,18 +133,6 @@ impl TrieLayerCache { return; } - // add this new bloom to the global one. - if let Some(filter) = &mut self.bloom { - for (p, _) in &key_values { - filter.insert(p.as_ref()); - } - - if let Err(e) = filter.build() { - tracing::warn!("TrieLayerCache: rebuild_bloom error: {e}"); - self.bloom = None; - } - } - let nodes: FxHashMap, Vec> = key_values .into_iter() .map(|(path, value)| (path.into_vec(), value)) @@ -155,14 +145,25 @@ impl TrieLayerCache { id: self.last_id, }; self.layers.insert(state_root, Arc::new(entry)); + // We need to rebuild the filter, with xorfilter we can't simply add the layer since it's static. + self.rebuild_bloom(); } /// Rebuilds the global bloom filter accruing all current existing layers. pub fn rebuild_bloom(&mut self) { let mut bloom = Self::create_filter(); - for key in self.layers.values().flat_map(|x| x.nodes.keys()) { - bloom.populate(key); + // Parallelize key hashing ourselves because populate from xorfilter doesn't. + let key_hashes = self + .layers + .values() + .flat_map(|x| x.nodes.keys()) + .par_bridge() + .map(|key| bloom.hash_builder.hash_one(key)) + .collect_vec_list(); + + for keys in key_hashes { + bloom.populate_keys(&keys); } if let Err(e) = bloom.build() { From cd30d8aefb43600082a04fc760037f967d43cde1 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 11:50:26 +0100 Subject: [PATCH 06/13] dedup --- crates/storage/trie_db/layering.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index 14d84f62782..8901511ab0b 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -1,8 +1,9 @@ use ethrex_common::H256; use rayon::iter::{ParallelBridge, ParallelIterator}; +use rayon::slice::ParallelSliceMut; use rustc_hash::FxHashMap; -use std::hash::BuildHasher; use std::sync::Arc; +use std::{collections::HashSet, hash::BuildHasher}; use ethrex_trie::{Nibbles, TrieDB, TrieError}; @@ -154,17 +155,19 @@ impl TrieLayerCache { let mut bloom = Self::create_filter(); // Parallelize key hashing ourselves because populate from xorfilter doesn't. - let key_hashes = self + let mut key_hashes: Vec = self .layers .values() .flat_map(|x| x.nodes.keys()) .par_bridge() .map(|key| bloom.hash_builder.hash_one(key)) - .collect_vec_list(); + .collect(); - for keys in key_hashes { - bloom.populate_keys(&keys); - } + // xorfilter needs "few" or no unique keys, so we need to do this. + key_hashes.par_sort_unstable(); + key_hashes.dedup(); + + bloom.populate_keys(&key_hashes); if let Err(e) = bloom.build() { tracing::warn!("TrieLayerCache: rebuild_bloom error: {e}"); From 9f857780a4f396c09bf6d126a191f3ad9559ed97 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 11:54:11 +0100 Subject: [PATCH 07/13] lint --- crates/storage/trie_db/layering.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index 8901511ab0b..6bd55cf40d0 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -2,8 +2,8 @@ use ethrex_common::H256; use rayon::iter::{ParallelBridge, ParallelIterator}; use rayon::slice::ParallelSliceMut; use rustc_hash::FxHashMap; +use std::hash::BuildHasher; use std::sync::Arc; -use std::{collections::HashSet, hash::BuildHasher}; use ethrex_trie::{Nibbles, TrieDB, TrieError}; From 8bc2c773707b6c2fa406af92b2315088d7250f65 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 12:01:19 +0100 Subject: [PATCH 08/13] avoid rebuild bloom on clone --- crates/storage/trie_db/layering.rs | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index 6bd55cf40d0..57e9a32b441 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -14,6 +14,7 @@ struct TrieLayer { id: usize, } +#[derive(Clone)] pub struct TrieLayerCache { /// Monotonically increasing ID for layers, starting at 1. /// TODO: this implementation panics on overflow @@ -27,35 +28,20 @@ pub struct TrieLayerCache { /// In case a bloom filter insert or merge fails, we need to mark the bloom filter as poisoned /// so we never use it again, because if we don't we may be misled into believing a key is not present /// on a diff layer when it is (i.e. a false negative), leading to wrong executions. - bloom: Option, + bloom: Option>, } impl Default for TrieLayerCache { fn default() -> Self { // Try to create the bloom filter, if it fails use poison mode. Self { - bloom: Some(Self::create_filter()), + bloom: None, last_id: 0, layers: Default::default(), } } } -impl Clone for TrieLayerCache { - fn clone(&self) -> Self { - let mut trie = Self { - last_id: self.last_id, - layers: self.layers.clone(), - bloom: None, - }; - - // Fuse8 is not Clone. - trie.rebuild_bloom(); - - trie - } -} - impl std::fmt::Debug for TrieLayerCache { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("TrieLayerCache") @@ -175,7 +161,7 @@ impl TrieLayerCache { return; } - self.bloom = Some(bloom); + self.bloom = Some(Arc::new(bloom)); } pub fn commit(&mut self, state_root: H256) -> Option, Vec)>> { From b26012d0763bd545f66aeb3454cc7ecd39e70217 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 12:09:11 +0100 Subject: [PATCH 09/13] improve hash --- crates/storage/trie_db/layering.rs | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index 57e9a32b441..d4b9aa8a2f3 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -2,7 +2,8 @@ use ethrex_common::H256; use rayon::iter::{ParallelBridge, ParallelIterator}; use rayon::slice::ParallelSliceMut; use rustc_hash::FxHashMap; -use std::hash::BuildHasher; +use std::hash::Hasher; +use std::hash::{DefaultHasher, Hash}; use std::sync::Arc; use ethrex_trie::{Nibbles, TrieDB, TrieError}; @@ -12,6 +13,8 @@ struct TrieLayer { nodes: Arc, Vec>>, parent: H256, id: usize, + // Hashed keys for the filter + keys: Arc>, } #[derive(Clone)] @@ -126,11 +129,24 @@ impl TrieLayerCache { .collect(); self.last_id += 1; + + let key_hashes: Vec = nodes + .keys() + .par_bridge() + .map(|key| { + let mut h = DefaultHasher::new(); + key.hash(&mut h); + h.finish() + }) + .collect(); + let entry = TrieLayer { nodes: Arc::new(nodes), parent, id: self.last_id, + keys: Arc::new(key_hashes), }; + self.layers.insert(state_root, Arc::new(entry)); // We need to rebuild the filter, with xorfilter we can't simply add the layer since it's static. self.rebuild_bloom(); @@ -144,18 +160,14 @@ impl TrieLayerCache { let mut key_hashes: Vec = self .layers .values() - .flat_map(|x| x.nodes.keys()) - .par_bridge() - .map(|key| bloom.hash_builder.hash_one(key)) + .flat_map(|x| x.keys.iter().copied()) .collect(); // xorfilter needs "few" or no unique keys, so we need to do this. key_hashes.par_sort_unstable(); key_hashes.dedup(); - bloom.populate_keys(&key_hashes); - - if let Err(e) = bloom.build() { + if let Err(e) = bloom.build_keys(&key_hashes) { tracing::warn!("TrieLayerCache: rebuild_bloom error: {e}"); self.bloom = None; return; From 59164136e138085718b8cf9076cf99d1d82775a7 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 12:13:13 +0100 Subject: [PATCH 10/13] use fxhash --- crates/storage/trie_db/layering.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index d4b9aa8a2f3..85b98240ceb 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -1,7 +1,7 @@ use ethrex_common::H256; use rayon::iter::{ParallelBridge, ParallelIterator}; use rayon::slice::ParallelSliceMut; -use rustc_hash::FxHashMap; +use rustc_hash::{FxHashMap, FxHasher}; use std::hash::Hasher; use std::hash::{DefaultHasher, Hash}; use std::sync::Arc; @@ -134,7 +134,7 @@ impl TrieLayerCache { .keys() .par_bridge() .map(|key| { - let mut h = DefaultHasher::new(); + let mut h = FxHasher::default(); key.hash(&mut h); h.finish() }) From 48fb2d435762cdf619319e5e988a3d38b112eb11 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 13:30:50 +0100 Subject: [PATCH 11/13] fix --- crates/storage/trie_db/layering.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index 85b98240ceb..30e20b878b3 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -2,8 +2,8 @@ use ethrex_common::H256; use rayon::iter::{ParallelBridge, ParallelIterator}; use rayon::slice::ParallelSliceMut; use rustc_hash::{FxHashMap, FxHasher}; +use std::hash::Hash; use std::hash::Hasher; -use std::hash::{DefaultHasher, Hash}; use std::sync::Arc; use ethrex_trie::{Nibbles, TrieDB, TrieError}; @@ -67,7 +67,11 @@ impl TrieLayerCache { // Fast check to know if any layer may contains the given key. // We can only be certain it doesn't exist, but if it returns true it may or not exist (false positive). if let Some(filter) = &self.bloom - && !filter.contains(key) + && !filter.contains_key({ + let mut s = FxHasher::default(); + key.hash(&mut s); + s.finish() + }) { // TrieWrapper goes to db when returning None. return None; From 57a0544d94a1082f59947d11d188cddf66233c1d Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 13:34:27 +0100 Subject: [PATCH 12/13] fix --- crates/storage/trie_db/layering.rs | 37 +++++++++++------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index 30e20b878b3..1c05e531d2e 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -1,9 +1,8 @@ use ethrex_common::H256; use rayon::iter::{ParallelBridge, ParallelIterator}; use rayon::slice::ParallelSliceMut; -use rustc_hash::{FxHashMap, FxHasher}; -use std::hash::Hash; -use std::hash::Hasher; +use rustc_hash::{FxBuildHasher, FxHashMap}; +use std::hash::BuildHasher; use std::sync::Arc; use ethrex_trie::{Nibbles, TrieDB, TrieError}; @@ -13,8 +12,8 @@ struct TrieLayer { nodes: Arc, Vec>>, parent: H256, id: usize, - // Hashed keys for the filter - keys: Arc>, + // pre-computed 64-bit digests to recreate the global bloom filter + bloom_digests: Arc>, } #[derive(Clone)] @@ -31,7 +30,7 @@ pub struct TrieLayerCache { /// In case a bloom filter insert or merge fails, we need to mark the bloom filter as poisoned /// so we never use it again, because if we don't we may be misled into believing a key is not present /// on a diff layer when it is (i.e. a false negative), leading to wrong executions. - bloom: Option>, + bloom: Option>>, } impl Default for TrieLayerCache { @@ -57,7 +56,7 @@ impl std::fmt::Debug for TrieLayerCache { impl TrieLayerCache { // TODO: tune this - fn create_filter() -> xorfilter::Fuse8 { + fn create_filter() -> xorfilter::Fuse8 { xorfilter::Fuse8::new(1_000_000) } @@ -67,11 +66,7 @@ impl TrieLayerCache { // Fast check to know if any layer may contains the given key. // We can only be certain it doesn't exist, but if it returns true it may or not exist (false positive). if let Some(filter) = &self.bloom - && !filter.contains_key({ - let mut s = FxHasher::default(); - key.hash(&mut s); - s.finish() - }) + && !filter.contains(key) { // TrieWrapper goes to db when returning None. return None; @@ -137,18 +132,14 @@ impl TrieLayerCache { let key_hashes: Vec = nodes .keys() .par_bridge() - .map(|key| { - let mut h = FxHasher::default(); - key.hash(&mut h); - h.finish() - }) + .map(|key| FxBuildHasher::default().hash_one(key)) .collect(); let entry = TrieLayer { nodes: Arc::new(nodes), parent, id: self.last_id, - keys: Arc::new(key_hashes), + bloom_digests: Arc::new(key_hashes), }; self.layers.insert(state_root, Arc::new(entry)); @@ -161,17 +152,17 @@ impl TrieLayerCache { let mut bloom = Self::create_filter(); // Parallelize key hashing ourselves because populate from xorfilter doesn't. - let mut key_hashes: Vec = self + let mut bloom_digests: Vec = self .layers .values() - .flat_map(|x| x.keys.iter().copied()) + .flat_map(|x| x.bloom_digests.iter().copied()) .collect(); // xorfilter needs "few" or no unique keys, so we need to do this. - key_hashes.par_sort_unstable(); - key_hashes.dedup(); + bloom_digests.par_sort_unstable(); + bloom_digests.dedup(); - if let Err(e) = bloom.build_keys(&key_hashes) { + if let Err(e) = bloom.build_keys(&bloom_digests) { tracing::warn!("TrieLayerCache: rebuild_bloom error: {e}"); self.bloom = None; return; From 4c1a89e988cb2dbe2a99b88bc15bd040b81236c2 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 3 Nov 2025 13:49:19 +0100 Subject: [PATCH 13/13] lint --- crates/storage/trie_db/layering.rs | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index 1c05e531d2e..7ac9983c2ba 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -16,7 +16,7 @@ struct TrieLayer { bloom_digests: Arc>, } -#[derive(Clone)] +#[derive(Clone, Default)] pub struct TrieLayerCache { /// Monotonically increasing ID for layers, starting at 1. /// TODO: this implementation panics on overflow @@ -33,17 +33,6 @@ pub struct TrieLayerCache { bloom: Option>>, } -impl Default for TrieLayerCache { - fn default() -> Self { - // Try to create the bloom filter, if it fails use poison mode. - Self { - bloom: None, - last_id: 0, - layers: Default::default(), - } - } -} - impl std::fmt::Debug for TrieLayerCache { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("TrieLayerCache") @@ -132,7 +121,7 @@ impl TrieLayerCache { let key_hashes: Vec = nodes .keys() .par_bridge() - .map(|key| FxBuildHasher::default().hash_one(key)) + .map(|key| FxBuildHasher.hash_one(key)) .collect(); let entry = TrieLayer {