Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Perf

### 2025-11-04

-Avoid unnecesary work on bloom by keeping a per layer bloom [#5176](https://github.com/lambdaclass/ethrex/pull/5176)

### 2025-11-03
- Merge execution with some post-execution validations [#5170](https://github.com/lambdaclass/ethrex/pull/5170)

Expand Down
42 changes: 24 additions & 18 deletions crates/storage/trie_db/layering.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ struct TrieLayer {
nodes: Arc<FxHashMap<Vec<u8>, Vec<u8>>>,
parent: H256,
id: usize,
/// Per layer bloom filter, None if the size was exceeded (exceedingly rare).
/// Having a bloom per layer avoids the cost of rehashing each key every time we rebuild the globam bloom,
/// since merge simply uses the u64 hashed keys instead of rehashing.
bloom: Option<qfilter::Filter>,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't change it right now, but I think we should start referring to this as "query filter", as we're not necessarily relying on the Bloom implementation (approximate query filter is the "generic" name, but too long).

}

#[derive(Clone, Debug)]
Expand Down Expand Up @@ -110,17 +114,30 @@ impl TrieLayerCache {
return;
}

// add this new bloom to the global one.
if let Some(filter) = &mut self.bloom {
let mut bloom = Self::create_filter().ok();

// create the layer bloom, this is the only place where hashing of keys happens.
if let Some(filter) = &mut bloom {
for (p, _) in &key_values {
if let Err(qfilter::Error::CapacityExceeded) = filter.insert(p.as_ref()) {
tracing::warn!("TrieLayerCache: put_batch capacity exceeded");
self.bloom = None;
tracing::warn!("TrieLayerCache: put_batch per layer capacity exceeded");
bloom = None;
break;
}
}
}

// add this new bloom to the global one via merge
if let Some(filter) = &mut self.bloom
&& let Some(new_filter) = &bloom
{
if let Err(qfilter::Error::CapacityExceeded) = filter.merge(false, new_filter) {
tracing::warn!("TrieLayerCache: put_batch merge capacity exceeded");
self.bloom = None;
bloom = None;
}
}

let nodes: FxHashMap<Vec<u8>, Vec<u8>> = key_values
.into_iter()
.map(|(path, value)| (path.into_vec(), value))
Expand All @@ -131,6 +148,7 @@ impl TrieLayerCache {
nodes: Arc::new(nodes),
parent,
id: self.last_id,
bloom,
};
self.layers.insert(state_root, Arc::new(entry));
}
Expand All @@ -141,22 +159,10 @@ impl TrieLayerCache {
.layers
.values()
.par_bridge()
.map(|entry| {
let Ok(mut bloom) = Self::create_filter() else {
tracing::warn!("TrieLayerCache: rebuild_bloom could not create filter");
return None;
};
for (p, _) in entry.nodes.iter() {
if let Err(qfilter::Error::CapacityExceeded) = bloom.insert(p) {
tracing::warn!("TrieLayerCache: rebuild_bloom capacity exceeded");
return None;
}
}
Some(bloom)
})
.map(|entry| entry.bloom.as_ref())
.collect();

let Some(mut ret) = blooms.pop().flatten() else {
let Some(mut ret) = blooms.pop().flatten().cloned() else {
tracing::warn!("TrieLayerCache: rebuild_bloom no valid bloom found");
self.bloom = None;
return;
Expand Down
Loading