From 04b2d38cd0cedcb80bd3842b6b87c087d3f5387a Mon Sep 17 00:00:00 2001
From: Andrurachi <andruvrch@gmail.com>
Date: Wed, 3 Dec 2025 17:01:00 -0500
Subject: [PATCH 1/2] chore: add micro-benchmarks for sorting threshold
 optimization

---
 Cargo.lock                                |   3 +
 crates/trie/db/Cargo.toml                 |   8 ++
 crates/trie/db/benches/sorting_par_exp.rs | 113 ++++++++++++++++++++++
 3 files changed, 124 insertions(+)
 create mode 100644 crates/trie/db/benches/sorting_par_exp.rs
diff --git a/Cargo.lock b/Cargo.lock
index 8427f237ffc..3bb30725089 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -10917,8 +10917,11 @@ dependencies = [
  "alloy-consensus",
  "alloy-primitives",
  "alloy-rlp",
+ "codspeed-criterion-compat",
  "proptest",
  "proptest-arbitrary-interop",
+ "rand 0.9.2",
+ "rayon",
  "reth-chainspec",
  "reth-db",
  "reth-db-api",
diff --git a/crates/trie/db/Cargo.toml b/crates/trie/db/Cargo.toml
index 09ccd301192..119760891e1 100644
--- a/crates/trie/db/Cargo.toml
+++ b/crates/trie/db/Cargo.toml
@@ -47,6 +47,10 @@ proptest-arbitrary-interop.workspace = true
 serde_json.workspace = true
 similar-asserts.workspace = true
 
+criterion.workspace = true
+rayon.workspace = true
+rand.workspace = true
+
 [features]
 metrics = ["reth-trie/metrics"]
 serde = [
@@ -68,3 +72,7 @@ test-utils = [
     "reth-provider/test-utils",
     "reth-trie/test-utils",
 ]
+
+[[bench]]
+name = "sorting_par_exp"
+harness = false
\ No newline at end of file
diff --git a/crates/trie/db/benches/sorting_par_exp.rs b/crates/trie/db/benches/sorting_par_exp.rs
new file mode 100644
index 00000000000..e4e36a87a0f
--- /dev/null
+++ b/crates/trie/db/benches/sorting_par_exp.rs
@@ -0,0 +1,113 @@
+#![allow(missing_docs, unreachable_pub)]
+
+use alloy_primitives::{Address, B256, U256};
+use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput};
+use rand::{rngs::StdRng, Rng, SeedableRng};
+use rayon::prelude::*;
+
+/// Synthetic storage data: a list of accounts, each with a list of slots.
+type MockStorage = Vec<(Address, Vec<(B256, U256)>)>;
+
+#[derive(Clone, Copy, Debug)]
+enum Distribution {
+    Uniform(usize),
+    Skewed { light: usize, heavy: usize },
+}
+
+/// Generate random synthetic data with a fixed seed for reproducibility.
+fn generate_data(num_accounts: usize, dist: Distribution) -> MockStorage {
+    let mut rng = StdRng::seed_from_u64(42);
+
+    (0..num_accounts)
+        .map(|_| {
+            let address = Address::random();
+            let slot_count = match dist {
+                Distribution::Uniform(n) => n,
+                Distribution::Skewed { light, heavy } => {
+                    // 5% chance of being a heavy account
+                    if rng.random_bool(0.05) {
+                        heavy
+                    } else {
+                        light
+                    }
+                }
+            };
+
+            let slots = (0..slot_count)
+                .map(|_| (B256::random(), U256::from(rng.random::<u64>())))
+                .collect();
+            (address, slots)
+        })
+        .collect()
+}
+
+/// Loop sequentially, sort slots sequentially.
+fn process_sequential(mut data: MockStorage) {
+    for (_addr, slots) in &mut data {
+        slots.sort_unstable_by_key(|(k, _)| *k);
+    }
+}
+
+/// Accounts in parallel, sort slots sequentially.
+fn process_par_iter_accounts(mut data: MockStorage) {
+    data.par_iter_mut().for_each(|(_addr, slots)| {
+        slots.sort_unstable_by_key(|(k, _)| *k);
+    });
+}
+
+/// Accounts sequentially, sort slots parallel.
+fn process_par_sort_slots(mut data: MockStorage) {
+    for (_addr, slots) in &mut data {
+        slots.par_sort_unstable_by_key(|(k, _)| *k);
+    }
+}
+
+fn bench_storage_sort(c: &mut Criterion) {
+    let mut group = c.benchmark_group("sorting_par_exp");
+
+    let scenarios = vec![
+        // Finding Account Threshold.
+        ("Acc_Low", 100, Distribution::Uniform(4)),
+        ("Acc_Med", 1_000, Distribution::Uniform(4)),
+        ("Acc_Med_High", 2_500, Distribution::Uniform(4)),
+        ("Acc_High", 10_000, Distribution::Uniform(4)),
+        // Finding Slot Threshold.
+        ("Slots_Med", 10, Distribution::Uniform(100)),
+        ("Slots_High", 10, Distribution::Uniform(5_000)),
+        ("Slots_Massive", 10, Distribution::Uniform(50_000)),
+        // 10k accounts. Most have 4 slots, 5% have 2k slots.
+        ("Skewed_2.5k", 2_500, Distribution::Skewed { light: 4, heavy: 2_000 }),
+        ("Skewed_10k", 10_000, Distribution::Skewed { light: 4, heavy: 2_000 }),
+    ];
+
+    for (name, accounts, dist) in scenarios {
+        let input = generate_data(accounts, dist);
+
+        let total_slots: usize = input.iter().map(|(_, s)| s.len()).sum();
+        group.throughput(Throughput::Elements(total_slots as u64));
+
+        // Sequential
+        group.bench_with_input(BenchmarkId::new("Sequential", name), &input, |b, data| {
+            b.iter_batched(|| data.clone(), process_sequential, BatchSize::LargeInput)
+        });
+
+        // Parallel Accounts
+        group.bench_with_input(BenchmarkId::new("Par_Accounts", name), &input, |b, data| {
+            b.iter_batched(|| data.clone(), process_par_iter_accounts, BatchSize::LargeInput)
+        });
+
+        // Parallel Slots
+        if let Distribution::Uniform(s) = dist &&
+            s >= 100
+        {
+            group.bench_with_input(BenchmarkId::new("Par_Inner_Slots", name), &input, |b, data| {
+                b.iter_batched(|| data.clone(), process_par_sort_slots, BatchSize::LargeInput)
+            });
+        }
+    }
+
+    group.finish();
+}
+
+criterion_group!(sorting_par_exp, bench_storage_sort);
+criterion_main!(sorting_par_exp);

From 1843db5988ac322edac1bcd867cb9f95896ad402 Mon Sep 17 00:00:00 2001
From: Andrurachi <andruvrch@gmail.com>
Date: Thu, 4 Dec 2025 13:03:46 -0500
Subject: [PATCH 2/2] perf(trie): parallelize
 HashedPostStateSorted::from_reverts

Parallelizes the hashing/sorting step using Rayon when account count
exceeds a threshold (2500). This alleviates CPU bottlenecks during large
state reverts or deep reorgs.

Closes #20049
---
 crates/trie/db/Cargo.toml                   |  14 ++-
 crates/trie/db/benches/integration_bench.rs | 110 ++++++++++++++++++++
 crates/trie/db/src/state.rs                 |  42 ++++++--
 3 files changed, 156 insertions(+), 10 deletions(-)
 create mode 100644 crates/trie/db/benches/integration_bench.rs

diff --git a/crates/trie/db/Cargo.toml b/crates/trie/db/Cargo.toml
index 119760891e1..7766fa18bed 100644
--- a/crates/trie/db/Cargo.toml
+++ b/crates/trie/db/Cargo.toml
@@ -24,6 +24,9 @@ alloy-primitives.workspace = true
 # tracing
 tracing.workspace = true
 
+# rayon
+rayon = { workspace = true, optional = true }
+
 [dev-dependencies]
 # reth
 reth-chainspec.workspace = true
@@ -48,7 +51,6 @@ serde_json.workspace = true
 similar-asserts.workspace = true
 
 criterion.workspace = true
-rayon.workspace = true
 rand.workspace = true
 
 [features]
@@ -62,6 +64,7 @@ serde = [
     "reth-primitives-traits/serde",
     "revm-database/serde",
     "revm/serde",
+    "rand/serde",
 ]
 test-utils = [
     "reth-trie-common/test-utils",
@@ -73,6 +76,13 @@ test-utils = [
     "reth-trie/test-utils",
 ]
 
+parallel-from-reverts = ["dep:rayon"]
+
 [[bench]]
 name = "sorting_par_exp"
-harness = false
\ No newline at end of file
+harness = false
+required-features = ["parallel-from-reverts"]
+
+[[bench]]
+name = "integration_bench"
+harness = false
diff --git a/crates/trie/db/benches/integration_bench.rs b/crates/trie/db/benches/integration_bench.rs
new file mode 100644
index 00000000000..ab4a2858d86
--- /dev/null
+++ b/crates/trie/db/benches/integration_bench.rs
@@ -0,0 +1,110 @@
+#![allow(missing_docs, unreachable_pub)]
+
+use alloy_primitives::{Address, B256, U256};
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use rand::{rngs::StdRng, Rng, SeedableRng};
+use reth_db::{
+    test_utils::create_test_rw_db,
+    transaction::{DbTx, DbTxMut},
+    Database, DatabaseEnv,
+};
+use reth_db_api::{
+    models::{AccountBeforeTx, BlockNumberAddress},
+    tables,
+};
+use reth_primitives_traits::{Account, StorageEntry};
+use reth_trie::{HashedPostStateSorted, KeccakKeyHasher};
+use reth_trie_db::DatabaseHashedPostState;
+use std::sync::Arc;
+
+// Populate a temporary database with synthetic revert data.
+type TestDb = Arc<reth_db::test_utils::TempDatabase<DatabaseEnv>>;
+
+#[derive(Clone, Copy, Debug)]
+enum Distribution {
+    Uniform(usize),
+    Skewed { light: usize, heavy: usize },
+}
+
+fn seed_db(num_accounts: usize, dist: Distribution) -> TestDb {
+    let db = create_test_rw_db();
+    let tx = db.tx_mut().expect("failed to create rw tx");
+
+    let mut rng = StdRng::seed_from_u64(12345);
+    let block = 1;
+
+    for idx in 0..num_accounts {
+        let address = Address::random();
+
+        let account =
+            Account { nonce: idx as u64, balance: U256::from(idx as u64), bytecode_hash: None };
+
+        tx.put::<tables::AccountChangeSets>(
+            block,
+            AccountBeforeTx { address, info: Some(account) },
+        )
+        .expect("failed to insert account changeset");
+
+        let slots_count = match dist {
+            Distribution::Uniform(n) => n,
+            Distribution::Skewed { light, heavy } => {
+                if rng.random_bool(0.05) {
+                    heavy
+                } else {
+                    light
+                }
+            }
+        };
+
+        for slot_idx in 0..slots_count {
+            let slot_key = B256::random();
+            let value = U256::from(slot_idx);
+
+            tx.put::<tables::StorageChangeSets>(
+                BlockNumberAddress((block, address)),
+                StorageEntry { key: slot_key, value },
+            )
+            .expect("failed to insert storage changeset");
+        }
+    }
+
+    tx.commit().expect("failed to commit seeded db");
+    db
+}
+
+fn bench_db_execution(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Integration_DB_Sort");
+    group.measurement_time(std::time::Duration::from_secs(10));
+
+    // Scenarios to test:
+    // - Below Threshold: Should be identical (Sequential)
+    // - Above Threshold: Should show some speedup with feature flag
+    // - Skewed: Should show speedup with feature flag
+    let scenarios = vec![
+        ("Small_Under_Threshold", 1_000, Distribution::Uniform(50)),
+        ("Large_Uniform", 10_000, Distribution::Uniform(20)),
+        ("Large_Skewed", 10_000, Distribution::Skewed { light: 4, heavy: 2_000 }),
+    ];
+
+    for (name, accounts, dist) in scenarios {
+        let db = seed_db(accounts, dist);
+        let range = 1..=1;
+
+        group.bench_function(BenchmarkId::new("from_reverts", name), |b| {
+            b.iter(|| {
+                let tx = db.tx().expect("failed to create ro tx");
+
+                let state =
+                    HashedPostStateSorted::from_reverts::<KeccakKeyHasher>(&tx, range.clone())
+                        .expect("failed to calculate state");
+
+                black_box(state);
+            });
+        });
+    }
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_db_execution);
+criterion_main!(benches);
diff --git a/crates/trie/db/src/state.rs b/crates/trie/db/src/state.rs
index ecd50a18f77..b59e6aa911b 100644
--- a/crates/trie/db/src/state.rs
+++ b/crates/trie/db/src/state.rs
@@ -19,6 +19,9 @@ use std::{
 };
 use tracing::{debug, instrument};
 
+#[cfg(feature = "parallel-from-reverts")]
+use rayon::iter::{IntoParallelIterator, ParallelIterator};
+
 /// Extends [`StateRoot`] with operations specific for working with a database transaction.
 pub trait DatabaseStateRoot<'a, TX>: Sized {
     /// Create a new [`StateRoot`] instance.
@@ -280,14 +283,37 @@ impl<TX: DbTx> DatabaseHashedPostState<TX> for HashedPostStateSorted {
             }
         }
 
-        // Sort storage slots and convert to HashedStorageSorted
-        let hashed_storages = storages
-            .into_iter()
-            .map(|(address, mut slots)| {
-                slots.sort_unstable_by_key(|(slot, _)| *slot);
-                (address, HashedStorageSorted { storage_slots: slots, wiped: false })
-            })
-            .collect();
+        // Threshold based on benchmark
+        const PARALLEL_THRESHOLD: usize = 2_500;
+
+        // Check Feature Flag AND Threshold
+        let use_parallel =
+            cfg!(feature = "parallel-from-reverts") && storages.len() >= PARALLEL_THRESHOLD;
+
+        let hashed_storages = if use_parallel {
+            #[cfg(feature = "parallel-from-reverts")]
+            {
+                storages
+                    .into_par_iter()
+                    .map(|(address, mut slots)| {
+                        slots.sort_unstable_by_key(|(slot, _)| *slot);
+                        (address, HashedStorageSorted { storage_slots: slots, wiped: false })
+                    })
+                    .collect()
+            }
+            #[cfg(not(feature = "parallel-from-reverts"))]
+            {
+                unreachable!()
+            }
+        } else {
+            storages
+                .into_iter()
+                .map(|(address, mut slots)| {
+                    slots.sort_unstable_by_key(|(slot, _)| *slot);
+                    (address, HashedStorageSorted { storage_slots: slots, wiped: false })
+                })
+                .collect()
+        };
 
         Ok(Self::new(accounts, hashed_storages))
     }