yug49
diff --git a/‎.github/workflows/benchmarks.yml‎
Lines changed: 12 additions & 16 deletions b/‎.github/workflows/benchmarks.yml‎
Lines changed: 12 additions & 16 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎Cargo.lock‎
Lines changed: 53 additions & 3 deletions b/‎Cargo.lock‎
Lines changed: 53 additions & 3 deletions
diff --git a/‎benches/Cargo.toml‎
Lines changed: 7 additions & 0 deletions b/‎benches/Cargo.toml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎benches/benches/historic_scanning.rs‎
Lines changed: 52 additions & 28 deletions b/‎benches/benches/historic_scanning.rs‎
Lines changed: 52 additions & 28 deletions
diff --git a/‎benches/benches/latest_events_scanning.rs‎
Lines changed: 27 additions & 23 deletions b/‎benches/benches/latest_events_scanning.rs‎
Lines changed: 27 additions & 23 deletions
diff --git a/‎benches/dumps/state_100000.json.gz‎
24.2 MB b/‎benches/dumps/state_100000.json.gz‎
24.2 MB
@@ -46,14 +46,6 @@ jobs:
         with:
           egress-policy: audit
 
-      - name: Free up disk space
-        run: |
-          sudo rm -rf /usr/share/dotnet
-          sudo rm -rf /opt/ghc
-          sudo rm -rf "/usr/local/share/boost"
-          sudo rm -rf /usr/local/lib/android
-          df -h
-
       - name: Checkout repository
         uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
 
@@ -66,6 +58,12 @@ jobs:
       - name: Install Bencher CLI
         uses: bencherdev/bencher@2f1532643adc0e69e52acaec936d227ff14da24f # v0.5.9
 
+      - name: Decompress benchmark state dumps
+        run: |
+          echo "Decompressing Anvil state dumps..."
+          gunzip -k benches/dumps/*.json.gz
+          ls -lh benches/dumps/
+
       - name: Run historic benchmarks and track with Bencher
         env:
           BENCHER_API_TOKEN: ${{ secrets.BENCHER_API_TOKEN }}
@@ -99,14 +97,6 @@ jobs:
         with:
           egress-policy: audit
 
-      - name: Free up disk space
-        run: |
-          sudo rm -rf /usr/share/dotnet
-          sudo rm -rf /opt/ghc
-          sudo rm -rf "/usr/local/share/boost"
-          sudo rm -rf /usr/local/lib/android
-          df -h
-
       - name: Checkout repository
         uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
 
@@ -119,6 +109,12 @@ jobs:
       - name: Install Bencher CLI
         uses: bencherdev/bencher@2f1532643adc0e69e52acaec936d227ff14da24f # v0.5.9
 
+      - name: Decompress benchmark state dumps
+        run: |
+          echo "Decompressing Anvil state dumps..."
+          gunzip -k benches/dumps/*.json.gz
+          ls -lh benches/dumps/
+
       - name: Run latest events benchmarks and track with Bencher
         env:
           BENCHER_API_TOKEN: ${{ secrets.BENCHER_API_TOKEN }}
 
@@ -1,3 +1,7 @@
 /target
 /examples/**/target
 .DS_Store
+
+# Benchmark dumps - only commit compressed files
+benches/dumps/*.json
+!benches/dumps/*.metadata.json
@@ -14,6 +14,9 @@ tokio.workspace = true
 tokio-stream.workspace = true
 futures.workspace = true
 anyhow.workspace = true
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+flate2 = "1.1"
 
 [dev-dependencies]
 criterion.workspace = true
@@ -28,3 +31,7 @@ harness = false
 [[bench]]
 name = "latest_events_scanning"
 harness = false
+
+[[bin]]
+name = "generate_dump"
+path = "src/bin/generate_dump.rs"
@@ -1,35 +1,45 @@
 //! Benchmarks for historic scanning mode.
 //!
-//! Heavy load tests that measure the time to fetch all expected events.
+//! Heavy load tests that measure the time to fetch events from different block ranges.
+//! Uses pre-generated Anvil state dumps for fast, reproducible setup.
+//!
+//! Benchmarks three block ranges from a 100k event dump:
+//! - First 1/10 of blocks (~10k events)
+//! - First 1/2 of blocks (~50k events)
+//! - All blocks (100k events)
 
+use std::path::{Path, PathBuf};
 use std::sync::OnceLock;
 
 use anyhow::{Result, bail};
 use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
 use event_scanner::{EventFilter, EventScannerBuilder, Message};
-use event_scanner_benches::{
-    BenchConfig, BenchEnvironment, count_increased_signature, setup_environment,
-};
+use event_scanner_benches::{BenchEnvironment, count_increased_signature, setup_from_dump};
 use tokio_stream::StreamExt;
 
+/// Returns the path to the dump file, resolved from the crate's manifest directory.
+fn dump_path() -> PathBuf {
+    Path::new(env!("CARGO_MANIFEST_DIR")).join("dumps/state_100000.json.gz")
+}
+
 static RUNTIME: OnceLock<tokio::runtime::Runtime> = OnceLock::new();
 
 fn get_runtime() -> &'static tokio::runtime::Runtime {
     RUNTIME.get_or_init(|| tokio::runtime::Runtime::new().expect("failed to create tokio runtime"))
 }
 
-/// Runs a single historic scan.
+/// Runs a historic scan for a specific block range.
 ///
-/// This fetches ALL events from block 0 to latest.
-async fn run_historic_scan(env: &BenchEnvironment) -> Result<()> {
+/// Fetches events from block 0 to `to_block`.
+async fn run_historic_scan(env: &BenchEnvironment, to_block: u64) -> Result<()> {
     let filter = EventFilter::new()
         .contract_address(env.contract_address)
         .event(count_increased_signature());
 
     let mut scanner = EventScannerBuilder::historic()
         .max_block_range(100)
         .from_block(0)
-        .to_block(alloy::eips::BlockNumberOrTag::Latest)
+        .to_block(to_block)
         .connect(env.provider.clone())
         .await?;
 
@@ -58,26 +68,40 @@ fn historic_scanning_benchmark(c: &mut Criterion) {
 
     // Configure for heavy load tests
     group.warm_up_time(std::time::Duration::from_secs(5));
-    group.measurement_time(std::time::Duration::from_secs(120));
-
-    // Heavy load test: 100,000 events
-    // Also include smaller sizes for regression comparison
-    for event_count in [10_000, 50_000, 100_000] {
-        println!("Setting up environment with {event_count} events...");
-
-        // Setup environment once per event count (events are pre-generated)
-        let env: BenchEnvironment = rt.block_on(async {
-            let config = BenchConfig::new(event_count);
-            setup_environment(config).await.expect("failed to setup benchmark environment")
-        });
-
-        println!("Environment ready. Starting benchmark...");
-
-        group.throughput(Throughput::Elements(event_count as u64));
-
-        group.bench_with_input(BenchmarkId::new("events", event_count), &env, |b, env| {
-            b.to_async(&rt)
-                .iter(|| async { run_historic_scan(env).await.expect("historic scan failed") });
+    group.measurement_time(std::time::Duration::from_secs(140));
+
+    // Load environment from pre-generated dump (100k events)
+    println!("Loading benchmark environment from dump file...");
+    let env: BenchEnvironment = rt.block_on(async {
+        setup_from_dump(&dump_path()).await.expect("failed to load benchmark environment from dump")
+    });
+    println!(
+        "Environment ready: {} events across {} blocks at contract {}",
+        env.event_count, env.block_number, env.contract_address
+    );
+
+    // Calculate block ranges:
+    // - 1/10 of blocks: ~10k events
+    // - 1/2 of blocks: ~50k events
+    // - All blocks: 100k events
+    let total_blocks = env.block_number;
+    let block_ranges = [
+        (total_blocks / 10, "1/10 blocks (~10k events)"),
+        (total_blocks / 2, "1/2 blocks (~50k events)"),
+        (total_blocks, "all blocks (100k events)"),
+    ];
+
+    for (to_block, description) in block_ranges {
+        println!("Benchmarking historic scan: {description} (to block {to_block})...");
+
+        // Estimate events based on block ratio (events are roughly evenly distributed)
+        let estimated_events = (env.event_count as u64 * to_block) / total_blocks;
+        group.throughput(Throughput::Elements(estimated_events));
+
+        group.bench_with_input(BenchmarkId::new("blocks", to_block), &to_block, |b, &to_block| {
+            b.to_async(rt).iter(|| async {
+                run_historic_scan(&env, to_block).await.expect("historic scan failed");
+            });
         });
     }
 
 
@@ -2,17 +2,27 @@
 //!
 //! Heavy load tests that measure the time to fetch the N most recent events
 //! from a large pool of pre-generated events.
+//! Uses pre-generated Anvil state dumps for fast, reproducible setup.
+//!
+//! Benchmarks fetching latest events from a 100k event pool:
+//! - 10,000 latest events
+//! - 50,000 latest events
+//! - 100,000 latest events (all)
 
+use std::path::{Path, PathBuf};
 use std::sync::OnceLock;
 
 use anyhow::{Result, bail, ensure};
 use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
 use event_scanner::{EventFilter, EventScannerBuilder, Message};
-use event_scanner_benches::{
-    BenchConfig, BenchEnvironment, count_increased_signature, setup_environment,
-};
+use event_scanner_benches::{BenchEnvironment, count_increased_signature, setup_from_dump};
 use tokio_stream::StreamExt;
 
+/// Returns the path to the dump file, resolved from the crate's manifest directory.
+fn dump_path() -> PathBuf {
+    Path::new(env!("CARGO_MANIFEST_DIR")).join("dumps/state_100000.json.gz")
+}
+
 static RUNTIME: OnceLock<tokio::runtime::Runtime> = OnceLock::new();
 
 fn get_runtime() -> &'static tokio::runtime::Runtime {
@@ -60,29 +70,23 @@ fn latest_events_scanning_benchmark(c: &mut Criterion) {
 
     // Configure for heavy load tests
     group.warm_up_time(std::time::Duration::from_secs(5));
-    group.measurement_time(std::time::Duration::from_secs(120));
-
-    // Generate a pool of events once
-    // We'll benchmark fetching different "latest N" counts from this pool
-    // Using 50K total
-    let total_events = 50_000;
-
-    println!("Setting up environment with {total_events} total events...");
+    group.measurement_time(std::time::Duration::from_secs(140));
 
+    // Load environment from pre-generated dump (100k events)
+    println!("Loading benchmark environment from dump file...");
     let env: BenchEnvironment = rt.block_on(async {
-        let config = BenchConfig::new(total_events);
-        setup_environment(config).await.expect("failed to setup benchmark environment")
+        setup_from_dump(&dump_path()).await.expect("failed to load benchmark environment from dump")
     });
+    println!(
+        "Environment ready: {} events across {} blocks at contract {}",
+        env.event_count, env.block_number, env.contract_address
+    );
 
-    println!("Environment ready. Starting benchmarks...");
-
-    // Benchmark fetching different "latest N" counts
-    // Trying to replicate realistic use cases:
-    // - 100: Quick recent activity check
-    // - 1,000: Moderate history lookup
+    // Benchmark fetching latest N events from the 100k event pool:
     // - 10,000: Substantial history fetch
-    // - 25,000: Heavy load retrieval
-    for latest_count in [100, 1_000, 10_000, 25_000] {
+    // - 50,000: Heavy load retrieval
+    // - 100,000: All events (full scan)
+    for latest_count in [10_000, 50_000, 100_000] {
         println!("Benchmarking latest {latest_count} events...");
 
         group.throughput(Throughput::Elements(latest_count as u64));
@@ -91,8 +95,8 @@ fn latest_events_scanning_benchmark(c: &mut Criterion) {
             BenchmarkId::new("latest", latest_count),
             &latest_count,
             |b, &count| {
-                b.to_async(&rt).iter(|| async {
-                    run_latest_events_scan(&env, count).await.expect("latest events scan failed")
+                b.to_async(rt).iter(|| async {
+                    run_latest_events_scan(&env, count).await.expect("latest events scan failed");
                 });
             },
         );