add derivation pipeline cache

frisitano · frisitano · commit 6bcbcd371c5e · 2025-11-26T03:34:50.000+04:00
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -223,6 +223,7 @@ futures = { version = "0.3", default-features = false }
 lru = "0.13.0"
 metrics = "0.24.0"
 metrics-derive = "0.1"
+moka = "0.12.11"
 parking_lot = "0.12"
 rand = { version = "0.9" }
 rayon = "1.7"
diff --git a/crates/database/db/src/db.rs b/crates/database/db/src/db.rs
@@ -643,6 +643,14 @@ impl DatabaseReadOperations for Database {
         )
     }
 
+    async fn get_max_block_data_hint_block_number(&self) -> Result<u64, DatabaseError> {
+        metered!(
+            DatabaseOperation::GetMaxBlockDataHintBlockNumber,
+            self,
+            tx(|tx| async move { tx.get_max_block_data_hint_block_number().await })
+        )
+    }
+
     async fn get_l2_block_and_batch_info_by_hash(
         &self,
         block_hash: B256,
diff --git a/crates/database/db/src/metrics.rs b/crates/database/db/src/metrics.rs
@@ -64,6 +64,7 @@ pub(crate) enum DatabaseOperation {
     GetL2HeadBlockNumber,
     GetNL1Messages,
     GetNL2BlockDataHint,
+    GetMaxBlockDataHintBlockNumber,
     GetL2BlockAndBatchInfoByHash,
     GetL2BlockInfoByNumber,
     GetLatestSafeL2Info,
@@ -132,6 +133,7 @@ impl DatabaseOperation {
             Self::GetL2HeadBlockNumber => "get_l2_head_block_number",
             Self::GetNL1Messages => "get_n_l1_messages",
             Self::GetNL2BlockDataHint => "get_n_l2_block_data_hint",
+            Self::GetMaxBlockDataHintBlockNumber => "get_max_block_data_hint_block_number",
             Self::GetL2BlockAndBatchInfoByHash => "get_l2_block_and_batch_info_by_hash",
             Self::GetL2BlockInfoByNumber => "get_l2_block_info_by_number",
             Self::GetLatestSafeL2Info => "get_latest_safe_l2_info",
diff --git a/crates/database/db/src/operations.rs b/crates/database/db/src/operations.rs
@@ -996,6 +996,9 @@ pub trait DatabaseReadOperations {
         n: usize,
     ) -> Result<Vec<L1MessageEnvelope>, DatabaseError>;
 
+    /// Get the maximum block number for which we have stored extra data hints.
+    async fn get_max_block_data_hint_block_number(&self) -> Result<u64, DatabaseError>;
+
     /// Get the extra data for n block, starting at the provided block number.
     async fn get_n_l2_block_data_hint(
         &self,
@@ -1374,6 +1377,18 @@ impl<T: ReadConnectionProvider + Sync + ?Sized> DatabaseReadOperations for T {
             .collect())
     }
 
+    async fn get_max_block_data_hint_block_number(&self) -> Result<u64, DatabaseError> {
+        Ok(models::block_data::Entity::find()
+            .select_only()
+            .column_as(models::block_data::Column::Number.max(), "max_number")
+            .into_tuple::<Option<i64>>()
+            .one(self.get_connection())
+            .await?
+            .flatten()
+            .map(|n| n as u64)
+            .unwrap_or(0))
+    }
+
     async fn get_l2_block_and_batch_info_by_hash(
         &self,
         block_hash: B256,
diff --git a/crates/derivation-pipeline/Cargo.toml b/crates/derivation-pipeline/Cargo.toml
@@ -29,6 +29,7 @@ scroll-db.workspace = true
 futures.workspace = true
 metrics.workspace = true
 metrics-derive.workspace = true
+moka = { workspace = true, features = ["future"] }
 tokio.workspace = true
 thiserror.workspace = true
 tracing.workspace = true
diff --git a/crates/derivation-pipeline/src/cache.rs b/crates/derivation-pipeline/src/cache.rs
@@ -0,0 +1,64 @@
+use moka::future::Cache;
+use scroll_alloy_rpc_types_engine::BlockDataHint;
+use scroll_db::{Database, DatabaseReadOperations};
+use std::{sync::Arc, time::Duration};
+
+use crate::DerivationPipelineError;
+
+/// The default size of the block data hint pre-fetch cache (number of entries).
+pub(crate) const DEFAULT_CACHE_SIZE: usize = 80_000;
+
+/// The default number of block data hints to pre-fetch.
+pub(crate) const DEFAULT_PREFETCH_COUNT: usize = 60_000;
+
+/// The default time-to-live (TTL) for cache entries.
+pub(crate) const DEFAULT_CACHE_TTL: Duration = Duration::from_secs(120 * 60); // 120 minutes
+
+#[derive(Debug, Clone)]
+pub struct PreFetchCache {
+    db: Arc<Database>,
+    hint_cache: Cache<u64, BlockDataHint>,
+    // TODO: Add a cache for batches.
+    max_block_data_hint_block_number: u64,
+    pre_fetch_count: usize,
+}
+
+impl PreFetchCache {
+    /// Creates a new block data hint pre-fetch cache with default settings.
+    pub(crate) async fn new(
+        db: Arc<Database>,
+        size: usize,
+        ttl: Duration,
+        pre_fetch_count: usize,
+    ) -> Result<Self, DerivationPipelineError> {
+        let max_block_data_hint_block_number = db.get_max_block_data_hint_block_number().await?;
+        Ok(Self {
+            db,
+            hint_cache: Cache::builder().max_capacity(size as u64).time_to_live(ttl).build(),
+            max_block_data_hint_block_number,
+            pre_fetch_count,
+        })
+    }
+
+    /// Fetches the block data hint for the given block number, using the cache if possible.
+    pub(crate) async fn get(
+        &self,
+        block_number: u64,
+    ) -> Result<Option<BlockDataHint>, DerivationPipelineError> {
+        if block_number > self.max_block_data_hint_block_number {
+            return Ok(None);
+        }
+
+        if let Some(cached_hint) = self.hint_cache.get(&block_number).await {
+            return Ok(Some(cached_hint));
+        }
+
+        let hints = self.db.get_n_l2_block_data_hint(block_number, self.pre_fetch_count).await?;
+        let requested = hints.first().cloned();
+        for (idx, hint) in hints.into_iter().enumerate() {
+            self.hint_cache.insert(block_number + idx as u64, hint.clone()).await;
+        }
+
+        Ok(requested)
+    }
+}
diff --git a/crates/derivation-pipeline/src/lib.rs b/crates/derivation-pipeline/src/lib.rs
@@ -14,6 +14,9 @@ use scroll_codec::{decoding::payload::PayloadData, Codec, CodecError, DecodingEr
 use scroll_db::{Database, DatabaseReadOperations, L1MessageKey};
 use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
 
+mod cache;
+use cache::{PreFetchCache, DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL, DEFAULT_PREFETCH_COUNT};
+
 mod data_source;
 
 mod error;
@@ -108,6 +111,8 @@ pub struct DerivationPipelineWorker<P> {
     futures: FuturesOrdered<DerivationPipelineFuture>,
     /// A reference to the database.
     database: Arc<Database>,
+    /// A cache for pre-fetching derivation pipeline data from the database.
+    cache: PreFetchCache,
     /// A L1 provider.
     l1_provider: P,
     /// The L1 message queue index at which the V2 L1 message queue was enabled.
@@ -118,22 +123,30 @@ pub struct DerivationPipelineWorker<P> {
 
 impl<P> DerivationPipelineWorker<P> {
     /// Returns a new instance of the [`DerivationPipeline`].
-    pub fn new(
+    pub async fn new(
         l1_provider: P,
         database: Arc<Database>,
         l1_v2_message_queue_start_index: u64,
         batch_receiver: UnboundedReceiver<Arc<BatchDerivationRequest>>,
         result_sender: UnboundedSender<BatchDerivationResult>,
-    ) -> Self {
-        Self {
+    ) -> Result<Self, DerivationPipelineError> {
+        let cache = PreFetchCache::new(
+            database.clone(),
+            DEFAULT_CACHE_SIZE,
+            DEFAULT_CACHE_TTL,
+            DEFAULT_PREFETCH_COUNT,
+        )
+        .await?;
+        Ok(Self {
             batch_receiver,
             result_sender,
             futures: FuturesOrdered::new(),
             database,
+            cache,
             l1_provider,
             l1_v2_message_queue_start_index,
             metrics: DerivationPipelineMetrics::default(),
-        }
+        })
     }
 }
 
@@ -158,7 +171,9 @@ where
                 l1_v2_message_queue_start_index,
                 batch_receiver,
                 result_sender,
-            );
+            )
+            .await
+            .expect("Failed to create derivation pipeline worker");
 
             worker.run().await;
         });
@@ -207,6 +222,7 @@ where
 
     fn derivation_future(&self, request: Arc<BatchDerivationRequest>) -> DerivationPipelineFuture {
         let db = self.database.clone();
+        let cache = self.cache.clone();
         let metrics = self.metrics.clone();
         let provider = self.l1_provider.clone();
         let l1_v2_message_queue_start_index = self.l1_v2_message_queue_start_index;
@@ -228,7 +244,7 @@ where
 
             // derive the attributes and attach the corresponding batch info.
             let result =
-                derive(batch, target_status, provider, db, l1_v2_message_queue_start_index)
+                derive(batch, target_status, provider, cache, l1_v2_message_queue_start_index)
                     .await
                     .map_err(|err| (request.clone(), err))?;
 
@@ -286,11 +302,11 @@ type DerivationPipelineFuture = Pin<
 
 /// Returns a [`BatchDerivationResult`] from the [`BatchCommitData`] by deriving the payload
 /// attributes for each L2 block in the batch.
-pub async fn derive<L1P: L1Provider + Sync + Send, DB: DatabaseReadOperations>(
+pub async fn derive<L1P: L1Provider + Sync + Send>(
     batch: BatchCommitData,
     target_status: BatchStatus,
     l1_provider: L1P,
-    db: DB,
+    cache: PreFetchCache,
     l1_v2_message_queue_start_index: u64,
 ) -> Result<BatchDerivationResult, DerivationPipelineError> {
     // fetch the blob then decode the input batch.
@@ -323,14 +339,7 @@ pub async fn derive<L1P: L1Provider + Sync + Send, DB: DatabaseReadOperations>(
     let blocks = decoded.data.into_l2_blocks();
     let mut attributes = Vec::with_capacity(blocks.len());
 
-    let start = blocks.first().map(|b| b.context.number);
-    let block_data = if let Some(start) = start {
-        db.get_n_l2_block_data_hint(start, blocks.len()).await?
-    } else {
-        vec![]
-    };
-
-    for (i, mut block) in blocks.into_iter().enumerate() {
+    for mut block in blocks {
         // query the appropriate amount of l1 messages.
         let mut txs = Vec::with_capacity(block.context.num_transactions as usize);
         for _ in 0..block.context.num_l1_messages {
@@ -369,7 +378,10 @@ pub async fn derive<L1P: L1Provider + Sync + Send, DB: DatabaseReadOperations>(
                 },
                 transactions: Some(txs),
                 no_tx_pool: true,
-                block_data_hint: block_data.get(i).cloned().unwrap_or_else(BlockDataHint::none),
+                block_data_hint: cache
+                    .get(block.context.number)
+                    .await?
+                    .unwrap_or_else(BlockDataHint::none),
                 gas_limit: Some(block.context.gas_limit),
             },
         };
@@ -446,7 +458,7 @@ async fn iter_l1_messages_from_payload<L1P: L1Provider>(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use std::sync::Arc;
+    use std::{sync::Arc, time::Duration};
 
     use alloy_eips::Decodable2718;
     use alloy_primitives::{address, b256, bytes, U256};
@@ -643,10 +655,12 @@ mod tests {
         for message in l1_messages {
             db.insert_l1_message(message).await?;
         }
+        let cache = PreFetchCache::new(db.clone(), 100, Duration::from_secs(60), 10).await?;
 
         let l1_provider = MockL1Provider { db: db.clone(), blobs: HashMap::new() };
 
-        let result = derive(batch_data, BatchStatus::Committed, l1_provider, db, u64::MAX).await?;
+        let result =
+            derive(batch_data, BatchStatus::Committed, l1_provider, cache, u64::MAX).await?;
         let attribute = result
             .attributes
             .iter()
@@ -745,10 +759,11 @@ mod tests {
         }
 
         let l1_provider = MockL1Provider { db: db.clone(), blobs: HashMap::new() };
+        let cache = PreFetchCache::new(db.clone(), 100, Duration::from_secs(60), 10).await?;
 
         // derive attributes and extract l1 messages.
         let attributes =
-            derive(batch_data, BatchStatus::Committed, l1_provider, db, u64::MAX).await?;
+            derive(batch_data, BatchStatus::Committed, l1_provider, cache, u64::MAX).await?;
         let derived_l1_messages: Vec<_> = attributes
             .attributes
             .into_iter()
@@ -800,10 +815,11 @@ mod tests {
         }
 
         let l1_provider = MockL1Provider { db: db.clone(), blobs: HashMap::new() };
+        let cache = PreFetchCache::new(db.clone(), 100, Duration::from_secs(60), 10).await?;
 
         // derive attributes and extract l1 messages.
         let attributes =
-            derive(batch_data, BatchStatus::Committed, l1_provider, db, u64::MAX).await?;
+            derive(batch_data, BatchStatus::Committed, l1_provider, cache, u64::MAX).await?;
         let derived_l1_messages: Vec<_> = attributes
             .attributes
             .into_iter()
@@ -916,8 +932,9 @@ mod tests {
                             blob_path
                         )]),
                     };
+                    let cache = PreFetchCache::new(db.clone(), 100, Duration::from_secs(60), 10).await?;
 
-                    let attributes = derive(batch_data, BatchStatus::Committed, l1_provider, db, u64::MAX).await?;
+                    let attributes = derive(batch_data, BatchStatus::Committed, l1_provider, cache, u64::MAX).await?;
 
                     let attribute = attributes.attributes.last().unwrap();
                     let expected = ScrollPayloadAttributes {

Original file line number	Diff line number	Diff line change
`@@ -643,6 +643,14 @@ impl DatabaseReadOperations for Database {`
`643`	`643`	`)`
`644`	`644`	`}`
`645`	`645`
	`646`	`+ async fn get_max_block_data_hint_block_number(&self) -> Result<u64, DatabaseError> {`
	`647`	`+ metered!(`
	`648`	`+ DatabaseOperation::GetMaxBlockDataHintBlockNumber,`
	`649`	`+ self,`
	`650`	`+ tx(\|tx\| async move { tx.get_max_block_data_hint_block_number().await })`
	`651`	`+ )`
	`652`	`+ }`
	`653`	`+`
`646`	`654`	`async fn get_l2_block_and_batch_info_by_hash(`
`647`	`655`	`&self,`
`648`	`656`	`block_hash: B256,`