diff --git a/crates/blockchain/README.md b/crates/blockchain/README.md new file mode 100644 index 00000000000..3d90bcd3396 --- /dev/null +++ b/crates/blockchain/README.md @@ -0,0 +1,30 @@ +# ethrex-blockchain + +Core blockchain logic for the ethrex Ethereum client. + +For detailed API documentation, see the rustdocs: +```bash +cargo doc --package ethrex-blockchain --open +``` + +## Quick Start + +```rust +use ethrex_blockchain::Blockchain; + +let blockchain = Blockchain::new(store, BlockchainOptions::default()); + +// Add a block +blockchain.add_block(&block)?; + +// Add transaction to mempool +blockchain.add_transaction_to_mempool(tx).await?; +``` + +## Features + +- `metrics`: Enable Prometheus metrics collection + +## Notes + +ethrex is a post-merge client and does not support pre-merge (PoW) forks. diff --git a/crates/blockchain/blockchain.rs b/crates/blockchain/blockchain.rs index 593ca6fb5be..6268fe376be 100644 --- a/crates/blockchain/blockchain.rs +++ b/crates/blockchain/blockchain.rs @@ -1,3 +1,47 @@ +//! # ethrex Blockchain +//! +//! Core blockchain logic for the ethrex Ethereum client. +//! +//! ## Overview +//! +//! This module implements the blockchain layer, which is responsible for: +//! - Block validation and execution +//! - State management and transitions +//! - Fork choice rule implementation +//! - Transaction mempool management +//! - Payload building for block production +//! +//! ## Key Components +//! +//! - [`Blockchain`]: Main interface for blockchain operations +//! - [`Mempool`]: Transaction pool for pending transactions +//! - [`fork_choice`]: Fork choice rule implementation +//! - [`payload`]: Block payload building for consensus +//! +//! ## Block Execution Flow +//! +//! ```text +//! 1. Receive block from consensus/P2P +//! 2. Validate block header (parent, timestamp, gas limit, etc.) +//! 3. Execute transactions in EVM +//! 4. Verify state root matches header +//! 5. Store block and update canonical chain +//! ``` +//! +//! ## Usage +//! +//! ```ignore +//! use ethrex_blockchain::Blockchain; +//! +//! let blockchain = Blockchain::new(store, BlockchainOptions::default()); +//! +//! // Add a block +//! blockchain.add_block(&block)?; +//! +//! // Add transaction to mempool +//! blockchain.add_transaction_to_mempool(tx).await?; +//! ``` + pub mod constants; pub mod error; pub mod fork_choice; @@ -67,38 +111,79 @@ type StoreUpdatesMap = FxHashMap, FxHashMap>, } +/// Core blockchain implementation for block validation and execution. +/// +/// The `Blockchain` struct is the main entry point for all blockchain operations: +/// - Adding and validating blocks +/// - Managing the transaction mempool +/// - Building payloads for block production +/// - Handling fork choice updates +/// +/// # Thread Safety +/// +/// `Blockchain` uses interior mutability for thread-safe access to shared state. +/// The mempool and payload storage are protected by appropriate synchronization primitives. +/// +/// # Example +/// +/// ```ignore +/// let blockchain = Blockchain::new(store, BlockchainOptions::default()); +/// +/// // Validate and add a block +/// blockchain.add_block(&block)?; +/// +/// // Check sync status +/// if blockchain.is_synced() { +/// // Process transactions from mempool +/// } +/// ``` #[derive(Debug)] pub struct Blockchain { + /// Underlying storage for blocks and state. storage: Store, + /// Transaction mempool for pending transactions. pub mempool: Mempool, - /// Whether the node's chain is in or out of sync with the current chain - /// This will be set to true once the initial sync has taken place and wont be set to false after - /// This does not reflect whether there is an ongoing sync process + /// Whether the node has completed initial sync. + /// + /// Set to true after initial sync completes, never reset to false. + /// Does not reflect whether an ongoing sync is in progress. is_synced: AtomicBool, + /// Configuration options for blockchain behavior. pub options: BlockchainOptions, - /// Mapping from a payload id to either a complete payload or a payload build task - /// We need to keep completed payloads around in case consensus requests them twice + /// Cache of recently built payloads. + /// + /// Maps payload IDs to either completed payloads or in-progress build tasks. + /// Kept around in case consensus requests the same payload twice. pub payloads: Arc>>, } +/// Configuration options for the blockchain. #[derive(Debug, Clone)] pub struct BlockchainOptions { + /// Maximum number of transactions in the mempool. pub max_mempool_size: usize, - /// Whether performance logs should be emitted + /// Whether to emit performance logging. pub perf_logs_enabled: bool, + /// Blockchain type (L1 or L2). pub r#type: BlockchainType, } diff --git a/crates/networking/p2p/README.md b/crates/networking/p2p/README.md new file mode 100644 index 00000000000..ffcc8372e70 --- /dev/null +++ b/crates/networking/p2p/README.md @@ -0,0 +1,20 @@ +# ethrex-p2p + +Peer-to-peer networking layer for the ethrex Ethereum client. + +For detailed API documentation, see the rustdocs: +```bash +cargo doc --package ethrex-p2p --open +``` + +## Protocols + +- **DiscV4**: Node discovery +- **RLPx**: Encrypted transport +- **eth/68**: Block and transaction propagation +- **snap/1**: Snap sync for state synchronization + +## Features + +- `experimental-discv5`: Enable discv5 node discovery (experimental) +- `sync-test`: Testing utilities for sync diff --git a/crates/networking/p2p/p2p.rs b/crates/networking/p2p/p2p.rs index c2ca66b6592..80a64bc485c 100644 --- a/crates/networking/p2p/p2p.rs +++ b/crates/networking/p2p/p2p.rs @@ -1,3 +1,71 @@ +//! # ethrex P2P Networking +//! +//! Peer-to-peer networking layer for the ethrex Ethereum client. +//! +//! ## Overview +//! +//! This crate implements the Ethereum P2P networking stack: +//! - **Discovery**: Node discovery using discv4 (and experimental discv5) +//! - **RLPx**: Encrypted transport protocol for peer communication +//! - **eth Protocol**: Block and transaction propagation +//! - **snap Protocol**: Fast state synchronization +//! +//! ## Architecture +//! +//! ```text +//! ┌─────────────────────────────────────────────────────────────┐ +//! │ Network Layer │ +//! │ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │ +//! │ │ discv4 │ │ RLPx │ │ Peer Handler │ │ +//! │ │ (Discovery) │ │ (Transport) │ │ (Messages) │ │ +//! │ └─────────────┘ └─────────────┘ └─────────────────────┘ │ +//! └─────────────────────────────────────────────────────────────┘ +//! │ +//! ┌──────────────────┼──────────────────┐ +//! ▼ ▼ ▼ +//! ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +//! │ Sync Manager │ │ TX Broadcaster │ │ Snap Sync │ +//! └─────────────────┘ └─────────────────┘ └─────────────────┘ +//! ``` +//! +//! ## Key Components +//! +//! - [`network`]: Network initialization and peer management +//! - [`peer_handler`]: Message handling for connected peers +//! - [`sync_manager`]: Block synchronization coordination +//! - [`sync`]: Full and snap sync implementations +//! - [`tx_broadcaster`]: Transaction pool broadcasting +//! - [`discv4`]: Node discovery protocol v4 +//! - [`rlpx`]: RLPx encrypted transport +//! +//! ## Usage +//! +//! ```ignore +//! use ethrex_p2p::{start_network, SyncManager}; +//! +//! // Start the P2P network +//! let (sync_manager, peer_handler) = start_network( +//! udp_addr, +//! tcp_addr, +//! bootnodes, +//! signer, +//! storage, +//! blockchain, +//! ).await?; +//! +//! // Start synchronization +//! sync_manager.start_sync().await?; +//! ``` +//! +//! ## Protocols +//! +//! - **eth/68**: Block and transaction exchange +//! - **snap/1**: State snapshot synchronization +//! +//! ## Features +//! +//! - `experimental-discv5`: Enable discv5 node discovery (experimental) + pub mod discv4; #[cfg(feature = "experimental-discv5")] pub mod discv5; diff --git a/crates/networking/rpc/README.md b/crates/networking/rpc/README.md new file mode 100644 index 00000000000..b13ec63c6f8 --- /dev/null +++ b/crates/networking/rpc/README.md @@ -0,0 +1,18 @@ +# ethrex-rpc + +JSON-RPC API implementation for the ethrex Ethereum client. + +For detailed API documentation, see the rustdocs: +```bash +cargo doc --package ethrex-rpc --open +``` + +## Supported Namespaces + +- `eth_*`: Standard Ethereum methods +- `engine_*`: Consensus client communication +- `debug_*`: Debugging and tracing +- `net_*`: Network information +- `admin_*`: Node administration +- `web3_*`: Web3 utilities +- `txpool_*`: Transaction pool inspection diff --git a/crates/networking/rpc/eth/gas_price.rs b/crates/networking/rpc/eth/gas_price.rs index a79af5229ce..c293b76840d 100644 --- a/crates/networking/rpc/eth/gas_price.rs +++ b/crates/networking/rpc/eth/gas_price.rs @@ -1,13 +1,29 @@ +//! Gas price estimation for `eth_gasPrice` RPC method. +//! +//! This module implements the gas price oracle that estimates a reasonable +//! gas price based on recent block history and network conditions. + use crate::rpc::{RpcApiContext, RpcHandler}; use crate::utils::RpcErr; use ethrex_blockchain::BlockchainType; use serde_json::Value; -// TODO: This does not need a struct, -// but I'm leaving it like this for consistency -// with the other RPC endpoints. -// The handle function could simply be -// a function called 'estimate'. +/// Handler for the `eth_gasPrice` RPC method. +/// +/// Returns the current gas price in wei as a hexadecimal string. +/// The price is calculated as: `base_fee + estimated_priority_fee + operator_fee (L2 only)`. +/// +/// # Algorithm +/// +/// 1. Gets the base fee from the latest block header +/// 2. Estimates a reasonable priority fee (gas tip) by analyzing recent transactions +/// 3. For L2 nodes, adds the operator fee if configured +/// +/// # Example Response +/// +/// ```json +/// "0x3b9aca00" // 1 Gwei in hexadecimal +/// ``` #[derive(Debug, Clone)] pub struct GasPrice; diff --git a/crates/networking/rpc/lib.rs b/crates/networking/rpc/lib.rs index 7e8ccb221ed..2b7f0246673 100644 --- a/crates/networking/rpc/lib.rs +++ b/crates/networking/rpc/lib.rs @@ -1,3 +1,61 @@ +//! # ethrex RPC +//! +//! This crate implements the Ethereum JSON-RPC API for the ethrex node. +//! +//! ## Overview +//! +//! The RPC server provides three interfaces: +//! - **HTTP API**: Public JSON-RPC endpoint for client requests (`eth_*`, `debug_*`, `net_*`, etc.) +//! - **WebSocket API**: Optional WebSocket endpoint for subscriptions and real-time updates +//! - **Auth RPC API**: Authenticated endpoint for consensus client communication (`engine_*` methods) +//! +//! ## Supported Namespaces +//! +//! - `eth`: Standard Ethereum methods (blocks, transactions, accounts, gas estimation) +//! - `engine`: Consensus layer methods for block building and fork choice +//! - `debug`: Debugging methods (raw blocks, execution witnesses, tracing) +//! - `net`: Network information methods +//! - `admin`: Node administration methods +//! - `web3`: Web3 utility methods +//! - `txpool`: Transaction pool inspection methods +//! +//! ## Usage +//! +//! ```ignore +//! use ethrex_rpc::{start_api, RpcApiContext}; +//! +//! // Start the RPC server +//! start_api( +//! http_addr, +//! ws_addr, +//! authrpc_addr, +//! storage, +//! blockchain, +//! jwt_secret, +//! // ... other parameters +//! ).await?; +//! ``` +//! +//! ## Implementing Custom RPC Handlers +//! +//! Implement the [`RpcHandler`] trait to create custom RPC endpoints: +//! +//! ```ignore +//! use ethrex_rpc::{RpcHandler, RpcApiContext, RpcErr}; +//! +//! struct MyHandler { /* fields */ } +//! +//! impl RpcHandler for MyHandler { +//! fn parse(params: &Option>) -> Result { +//! // Parse JSON-RPC parameters +//! } +//! +//! async fn handle(&self, context: RpcApiContext) -> Result { +//! // Handle the request +//! } +//! } +//! ``` + // This is added because otherwise some tests would fail due to reaching the recursion limit #![recursion_limit = "400"] diff --git a/crates/networking/rpc/rpc.rs b/crates/networking/rpc/rpc.rs index 5529093de69..9d7daff07f6 100644 --- a/crates/networking/rpc/rpc.rs +++ b/crates/networking/rpc/rpc.rs @@ -155,42 +155,107 @@ pub async fn handle_get_heap_flamegraph() -> Result<(), (StatusCode, String)> { )) } +/// Wrapper for JSON-RPC requests that can be either single or batched. +/// +/// According to the JSON-RPC 2.0 specification, clients may send either a single +/// request object or an array of request objects (batch request). #[derive(Deserialize)] #[serde(untagged)] pub enum RpcRequestWrapper { + /// A single JSON-RPC request. Single(RpcRequest), + /// A batch of JSON-RPC requests to be processed together. Multiple(Vec), } +/// Shared context passed to all RPC request handlers. +/// +/// This struct contains all the dependencies that RPC handlers need to process requests, +/// including storage access, blockchain state, P2P networking, and configuration. +/// +/// The context is cloned for each request, with most fields being cheap `Arc` references. #[derive(Debug, Clone)] pub struct RpcApiContext { + /// Database storage for blocks, transactions, and state. pub storage: Store, + /// Blockchain instance for block validation and execution. pub blockchain: Arc, + /// Active log filters for `eth_newFilter` / `eth_getFilterChanges` endpoints. pub active_filters: ActiveFilters, - // L2 nodes don't need to initialize the syncer + /// Sync manager for coordinating block synchronization (None for L2 nodes). pub syncer: Option>, - // L2 nodes don't need to initialize the peer handler + /// Peer handler for P2P network operations (None for L2 nodes). pub peer_handler: Option, + /// Node identity and configuration data. pub node_data: NodeData, + /// Gas tip estimator for `eth_gasPrice` and `eth_maxPriorityFeePerGas`. pub gas_tip_estimator: Arc>, + /// Handler for dynamically changing log filter levels via `admin_setLogLevel`. pub log_filter_handler: Option>, + /// Maximum gas limit for blocks (used in payload building). pub gas_ceil: u64, + /// Channel for sending blocks to the block executor worker thread. pub block_worker_channel: UnboundedSender<(oneshot::Sender>, Block)>, } +/// Node identity and configuration information. +/// +/// Contains the node's cryptographic identity, network endpoints, and metadata +/// used for P2P discovery and RPC responses. #[derive(Debug, Clone)] pub struct NodeData { + /// JWT secret for authenticating Engine API requests from consensus clients. pub jwt_secret: Bytes, + /// Local P2P node identity (public key and address). pub local_p2p_node: Node, + /// ENR (Ethereum Node Record) for node discovery. pub local_node_record: NodeRecord, + /// Client version string (e.g., "ethrex/0.1.0"). pub client_version: String, + /// Extra data included in mined blocks. pub extra_data: Bytes, } +/// Trait for implementing JSON-RPC method handlers. +/// +/// Each RPC method (e.g., `eth_getBalance`, `engine_newPayloadV3`) is implemented +/// as a struct that implements this trait. The trait provides a standard pattern +/// for parsing parameters and handling requests. +/// +/// # Example +/// +/// ```ignore +/// struct GetBalanceRequest { +/// address: Address, +/// block: BlockId, +/// } +/// +/// impl RpcHandler for GetBalanceRequest { +/// fn parse(params: &Option>) -> Result { +/// let params = params.as_ref().ok_or(RpcErr::MissingParam("params"))?; +/// Ok(Self { +/// address: serde_json::from_value(params[0].clone())?, +/// block: serde_json::from_value(params[1].clone())?, +/// }) +/// } +/// +/// async fn handle(&self, context: RpcApiContext) -> Result { +/// let balance = context.storage.get_balance(self.address, self.block)?; +/// Ok(serde_json::to_value(balance)?) +/// } +/// } +/// ``` #[allow(async_fn_in_trait)] pub trait RpcHandler: Sized { + /// Parse JSON-RPC parameters into the handler struct. + /// + /// Returns an error if required parameters are missing or have invalid types. fn parse(params: &Option>) -> Result; + /// Entry point for handling an RPC request. + /// + /// This method parses the request, records metrics, and delegates to `handle()`. + /// Most implementations should not override this method. async fn call(req: &RpcRequest, context: RpcApiContext) -> Result { let request = Self::parse(&req.params)?; let namespace = match req.namespace() { @@ -216,6 +281,9 @@ pub trait RpcHandler: Sized { result } + /// Handle the RPC request and return a JSON response. + /// + /// This is where the actual business logic for the RPC method lives. async fn handle(&self, context: RpcApiContext) -> Result; } @@ -239,6 +307,11 @@ fn get_error_kind(err: &RpcErr) -> &'static str { } } +/// Duration after which inactive filters are cleaned up. +/// +/// Filters created via `eth_newFilter` are automatically removed if not +/// accessed within this duration. In tests, this is set to 1 second for +/// faster test execution. pub const FILTER_DURATION: Duration = { if cfg!(test) { Duration::from_secs(1) @@ -247,6 +320,21 @@ pub const FILTER_DURATION: Duration = { } }; +/// Spawns a dedicated thread for sequential block execution. +/// +/// Blocks received from the consensus client via `engine_newPayload` are sent +/// to this worker thread for execution. This ensures blocks are processed +/// sequentially and prevents the async runtime from being blocked by CPU-intensive +/// block execution. +/// +/// # Returns +/// +/// An unbounded channel sender for submitting blocks. Each submission includes +/// a oneshot channel for receiving the execution result. +/// +/// # Panics +/// +/// Panics if the worker thread cannot be spawned. pub fn start_block_executor( blockchain: Arc, ) -> UnboundedSender<(oneshot::Sender>, Block)> { @@ -265,6 +353,43 @@ pub fn start_block_executor( block_worker_channel } +/// Starts the JSON-RPC API servers. +/// +/// This function initializes and runs three server endpoints: +/// +/// 1. **HTTP Server** (`http_addr`): Public JSON-RPC endpoint for standard Ethereum +/// methods (`eth_*`, `debug_*`, `net_*`, `admin_*`, `web3_*`, `txpool_*`). +/// +/// 2. **WebSocket Server** (`ws_addr`): Optional WebSocket endpoint for the same +/// methods as HTTP, enabling persistent connections. +/// +/// 3. **Auth RPC Server** (`authrpc_addr`): JWT-authenticated endpoint for Engine API +/// methods (`engine_*`) used by consensus clients. +/// +/// # Arguments +/// +/// * `http_addr` - Socket address for the HTTP server (e.g., `127.0.0.1:8545`) +/// * `ws_addr` - Optional socket address for WebSocket server +/// * `authrpc_addr` - Socket address for authenticated Engine API (e.g., `127.0.0.1:8551`) +/// * `storage` - Database storage instance +/// * `blockchain` - Blockchain instance for block operations +/// * `jwt_secret` - JWT secret for Engine API authentication +/// * `local_p2p_node` - Local node identity for P2P networking +/// * `local_node_record` - ENR for node discovery +/// * `syncer` - Sync manager for block synchronization +/// * `peer_handler` - Handler for P2P peer operations +/// * `client_version` - Client version string for `web3_clientVersion` +/// * `log_filter_handler` - Optional handler for dynamic log level changes +/// * `gas_ceil` - Maximum gas limit for payload building +/// * `extra_data` - Extra data to include in mined blocks +/// +/// # Errors +/// +/// Returns an error if any server fails to bind to its address. +/// +/// # Shutdown +/// +/// All servers shut down gracefully on SIGINT (Ctrl+C). #[allow(clippy::too_many_arguments)] pub async fn start_api( http_addr: SocketAddr, @@ -397,6 +522,9 @@ pub async fn start_api( Ok(()) } +/// Returns a future that completes when SIGINT (Ctrl+C) is received. +/// +/// Used to implement graceful shutdown for all RPC servers. pub async fn shutdown_signal() { tokio::signal::ctrl_c() .await @@ -511,6 +639,15 @@ pub async fn map_authrpc_requests( } } +/// Routes `eth_*` namespace requests to their handlers. +/// +/// Handles all standard Ethereum JSON-RPC methods including: +/// - Account queries: `eth_getBalance`, `eth_getCode`, `eth_getStorageAt`, `eth_getTransactionCount` +/// - Block queries: `eth_getBlockByNumber`, `eth_getBlockByHash`, `eth_blockNumber` +/// - Transaction operations: `eth_sendRawTransaction`, `eth_getTransactionByHash`, `eth_getTransactionReceipt` +/// - Gas estimation: `eth_estimateGas`, `eth_gasPrice`, `eth_maxPriorityFeePerGas`, `eth_feeHistory` +/// - Filters: `eth_newFilter`, `eth_getFilterChanges`, `eth_uninstallFilter`, `eth_getLogs` +/// - Misc: `eth_chainId`, `eth_syncing`, `eth_createAccessList`, `eth_getProof` pub async fn map_eth_requests(req: &RpcRequest, context: RpcApiContext) -> Result { match req.method.as_str() { "eth_chainId" => ChainId::call(req, context).await, @@ -563,6 +700,12 @@ pub async fn map_eth_requests(req: &RpcRequest, context: RpcApiContext) -> Resul } } +/// Routes `debug_*` namespace requests to their handlers. +/// +/// Handles debugging and introspection methods: +/// - Raw data: `debug_getRawHeader`, `debug_getRawBlock`, `debug_getRawTransaction`, `debug_getRawReceipts` +/// - Execution witness: `debug_executionWitness` (for stateless validation) +/// - Tracing: `debug_traceTransaction`, `debug_traceBlockByNumber` pub async fn map_debug_requests(req: &RpcRequest, context: RpcApiContext) -> Result { match req.method.as_str() { "debug_getRawHeader" => GetRawHeaderRequest::call(req, context).await, @@ -576,6 +719,18 @@ pub async fn map_debug_requests(req: &RpcRequest, context: RpcApiContext) -> Res } } +/// Routes `engine_*` namespace requests to their handlers. +/// +/// These are Engine API methods used by consensus clients (e.g., Lighthouse, Prysm) +/// to communicate with the execution layer. All methods require JWT authentication. +/// +/// Handles: +/// - Fork choice: `engine_forkchoiceUpdatedV1/V2/V3` +/// - Payload submission: `engine_newPayloadV1/V2/V3/V4` +/// - Payload retrieval: `engine_getPayloadV1/V2/V3/V4/V5` +/// - Payload bodies: `engine_getPayloadBodiesByHashV1`, `engine_getPayloadBodiesByRangeV1` +/// - Blob retrieval: `engine_getBlobsV1/V2/V3` +/// - Capabilities: `engine_exchangeCapabilities`, `engine_exchangeTransitionConfigurationV1` pub async fn map_engine_requests( req: &RpcRequest, context: RpcApiContext, @@ -647,6 +802,19 @@ pub fn map_mempool_requests(req: &RpcRequest, contex: RpcApiContext) -> Result(id: RpcRequestId, res: Result) -> Result where E: Into, diff --git a/crates/networking/rpc/utils.rs b/crates/networking/rpc/utils.rs index 726a03c7cc1..860412500b3 100644 --- a/crates/networking/rpc/utils.rs +++ b/crates/networking/rpc/utils.rs @@ -1,3 +1,11 @@ +//! Utility types and error handling for JSON-RPC. +//! +//! This module provides common types used across all RPC handlers: +//! - [`RpcErr`]: Error type for RPC failures with proper JSON-RPC error codes +//! - [`RpcRequest`]: Parsed JSON-RPC request +//! - [`RpcNamespace`]: RPC method namespace (eth, engine, debug, etc.) +//! - Response types for success and error cases + use ethrex_common::U256; use ethrex_storage::error::StoreError; use ethrex_vm::EvmError; @@ -7,6 +15,15 @@ use serde_json::Value; use crate::{authentication::AuthenticationError, clients::EthClientError}; use ethrex_blockchain::error::MempoolError; +/// Error type for JSON-RPC method failures. +/// +/// Each variant maps to a specific JSON-RPC error code when serialized: +/// - `-32601`: Method not found +/// - `-32602`: Invalid params +/// - `-32603`: Internal error +/// - `-32000`: Generic server error +/// - `-38001` to `-38005`: Engine API specific errors +/// - `3`: Execution reverted/halted #[derive(Debug, thiserror::Error)] pub enum RpcErr { #[error("Method not found: {0}")] @@ -167,28 +184,61 @@ impl From for RpcErr { } } +/// JSON-RPC method namespace. +/// +/// Methods are namespaced by prefix (e.g., `eth_getBalance` is in the `Eth` namespace). +/// Different namespaces may have different authentication requirements. pub enum RpcNamespace { + /// Engine API methods for consensus client communication (requires JWT auth). Engine, + /// Standard Ethereum methods for querying state and sending transactions. Eth, + /// Node administration methods. Admin, + /// Debugging and tracing methods. Debug, + /// Web3 utility methods. Web3, + /// Network information methods. Net, + /// Transaction pool inspection methods (exposed as `txpool_*`). Mempool, } +/// JSON-RPC request identifier. +/// +/// Per the JSON-RPC 2.0 spec, request IDs can be either numbers or strings. +/// The same ID must be returned in the response. #[derive(Debug, Serialize, Deserialize)] #[serde(untagged)] pub enum RpcRequestId { + /// Numeric request ID. Number(u64), + /// String request ID. String(String), } +/// A parsed JSON-RPC 2.0 request. +/// +/// # Example +/// +/// ```json +/// { +/// "jsonrpc": "2.0", +/// "id": 1, +/// "method": "eth_getBalance", +/// "params": ["0x...", "latest"] +/// } +/// ``` #[derive(Serialize, Deserialize, Debug)] pub struct RpcRequest { + /// Request identifier, echoed back in the response. pub id: RpcRequestId, + /// JSON-RPC version, must be "2.0". pub jsonrpc: String, + /// Method name (e.g., "eth_getBalance"). pub method: String, + /// Optional array of method parameters. pub params: Option>, } @@ -236,25 +286,40 @@ impl Default for RpcRequest { } } +/// Error metadata for JSON-RPC error responses. +/// +/// Contains the error code, message, and optional additional data. +/// Error codes follow the JSON-RPC 2.0 and Ethereum conventions. #[derive(Serialize, Deserialize, Debug)] pub struct RpcErrorMetadata { + /// Numeric error code (negative for standard errors). pub code: i32, + /// Optional additional error data (e.g., revert reason). #[serde(skip_serializing_if = "Option::is_none")] pub data: Option, + /// Human-readable error message. pub message: String, } +/// A successful JSON-RPC 2.0 response. #[derive(Serialize, Deserialize, Debug)] pub struct RpcSuccessResponse { + /// Request identifier from the original request. pub id: RpcRequestId, + /// JSON-RPC version, always "2.0". pub jsonrpc: String, + /// The result value returned by the method. pub result: Value, } +/// An error JSON-RPC 2.0 response. #[derive(Serialize, Deserialize, Debug)] pub struct RpcErrorResponse { + /// Request identifier from the original request. pub id: RpcRequestId, + /// JSON-RPC version, always "2.0". pub jsonrpc: String, + /// Error details including code and message. pub error: RpcErrorMetadata, } diff --git a/crates/storage/README.md b/crates/storage/README.md new file mode 100644 index 00000000000..88409a38748 --- /dev/null +++ b/crates/storage/README.md @@ -0,0 +1,27 @@ +# ethrex-storage + +Persistent storage layer for the ethrex Ethereum client. + +For detailed API documentation, see the rustdocs: +```bash +cargo doc --package ethrex-storage --open +``` + +## Quick Start + +```rust +use ethrex_storage::{Store, EngineType}; + +// Create with RocksDB backend +let store = Store::new("./data", EngineType::RocksDB)?; + +// Add a block +store.add_block(block).await?; + +// Query account +let info = store.get_account_info(block_number, address)?; +``` + +## Features + +- `rocksdb`: Enable RocksDB backend for persistent storage (default is in-memory) diff --git a/crates/storage/lib.rs b/crates/storage/lib.rs index c5378ab5f61..ee45d5b41a0 100644 --- a/crates/storage/lib.rs +++ b/crates/storage/lib.rs @@ -1,4 +1,69 @@ -// New unified storage interface +//! # ethrex Storage +//! +//! This crate provides persistent storage for the ethrex Ethereum client. +//! +//! ## Overview +//! +//! The storage layer handles: +//! - Block storage (headers, bodies, receipts) +//! - State storage (accounts, code, storage slots) +//! - Merkle Patricia Trie management +//! - Transaction indexing +//! - Chain configuration +//! +//! ## Architecture +//! +//! ```text +//! ┌─────────────────────────────────────────────────┐ +//! │ Store │ +//! │ (High-level API for blockchain operations) │ +//! └─────────────────────────────────────────────────┘ +//! │ +//! ┌────────────┴────────────┐ +//! ▼ ▼ +//! ┌─────────────────┐ ┌─────────────────┐ +//! │ InMemoryBackend │ │ RocksDBBackend │ +//! │ (Testing) │ │ (Production) │ +//! └─────────────────┘ └─────────────────┘ +//! ``` +//! +//! ## Storage Backends +//! +//! - **InMemory**: Fast, non-persistent storage for testing +//! - **RocksDB**: Production-grade persistent storage (requires `rocksdb` feature) +//! +//! ## Usage +//! +//! ```ignore +//! use ethrex_storage::{Store, EngineType}; +//! +//! // Create a new store with RocksDB backend +//! let store = Store::new("./data", EngineType::RocksDB)?; +//! +//! // Or from a genesis file +//! let store = Store::new_from_genesis( +//! Path::new("./data"), +//! EngineType::RocksDB, +//! "genesis.json" +//! ).await?; +//! +//! // Add a block +//! store.add_block(block).await?; +//! +//! // Query state +//! let balance = store.get_account_info(block_number, address)?.map(|a| a.balance); +//! ``` +//! +//! ## State Management +//! +//! State is stored using Merkle Patricia Tries for efficient verification: +//! - **State Trie**: Maps account addresses to account data +//! - **Storage Tries**: Maps storage keys to values for each contract +//! - **Code Storage**: Separate storage for contract bytecode +//! +//! The store maintains a cache layer (`TrieLayerCache`) for efficient state access +//! without requiring full trie traversal for recent blocks. + pub mod api; pub mod backend; pub mod error; @@ -11,9 +76,14 @@ pub mod utils; pub use layering::apply_prefix; pub use store::{AccountUpdatesList, EngineType, Store, UpdateBatch, hash_address, hash_key}; -/// Store Schema Version, must be updated on any breaking change -/// An upgrade to a newer schema version invalidates currently stored data, requiring a re-sync. +/// Store Schema Version, must be updated on any breaking change. +/// +/// An upgrade to a newer schema version invalidates currently stored data, +/// requiring a re-sync from genesis or a snapshot. pub const STORE_SCHEMA_VERSION: u64 = 1; -/// Name of the file storing the metadata about the database +/// Name of the file storing the metadata about the database. +/// +/// This file contains version information and is used to detect +/// incompatible database formats on startup. pub const STORE_METADATA_FILENAME: &str = "metadata.json"; diff --git a/crates/storage/store.rs b/crates/storage/store.rs index bfbe3de6007..d4e328e642e 100644 --- a/crates/storage/store.rs +++ b/crates/storage/store.rs @@ -119,27 +119,66 @@ impl CodeCache { } } +/// Main storage interface for the ethrex client. +/// +/// The `Store` provides a high-level API for all blockchain data operations: +/// - Block storage and retrieval +/// - State trie management +/// - Account and storage queries +/// - Transaction indexing +/// +/// # Thread Safety +/// +/// `Store` is `Clone` and thread-safe. All clones share the same underlying +/// database connection and caches via `Arc`. +/// +/// # Caching +/// +/// The store maintains several caches for performance: +/// - **Trie Layer Cache**: Recent trie nodes for fast state access +/// - **Code Cache**: LRU cache for contract bytecode (64MB default) +/// - **Latest Block Cache**: Cached latest block header for RPC +/// +/// # Example +/// +/// ```ignore +/// let store = Store::new("./data", EngineType::RocksDB)?; +/// +/// // Add a block +/// store.add_block(block).await?; +/// +/// // Query account balance +/// let info = store.get_account_info(block_number, address)?; +/// let balance = info.map(|a| a.balance).unwrap_or_default(); +/// ``` #[derive(Debug, Clone)] pub struct Store { + /// Path to the database directory. db_path: PathBuf, + /// Storage backend (InMemory or RocksDB). backend: Arc, + /// Chain configuration (fork schedule, chain ID, etc.). chain_config: ChainConfig, + /// Cache for trie nodes from recent blocks. trie_cache: Arc>>, + /// Channel for controlling the FlatKeyValue generator background task. flatkeyvalue_control_tx: std::sync::mpsc::SyncSender, + /// Channel for sending trie updates to the background worker. trie_update_worker_tx: std::sync::mpsc::SyncSender, - /// Keeps the latest canonical block header - /// It's wrapped in an Arc to allow for cheap reads with infrequent writes - /// Reading an out-of-date value is acceptable, since it's only used as: - /// - a cache of the (frequently requested) header - /// - a Latest tag for RPC, where a small extra delay before the newest block is expected - /// - sync-related operations, which must be idempotent in order to handle reorgs + /// Cached latest canonical block header. + /// + /// Wrapped in Arc for cheap reads with infrequent writes. + /// May be slightly out of date, which is acceptable for: + /// - Caching frequently requested headers + /// - RPC "latest" block queries (small delay acceptable) + /// - Sync operations (must be idempotent anyway) latest_block_header: LatestBlockHeaderCache, + /// Last computed FlatKeyValue for incremental updates. last_computed_flatkeyvalue: Arc>>, - - /// Cache for account bytecodes, keyed by the bytecode hash. - /// Note that we don't remove entries on account code changes, since - /// those changes already affect the code hash stored in the account, and only - /// may result in this cache having useless data. + /// LRU cache for contract bytecode, keyed by code hash. + /// + /// Entries are not removed on code changes since the code hash + /// in the account already reflects the change. account_code_cache: Arc, background_threads: Arc, @@ -158,34 +197,56 @@ impl Drop for ThreadList { } } +/// Storage trie nodes grouped by account address hash. +/// +/// Each entry contains the hashed account address and the trie nodes +/// for that account's storage trie. pub type StorageTrieNodes = Vec<(H256, Vec<(Nibbles, Vec)>)>; +/// Storage backend type selection. +/// +/// Used when creating a new [`Store`] to specify which backend to use. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum EngineType { + /// In-memory storage, non-persistent. Suitable for testing. InMemory, + /// RocksDB storage, persistent. Suitable for production. #[cfg(feature = "rocksdb")] RocksDB, } +/// Batch of updates to apply to the store atomically. +/// +/// Used during block execution to collect all state changes before +/// committing them to the database in a single transaction. pub struct UpdateBatch { - /// Nodes to be added to the state trie + /// New nodes to add to the state trie. pub account_updates: Vec, - /// Storage tries updated and their new nodes + /// Storage trie updates per account (keyed by hashed address). pub storage_updates: Vec<(H256, Vec)>, - /// Blocks to be added + /// Blocks to store. pub blocks: Vec, - /// Receipts added per block + /// Receipts to store, grouped by block hash. pub receipts: Vec<(H256, Vec)>, - /// Code updates + /// Contract code updates (code hash -> bytecode). pub code_updates: Vec<(H256, Code)>, } +/// Storage trie updates grouped by account address hash. pub type StorageUpdates = Vec<(H256, Vec<(Nibbles, Vec)>)>; +/// Collection of account state changes from block execution. +/// +/// Contains all the data needed to update the state trie after +/// executing a block: account updates, storage updates, and code deployments. pub struct AccountUpdatesList { + /// Root hash of the state trie after applying these updates. pub state_trie_hash: H256, + /// State trie node updates (path -> RLP-encoded node). pub state_updates: Vec<(Nibbles, Vec)>, + /// Storage trie updates per account. pub storage_updates: StorageUpdates, + /// New contract bytecode deployments. pub code_updates: Vec<(H256, Code)>, } diff --git a/crates/vm/levm/src/lib.rs b/crates/vm/levm/src/lib.rs index 5105016fe0b..dd68f07045d 100644 --- a/crates/vm/levm/src/lib.rs +++ b/crates/vm/levm/src/lib.rs @@ -1,3 +1,69 @@ +//! # LEVM - Lambda EVM +//! +//! A pure Rust implementation of the Ethereum Virtual Machine. +//! +//! ## Overview +//! +//! LEVM (Lambda EVM) is ethrex's native EVM implementation, designed for: +//! - **Correctness**: Full compatibility with Ethereum consensus tests +//! - **Performance**: Optimized opcode execution and memory management +//! - **Readability**: Clean, well-documented Rust code +//! - **Extensibility**: Modular design for easy feature additions +//! +//! ## Architecture +//! +//! ```text +//! ┌─────────────────────────────────────────────────────────────┐ +//! │ VM │ +//! │ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │ +//! │ │ CallFrame │ │ Memory │ │ Stack │ │ +//! │ └─────────────┘ └─────────────┘ └─────────────────────┘ │ +//! │ │ +//! │ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │ +//! │ │ Substate │ │ Precompiles │ │ Environment │ │ +//! │ └─────────────┘ └─────────────┘ └─────────────────────┘ │ +//! └─────────────────────────────────────────────────────────────┘ +//! │ +//! ▼ +//! ┌─────────────────────────────────────────────────────────────┐ +//! │ GeneralizedDatabase │ +//! │ (Account state, storage, code) │ +//! └─────────────────────────────────────────────────────────────┘ +//! ``` +//! +//! ## Key Components +//! +//! - [`vm::VM`]: Main EVM execution engine +//! - [`call_frame::CallFrame`]: Execution context for each call +//! - [`memory::Memory`]: EVM memory with expansion tracking +//! - [`environment::Environment`]: Block and transaction context +//! - [`precompiles`]: Native implementations of precompiled contracts +//! - [`hooks`]: Execution hooks for pre/post-execution logic and L2-specific behavior +//! +//! ## Supported Forks +//! +//! LEVM supports post-merge Ethereum forks: +//! - Paris (The Merge), Shanghai, Cancun, Prague, Osaka +//! +//! Note: ethrex is a post-merge client and does not support pre-merge forks. +//! +//! ## Usage +//! +//! ```ignore +//! use levm::{VM, Environment}; +//! +//! // Create VM with database and environment +//! let mut vm = VM::new(env, db, &tx, tracer, debug_mode, vm_type); +//! +//! // Execute the transaction +//! let report = vm.execute()?; +//! +//! // Check execution result +//! if report.is_success() { +//! println!("Gas used: {}", report.gas_used); +//! } +//! ``` + pub mod call_frame; pub mod constants; pub mod db; diff --git a/crates/vm/levm/src/vm.rs b/crates/vm/levm/src/vm.rs index 31fdac76828..aadf73a3739 100644 --- a/crates/vm/levm/src/vm.rs +++ b/crates/vm/levm/src/vm.rs @@ -29,28 +29,55 @@ use std::{ rc::Rc, }; +/// Storage mapping from slot key to value. pub type Storage = HashMap; +/// Specifies whether the VM operates in L1 or L2 mode. #[derive(Debug, Clone, Copy, Default)] pub enum VMType { + /// Standard Ethereum L1 execution. #[default] L1, + /// L2 rollup execution with additional fee handling. L2(FeeConfig), } -/// Information that changes during transaction execution. -// Most fields are private by design. The backup mechanism (`parent` field) will only work properly -// if data is append-only. +/// Execution substate that tracks changes during transaction execution. +/// +/// The substate maintains all information that may need to be reverted if a +/// call fails, including: +/// - Self-destructed accounts +/// - Accessed addresses and storage slots (for EIP-2929 gas accounting) +/// - Created accounts +/// - Gas refunds +/// - Transient storage (EIP-1153) +/// - Event logs +/// +/// # Backup Mechanism +/// +/// The substate supports checkpointing via [`push_backup`] and restoration via +/// [`revert_backup`] or commitment via [`commit_backup`]. This is used to handle +/// nested calls where inner calls may fail and need to be reverted. +/// +/// Most fields are private by design. The backup mechanism only works correctly +/// if data modifications are append-only. #[derive(Debug, Default)] pub struct Substate { + /// Parent checkpoint for reverting on failure. parent: Option>, - + /// Accounts marked for self-destruction (deleted at end of transaction). selfdestruct_set: HashSet
, + /// Addresses accessed during execution (for EIP-2929 warm/cold gas costs). accessed_addresses: HashSet
, + /// Storage slots accessed per address (for EIP-2929 warm/cold gas costs). accessed_storage_slots: BTreeMap>, + /// Accounts created during this transaction. created_accounts: HashSet
, + /// Accumulated gas refund (e.g., from storage clears). pub refunded_gas: u64, + /// Transient storage (EIP-1153), cleared at end of transaction. transient_storage: TransientStorage, + /// Event logs emitted during execution. logs: Vec, } @@ -303,28 +330,67 @@ impl Substate { } } +/// The LEVM (Lambda EVM) execution engine. +/// +/// The VM executes Ethereum transactions by processing EVM bytecode. It maintains +/// a call stack, memory, and tracks all state changes during execution. +/// +/// # Execution Model +/// +/// 1. Transaction is validated (nonce, balance, gas limit) +/// 2. Initial call frame is created with transaction data +/// 3. Opcodes are executed sequentially until completion or error +/// 4. State changes are committed or reverted based on success +/// +/// # Call Stack +/// +/// Nested calls (CALL, DELEGATECALL, etc.) push new frames onto `call_frames`. +/// Each frame has its own memory, stack, and execution context. The `current_call_frame` +/// is always the active frame being executed. +/// +/// # Hooks +/// +/// The VM supports hooks for extending functionality (e.g., tracing, debugging). +/// Hooks are called at various points during execution and implement pre/post-execution +/// logic. L2-specific behavior (such as fee handling) is implemented via hooks. +/// +/// # Example +/// +/// ```ignore +/// let mut vm = VM::new(env, db, &tx, tracer, debug_mode, vm_type); +/// let report = vm.execute()?; +/// if report.is_success() { +/// println!("Gas used: {}, Output: {:?}", report.gas_used, report.output); +/// } else { +/// println!("Transaction reverted"); +/// } +/// ``` pub struct VM<'a> { - /// Parent callframes. + /// Stack of parent call frames (for nested calls). pub call_frames: Vec, - /// The current call frame. + /// The currently executing call frame. pub current_call_frame: CallFrame, + /// Block and transaction environment. pub env: Environment, + /// Execution substate (accessed addresses, logs, refunds, etc.). pub substate: Substate, + /// Database for reading/writing account state. pub db: &'a mut GeneralizedDatabase, + /// The transaction being executed. pub tx: Transaction, + /// Execution hooks for tracing and debugging. pub hooks: Vec>>, - /// Original storage values before the transaction. Used for gas calculations in SSTORE. + /// Original storage values before transaction (for SSTORE gas calculation). pub storage_original_values: BTreeMap<(Address, H256), U256>, - /// When enabled, it "logs" relevant information during execution + /// Call tracer for execution tracing. pub tracer: LevmCallTracer, - /// Mode for printing some useful stuff, only used in development! + /// Debug mode for development diagnostics. pub debug_mode: DebugMode, - /// A pool of stacks to avoid reallocating too much when creating new call frames. + /// Pool of reusable stacks to reduce allocations. pub stack_pool: Vec, + /// VM type (L1 or L2 with fee config). pub vm_type: VMType, - - /// The opcode table mapping opcodes to opcode handlers for fast lookup. - /// Build dynamically according to the given fork config. + /// Opcode dispatch table, built dynamically per fork. pub(crate) opcode_table: [OpCodeFn<'a>; 256], } diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 02a20e8a3cd..a06a706022c 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -17,6 +17,11 @@ - [Node startup](./l1/running/startup.md) - [Configuration](./l1/running/configuration.md) - [Monitoring and metrics](./l1/running/monitoring.md) +- [Architecture](./l1/architecture/README.md) + - [System Overview](./l1/architecture/overview.md) + - [Block Execution Pipeline](./l1/architecture/block_execution.md) + - [Sync State Machine](./l1/architecture/sync_state_machine.md) + - [Crate Map](./l1/architecture/crate_map.md) - [Fundamentals](./l1/fundamentals/README.md) - [Metrics]() - [Logs]() diff --git a/docs/l1/architecture/README.md b/docs/l1/architecture/README.md new file mode 100644 index 00000000000..33db67542df --- /dev/null +++ b/docs/l1/architecture/README.md @@ -0,0 +1,8 @@ +# L1 Architecture + +This section covers the internal architecture of ethrex as an Ethereum L1 execution client. It explains how the different components interact, how blocks flow through the system, and the design decisions behind the implementation. + +- [System Overview](./overview.md) - High-level architecture and component interactions +- [Block Execution Pipeline](./block_execution.md) - How blocks are validated and executed +- [Sync State Machine](./sync_state_machine.md) - Full sync and snap sync algorithms +- [Crate Map](./crate_map.md) - Overview of all crates and their responsibilities diff --git a/docs/l1/architecture/block_execution.md b/docs/l1/architecture/block_execution.md new file mode 100644 index 00000000000..a33e6695b79 --- /dev/null +++ b/docs/l1/architecture/block_execution.md @@ -0,0 +1,303 @@ +# Block Execution Pipeline + +This document describes how ethrex validates and executes blocks, from receiving a block to committing state changes. + +## Overview + +Block execution in ethrex follows the Ethereum specification closely. The pipeline handles: + +1. Block header validation +2. System-level operations (beacon root contract, block hash storage) +3. Transaction execution +4. Withdrawal processing +5. Request extraction (post-Prague) +6. State root verification + +## Entry Points + +Blocks enter the execution pipeline through two main paths: + +### 1. P2P Sync (`Syncer`) + +During synchronization, blocks are fetched from peers and executed in batches: + +```rust +// crates/networking/p2p/sync.rs +Syncer::add_blocks() → Blockchain::add_blocks_in_batch() → execute each block +``` + +### 2. Engine API (`engine_newPayloadV{1,2,3}`) + +Post-Merge, the consensus client sends new blocks via the Engine API: + +```rust +// crates/networking/rpc/engine/payload.rs +NewPayloadV3::handle() → Blockchain::add_block() → execute block +``` + +## Block Header Validation + +Before executing a block, its header is validated: + +```rust +// crates/blockchain/blockchain.rs +fn validate_header(header: &BlockHeader, parent: &BlockHeader) -> Result<()> +``` + +### Validation Checks + +| Check | Description | +|-------|-------------| +| Parent hash | Must match parent block's hash | +| Block number | Must be parent.number + 1 | +| Timestamp | Must be > parent.timestamp | +| Gas limit | Must be within bounds of parent (EIP-1559) | +| Base fee | Must match calculated value (EIP-1559) | +| Difficulty | Must be 0 (post-Merge) | +| Nonce | Must be 0 (post-Merge) | +| Ommers hash | Must be empty hash (post-Merge) | +| Withdrawals root | Must match if Shanghai activated | +| Blob gas fields | Must be present if Cancun activated | +| Requests hash | Must match if Prague activated | + +## Execution Flow + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ Block Execution │ +├─────────────────────────────────────────────────────────────────────┤ +│ │ +│ 1. ┌────────────────────────────────────────────────────────────┐ │ +│ │ System Operations (post-Cancun) │ │ +│ │ • Store beacon block root (EIP-4788) │ │ +│ │ • Store parent block hash (EIP-2935) │ │ +│ └────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ 2. ┌────────────────────────────────────────────────────────────┐ │ +│ │ Transaction Execution │ │ +│ │ For each transaction: │ │ +│ │ • Validate signature and nonce │ │ +│ │ • Check sender balance │ │ +│ │ • Execute in EVM │ │ +│ │ • Apply gas refunds │ │ +│ │ • Update account states │ │ +│ │ • Generate receipt │ │ +│ └────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ 3. ┌────────────────────────────────────────────────────────────┐ │ +│ │ Withdrawal Processing (post-Shanghai) │ │ +│ │ For each withdrawal: │ │ +│ │ • Credit validator address with withdrawal amount │ │ +│ └────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ 4. ┌────────────────────────────────────────────────────────────┐ │ +│ │ Request Extraction (post-Prague) │ │ +│ │ • Deposit requests from logs │ │ +│ │ • Withdrawal requests from system contract │ │ +│ │ • Consolidation requests from system contract │ │ +│ └────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ 5. ┌────────────────────────────────────────────────────────────┐ │ +│ │ State Finalization │ │ +│ │ • Compute state root from account updates │ │ +│ │ • Verify against header.state_root │ │ +│ │ • Commit changes to storage │ │ +│ └────────────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## Transaction Execution + +Each transaction goes through the following steps: + +### 1. Pre-Execution Validation + +```rust +// crates/blockchain/validate.rs +fn validate_transaction(tx: &Transaction, header: &BlockHeader) -> Result<()> +``` + +- Signature recovery and validation +- Nonce check (must match account nonce) +- Gas limit check (must be <= block gas remaining) +- Balance check (must cover `gas_limit * gas_price + value`) +- Intrinsic gas calculation +- EIP-2930 access list validation +- EIP-4844 blob validation (if applicable) + +### 2. EVM Execution + +```rust +// crates/vm/levm/src/vm.rs +VM::execute() → Result +``` + +The EVM executes the transaction bytecode: + +1. **Contract Call**: Execute target contract code +2. **Contract Creation**: Deploy new contract, execute constructor +3. **Transfer**: Simple value transfer (no code execution) + +During execution: +- Opcodes are decoded and executed +- Gas is consumed for each operation +- State changes are tracked (but not committed) +- Logs are collected +- Errors revert all changes + +### 3. Post-Execution + +After EVM execution: + +```rust +// crates/vm/levm/src/vm.rs +fn finalize_transaction() -> Receipt +``` + +- Calculate gas refund (max 1/5 of gas used, post-London) +- Credit coinbase with priority fee +- Generate receipt with logs and status +- Update cumulative gas used + +## State Management + +### Account Updates + +State changes are tracked as `AccountUpdate` structs: + +```rust +pub struct AccountUpdate { + pub address: Address, + pub removed: bool, + pub info: Option, // balance, nonce, code_hash + pub code: Option, // bytecode if changed + pub added_storage: HashMap, +} +``` + +### State Root Computation + +After all transactions execute: + +```rust +// crates/storage/store.rs +Store::apply_account_updates_batch(parent_hash, updates) -> StateTrieHash +``` + +This is one of the two merkelization backends (the other is used by `add_block_pipeline`): + +1. Load parent state trie +2. Apply each account update to the trie +3. For accounts with storage changes, update storage tries +4. Compute new state root +5. Verify it matches `header.state_root` + +## Payload Building + +When ethrex acts as a block producer (validator), it builds payloads: + +```rust +// crates/blockchain/payload.rs +Blockchain::build_payload(template: Block) -> PayloadBuildResult +``` + +### Building Process + +1. **Fetch transactions** from mempool, filtered by: + - Base fee (must afford current base fee) + - Blob fee (for EIP-4844 transactions) + - Nonce ordering (consecutive nonces per sender) + +2. **Order transactions** by effective tip (highest first) + +3. **Execute transactions** until: + - Block gas limit reached + - No more valid transactions + - Blob limit reached (for blob transactions) + +4. **Finalize block**: + - Apply withdrawals + - Extract requests + - Compute state root + - Compute receipts root + - Generate logs bloom + +### Payload Rebuilding + +Payloads are rebuilt continuously until requested: + +```rust +// crates/blockchain/payload.rs +Blockchain::build_payload_loop(payload, cancel_token) +``` + +This maximizes MEV by including the most profitable transactions available. + +## Error Handling + +Block execution can fail for various reasons: + +| Error | Cause | Recovery | +|-------|-------|----------| +| `InvalidBlock::InvalidStateRoot` | Computed state root doesn't match header | Reject block | +| `InvalidBlock::InvalidGasUsed` | Gas used doesn't match header | Reject block | +| `InvalidBlock::InvalidTransaction` | Transaction validation failed | Reject block | +| `EvmError::OutOfGas` | Transaction ran out of gas | Revert transaction, continue block | +| `EvmError::InvalidOpcode` | Unknown opcode encountered | Revert transaction, continue block | + +## Performance Considerations + +### Batch Execution + +During sync, blocks are executed in batches (default 1024 blocks): + +```rust +// crates/networking/p2p/sync.rs +const EXECUTE_BATCH_SIZE: usize = 1024; +``` + +This reduces database commits and improves throughput. + +### Parallel Trie Operations + +Storage trie updates can be parallelized across accounts: + +```rust +// Uses rayon for parallel iteration +account_updates.par_iter().map(|update| update_storage_trie(update)) +``` + +### State Caching + +The EVM maintains a cache of accessed accounts and storage slots to minimize database reads during execution. + +## Hard Fork Handling + +Block execution adapts based on the active hard fork: + +```rust +// crates/common/types/chain_config.rs +impl ChainConfig { + pub fn fork(&self, timestamp: u64) -> Fork { ... } + pub fn is_cancun_activated(&self, timestamp: u64) -> bool { ... } + pub fn is_prague_activated(&self, timestamp: u64) -> bool { ... } +} +``` + +Each fork may introduce: +- New opcodes (e.g., `PUSH0` in Shanghai) +- New precompiles (e.g., point evaluation in Cancun) +- New system contracts (e.g., beacon root contract in Cancun) +- Changed gas costs +- New transaction types + +## Related Documentation + +- [LEVM Documentation](../../vm/levm/debug.md) - EVM implementation details +- [Sync State Machine](./sync_state_machine.md) - How blocks flow during sync +- [Crate Map](./crate_map.md) - Overview of involved crates diff --git a/docs/l1/architecture/crate_map.md b/docs/l1/architecture/crate_map.md new file mode 100644 index 00000000000..31fbdd186ac --- /dev/null +++ b/docs/l1/architecture/crate_map.md @@ -0,0 +1,317 @@ +# Crate Map + +This document provides an overview of all crates in the ethrex monorepo and their responsibilities. + +## Crate Dependency Graph + +``` + ┌─────────────────────────────────────┐ + │ cmd/ethrex │ + │ (Main binary entry point) │ + └───────────────┬─────────────────────┘ + │ + ┌─────────────────────────┼─────────────────────────┐ + │ │ │ + ▼ ▼ ▼ + ┌───────────────────┐ ┌───────────────────┐ ┌───────────────────┐ + │ networking/rpc │ │ networking/p2p │ │ blockchain │ + │ (JSON-RPC API) │ │ (P2P networking) │ │ (Chain management)│ + └─────────┬─────────┘ └─────────┬─────────┘ └─────────┬─────────┘ + │ │ │ + │ │ │ + └─────────────────────────┼─────────────────────────┘ + │ + ▼ + ┌─────────────────────────────┐ + │ vm/levm │ + │ (EVM implementation) │ + └─────────────┬───────────────┘ + │ + ▼ + ┌─────────────────────────────┐ + │ storage │ + │ (Data persistence) │ + └─────────────┬───────────────┘ + │ + ┌───────────────────────┼───────────────────────┐ + │ │ │ + ▼ ▼ ▼ + ┌───────────────────┐ ┌───────────────────┐ ┌───────────────────┐ + │ common/trie │ │ common/rlp │ │ common/types │ + │ (Merkle Patricia) │ │ (RLP encoding) │ │ (Core data types) │ + └───────────────────┘ └───────────────────┘ └───────────────────┘ +``` + +## Core Crates + +### `ethrex-common` + +**Purpose:** Core data types and utilities shared across all crates. + +**Key Modules:** +- `types/` - Block, Transaction, Receipt, Account types +- `trie/` - Merkle Patricia Trie implementation +- `rlp/` - RLP encoding/decoding +- `crypto/` - Keccak hashing, signature recovery + +**Notable Types:** +```rust +pub struct Block { header: BlockHeader, body: BlockBody } +pub struct Transaction { /* variants for Legacy, EIP-2930, EIP-1559, EIP-4844, EIP-7702 */ } +pub struct AccountState { nonce: u64, balance: U256, storage_root: H256, code_hash: H256 } +``` + +--- + +### `ethrex-storage` + +**Purpose:** Persistent storage layer with multiple backend support. + +**Key Components:** +- `Store` - High-level API for all blockchain data +- `StoreEngine` trait - Backend abstraction +- `InMemoryStore` - Testing backend +- `RocksDBStore` - Production backend + +**Stored Data:** +| Table | Contents | +|-------|----------| +| `block_numbers` | Block hash → block number | +| `canonical_block_hashes` | Block number → canonical hash | +| `headers` | Block hash → BlockHeader | +| `bodies` | Block hash → BlockBody | +| `receipts` | Block hash + index → Receipt | +| `account_trie_nodes` | Node hash → trie node data | +| `storage_trie_nodes` | Node hash → trie node data | +| `account_codes` | Code hash → bytecode | +| `account_flatkeyvalue` | Account flat key-value store | +| `storage_flatkeyvalue` | Storage flat key-value store | + +--- + +### `ethrex-blockchain` + +**Purpose:** Chain management, block validation, and mempool. + +**Key Components:** +- `Blockchain` - Main orchestrator for chain operations +- `Mempool` - Pending transaction pool +- `fork_choice` - Fork choice rule implementation +- `payload` - Block building for validators +- `validate` - Block and transaction validation + +**Public API:** +```rust +impl Blockchain { + pub fn add_block(&self, block: Block) -> Result<(), ChainError> + pub fn add_block_pipeline(&self, block: Block) -> Result<(), ChainError> + pub fn validate_transaction(&self, tx: &Transaction) -> Result<(), MempoolError> + pub fn build_payload(&self, template: Block) -> Result + pub fn get_payload(&self, id: u64) -> Result +} +``` + +--- + +### `ethrex-vm` / `levm` + +**Purpose:** Ethereum Virtual Machine implementation. + +**Key Components:** +- `VM` - Main EVM execution engine +- `Evm` trait - VM interface for different contexts +- Opcode handlers (one per EVM opcode) +- Precompiled contracts +- Gas metering + +**Execution Flow:** +```rust +impl VM { + pub fn execute(&mut self) -> Result + fn execute_opcode(&mut self, opcode: u8) -> Result<(), VMError> + fn call(&mut self, ...) -> Result + fn create(&mut self, ...) -> Result +} +``` + +--- + +### `ethrex-networking/rpc` + +**Purpose:** JSON-RPC API server. + +**Supported Namespaces:** +- `eth_*` - Standard Ethereum methods +- `debug_*` - Debugging and tracing +- `txpool_*` - Mempool inspection +- `admin_*` - Node administration +- `engine_*` - Consensus client communication +- `web3_*` - Web3 utilities + +**Architecture:** +```rust +pub trait RpcHandler: Send + Sync { + fn parse(params: &Option>) -> Result; + async fn handle(&self, context: RpcApiContext) -> Result; +} +``` + +--- + +### `ethrex-networking/p2p` + +**Purpose:** Peer-to-peer networking stack. + +**Protocol Layers:** +1. **DiscV4** - Node discovery +2. **RLPx** - Encrypted transport +3. **eth/68** - Ethereum wire protocol +4. **snap/1** - Snap sync protocol + +**Key Components:** +- `PeerHandler` - Manages peer connections +- `PeerTable` - Tracks known peers and their scores +- `Syncer` - Synchronization state machine +- `SyncManager` - Coordinates sync operations + +--- + +## Supporting Crates + +### `ethrex-common/trie` + +**Purpose:** Merkle Patricia Trie implementation. + +**Features:** +- Standard MPT operations (get, insert, delete) +- Proof generation and verification +- Sorted insertion for snap sync +- Flat key-value store integration + +--- + +### `ethrex-common/rlp` + +**Purpose:** Recursive Length Prefix encoding. + +**Traits:** +```rust +pub trait RLPEncode { + fn encode(&self, buf: &mut dyn BufMut); + fn encode_to_vec(&self) -> Vec; +} + +pub trait RLPDecode: Sized { + fn decode(rlp: &[u8]) -> Result; + fn decode_unfinished(rlp: &[u8]) -> Result<(Self, &[u8]), RLPDecodeError>; +} +``` + +--- + +### `ethrex-metrics` + +**Purpose:** Prometheus metrics collection. + +**Metric Categories:** +- Block metrics (height, gas, execution time) +- Transaction metrics (types, counts, errors) +- P2P metrics (peers, messages, sync progress) +- RPC metrics (requests, latency) + +--- + +### `ethrex-crypto` + +**Purpose:** Cryptographic primitives. + +**Features:** +- Keccak-256 hashing +- ECDSA signature recovery +- BLS signatures (for beacon chain) + +--- + +## L2-Specific Crates + +### `ethrex-l2` + +**Purpose:** L2 sequencer and prover integration. + +**Components:** +- Sequencer logic +- State diff computation +- Prover interface +- L1 interaction (deposits, withdrawals) + +--- + +### `ethrex-prover` + +**Purpose:** Zero-knowledge proof generation. + +**Supported Provers:** +- SP1 (Succinct) +- RISC0 +- TDX (Trusted Execution) + +--- + +## Test and Development Crates + +### `ef-tests` + +**Purpose:** Ethereum Foundation test runner. + +Runs official Ethereum tests to verify protocol compliance. + +--- + +### `ethrex-dev` + +**Purpose:** Development mode utilities. + +Features: +- Local development network +- Block import from files +- Test fixtures + +--- + +## Crate Features + +Many crates support feature flags: + +| Crate | Feature | Effect | +|-------|---------|--------| +| `ethrex-storage` | `rocksdb` | Enable RocksDB backend | +| `ethrex-blockchain` | `metrics` | Enable Prometheus metrics | +| `ethrex-networking/p2p` | `sync-test` | Testing utilities for sync | +| `ethrex-networking/p2p` | `experimental-discv5` | Enable discv5 node discovery (experimental) | + +## Adding New Functionality + +When adding new features, consider: + +1. **Where does it belong?** + - Pure data types → `ethrex-common` + - Database operations → `ethrex-storage` + - EVM changes → `ethrex-vm` + - Chain logic → `ethrex-blockchain` + - API endpoints → `ethrex-networking/rpc` + - P2P messages → `ethrex-networking/p2p` + +2. **Dependency direction** + - Lower-level crates should not depend on higher-level ones + - Common types flow down, behaviors flow up + +3. **Testing** + - Unit tests in the crate + - Integration tests in `tests/` directory + - EF tests for protocol compliance + +## Related Documentation + +- [System Overview](./overview.md) - How crates work together +- [Block Execution](./block_execution.md) - Execution flow across crates +- [Sync State Machine](./sync_state_machine.md) - Sync implementation details diff --git a/docs/l1/architecture/overview.md b/docs/l1/architecture/overview.md new file mode 100644 index 00000000000..9ad14e30345 --- /dev/null +++ b/docs/l1/architecture/overview.md @@ -0,0 +1,241 @@ +# System Overview + +This document provides a high-level overview of ethrex's L1 architecture as an Ethereum execution client. + +## Introduction + +ethrex is a Rust implementation of an Ethereum execution client. It implements the Ethereum protocol specification, including: + +- Block validation and execution +- State management via Merkle Patricia Tries +- P2P networking (devp2p stack) +- JSON-RPC API for external interaction +- Engine API for consensus client communication + +## High-Level Architecture + +``` + ┌─────────────────────┐ + │ Consensus Client │ + │ (Lighthouse, etc) │ + └──────────┬──────────┘ + │ Engine API + │ (JWT auth) + ▼ +┌──────────────────────────────────────────────────────────────────────────────┐ +│ ethrex Execution Client │ +│ │ +│ ┌─────────────┐ ┌──────────────┐ ┌────────────────────────────────┐ │ +│ │ JSON-RPC │ │ Engine API │ │ P2P Network │ │ +│ │ Server │ │ Handler │ │ ┌────────┐ ┌──────────────┐ │ │ +│ │ │ │ │ │ │DiscV4 │ │ RLPx │ │ │ +│ │ eth_* │ │ engine_* │ │ │ │ │ ┌────────┐ │ │ │ +│ │ debug_* │ │ forkchoice │ │ │ │ │ │ eth/68 │ │ │ │ +│ │ txpool_* │ │ newPayload │ │ │ │ │ │ snap/1 │ │ │ │ +│ │ admin_* │ │ getPayload │ │ │ │ │ └────────┘ │ │ │ +│ └──────┬──────┘ └──────┬───────┘ │ └────────┘ └──────────────┘ │ │ +│ │ │ └────────────────┬───────────────┘ │ +│ │ │ │ │ +│ └───────────────────┼──────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────────────────────────────────────────────────────────────┐ │ +│ │ Blockchain │ │ +│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ +│ │ │ Mempool │ │ Payload │ │ Fork Choice │ │ Block │ │ │ +│ │ │ │ │ Builder │ │ Update │ │ Pipeline │ │ │ +│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │ +│ └───────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────────────────────────────────────────────────────────────┐ │ +│ │ EVM (LEVM) │ │ +│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ +│ │ │Transaction │ │ Opcode │ │ Precompiled│ │ State │ │ │ +│ │ │ Execution │ │ Handler │ │ Contracts │ │ Transitions │ │ │ +│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │ +│ └───────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────────────────────────────────────────────────────────────┐ │ +│ │ Storage │ │ +│ │ ┌───────────────────────────────────────────────────────────────┐ │ │ +│ │ │ Store (High-level API) │ │ │ +│ │ └───────────────────────────────────────────────────────────────┘ │ │ +│ │ │ │ │ │ +│ │ ┌──────────┴──────────┐ ┌─────────┴────────┐ │ │ +│ │ ▼ ▼ ▼ ▼ │ │ +│ │ ┌─────────────┐ ┌─────────────────┐ ┌───────────────┐ │ │ +│ │ │ InMemory │ │ RocksDB │ │ State Trie │ │ │ +│ │ │ (Testing) │ │ (Production) │ │ (MPT + Flat) │ │ │ +│ │ └─────────────┘ └─────────────────┘ └───────────────┘ │ │ +│ └───────────────────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +## Core Components + +### 1. Network Layer + +The network layer handles all external communication: + +**JSON-RPC Server** (`crates/networking/rpc`) +- Implements the Ethereum JSON-RPC specification +- Namespaces: `eth_*`, `debug_*`, `txpool_*`, `admin_*`, `web3_*` +- Validates and broadcasts incoming transactions + +**Engine API** (`crates/networking/rpc/engine`) +- Communication channel with the consensus client +- Handles `engine_forkchoiceUpdatedV{1,2,3}`, `engine_newPayloadV{1,2,3}`, `engine_getPayloadV{1,2,3}` +- JWT authentication for security +- Triggers sync when receiving unknown block hashes + +**P2P Network** (`crates/networking/p2p`) +- **DiscV4**: Node discovery protocol for finding peers +- **RLPx**: Encrypted transport layer for peer communication +- **eth/68**: Block and transaction propagation protocol +- **snap/1**: Snap sync protocol for fast state download + +### 2. Blockchain Layer + +The blockchain layer manages chain state and block processing: + +**Blockchain** (`crates/blockchain`) +- Orchestrates block validation and execution +- Manages the mempool for pending transactions +- Handles fork choice updates from the consensus layer +- Coordinates payload building for block production + +**Mempool** +- Stores pending transactions awaiting inclusion +- Filters transactions by gas price, nonce, and validity +- Supports transaction replacement (EIP-1559 and EIP-4844) +- Broadcasts new transactions to peers + +**Fork Choice** +- Implements Ethereum's fork choice rule +- Updates the canonical chain based on consensus client signals +- Handles chain reorganizations + +### 3. Execution Layer + +**LEVM (Lambda EVM)** (`crates/vm/levm`) +- Custom EVM implementation in Rust +- Executes smart contract bytecode +- Implements all EVM opcodes up to the latest hard fork +- Handles precompiled contracts + +**Block Execution Pipeline** +1. Validate block header +2. Apply system-level operations (beacon root, block hash storage) +3. Execute transactions in order +4. Process withdrawals (post-Merge) +5. Extract requests (post-Prague) +6. Compute state root and verify against header + +### 4. Storage Layer + +**Store** (`crates/storage`) +- High-level API for all blockchain data +- Supports multiple backends: InMemory (testing), RocksDB (production) +- Manages block headers, bodies, receipts, and state + +**State Trie** (`crates/common/trie`) +- Merkle Patricia Trie implementation +- Stores account states and contract storage +- Supports flat key-value storage for performance +- Handles trie node caching and persistence + +## Data Flow + +### Block Import (from P2P) + +``` +P2P Peer → Block Headers/Bodies → Syncer → Blockchain.add_block() → EVM.execute() → Store +``` + +1. Syncer requests headers from peers +2. Headers are validated (parent exists, timestamps, gas limits, etc.) +3. Bodies are requested and matched to headers +4. Blocks are executed in batches +5. State is committed to storage + +### Block Import (from Consensus Client) + +``` +Consensus Client → engine_newPayloadV3 → Blockchain.add_block_pipeline() → EVM.execute() → Store + → engine_forkchoiceUpdated → Fork Choice Update → Canonical Chain Update +``` + +1. Consensus client sends new payload via Engine API +2. Block is validated and executed +3. Fork choice update makes the block canonical +4. Sync is triggered if the block's parent is unknown + +### Transaction Lifecycle + +``` +User → JSON-RPC (eth_sendRawTransaction) → Mempool → Broadcast to Peers + → Include in Block +``` + +1. Transaction arrives via JSON-RPC or P2P +2. Validated for signature, nonce, balance, gas +3. Added to mempool if valid +4. Broadcast to connected peers +5. Eventually included in a block by the payload builder + +## Sync Modes + +### Full Sync + +Downloads and executes every block from genesis (or a known checkpoint): + +1. Request block headers from peers +2. Request block bodies for each header +3. Execute blocks in batches (1024 blocks per batch) +4. Commit state after each batch +5. Update fork choice when sync head is reached + +### Snap Sync + +Downloads state directly instead of executing all historical blocks: + +1. Download block headers to find a recent "pivot" block +2. Download account state trie leaves via snap protocol +3. Download storage tries for accounts with storage +4. Heal any missing trie nodes (state may have changed during download) +5. Download bytecode for contract accounts +6. Execute recent blocks (post-pivot) to catch up + +See [Sync State Machine](./sync_state_machine.md) for detailed documentation. + +## Concurrency Model + +ethrex uses Tokio for async I/O with the following patterns: + +- **Async tasks** for network I/O (RPC, P2P) +- **Blocking tasks** for CPU-intensive work (block execution, trie operations) +- **Channels** for inter-component communication (sync signals, mempool updates) +- **RwLock/Mutex** for shared state (mempool, peer table) + +## Configuration + +Key configuration options: + +| Option | Description | Default | +|--------|-------------|---------| +| `--network` | Network to connect to | `mainnet` | +| `--datadir` | Data directory for DB and keys | `~/.ethrex` | +| `--syncmode` | Sync mode (`full` or `snap`) | `snap` | +| `--authrpc.port` | Engine API port | `8551` | +| `--http.port` | JSON-RPC HTTP port | `8545` | +| `--discovery.port` | P2P discovery port | `30303` | + +See [Configuration](../running/configuration.md) for the complete reference. + +## Next Steps + +- [Block Execution Pipeline](./block_execution.md) - Deep dive into block processing +- [Sync State Machine](./sync_state_machine.md) - Detailed sync algorithm documentation +- [Crate Map](./crate_map.md) - Overview of all crates and dependencies diff --git a/docs/l1/architecture/sync_state_machine.md b/docs/l1/architecture/sync_state_machine.md new file mode 100644 index 00000000000..52b3eb8c1b0 --- /dev/null +++ b/docs/l1/architecture/sync_state_machine.md @@ -0,0 +1,424 @@ +# Sync State Machine + +This document describes the synchronization algorithms implemented in ethrex, including full sync and snap sync. + +## Overview + +ethrex supports two synchronization modes: + +| Mode | Description | Use Case | +|------|-------------|----------| +| **Full Sync** | Downloads and executes every block | Maximum security, slower | +| **Snap Sync** | Downloads state directly, executes recent blocks | Faster initial sync | + +## Sync Manager Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ SyncManager │ +│ • Receives sync targets from Engine API / P2P │ +│ • Tracks current sync mode (Full / Snap) │ +│ • Coordinates Syncer for actual sync work │ +└──────────────────────────────┬──────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Syncer │ +│ • Executes sync cycles │ +│ • Manages peer connections via PeerHandler │ +│ • Handles both full and snap sync algorithms │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Sync Triggers + +Synchronization is triggered by: + +1. **Engine API**: `engine_forkchoiceUpdated` with unknown head hash +2. **P2P**: Receiving block announcements for unknown blocks +3. **Startup**: When local chain is behind network + +```rust +// crates/networking/rpc/engine/fork_choice.rs +match apply_fork_choice(...) { + Err(InvalidForkChoice::Syncing) => { + syncer.sync_to_head(fork_choice_state.head_block_hash); + // Return SYNCING status to consensus client + } +} +``` + +## Full Sync Algorithm + +Full sync downloads blocks from the network and executes each one to reconstruct the state. + +### State Machine + +``` + ┌─────────────────┐ + │ START SYNC │ + └────────┬────────┘ + │ + ▼ + ┌─────────────────┐ + ┌─────────│ Request Headers │◄─────────────┐ + │ └────────┬────────┘ │ + │ │ │ + │ ▼ │ + │ ┌─────────────────┐ │ + │ │ Validate Headers│ │ + │ └────────┬────────┘ │ + │ │ │ + │ ▼ │ + │ ┌─────────────────┐ │ + │ │ Found Canonical │──No──────────┘ + │ │ Ancestor? │ + │ └────────┬────────┘ + │ │ Yes + │ ▼ + │ ┌─────────────────┐ + │ │ Request Bodies │◄─────────────┐ + │ └────────┬────────┘ │ + │ │ │ + │ ▼ │ + │ ┌─────────────────┐ │ + │ │ Execute Batch │ │ + │ │ (1024 blocks) │ │ + │ └────────┬────────┘ │ + │ │ │ + │ ▼ │ + │ ┌─────────────────┐ │ + │ │ More Blocks? │──Yes─────────┘ + │ └────────┬────────┘ + │ │ No + │ ▼ + │ ┌─────────────────┐ + └─Error───│ SYNC DONE │ + └─────────────────┘ +``` + +### Algorithm Details + +```rust +// crates/networking/p2p/sync.rs +async fn sync_cycle_full(sync_head: H256, store: Store) -> Result<()> +``` + +1. **Find Chain Link** + - Request headers backwards from sync_head + - Stop when reaching a canonical block (already known) + - This identifies the fork point + +2. **Store Headers** + - Save all new headers to temporary storage + - Headers are stored in batches during download + +3. **Download Bodies** + - Request bodies for stored headers + - Match bodies to headers by hash + - Maximum 64 bodies per request + +4. **Execute Blocks** + - Execute in batches of 1024 blocks + - Each block is fully validated and executed + - State is committed after each batch + +5. **Update Fork Choice** + - After all blocks executed, update canonical chain + - Set new head, safe, and finalized blocks + +### Key Constants + +```rust +const EXECUTE_BATCH_SIZE: usize = 1024; // Blocks per execution batch +const MAX_BLOCK_BODIES_TO_REQUEST: usize = 64; // Bodies per request +``` + +## Snap Sync Algorithm + +Snap sync downloads state directly from peers instead of executing all historical blocks. + +### State Machine + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ SNAP SYNC STATE MACHINE │ +└─────────────────────────────────────────────────────────────────────────────┘ + + ┌──────────────┐ + │ START SNAP │ + │ SYNC │ + └──────┬───────┘ + │ + ▼ + ┌──────────────┐ ┌─────────────────────────────────────────────────────┐ + │ Download │ │ Download headers to find sync head │ + │ Headers │────▶│ Store hashes for later body download │ + └──────┬───────┘ └─────────────────────────────────────────────────────┘ + │ + ▼ + ┌──────────────┐ ┌─────────────────────────────────────────────────────┐ + │ Select Pivot │────▶│ Choose recent block as pivot (must not be stale) │ + │ Block │ │ Pivot block is target for state download │ + └──────┬───────┘ └─────────────────────────────────────────────────────┘ + │ + ▼ + ┌──────────────┐ ┌─────────────────────────────────────────────────────┐ + │ Download │────▶│ Request account ranges via SNAP protocol │ + │ Accounts │ │ Store account states to disk as snapshots │ + └──────┬───────┘ └─────────────────────────────────────────────────────┘ + │ + ▼ + ┌──────────────┐ ┌─────────────────────────────────────────────────────┐ + │ Insert │────▶│ Build account trie from downloaded leaves │ + │ Accounts │ │ Identify accounts with non-empty storage │ + └──────┬───────┘ └─────────────────────────────────────────────────────┘ + │ + ▼ + ┌──────────────┐ ┌─────────────────────────────────────────────────────┐ + │ Download │────▶│ For each account with storage: │ + │ Storage │ │ Request storage ranges and build storage tries. │ + │ │ │ Includes a healing loop to fix state trie changes. │ + └──────┬───────┘ └─────────────────────────────────────────────────────┘ + │ + ▼ + ┌──────────────┐ ┌─────────────────────────────────────────────────────┐ + │ Heal │────▶│ Heal state trie (fill missing nodes) │ + │ Tries │ │ Heal storage tries for modified accounts │ + └──────┬───────┘ └─────────────────────────────────────────────────────┘ + │ + ▼ + ┌──────────────┐ ┌─────────────────────────────────────────────────────┐ + │ Download │────▶│ Download bytecode for all contract accounts │ + │ Bytecode │ │ Match by code hash │ + └──────┬───────┘ └─────────────────────────────────────────────────────┘ + │ + ▼ + ┌──────────────┐ + │ SNAP SYNC │ + │ COMPLETE │ + └──────┬───────┘ + │ + ▼ + ┌──────────────┐ ┌─────────────────────────────────────────────────────┐ + │ Switch to │────▶│ Execute recent blocks from pivot to head │ + │ Full Sync │ │ Continue with full sync for new blocks │ + └──────────────┘ └─────────────────────────────────────────────────────┘ +``` + +### Phase 1: Header Download + +Download all block headers from current head to sync target: + +```rust +// crates/networking/p2p/sync.rs +async fn sync_cycle_snap(sync_head: H256, store: Store) -> Result<()> +``` + +- Request headers in batches +- Store header hashes for later use +- Identify pivot block (recent block whose state we'll download) + +### Phase 2: Pivot Selection + +The pivot block must be: +- Recent enough to have state available on peers +- Not "stale" (older than SNAP_LIMIT * 12 seconds) + +```rust +// crates/networking/p2p/sync.rs +fn block_is_stale(header: &BlockHeader) -> bool { + calculate_staleness_timestamp(header.timestamp) < current_unix_time() +} + +const SNAP_LIMIT: usize = 128; // Blocks before pivot is considered stale +``` + +If the pivot becomes stale during sync, a new pivot is selected: + +```rust +async fn update_pivot(block_number: u64, ...) -> Result +``` + +### Phase 3: Account Download + +Download all account states at the pivot block: + +```rust +// Uses SNAP protocol GetAccountRange messages +peers.request_account_range(start_hash, end_hash, snapshot_dir, pivot_header) +``` + +- Accounts are saved to disk as RLP-encoded snapshots +- Each snapshot file contains a batch of (hash, account_state) pairs +- Process tracks code hashes for later bytecode download + +### Phase 4: Account Trie Construction + +Build the account state trie from downloaded leaves: + +```rust +async fn insert_accounts(store, storage_accounts, snapshots_dir, ...) -> (H256, accounts_with_storage) +``` + +For RocksDB backend: +- Ingest snapshot files directly via SST ingestion +- Build trie using sorted insertion algorithm +- Track accounts with non-empty storage root + +### Phase 5: Storage Download + +For each account with storage, download storage slots: + +```rust +peers.request_storage_ranges(storage_accounts, snapshots_dir, chunk_index, pivot_header) +``` + +- Multiple accounts can be requested per message +- Large accounts are downloaded in chunks +- "Big accounts" (>4096 slots) are marked for healing instead + +### Phase 6: Trie Healing + +State may have changed while downloading. Healing fixes inconsistencies: + +**State Trie Healing:** +```rust +async fn heal_state_trie_wrap(state_root, store, peers, deadline, ...) -> bool +``` +- Walk trie from root +- Request missing nodes from peers +- Fill in gaps caused by state changes + +**Storage Trie Healing:** +```rust +async fn heal_storage_trie(state_root, accounts, peers, store, ...) -> bool +``` +- For each account marked for healing +- Request missing storage trie nodes +- Verify storage roots match account state + +### Phase 7: Bytecode Download + +Download contract bytecode: + +```rust +peers.request_bytecodes(&code_hashes) +``` + +- Code hashes collected during account download +- Bytecode downloaded in chunks (50,000 per batch) +- Verified by hashing and comparing to code_hash + +### Phase 8: Transition to Full Sync + +After snap sync completes: +1. Store pivot block body +2. Update fork choice to pivot +3. Switch sync mode to Full +4. Execute any remaining blocks normally + +## P2P Protocols Used + +### eth/68 Protocol + +Used for block header and body download: + +| Message | Purpose | +|---------|---------| +| `GetBlockHeaders` | Request headers by number or hash | +| `BlockHeaders` | Response with headers | +| `GetBlockBodies` | Request bodies by hash | +| `BlockBodies` | Response with bodies | + +### snap/1 Protocol + +Used for state download during snap sync: + +| Message | Purpose | +|---------|---------| +| `GetAccountRange` | Request accounts in hash range | +| `AccountRange` | Response with accounts and proof | +| `GetStorageRanges` | Request storage for accounts | +| `StorageRanges` | Response with storage and proofs | +| `GetByteCodes` | Request bytecode by hash | +| `ByteCodes` | Response with bytecode | +| `GetTrieNodes` | Request specific trie nodes | +| `TrieNodes` | Response with nodes | + +## Error Recovery + +### Recoverable Errors + +These errors cause sync to retry: +- Peer disconnection +- Invalid response from peer +- Timeout waiting for response +- Database errors (transient) + +### Non-Recoverable Errors + +These errors cause sync to abort with warning: +- Snapshot file corruption +- Database corruption +- State root mismatch after healing + +```rust +// crates/networking/p2p/sync.rs +impl SyncError { + pub fn is_recoverable(&self) -> bool { + match self { + SyncError::Chain(_) | SyncError::Store(_) | ... => true, + SyncError::CorruptDB | SyncError::SnapshotDecodeError(_) | ... => false, + } + } +} +``` + +## Performance Optimizations + +### Parallel Operations + +- Account trie insertion uses Rayon for parallelism +- Storage tries built in parallel across accounts +- Bytecode downloads are batched + +### Disk I/O + +- Snapshot files written in batches to reduce writes +- RocksDB SST ingestion for fast account loading +- Temporary directories cleaned up after sync + +### Network + +- Multiple peers used concurrently +- Peer scoring based on response time and validity +- Automatic peer rotation for failed requests + +## Metrics + +Sync progress is tracked via metrics: + +```rust +// crates/networking/p2p/metrics.rs +METRICS.account_tries_inserted // Accounts added to trie +METRICS.storage_leaves_inserted // Storage slots added +METRICS.current_step // Current sync phase +METRICS.sync_head_hash // Current sync target +``` + +## Configuration + +| Option | Description | Default | +|--------|-------------|---------| +| `--syncmode` | Sync mode (`full` or `snap`) | `snap` | +| `EXECUTE_BATCH_SIZE` | Blocks per batch (env var) | 1024 | +| `MIN_FULL_BLOCKS` | Min blocks to full sync in snap mode | 10,000 | + +## Related Documentation + +- [Snap Sync Internals](../fundamentals/snap_sync.md) - Detailed snap sync documentation +- [Block Execution Pipeline](./block_execution.md) - How blocks are executed +- [Networking](../fundamentals/networking.md) - P2P protocol details + +> **Note:** For comprehensive snap sync documentation, see [Snap Sync Internals](../fundamentals/snap_sync.md).