ethereum · gballet · Oct 21, 2025 · Oct 13, 2025 · Oct 21, 2025 · gballet
@@ -25,10 +25,10 @@ import (
 )
 
 const (
-	indexBlockDescSize   = 14        // The size of index block descriptor
-	indexBlockEntriesCap = 4096      // The maximum number of entries can be grouped in a block
-	indexBlockRestartLen = 256       // The restart interval length of index block
-	historyIndexBatch    = 1_000_000 // The number of state history indexes for constructing or deleting as batch
+	indexBlockDescSize   = 14         // The size of index block descriptor
+	indexBlockEntriesCap = 4096       // The maximum number of entries can be grouped in a block
+	indexBlockRestartLen = 256        // The restart interval length of index block
+	historyIndexBatch    = 512 * 1024 // The number of state history indexes for constructing or deleting as batch
 )
 
 // indexBlockDesc represents a descriptor for an index block, which contains a

@@ -40,6 +40,11 @@ const (
 	stateHistoryIndexVersion    = stateHistoryIndexV0    // the current state index version
 	trienodeHistoryIndexV0      = uint8(0)               // initial version of trienode index structure
 	trienodeHistoryIndexVersion = trienodeHistoryIndexV0 // the current trienode index version
+
+	// estimations for calculating the batch size for atomic database commit
+	estimatedStateHistoryIndexSize    = 3  // The average size of each state history index entry is approximately 2–3 bytes
+	estimatedTrienodeHistoryIndexSize = 3  // The average size of each trienode history index entry is approximately 2-3 bytes
+	estimatedIndexBatchSizeFactor     = 32 // The factor counts for the write amplification for each entry
 )
 
 // indexVersion returns the latest index version for the given history type.
@@ -150,6 +155,22 @@ func (b *batchIndexer) process(h history, id uint64) error {
 	return b.finish(false)
 }
 
+// makeBatch constructs a database batch based on the number of pending entries.
+// The batch size is roughly estimated to minimize repeated resizing rounds,
+// as accurately predicting the exact size is technically challenging.
+func (b *batchIndexer) makeBatch() ethdb.Batch {
+	var size int
+	switch b.typ {
+	case typeStateHistory:
+		size = estimatedStateHistoryIndexSize
+	case typeTrienodeHistory:
+		size = estimatedTrienodeHistoryIndexSize
+	default:
+		panic(fmt.Sprintf("unknown history type %d", b.typ))
+	}
+	return b.db.NewBatchWithSize(size * estimatedIndexBatchSizeFactor * b.pending)
+}
+
 // finish writes the accumulated state indexes into the disk if either the
 // memory limitation is reached or it's requested forcibly.
 func (b *batchIndexer) finish(force bool) error {
@@ -160,7 +181,7 @@ func (b *batchIndexer) finish(force bool) error {
 		return nil
 	}
 	var (
-		batch   = b.db.NewBatch()
+		batch   = b.makeBatch()
 		batchMu sync.RWMutex
 		start   = time.Now()
 		eg      errgroup.Group