From d3c7146d3e932bdebda6c89609f20c8df14707a5 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Wed, 13 Aug 2025 07:44:53 +0000
Subject: [PATCH 01/43] Initial kafka committer

---
 cmd/root.go                                |  26 +
 configs/config.go                          |   9 +
 internal/common/block.go                   |   9 +-
 internal/orchestrator/committer_test.go    |   1 +
 internal/orchestrator/failure_recoverer.go |   1 +
 internal/orchestrator/poller.go            |   1 +
 internal/orchestrator/reorg_handler.go     |   1 +
 internal/storage/clickhouse.go             |   2 +
 internal/storage/connector.go              |   2 +
 internal/storage/kafka_postgres.go         | 620 +++++++++++++++++++++
 internal/storage/kafka_publisher.go        | 183 ++++++
 11 files changed, 851 insertions(+), 4 deletions(-)
 create mode 100644 internal/storage/kafka_postgres.go
 create mode 100644 internal/storage/kafka_publisher.go

diff --git a/cmd/root.go b/cmd/root.go
index 6ba9702..88b8428 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -124,6 +124,19 @@ func init() {
 	rootCmd.PersistentFlags().Int("storage-staging-postgres-maxIdleConns", 25, "PostgreSQL max idle connections for staging storage")
 	rootCmd.PersistentFlags().Int("storage-staging-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for staging storage")
 	rootCmd.PersistentFlags().Int("storage-staging-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for staging storage")
+	// Kafka storage flags - only for main storage (where blockchain data is committed)
+	rootCmd.PersistentFlags().Bool("storage-main-kafka-enabled", false, "Enable Kafka storage for main storage")
+	rootCmd.PersistentFlags().String("storage-main-kafka-brokers", "", "Kafka brokers for main storage")
+	rootCmd.PersistentFlags().String("storage-main-kafka-username", "", "Kafka username for main storage")
+	rootCmd.PersistentFlags().String("storage-main-kafka-password", "", "Kafka password for main storage")
+	rootCmd.PersistentFlags().String("storage-main-kafka-postgres-host", "", "PostgreSQL host for Kafka main storage bookkeeping")
+	rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-port", 5432, "PostgreSQL port for Kafka main storage bookkeeping")
+	rootCmd.PersistentFlags().String("storage-main-kafka-postgres-username", "", "PostgreSQL username for Kafka main storage bookkeeping")
+	rootCmd.PersistentFlags().String("storage-main-kafka-postgres-password", "", "PostgreSQL password for Kafka main storage bookkeeping")
+	rootCmd.PersistentFlags().String("storage-main-kafka-postgres-database", "", "PostgreSQL database for Kafka main storage bookkeeping")
+	rootCmd.PersistentFlags().String("storage-main-kafka-postgres-sslMode", "require", "PostgreSQL SSL mode for Kafka main storage bookkeeping")
+	rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-maxOpenConns", 25, "PostgreSQL max open connections for Kafka main storage bookkeeping")
+	rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-maxIdleConns", 10, "PostgreSQL max idle connections for Kafka main storage bookkeeping")
 	rootCmd.PersistentFlags().String("api-host", "localhost:3000", "API host")
 	rootCmd.PersistentFlags().String("api-basicAuth-username", "", "API basic auth username")
 	rootCmd.PersistentFlags().String("api-basicAuth-password", "", "API basic auth password")
@@ -240,6 +253,19 @@ func init() {
 	viper.BindPFlag("storage.staging.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxIdleConns"))
 	viper.BindPFlag("storage.staging.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxConnLifetime"))
 	viper.BindPFlag("storage.staging.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-connectTimeout"))
+	// Bind Kafka storage flags - only for main storage
+	viper.BindPFlag("storage.main.kafka.enabled", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enabled"))
+	viper.BindPFlag("storage.main.kafka.brokers", rootCmd.PersistentFlags().Lookup("storage-main-kafka-brokers"))
+	viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username"))
+	viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password"))
+	viper.BindPFlag("storage.main.kafka.postgres.host", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-host"))
+	viper.BindPFlag("storage.main.kafka.postgres.port", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-port"))
+	viper.BindPFlag("storage.main.kafka.postgres.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-username"))
+	viper.BindPFlag("storage.main.kafka.postgres.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-password"))
+	viper.BindPFlag("storage.main.kafka.postgres.database", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-database"))
+	viper.BindPFlag("storage.main.kafka.postgres.sslMode", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-sslMode"))
+	viper.BindPFlag("storage.main.kafka.postgres.maxOpenConns", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-maxOpenConns"))
+	viper.BindPFlag("storage.main.kafka.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-maxIdleConns"))
 	viper.BindPFlag("api.host", rootCmd.PersistentFlags().Lookup("api-host"))
 	viper.BindPFlag("api.basicAuth.username", rootCmd.PersistentFlags().Lookup("api-basicAuth-username"))
 	viper.BindPFlag("api.basicAuth.password", rootCmd.PersistentFlags().Lookup("api-basicAuth-password"))
diff --git a/configs/config.go b/configs/config.go
index 0be0feb..10a824f 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -62,6 +62,7 @@ const (
 type StorageConnectionConfig struct {
 	Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"`
 	Postgres   *PostgresConfig   `mapstructure:"postgres"`
+	Kafka      *KafkaConfig      `mapstructure:"kafka"`
 }
 
 type TableConfig struct {
@@ -100,6 +101,14 @@ type PostgresConfig struct {
 	ConnectTimeout  int    `mapstructure:"connectTimeout"`
 }
 
+type KafkaConfig struct {
+	Enabled  bool            `mapstructure:"enabled"`
+	Brokers  string          `mapstructure:"brokers"`
+	Username string          `mapstructure:"username"`
+	Password string          `mapstructure:"password"`
+	Postgres *PostgresConfig `mapstructure:"postgres"`
+}
+
 type RPCBatchRequestConfig struct {
 	BlocksPerRequest int `mapstructure:"blocksPerRequest"`
 	BatchDelay       int `mapstructure:"batchDelay"`
diff --git a/internal/common/block.go b/internal/common/block.go
index 4c9e8dc..eacf1f1 100644
--- a/internal/common/block.go
+++ b/internal/common/block.go
@@ -59,10 +59,11 @@ type BlockModel struct {
 }
 
 type BlockData struct {
-	Block        Block
-	Transactions []Transaction
-	Logs         []Log
-	Traces       []Trace
+	ChainId      uint64        `json:"chain_id"`
+	Block        Block         `json:"block"`
+	Transactions []Transaction `json:"transactions"`
+	Logs         []Log         `json:"logs"`
+	Traces       []Trace       `json:"traces"`
 }
 
 type BlockHeader struct {
diff --git a/internal/orchestrator/committer_test.go b/internal/orchestrator/committer_test.go
index 0c39ba4..c6d5906 100644
--- a/internal/orchestrator/committer_test.go
+++ b/internal/orchestrator/committer_test.go
@@ -426,6 +426,7 @@ func TestHandleGap(t *testing.T) {
 	mockRPC.EXPECT().GetBlocksPerRequest().Return(rpc.BlocksPerRequestConfig{
 		Blocks: 5,
 	})
+	mockRPC.EXPECT().GetChainID().Return(big.NewInt(1))
 	mockRPC.EXPECT().GetFullBlocks(context.Background(), []*big.Int{big.NewInt(100), big.NewInt(101), big.NewInt(102), big.NewInt(103), big.NewInt(104)}).Return([]rpc.GetFullBlockResult{
 		{BlockNumber: big.NewInt(100), Data: common.BlockData{Block: common.Block{Number: big.NewInt(100)}}},
 		{BlockNumber: big.NewInt(101), Data: common.BlockData{Block: common.Block{Number: big.NewInt(101)}}},
diff --git a/internal/orchestrator/failure_recoverer.go b/internal/orchestrator/failure_recoverer.go
index da1ae91..a097034 100644
--- a/internal/orchestrator/failure_recoverer.go
+++ b/internal/orchestrator/failure_recoverer.go
@@ -110,6 +110,7 @@ func (fr *FailureRecoverer) handleWorkerResults(blockFailures []common.BlockFail
 			})
 		} else {
 			successfulResults = append(successfulResults, common.BlockData{
+				ChainId:      fr.rpc.GetChainID().Uint64(),
 				Block:        result.Data.Block,
 				Logs:         result.Data.Logs,
 				Transactions: result.Data.Transactions,
diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go
index a1cca21..5e3b313 100644
--- a/internal/orchestrator/poller.go
+++ b/internal/orchestrator/poller.go
@@ -262,6 +262,7 @@ func (p *Poller) convertPollResultsToBlockData(results []rpc.GetFullBlockResult)
 	blockData := make([]common.BlockData, 0, len(successfulResults))
 	for _, result := range successfulResults {
 		blockData = append(blockData, common.BlockData{
+			ChainId:      p.rpc.GetChainID().Uint64(),
 			Block:        result.Data.Block,
 			Logs:         result.Data.Logs,
 			Transactions: result.Data.Transactions,
diff --git a/internal/orchestrator/reorg_handler.go b/internal/orchestrator/reorg_handler.go
index 2de8b95..889801c 100644
--- a/internal/orchestrator/reorg_handler.go
+++ b/internal/orchestrator/reorg_handler.go
@@ -274,6 +274,7 @@ func (rh *ReorgHandler) handleReorg(ctx context.Context, reorgedBlockNumbers []*
 			return fmt.Errorf("cannot fix reorg: failed block %s: %w", result.BlockNumber.String(), result.Error)
 		}
 		data = append(data, common.BlockData{
+			ChainId:      rh.rpc.GetChainID().Uint64(),
 			Block:        result.Data.Block,
 			Logs:         result.Data.Logs,
 			Transactions: result.Data.Transactions,
diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go
index c61256b..517a201 100644
--- a/internal/storage/clickhouse.go
+++ b/internal/storage/clickhouse.go
@@ -1959,6 +1959,7 @@ func (c *ClickHouseConnector) GetValidationBlockData(chainId *big.Int, startBloc
 	for i, block := range blocksResult.blocks {
 		blockNum := block.Number.String()
 		blockData[i] = common.BlockData{
+			ChainId:      chainId.Uint64(),
 			Block:        block,
 			Logs:         logsResult.logMap[blockNum],
 			Transactions: txsResult.txMap[blockNum],
@@ -2138,6 +2139,7 @@ func (c *ClickHouseConnector) GetFullBlockData(chainId *big.Int, blockNumbers []
 	for i, block := range blocksResult.blocks {
 		blockNum := block.Number.String()
 		blockData[i] = common.BlockData{
+			ChainId:      chainId.Uint64(),
 			Block:        block,
 			Logs:         logsResult.logMap[blockNum],
 			Transactions: txsResult.txMap[blockNum],
diff --git a/internal/storage/connector.go b/internal/storage/connector.go
index 1253213..e45db44 100644
--- a/internal/storage/connector.go
+++ b/internal/storage/connector.go
@@ -152,6 +152,8 @@ func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) {
 		conn, err = NewPostgresConnector(cfg.Postgres)
 	} else if cfg.Clickhouse != nil {
 		conn, err = NewClickHouseConnector(cfg.Clickhouse)
+	} else if cfg.Kafka != nil {
+		conn, err = NewKafkaPostgresConnector(cfg.Kafka)
 	} else {
 		return *new(T), fmt.Errorf("no storage driver configured")
 	}
diff --git a/internal/storage/kafka_postgres.go b/internal/storage/kafka_postgres.go
new file mode 100644
index 0000000..74b8714
--- /dev/null
+++ b/internal/storage/kafka_postgres.go
@@ -0,0 +1,620 @@
+package storage
+
+import (
+	"database/sql"
+	"encoding/json"
+	"fmt"
+	"math/big"
+	"strings"
+	"time"
+
+	_ "github.com/lib/pq"
+	"github.com/rs/zerolog/log"
+	config "github.com/thirdweb-dev/indexer/configs"
+	"github.com/thirdweb-dev/indexer/internal/common"
+)
+
+// KafkaPostgresConnector uses PostgreSQL for metadata storage and Kafka for block data delivery
+type KafkaPostgresConnector struct {
+	db             *sql.DB
+	cfg            *config.KafkaConfig
+	kafkaPublisher *KafkaPublisher
+}
+
+func NewKafkaPostgresConnector(cfg *config.KafkaConfig) (*KafkaPostgresConnector, error) {
+	// Connect to PostgreSQL
+	connStr := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s",
+		cfg.Postgres.Host, cfg.Postgres.Port, cfg.Postgres.Username, cfg.Postgres.Password, cfg.Postgres.Database)
+
+	// Default to "require" for security if SSL mode not specified
+	sslMode := cfg.Postgres.SSLMode
+	if sslMode == "" {
+		sslMode = "require"
+		log.Info().Msg("No SSL mode specified, defaulting to 'require' for secure connection")
+	}
+	connStr += fmt.Sprintf(" sslmode=%s", sslMode)
+
+	if cfg.Postgres.ConnectTimeout > 0 {
+		connStr += fmt.Sprintf(" connect_timeout=%d", cfg.Postgres.ConnectTimeout)
+	}
+
+	db, err := sql.Open("postgres", connStr)
+	if err != nil {
+		return nil, fmt.Errorf("failed to connect to postgres: %w", err)
+	}
+
+	db.SetMaxOpenConns(cfg.Postgres.MaxOpenConns)
+	db.SetMaxIdleConns(cfg.Postgres.MaxIdleConns)
+
+	if cfg.Postgres.MaxConnLifetime > 0 {
+		db.SetConnMaxLifetime(time.Duration(cfg.Postgres.MaxConnLifetime) * time.Second)
+	}
+
+	if err := db.Ping(); err != nil {
+		return nil, fmt.Errorf("failed to ping postgres: %w", err)
+	}
+
+	// Initialize Kafka publisher if enabled
+	var kafkaPublisher *KafkaPublisher
+	if cfg.Enabled && cfg.Brokers != "" {
+		kafkaPublisher, err = NewKafkaPublisher(cfg)
+		if err != nil {
+			log.Warn().Err(err).Msg("Failed to initialize Kafka publisher, continuing without publishing")
+			kafkaPublisher = nil
+		}
+	}
+
+	return &KafkaPostgresConnector{
+		db:             db,
+		cfg:            cfg,
+		kafkaPublisher: kafkaPublisher,
+	}, nil
+}
+
+// Orchestrator Storage Implementation (PostgreSQL)
+
+func (kp *KafkaPostgresConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) {
+	query := `SELECT chain_id, block_number, last_error_timestamp, failure_count, reason 
+	          FROM block_failures WHERE 1=1`
+
+	args := []interface{}{}
+	argCount := 0
+
+	if qf.ChainId != nil && qf.ChainId.Sign() > 0 {
+		argCount++
+		query += fmt.Sprintf(" AND chain_id = $%d", argCount)
+		args = append(args, qf.ChainId.String())
+	}
+
+	if len(qf.BlockNumbers) > 0 {
+		placeholders := make([]string, len(qf.BlockNumbers))
+		for i, bn := range qf.BlockNumbers {
+			argCount++
+			placeholders[i] = fmt.Sprintf("$%d", argCount)
+			args = append(args, bn.String())
+		}
+		query += fmt.Sprintf(" AND block_number IN (%s)", strings.Join(placeholders, ","))
+	}
+
+	if qf.SortBy != "" {
+		query += fmt.Sprintf(" ORDER BY %s", qf.SortBy)
+		if qf.SortOrder != "" {
+			query += " " + qf.SortOrder
+		}
+	} else {
+		query += " ORDER BY block_number DESC"
+	}
+
+	if qf.Limit > 0 {
+		argCount++
+		query += fmt.Sprintf(" LIMIT $%d", argCount)
+		args = append(args, qf.Limit)
+	}
+
+	if qf.Offset > 0 {
+		argCount++
+		query += fmt.Sprintf(" OFFSET $%d", argCount)
+		args = append(args, qf.Offset)
+	}
+
+	rows, err := kp.db.Query(query, args...)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		if err := rows.Close(); err != nil {
+			log.Error().Err(err).Msg("Failed to close rows in GetBlockFailures")
+		}
+	}()
+
+	var failures []common.BlockFailure
+	for rows.Next() {
+		var failure common.BlockFailure
+		var chainIdStr, blockNumberStr string
+		var timestamp int64
+		var count int
+
+		err := rows.Scan(&chainIdStr, &blockNumberStr, &timestamp, &count, &failure.FailureReason)
+		if err != nil {
+			return nil, fmt.Errorf("error scanning block failure: %w", err)
+		}
+
+		var ok bool
+		failure.ChainId, ok = new(big.Int).SetString(chainIdStr, 10)
+		if !ok {
+			return nil, fmt.Errorf("failed to parse chain_id '%s' as big.Int", chainIdStr)
+		}
+
+		failure.BlockNumber, ok = new(big.Int).SetString(blockNumberStr, 10)
+		if !ok {
+			return nil, fmt.Errorf("failed to parse block_number '%s' as big.Int", blockNumberStr)
+		}
+
+		failure.FailureTime = time.Unix(timestamp, 0)
+		failure.FailureCount = count
+
+		failures = append(failures, failure)
+	}
+
+	return failures, rows.Err()
+}
+
+func (kp *KafkaPostgresConnector) StoreBlockFailures(failures []common.BlockFailure) error {
+	if len(failures) == 0 {
+		return nil
+	}
+
+	valueStrings := make([]string, 0, len(failures))
+	valueArgs := make([]interface{}, 0, len(failures)*5)
+
+	for i, failure := range failures {
+		valueStrings = append(valueStrings, fmt.Sprintf("($%d, $%d, $%d, $%d, $%d)",
+			i*5+1, i*5+2, i*5+3, i*5+4, i*5+5))
+		valueArgs = append(valueArgs,
+			failure.ChainId.String(),
+			failure.BlockNumber.String(),
+			failure.FailureTime.Unix(),
+			failure.FailureCount,
+			failure.FailureReason,
+		)
+	}
+
+	query := fmt.Sprintf(`INSERT INTO block_failures (chain_id, block_number, last_error_timestamp, failure_count, reason)
+	          VALUES %s
+	          ON CONFLICT (chain_id, block_number) 
+	          DO UPDATE SET 
+	              last_error_timestamp = EXCLUDED.last_error_timestamp,
+	              failure_count = EXCLUDED.failure_count,
+	              reason = EXCLUDED.reason,
+	              updated_at = NOW()`, strings.Join(valueStrings, ","))
+
+	_, err := kp.db.Exec(query, valueArgs...)
+	return err
+}
+
+func (kp *KafkaPostgresConnector) DeleteBlockFailures(failures []common.BlockFailure) error {
+	if len(failures) == 0 {
+		return nil
+	}
+
+	tuples := make([]string, 0, len(failures))
+	args := make([]interface{}, 0, len(failures)*2)
+
+	for i, failure := range failures {
+		tuples = append(tuples, fmt.Sprintf("($%d, $%d)", i*2+1, i*2+2))
+		args = append(args, failure.ChainId.String(), failure.BlockNumber.String())
+	}
+
+	query := fmt.Sprintf(`DELETE FROM block_failures
+	WHERE ctid IN (
+		SELECT ctid
+		FROM block_failures
+		WHERE (chain_id, block_number) IN (%s)
+		FOR UPDATE SKIP LOCKED
+	)`, strings.Join(tuples, ","))
+
+	_, err := kp.db.Exec(query, args...)
+	return err
+}
+
+func (kp *KafkaPostgresConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	query := `SELECT cursor_value FROM cursors 
+	          WHERE cursor_type = 'reorg' AND chain_id = $1`
+
+	var blockNumberString string
+	err := kp.db.QueryRow(query, chainId.String()).Scan(&blockNumberString)
+	if err != nil {
+		if err == sql.ErrNoRows {
+			return big.NewInt(0), nil
+		}
+		return nil, err
+	}
+
+	blockNumber, ok := new(big.Int).SetString(blockNumberString, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString)
+	}
+
+	return blockNumber, nil
+}
+
+func (kp *KafkaPostgresConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value)
+	          VALUES ($1, 'reorg', $2)
+	          ON CONFLICT (chain_id, cursor_type) 
+	          DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()`
+
+	_, err := kp.db.Exec(query, chainId.String(), blockNumber.String())
+	return err
+}
+
+// Staging Storage Implementation (PostgreSQL)
+
+func (kp *KafkaPostgresConnector) InsertStagingData(data []common.BlockData) error {
+	if len(data) == 0 {
+		return nil
+	}
+
+	valueStrings := make([]string, 0, len(data))
+	valueArgs := make([]interface{}, 0, len(data)*3)
+
+	for i, blockData := range data {
+		blockDataJSON, err := json.Marshal(blockData)
+		if err != nil {
+			return err
+		}
+
+		valueStrings = append(valueStrings, fmt.Sprintf("($%d, $%d, $%d)",
+			i*3+1, i*3+2, i*3+3))
+		valueArgs = append(valueArgs,
+			blockData.Block.ChainId.String(),
+			blockData.Block.Number.String(),
+			string(blockDataJSON),
+		)
+	}
+
+	query := fmt.Sprintf(`INSERT INTO block_data (chain_id, block_number, data)
+	          VALUES %s
+	          ON CONFLICT (chain_id, block_number) 
+	          DO UPDATE SET data = EXCLUDED.data, updated_at = NOW()`, strings.Join(valueStrings, ","))
+
+	_, err := kp.db.Exec(query, valueArgs...)
+	return err
+}
+
+func (kp *KafkaPostgresConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) {
+	query := `SELECT data FROM block_data WHERE 1=1`
+
+	args := []interface{}{}
+	argCount := 0
+
+	if qf.ChainId != nil && qf.ChainId.Sign() > 0 {
+		argCount++
+		query += fmt.Sprintf(" AND chain_id = $%d", argCount)
+		args = append(args, qf.ChainId.String())
+	}
+
+	if len(qf.BlockNumbers) > 0 {
+		placeholders := make([]string, len(qf.BlockNumbers))
+		for i, bn := range qf.BlockNumbers {
+			argCount++
+			placeholders[i] = fmt.Sprintf("$%d", argCount)
+			args = append(args, bn.String())
+		}
+		query += fmt.Sprintf(" AND block_number IN (%s)", strings.Join(placeholders, ","))
+	} else if qf.StartBlock != nil && qf.EndBlock != nil {
+		argCount++
+		query += fmt.Sprintf(" AND block_number BETWEEN $%d AND $%d", argCount, argCount+1)
+		args = append(args, qf.StartBlock.String(), qf.EndBlock.String())
+		argCount++
+	}
+
+	query += " ORDER BY block_number ASC"
+
+	if qf.Limit > 0 {
+		argCount++
+		query += fmt.Sprintf(" LIMIT $%d", argCount)
+		args = append(args, qf.Limit)
+	}
+
+	rows, err := kp.db.Query(query, args...)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		if err := rows.Close(); err != nil {
+			log.Error().Err(err).Msg("Failed to close rows in GetStagingData")
+		}
+	}()
+
+	blockDataList := make([]common.BlockData, 0)
+	for rows.Next() {
+		var blockDataJson string
+		if err := rows.Scan(&blockDataJson); err != nil {
+			return nil, fmt.Errorf("error scanning block data: %w", err)
+		}
+
+		var blockData common.BlockData
+		if err := json.Unmarshal([]byte(blockDataJson), &blockData); err != nil {
+			return nil, err
+		}
+
+		blockDataList = append(blockDataList, blockData)
+	}
+
+	return blockDataList, rows.Err()
+}
+
+func (kp *KafkaPostgresConnector) DeleteStagingData(data []common.BlockData) error {
+	if len(data) == 0 {
+		return nil
+	}
+
+	tuples := make([]string, 0, len(data))
+	args := make([]interface{}, 0, len(data)*2)
+
+	for i, blockData := range data {
+		tuples = append(tuples, fmt.Sprintf("($%d, $%d)", i*2+1, i*2+2))
+		args = append(args, blockData.Block.ChainId.String(), blockData.Block.Number.String())
+	}
+
+	query := fmt.Sprintf(`DELETE FROM block_data
+	WHERE ctid IN (
+		SELECT ctid
+		FROM block_data
+		WHERE (chain_id, block_number) IN (%s)
+		FOR UPDATE SKIP LOCKED
+	)`, strings.Join(tuples, ","))
+
+	_, err := kp.db.Exec(query, args...)
+	return err
+}
+
+func (kp *KafkaPostgresConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'publish' AND chain_id = $1`
+
+	var blockNumberString string
+	err := kp.db.QueryRow(query, chainId.String()).Scan(&blockNumberString)
+	if err != nil {
+		if err == sql.ErrNoRows {
+			return big.NewInt(0), nil
+		}
+		return nil, err
+	}
+
+	blockNumber, ok := new(big.Int).SetString(blockNumberString, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString)
+	}
+	return blockNumber, nil
+}
+
+func (kp *KafkaPostgresConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value)
+                 VALUES ($1, 'publish', $2)
+                 ON CONFLICT (chain_id, cursor_type)
+                 DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()`
+
+	_, err := kp.db.Exec(query, chainId.String(), blockNumber.String())
+	return err
+}
+
+func (kp *KafkaPostgresConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) {
+	query := `SELECT MAX(block_number) FROM block_data WHERE 1=1`
+
+	args := []interface{}{}
+	argCount := 0
+
+	if chainId != nil && chainId.Sign() > 0 {
+		argCount++
+		query += fmt.Sprintf(" AND chain_id = $%d", argCount)
+		args = append(args, chainId.String())
+	}
+
+	if rangeStart != nil && rangeStart.Sign() > 0 {
+		argCount++
+		query += fmt.Sprintf(" AND block_number >= $%d", argCount)
+		args = append(args, rangeStart.String())
+	}
+
+	if rangeEnd != nil && rangeEnd.Sign() > 0 {
+		argCount++
+		query += fmt.Sprintf(" AND block_number <= $%d", argCount)
+		args = append(args, rangeEnd.String())
+	}
+
+	var blockNumberStr sql.NullString
+	err := kp.db.QueryRow(query, args...).Scan(&blockNumberStr)
+	if err != nil {
+		return nil, err
+	}
+
+	if !blockNumberStr.Valid {
+		return big.NewInt(0), nil
+	}
+
+	blockNumber, ok := new(big.Int).SetString(blockNumberStr.String, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", blockNumberStr.String)
+	}
+
+	return blockNumber, nil
+}
+
+func (kp *KafkaPostgresConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error {
+	query := `DELETE FROM block_data
+	WHERE ctid IN (
+		SELECT ctid
+		FROM block_data
+		WHERE chain_id = $1
+			AND block_number <= $2
+		FOR UPDATE SKIP LOCKED
+	)`
+	_, err := kp.db.Exec(query, chainId.String(), blockNumber.String())
+	return err
+}
+
+// InsertBlockData publishes block data to Kafka instead of storing in database
+func (kp *KafkaPostgresConnector) InsertBlockData(data []common.BlockData) error {
+	if len(data) == 0 {
+		return nil
+	}
+
+	// Publish to Kafka
+	if err := kp.kafkaPublisher.PublishBlockData(data); err != nil {
+		return fmt.Errorf("failed to publish block data to kafka: %w", err)
+	}
+	log.Debug().
+		Int("blocks", len(data)).
+		Msg("Published block data to Kafka")
+
+	// Update cursor to track the highest block number published
+	if len(data) > 0 {
+		// Find the highest block number in the batch
+		var maxBlock *big.Int
+		for _, blockData := range data {
+			if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 {
+				maxBlock = blockData.Block.Number
+			}
+		}
+		if maxBlock != nil {
+			chainId := data[0].Block.ChainId
+			blockNumber := maxBlock
+			query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value)
+				VALUES ($1, 'commit', $2)
+				ON CONFLICT (chain_id, cursor_type)
+				DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()`
+			if _, err := kp.db.Exec(query, chainId.String(), blockNumber.String()); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+// ReplaceBlockData handles reorg by publishing both old and new data to Kafka
+func (kp *KafkaPostgresConnector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) {
+	if len(data) == 0 {
+		return nil, nil
+	}
+
+	oldBlocks := []common.BlockData{}
+
+	// Publish reorg event to Kafka
+	if kp.kafkaPublisher != nil {
+		// Publish new blocks (the reorg handler will mark old ones as reverted)
+		if err := kp.kafkaPublisher.PublishBlockData(data); err != nil {
+			return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err)
+		}
+	}
+
+	// Update cursor to track the highest block number
+	if len(data) > 0 {
+		var maxBlock *big.Int
+		for _, blockData := range data {
+			if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 {
+				maxBlock = blockData.Block.Number
+			}
+		}
+		if maxBlock != nil {
+			if err := kp.SetLastPublishedBlockNumber(data[0].Block.ChainId, maxBlock); err != nil {
+				return nil, fmt.Errorf("failed to update published block cursor: %w", err)
+			}
+		}
+	}
+
+	return oldBlocks, nil
+}
+
+func (kp *KafkaPostgresConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) {
+	query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'commit' AND chain_id = $1`
+
+	var blockNumberString string
+	err := kp.db.QueryRow(query, chainId.String()).Scan(&blockNumberString)
+	if err != nil {
+		if err == sql.ErrNoRows {
+			return big.NewInt(0), nil
+		}
+		return nil, err
+	}
+
+	blockNumber, ok := new(big.Int).SetString(blockNumberString, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString)
+	}
+	return blockNumber, nil
+}
+
+func (kp *KafkaPostgresConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
+	// Get the last published block number
+	lastPublished, err := kp.GetLastPublishedBlockNumber(chainId)
+	if err != nil {
+		return nil, err
+	}
+
+	// Check if it's within the range
+	if lastPublished.Cmp(startBlock) >= 0 && lastPublished.Cmp(endBlock) <= 0 {
+		return lastPublished, nil
+	}
+
+	// If outside range, return appropriate boundary
+	if lastPublished.Cmp(endBlock) > 0 {
+		return endBlock, nil
+	}
+	if lastPublished.Cmp(startBlock) < 0 {
+		return big.NewInt(0), nil
+	}
+
+	return lastPublished, nil
+}
+
+func (kp *KafkaPostgresConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) {
+	return []common.BlockHeader{}, nil
+}
+
+func (kp *KafkaPostgresConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) {
+	return QueryResult[common.TokenBalance]{Data: []common.TokenBalance{}}, nil
+}
+
+func (kp *KafkaPostgresConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) {
+	return QueryResult[common.TokenTransfer]{Data: []common.TokenTransfer{}}, nil
+}
+
+func (kp *KafkaPostgresConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) {
+	return []common.BlockData{}, nil
+}
+
+func (kp *KafkaPostgresConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) {
+	return []*big.Int{}, nil
+}
+
+func (kp *KafkaPostgresConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) {
+	return []common.BlockData{}, nil
+}
+
+// Query methods return empty results as this connector uses Kafka for data delivery
+func (kp *KafkaPostgresConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) {
+	return QueryResult[common.Block]{Data: []common.Block{}}, nil
+}
+
+func (kp *KafkaPostgresConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) {
+	return QueryResult[common.Transaction]{Data: []common.Transaction{}}, nil
+}
+
+func (kp *KafkaPostgresConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) {
+	return QueryResult[common.Log]{Data: []common.Log{}}, nil
+}
+
+func (kp *KafkaPostgresConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) {
+	return QueryResult[common.Trace]{Data: []common.Trace{}}, nil
+}
+
+func (kp *KafkaPostgresConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) {
+	return QueryResult[interface{}]{Aggregates: []map[string]interface{}{}}, nil
+}
+
+// Close closes the database connection
+func (kp *KafkaPostgresConnector) Close() error {
+	return kp.db.Close()
+}
diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go
new file mode 100644
index 0000000..880ea57
--- /dev/null
+++ b/internal/storage/kafka_publisher.go
@@ -0,0 +1,183 @@
+package storage
+
+import (
+	"context"
+	"crypto/tls"
+	"encoding/json"
+	"fmt"
+	"net"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/rs/zerolog/log"
+	config "github.com/thirdweb-dev/indexer/configs"
+	"github.com/thirdweb-dev/indexer/internal/common"
+	"github.com/twmb/franz-go/pkg/kgo"
+	"github.com/twmb/franz-go/pkg/sasl/plain"
+)
+
+type KafkaPublisher struct {
+	client *kgo.Client
+	mu     sync.RWMutex
+}
+
+type PublishableMessage[T common.BlockData] struct {
+	Data   T      `json:"data"`
+	Status string `json:"status"`
+}
+
+// NewKafkaPublisher method for storage connector (public)
+func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) {
+	brokers := strings.Split(cfg.Brokers, ",")
+	opts := []kgo.Opt{
+		kgo.SeedBrokers(brokers...),
+		kgo.AllowAutoTopicCreation(),
+		kgo.ProducerBatchCompression(kgo.SnappyCompression()),
+		kgo.ClientID(fmt.Sprintf("insight-indexer-kafka-storage-%s", config.Cfg.RPC.ChainID)),
+		kgo.MaxBufferedRecords(1_000_000),
+		kgo.ProducerBatchMaxBytes(16_000_000),
+		kgo.RecordPartitioner(kgo.UniformBytesPartitioner(1_000_000, false, false, nil)),
+		kgo.MetadataMaxAge(60 * time.Second),
+		kgo.DialTimeout(10 * time.Second),
+	}
+
+	if cfg.Username != "" && cfg.Password != "" {
+		opts = append(opts, kgo.SASL(plain.Auth{
+			User: cfg.Username,
+			Pass: cfg.Password,
+		}.AsMechanism()))
+		tlsDialer := &tls.Dialer{NetDialer: &net.Dialer{Timeout: 10 * time.Second}}
+		opts = append(opts, kgo.Dialer(tlsDialer.DialContext))
+	}
+
+	client, err := kgo.NewClient(opts...)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create Kafka client: %v", err)
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	if err := client.Ping(ctx); err != nil {
+		client.Close()
+		return nil, fmt.Errorf("failed to connect to Kafka: %v", err)
+	}
+
+	publisher := &KafkaPublisher{
+		client: client,
+	}
+	return publisher, nil
+}
+
+func (p *KafkaPublisher) PublishBlockData(blockData []common.BlockData) error {
+	return p.publishBlockData(blockData, false)
+}
+
+func (p *KafkaPublisher) PublishReorg(oldData []common.BlockData, newData []common.BlockData) error {
+	// TODO: need to revisit how reorg blocks get published to downstream
+	if err := p.publishBlockData(oldData, true); err != nil {
+		return fmt.Errorf("failed to publish old block data: %v", err)
+	}
+
+	if err := p.publishBlockData(newData, false); err != nil {
+		return fmt.Errorf("failed to publish new block data: %v", err)
+	}
+	return nil
+}
+
+func (p *KafkaPublisher) Close() error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.client != nil {
+		p.client.Close()
+		log.Debug().Msg("Publisher client closed")
+	}
+	return nil
+}
+
+func (p *KafkaPublisher) publishMessages(ctx context.Context, messages []*kgo.Record) error {
+	if len(messages) == 0 {
+		return nil
+	}
+
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+
+	if p.client == nil {
+		return nil // Skip if no client configured
+	}
+
+	var wg sync.WaitGroup
+	wg.Add(len(messages))
+	// Publish to all configured producers
+	for _, msg := range messages {
+		p.client.Produce(ctx, msg, func(_ *kgo.Record, err error) {
+			defer wg.Done()
+			if err != nil {
+				log.Error().Err(err).Msg("Failed to publish message to Kafka")
+			}
+		})
+	}
+	wg.Wait()
+
+	return nil
+}
+
+func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isReorg bool) error {
+	if p.client == nil || len(blockData) == 0 {
+		return nil
+	}
+
+	publishStart := time.Now()
+
+	// Prepare messages for blocks, events, transactions and traces
+	blockMessages := make([]*kgo.Record, len(blockData))
+
+	status := "new"
+	if isReorg {
+		status = "reverted"
+	}
+
+	for i, data := range blockData {
+		// Block message
+		if blockMsg, err := p.createBlockDataMessage(data, status); err == nil {
+			blockMessages[i] = blockMsg
+		} else {
+			return fmt.Errorf("failed to create block message: %v", err)
+		}
+	}
+
+	if err := p.publishMessages(context.Background(), blockMessages); err != nil {
+		return fmt.Errorf("failed to publish block messages: %v", err)
+	}
+
+	log.Debug().Str("metric", "publish_duration").Msgf("Publisher.PublishBlockData duration: %f", time.Since(publishStart).Seconds())
+	return nil
+}
+
+func (p *KafkaPublisher) createBlockDataMessage(data common.BlockData, status string) (*kgo.Record, error) {
+	msg := PublishableMessage[common.BlockData]{
+		Data:   data,
+		Status: status,
+	}
+	msgJson, err := json.Marshal(msg)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal block data: %v", err)
+	}
+	return &kgo.Record{
+		Topic: p.getTopicName("commit", data.ChainId),
+		Key:   []byte(fmt.Sprintf("block-%s-%d-%s", status, data.ChainId, data.Block.Hash)),
+		Value: msgJson,
+	}, nil
+}
+
+func (p *KafkaPublisher) getTopicName(entity string, chainId uint64) string {
+	switch entity {
+	case "commit":
+		return fmt.Sprintf("insight.commit.blocks.%d", chainId)
+	default:
+		panic(fmt.Errorf("unknown topic entity: %s", entity))
+	}
+}

From cbbea0706f04c2253935300827822d1038e8523a Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Wed, 13 Aug 2025 16:03:52 +0000
Subject: [PATCH 02/43] Update config

---
 cmd/root.go                        | 3 ---
 configs/config.go                  | 1 -
 internal/storage/connector.go      | 6 +++---
 internal/storage/kafka_postgres.go | 2 +-
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/cmd/root.go b/cmd/root.go
index 88b8428..b18d947 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -125,7 +125,6 @@ func init() {
 	rootCmd.PersistentFlags().Int("storage-staging-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for staging storage")
 	rootCmd.PersistentFlags().Int("storage-staging-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for staging storage")
 	// Kafka storage flags - only for main storage (where blockchain data is committed)
-	rootCmd.PersistentFlags().Bool("storage-main-kafka-enabled", false, "Enable Kafka storage for main storage")
 	rootCmd.PersistentFlags().String("storage-main-kafka-brokers", "", "Kafka brokers for main storage")
 	rootCmd.PersistentFlags().String("storage-main-kafka-username", "", "Kafka username for main storage")
 	rootCmd.PersistentFlags().String("storage-main-kafka-password", "", "Kafka password for main storage")
@@ -253,8 +252,6 @@ func init() {
 	viper.BindPFlag("storage.staging.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxIdleConns"))
 	viper.BindPFlag("storage.staging.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxConnLifetime"))
 	viper.BindPFlag("storage.staging.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-connectTimeout"))
-	// Bind Kafka storage flags - only for main storage
-	viper.BindPFlag("storage.main.kafka.enabled", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enabled"))
 	viper.BindPFlag("storage.main.kafka.brokers", rootCmd.PersistentFlags().Lookup("storage-main-kafka-brokers"))
 	viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username"))
 	viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password"))
diff --git a/configs/config.go b/configs/config.go
index 10a824f..1aeb450 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -102,7 +102,6 @@ type PostgresConfig struct {
 }
 
 type KafkaConfig struct {
-	Enabled  bool            `mapstructure:"enabled"`
 	Brokers  string          `mapstructure:"brokers"`
 	Username string          `mapstructure:"username"`
 	Password string          `mapstructure:"password"`
diff --git a/internal/storage/connector.go b/internal/storage/connector.go
index e45db44..9a90b16 100644
--- a/internal/storage/connector.go
+++ b/internal/storage/connector.go
@@ -148,12 +148,12 @@ func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) {
 func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) {
 	var conn interface{}
 	var err error
-	if cfg.Postgres != nil {
+	if cfg.Kafka != nil {
+		conn, err = NewKafkaPostgresConnector(cfg.Kafka)
+	} else if cfg.Postgres != nil {
 		conn, err = NewPostgresConnector(cfg.Postgres)
 	} else if cfg.Clickhouse != nil {
 		conn, err = NewClickHouseConnector(cfg.Clickhouse)
-	} else if cfg.Kafka != nil {
-		conn, err = NewKafkaPostgresConnector(cfg.Kafka)
 	} else {
 		return *new(T), fmt.Errorf("no storage driver configured")
 	}
diff --git a/internal/storage/kafka_postgres.go b/internal/storage/kafka_postgres.go
index 74b8714..9621940 100644
--- a/internal/storage/kafka_postgres.go
+++ b/internal/storage/kafka_postgres.go
@@ -56,7 +56,7 @@ func NewKafkaPostgresConnector(cfg *config.KafkaConfig) (*KafkaPostgresConnector
 
 	// Initialize Kafka publisher if enabled
 	var kafkaPublisher *KafkaPublisher
-	if cfg.Enabled && cfg.Brokers != "" {
+	if cfg.Brokers != "" {
 		kafkaPublisher, err = NewKafkaPublisher(cfg)
 		if err != nil {
 			log.Warn().Err(err).Msg("Failed to initialize Kafka publisher, continuing without publishing")

From 4775d57abc3cfd1a11bce8d30b2a49c9735bda8f Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Wed, 13 Aug 2025 16:14:55 +0000
Subject: [PATCH 03/43] Error on uninitialize brokers

---
 internal/storage/kafka_postgres.go  | 18 ++++++------------
 internal/storage/kafka_publisher.go |  3 ++-
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/internal/storage/kafka_postgres.go b/internal/storage/kafka_postgres.go
index 9621940..23e7bfd 100644
--- a/internal/storage/kafka_postgres.go
+++ b/internal/storage/kafka_postgres.go
@@ -55,13 +55,9 @@ func NewKafkaPostgresConnector(cfg *config.KafkaConfig) (*KafkaPostgresConnector
 	}
 
 	// Initialize Kafka publisher if enabled
-	var kafkaPublisher *KafkaPublisher
-	if cfg.Brokers != "" {
-		kafkaPublisher, err = NewKafkaPublisher(cfg)
-		if err != nil {
-			log.Warn().Err(err).Msg("Failed to initialize Kafka publisher, continuing without publishing")
-			kafkaPublisher = nil
-		}
+	kafkaPublisher, err := NewKafkaPublisher(cfg)
+	if err != nil {
+		return nil, err
 	}
 
 	return &KafkaPostgresConnector{
@@ -502,11 +498,9 @@ func (kp *KafkaPostgresConnector) ReplaceBlockData(data []common.BlockData) ([]c
 	oldBlocks := []common.BlockData{}
 
 	// Publish reorg event to Kafka
-	if kp.kafkaPublisher != nil {
-		// Publish new blocks (the reorg handler will mark old ones as reverted)
-		if err := kp.kafkaPublisher.PublishBlockData(data); err != nil {
-			return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err)
-		}
+	// TODO: Publish new blocks (the reorg handler will mark old ones as reverted)
+	if err := kp.kafkaPublisher.PublishBlockData(data); err != nil {
+		return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err)
 	}
 
 	// Update cursor to track the highest block number
diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go
index 880ea57..84aca54 100644
--- a/internal/storage/kafka_publisher.go
+++ b/internal/storage/kafka_publisher.go
@@ -126,7 +126,7 @@ func (p *KafkaPublisher) publishMessages(ctx context.Context, messages []*kgo.Re
 }
 
 func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isReorg bool) error {
-	if p.client == nil || len(blockData) == 0 {
+	if len(blockData) == 0 {
 		return nil
 	}
 
@@ -135,6 +135,7 @@ func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isReorg
 	// Prepare messages for blocks, events, transactions and traces
 	blockMessages := make([]*kgo.Record, len(blockData))
 
+	// TODO: handle reorg
 	status := "new"
 	if isReorg {
 		status = "reverted"

From 661b1507b8e2100bc4c55b699e958b0ab0e3f404 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Thu, 14 Aug 2025 08:23:05 +0000
Subject: [PATCH 04/43] Update queries

---
 ...> 0000_clickhouse_create_blocks_table.sql} |   8 +-
 ...clickhouse_create_block_failures_table.sql |  12 -
 ..._clickhouse_create_transactions_table.sql} |  46 ++--
 .../0002_clickhouse_create_cursors_table.sql  |   7 -
 ... => 0002_clickhouse_create_logs_table.sql} |  42 ++--
 .../0003_clickhouse_create_staging_table.sql  |  11 -
 ...> 0003_clickhouse_create_traces_table.sql} |  40 +++-
 ...4_clickhouse_create_insert_null_table.sql} |  12 +-
 ...0005_clickhouse_create_insert_data_mv.sql} |  16 +-
 .../0006_clickhouse_create_logs_transfer.sql  |  62 +++++
 ...007_clickhouse_create_logs_transfer_mv.sql | 145 ++++++++++++
 .../0008_clickhouse_create_token_balance.sql  |  44 ++++
 ...009_clickhouse_create_token_balance_mv.sql | 157 +++++++++++++
 ...09_clickhouse_create_token_balances_mv.sql | 117 ----------
 ...0_clickhouse_create_token_transfers_mv.sql | 211 ------------------
 15 files changed, 522 insertions(+), 408 deletions(-)
 rename internal/tools/clickhouse/{0004_clickhouse_create_blocks_table.sql => 0000_clickhouse_create_blocks_table.sql} (90%)
 delete mode 100644 internal/tools/clickhouse/0001_clickhouse_create_block_failures_table.sql
 rename internal/tools/clickhouse/{0005_clickhouse_create_transactions_table.sql => 0001_clickhouse_create_transactions_table.sql} (70%)
 delete mode 100644 internal/tools/clickhouse/0002_clickhouse_create_cursors_table.sql
 rename internal/tools/clickhouse/{0006_clickhouse_create_logs_table.sql => 0002_clickhouse_create_logs_table.sql} (54%)
 delete mode 100644 internal/tools/clickhouse/0003_clickhouse_create_staging_table.sql
 rename internal/tools/clickhouse/{0007_clickhouse_create_traces_table.sql => 0003_clickhouse_create_traces_table.sql} (64%)
 rename internal/tools/clickhouse/{0000_clickhouse_create_insert_null_table.sql => 0004_clickhouse_create_insert_null_table.sql} (93%)
 rename internal/tools/clickhouse/{0008_clickhouse_create_insert_mvs.sql => 0005_clickhouse_create_insert_data_mv.sql} (87%)
 create mode 100644 internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql
 create mode 100644 internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql
 create mode 100644 internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql
 create mode 100644 internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql
 delete mode 100644 internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql
 delete mode 100644 internal/tools/clickhouse/0010_clickhouse_create_token_transfers_mv.sql

diff --git a/internal/tools/clickhouse/0004_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
similarity index 90%
rename from internal/tools/clickhouse/0004_clickhouse_create_blocks_table.sql
rename to internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
index 68bee0e..ada0c9d 100644
--- a/internal/tools/clickhouse/0004_clickhouse_create_blocks_table.sql
+++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
@@ -21,10 +21,12 @@ CREATE TABLE IF NOT EXISTS blocks (
     `gas_used` UInt256,
     `withdrawals_root` FixedString(66),
     `base_fee_per_gas` Nullable(UInt64),
+
     `insert_timestamp` DateTime DEFAULT now(),
     `sign` Int8 DEFAULT 1,
-    INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 3,
-    INDEX idx_hash hash TYPE bloom_filter GRANULARITY 3,
+
+    INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
+    INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2,
 ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
 ORDER BY (chain_id, block_number)
-PARTITION BY chain_id;
\ No newline at end of file
+PARTITION BY toYYYYMM(block_timestamp);
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0001_clickhouse_create_block_failures_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_block_failures_table.sql
deleted file mode 100644
index 669842d..0000000
--- a/internal/tools/clickhouse/0001_clickhouse_create_block_failures_table.sql
+++ /dev/null
@@ -1,12 +0,0 @@
-CREATE TABLE IF NOT EXISTS block_failures (
-    `chain_id` UInt256,
-    `block_number` UInt256,
-    `last_error_timestamp` UInt64 CODEC(Delta, ZSTD),
-    `count` UInt16,
-    `reason` String,
-    `insert_timestamp` DateTime DEFAULT now(),
-    `is_deleted` UInt8 DEFAULT 0,
-    INDEX idx_block_number block_number TYPE minmax GRANULARITY 1,
-) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
-ORDER BY (chain_id, block_number)
-SETTINGS allow_experimental_replacing_merge_with_cleanup = 1;
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0005_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
similarity index 70%
rename from internal/tools/clickhouse/0005_clickhouse_create_transactions_table.sql
rename to internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
index 48f1e0d..c8a31cc 100644
--- a/internal/tools/clickhouse/0005_clickhouse_create_transactions_table.sql
+++ b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
@@ -31,26 +31,46 @@ CREATE TABLE IF NOT EXISTS transactions (
     `blob_gas_price` Nullable(UInt256),
     `logs_bloom` Nullable(String),
     `status` Nullable(UInt64),
+
     `sign` Int8 DEFAULT 1,
     `insert_timestamp` DateTime DEFAULT now(),
-    INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 3,
+
+    INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3,
-    INDEX idx_hash hash TYPE bloom_filter GRANULARITY 3,
-    INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 1,
-    INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 1,
-    INDEX idx_function_selector function_selector TYPE bloom_filter GRANULARITY 1,
-    PROJECTION txs_chainid_from_address
+    INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2,
+    INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 4,
+    INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 4,
+    INDEX idx_function_selector function_selector TYPE bloom_filter GRANULARITY 2,
+
+    PROJECTION from_address_projection
     (
-        SELECT *
+        SELECT
+          chain_id,
+          block_number,
+          block_timestamp,
+          hash,
+          from_address,
+          to_address,
+          value,
+          data
         ORDER BY 
           chain_id,
           from_address,
-          block_number
+          block_number,
+          hash
     ),
-    PROJECTION txs_chainid_to_address
+    PROJECTION to_address_projection
     (
-        SELECT *
-        ORDER BY 
+        SELECT
+          chain_id,
+          block_number,
+          block_timestamp,
+          hash,
+          from_address,
+          to_address,
+          value,
+          data
+        ORDER BY
           chain_id,
           to_address,
           block_number,
@@ -58,5 +78,5 @@ CREATE TABLE IF NOT EXISTS transactions (
     )
 ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
 ORDER BY (chain_id, block_number, hash)
-PARTITION BY chain_id
-SETTINGS deduplicate_merge_projection_mode = 'drop', lightweight_mutation_projection_mode = 'rebuild';
\ No newline at end of file
+PARTITION BY toYYYYMM(block_timestamp)
+SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0002_clickhouse_create_cursors_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_cursors_table.sql
deleted file mode 100644
index 6574a3b..0000000
--- a/internal/tools/clickhouse/0002_clickhouse_create_cursors_table.sql
+++ /dev/null
@@ -1,7 +0,0 @@
-CREATE TABLE IF NOT EXISTS cursors (
-    `chain_id` UInt256,
-    `cursor_type` String,
-    `cursor_value` String,
-    `insert_timestamp` DateTime DEFAULT now(),
-) ENGINE = ReplacingMergeTree(insert_timestamp)
-ORDER BY (chain_id, cursor_type);
diff --git a/internal/tools/clickhouse/0006_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
similarity index 54%
rename from internal/tools/clickhouse/0006_clickhouse_create_logs_table.sql
rename to internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
index b1d3db3..f93cb9d 100644
--- a/internal/tools/clickhouse/0006_clickhouse_create_logs_table.sql
+++ b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
@@ -12,28 +12,44 @@ CREATE TABLE IF NOT EXISTS logs (
     `topic_1` String,
     `topic_2` String,
     `topic_3` String,
-    `insert_timestamp` DateTime DEFAULT now(),
+
     `sign` Int8 DEFAULT 1,
-    INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 3,
+    `insert_timestamp` DateTime DEFAULT now(),
+
+    INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3,
-    INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 3,
-    INDEX idx_address address TYPE bloom_filter GRANULARITY 1,
-    INDEX idx_topic0 topic_0 TYPE bloom_filter GRANULARITY 1,
-    INDEX idx_topic1 topic_1 TYPE bloom_filter GRANULARITY 1,
-    INDEX idx_topic2 topic_2 TYPE bloom_filter GRANULARITY 1,
-    INDEX idx_topic3 topic_3 TYPE bloom_filter GRANULARITY 1,
-    PROJECTION logs_chainid_topic0_address
+    INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 2,
+    INDEX idx_address address TYPE bloom_filter GRANULARITY 3,
+    INDEX idx_topic0 topic_0 TYPE bloom_filter GRANULARITY 3,
+    INDEX idx_topic1 topic_1 TYPE bloom_filter GRANULARITY 4,
+    INDEX idx_topic2 topic_2 TYPE bloom_filter GRANULARITY 4,
+    INDEX idx_topic3 topic_3 TYPE bloom_filter GRANULARITY 4,
+
+    PROJECTION chain_address_topic0_projection
     (
-        SELECT *
+        SELECT
+            *
         ORDER BY 
             chain_id,
-            topic_0,
             address,
+            topic_0,
             block_number,
             transaction_index,
             log_index
+    ),
+    PROJECTION chain_topic0_projection
+    (
+        SELECT
+            *
+        ORDER BY 
+            chain_id,
+            topic_0,
+            block_number,
+            transaction_index,
+            log_index,
+            address
     )
 ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
 ORDER BY (chain_id, block_number, transaction_hash, log_index)
-PARTITION BY chain_id
-SETTINGS deduplicate_merge_projection_mode = 'drop', lightweight_mutation_projection_mode = 'rebuild';
+PARTITION BY toYYYYMM(block_timestamp)
+SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
diff --git a/internal/tools/clickhouse/0003_clickhouse_create_staging_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_staging_table.sql
deleted file mode 100644
index cd015ac..0000000
--- a/internal/tools/clickhouse/0003_clickhouse_create_staging_table.sql
+++ /dev/null
@@ -1,11 +0,0 @@
-CREATE TABLE IF NOT EXISTS block_data (
-    `chain_id` UInt256,
-    `block_number` UInt256,
-    `data` String,
-    `insert_timestamp` DateTime DEFAULT now(),
-    `is_deleted` UInt8 DEFAULT 0,
-    INDEX idx_block_number block_number TYPE minmax GRANULARITY 1,
-) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
-ORDER BY (chain_id, block_number)
-PARTITION BY chain_id
-SETTINGS allow_experimental_replacing_merge_with_cleanup = 1;
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0007_clickhouse_create_traces_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
similarity index 64%
rename from internal/tools/clickhouse/0007_clickhouse_create_traces_table.sql
rename to internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
index f3dc25d..b07ec88 100644
--- a/internal/tools/clickhouse/0007_clickhouse_create_traces_table.sql
+++ b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
@@ -20,13 +20,39 @@ CREATE TABLE IF NOT EXISTS traces (
     `author` Nullable(FixedString(42)),
     `reward_type` LowCardinality(Nullable(String)),
     `refund_address` Nullable(FixedString(42)),
+
     `sign` Int8 DEFAULT 1,
     `insert_timestamp` DateTime DEFAULT now(),
-    INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 3,
-    INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3,
-    INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 3,
-    INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 1,
-    INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 1,
+
+    INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
+    INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 2,
+    INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3,
+    INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 3,
+
+    PROJECTION from_address_projection
+    (
+        SELECT
+          *
+        ORDER BY 
+          chain_id,
+          from_address,
+          block_number,
+          transaction_hash,
+          trace_address
+    ),
+    PROJECTION to_address_projection
+    (
+        SELECT
+          *
+        ORDER BY 
+          chain_id,
+          to_address,
+          block_number,
+          transaction_hash,
+          trace_address
+    )
+
 ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
-ORDER BY (chain_id, block_number, transaction_hash, trace_address)
-PARTITION BY chain_id;
\ No newline at end of file
+ORDER BY (chain_id, transaction_hash, trace_address)
+PARTITION BY toYYYYMM(block_timestamp)
+SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
diff --git a/internal/tools/clickhouse/0000_clickhouse_create_insert_null_table.sql b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql
similarity index 93%
rename from internal/tools/clickhouse/0000_clickhouse_create_insert_null_table.sql
rename to internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql
index 25fc90e..46f1541 100644
--- a/internal/tools/clickhouse/0000_clickhouse_create_insert_null_table.sql
+++ b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql
@@ -1,5 +1,6 @@
-CREATE TABLE IF NOT EXISTS inserts_null_table (
+CREATE TABLE IF NOT EXISTS insert_null_block_data (
     chain_id UInt256,
+
     block Tuple(
         block_number UInt256,
         block_timestamp DateTime,
@@ -92,8 +93,7 @@ CREATE TABLE IF NOT EXISTS inserts_null_table (
         reward_type LowCardinality(Nullable(String)),
         refund_address Nullable(FixedString(42))
     )),
-    insert_timestamp DateTime DEFAULT now(),
-    sign Int8 DEFAULT 1
-) ENGINE = MergeTree
-ORDER BY (chain_id, insert_timestamp)
-PARTITION BY chain_id;
+
+    sign Int8 DEFAULT 1,
+    insert_timestamp DateTime DEFAULT now()
+) ENGINE = Null;
diff --git a/internal/tools/clickhouse/0008_clickhouse_create_insert_mvs.sql b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql
similarity index 87%
rename from internal/tools/clickhouse/0008_clickhouse_create_insert_mvs.sql
rename to internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql
index 0ea2673..f7c7c46 100644
--- a/internal/tools/clickhouse/0008_clickhouse_create_insert_mvs.sql
+++ b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql
@@ -1,4 +1,4 @@
-CREATE MATERIALIZED VIEW IF NOT EXISTS mv_blocks_inserts
+CREATE MATERIALIZED VIEW IF NOT EXISTS insert_blocks_mv
 TO blocks
 AS
 SELECT
@@ -26,9 +26,9 @@ SELECT
     block.21 AS base_fee_per_gas,
     insert_timestamp,
     sign
-FROM inserts_null_table;
+FROM insert_null_block_data;
 
-CREATE MATERIALIZED VIEW IF NOT EXISTS mv_transactions_inserts
+CREATE MATERIALIZED VIEW IF NOT EXISTS insert_transactions_mv
 TO transactions
 AS
 SELECT
@@ -66,10 +66,10 @@ SELECT
     t.31 AS status,
     insert_timestamp,
     sign
-FROM inserts_null_table
+FROM insert_null_block_data
 ARRAY JOIN transactions AS t;
 
-CREATE MATERIALIZED VIEW IF NOT EXISTS mv_logs_inserts
+CREATE MATERIALIZED VIEW IF NOT EXISTS insert_logs_mv
 TO logs
 AS
 SELECT
@@ -88,10 +88,10 @@ SELECT
     l.12 AS topic_3,
     insert_timestamp,
     sign
-FROM inserts_null_table
+FROM insert_null_block_data
 ARRAY JOIN logs AS l;
 
-CREATE MATERIALIZED VIEW IF NOT EXISTS mv_traces_inserts
+CREATE MATERIALIZED VIEW IF NOT EXISTS insert_traces_mv
 TO traces
 AS
 SELECT
@@ -118,5 +118,5 @@ SELECT
     tr.20 AS refund_address,
     insert_timestamp,
     sign
-FROM inserts_null_table
+FROM insert_null_block_data
 ARRAY JOIN traces AS tr;
diff --git a/internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql b/internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql
new file mode 100644
index 0000000..2a985b0
--- /dev/null
+++ b/internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql
@@ -0,0 +1,62 @@
+CREATE TABLE IF NOT EXISTS logs_transfer
+(
+    `chain_id` UInt256,
+    `token_type` LowCardinality(String),
+    `token_address` FixedString(42),
+    `token_id` UInt256,
+    `from_address` FixedString(42),
+    `to_address` FixedString(42),
+    `block_number` UInt256,
+    `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)),
+    `transaction_hash` FixedString(66),
+    `transaction_index` UInt64,
+    `amount` UInt256,
+    `log_index` UInt64,
+    `batch_index` Nullable(UInt16) DEFAULT NULL,
+
+    `sign` Int8 DEFAULT 1,
+    `insert_timestamp` DateTime DEFAULT now(),
+
+    INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
+    INDEX idx_token_address token_address TYPE bloom_filter GRANULARITY 2,
+    INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3,
+    INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 3,
+    INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 4,
+
+    PROJECTION from_address_projection (
+        SELECT
+            *
+        ORDER BY
+            chain_id,
+            from_address,
+            block_number,
+            transaction_index,
+            log_index
+    ),
+    PROJECTION to_address_projection (
+        SELECT
+            *
+        ORDER BY
+            chain_id,
+            to_address,
+            block_number,
+            transaction_index,
+            log_index
+
+    ),
+    PROJECTION token_id_projection (
+        SELECT 
+            *
+        ORDER BY
+            chain_id,
+            token_address,
+            token_id,
+            block_number,
+            transaction_index,
+            log_index
+    )
+)
+ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
+PARTITION BY toYYYYMM(block_timestamp)
+ORDER BY (chain_id, token_address, block_number, transaction_index, log_index)
+SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql b/internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql
new file mode 100644
index 0000000..9a9b6bc
--- /dev/null
+++ b/internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql
@@ -0,0 +1,145 @@
+-- ERC20
+CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc20_mv
+TO logs_transfer
+AS
+SELECT
+  chain_id,
+  address AS token_address,
+  'erc20' AS token_type,
+  0 AS token_id,
+  concat('0x', substring(topic_1, 27, 40)) AS from_address,
+  concat('0x', substring(topic_2, 27, 40)) AS to_address,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS amount,
+  log_index,
+  CAST(NULL AS Nullable(UInt16)) AS batch_index,
+  sign,
+  insert_timestamp
+FROM logs
+WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef'  -- Transfer
+  AND length(topic_1) = 66 AND startsWith(topic_1, '0x')
+  AND length(topic_2) = 66 AND startsWith(topic_2, '0x')
+  AND topic_3 = ''
+  AND length(data) = 66;
+
+-- ERC721
+CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc721_mv
+TO logs_transfer
+AS
+SELECT
+  chain_id,
+  address AS token_address,
+  'erc721' AS token_type,
+  reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id,
+  concat('0x', substring(topic_1, 27, 40)) AS from_address,
+  concat('0x', substring(topic_2, 27, 40)) AS to_address,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  toUInt8(1) AS amount,
+  log_index,
+  CAST(NULL AS Nullable(UInt16)) AS batch_index,
+  sign,
+  insert_timestamp
+FROM logs
+WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef'
+  AND length(topic_1) = 66 AND startsWith(topic_1, '0x')
+  AND length(topic_2) = 66 AND startsWith(topic_2, '0x')
+  AND length(topic_3) = 66 AND startsWith(topic_3, '0x')
+  AND length(data) = 2;
+
+-- ERC1155 (single)
+CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc1155_single_mv
+TO logs_transfer
+AS
+SELECT
+    chain_id,
+    address AS token_address,
+    'erc1155' AS token_type,
+    reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS token_id,
+    concat('0x', substring(topic_2, 27, 40)) AS from_address,
+    concat('0x', substring(topic_3, 27, 40)) AS to_address,
+    block_number,
+    block_timestamp,
+    transaction_hash,
+    transaction_index,
+    reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount,
+    log_index,
+    toNullable(toUInt16(0)) AS batch_index,
+    sign,
+    insert_timestamp
+FROM logs
+WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62'   -- TransferSingle
+  AND length(topic_2) = 66 AND length(topic_3) = 66
+  AND length(data) = (2 + 2*64);
+
+-- ERC1155 (batch)
+CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc1155_batch_mv
+TO logs_transfer
+AS
+SELECT
+    chain_id,
+    address AS token_address,
+    'erc1155' AS token_type,
+    reinterpretAsUInt256(reverse(unhex(id_hex))) AS token_id,
+    concat('0x', substring(topic_2, 27, 40)) AS from_address,
+    concat('0x', substring(topic_3, 27, 40)) AS to_address,
+    block_number,
+    block_timestamp,
+    transaction_hash,
+    transaction_index,
+    reinterpretAsUInt256(reverse(unhex(amount_hex))) AS amount,
+    log_index,
+    toNullable(toUInt16(array_index - 1)) AS batch_index,
+    sign,
+    insert_timestamp
+FROM (
+    SELECT 
+        chain_id, address, topic_2, topic_3,
+        block_number, block_timestamp, transaction_hash, transaction_index, log_index, sign, insert_timestamp,
+        toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64))))) AS ids_offset,
+        toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64))))) AS amounts_offset,
+        toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + ids_offset * 2, 64))))) AS ids_length,
+        toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + amounts_offset * 2, 64))))) AS amounts_length,
+        arrayMap(i -> substring(data, 3 + ids_offset * 2 + 64 + (i-1)*64, 64), range(1, least(ids_length, 10000) + 1)) AS ids_array,
+        arrayMap(i -> substring(data, 3 + amounts_offset * 2 + 64 + (i-1)*64, 64), range(1, least(amounts_length, 10000) + 1)) AS amounts_array
+    FROM logs
+    WHERE topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb'
+      AND length(topic_2) = 66 
+      AND length(topic_3) = 66
+      AND ids_length = amounts_length
+)
+ARRAY JOIN 
+    ids_array AS id_hex,
+    amounts_array AS amount_hex,
+    arrayEnumerate(ids_array) AS array_index;
+
+-- ERC6909
+CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc6909_mv
+TO logs_transfer
+AS
+SELECT
+  chain_id,
+  address AS token_address,
+  'erc6909' AS token_type,
+  reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id,
+  concat('0x', substring(topic_1, 27, 40)) AS from_address,
+  concat('0x', substring(topic_2, 27, 40)) AS to_address,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount,
+  log_index,
+  CAST(NULL AS Nullable(UInt16)) AS batch_index,
+  sign,
+  insert_timestamp
+FROM logs
+WHERE topic_0 = '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859'
+  AND length(topic_1) = 66
+  AND length(topic_2) = 66
+  AND length(data) == 2 + 128;
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql
new file mode 100644
index 0000000..e476123
--- /dev/null
+++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql
@@ -0,0 +1,44 @@
+CREATE TABLE IF NOT EXISTS token_balance
+(
+  `chain_id` UInt256,
+  `token_type` LowCardinality(String),
+  `token_address` FixedString(42),
+  `owner_address` FixedString(42),
+  `token_id` UInt256,
+
+  `balance_state` AggregateFunction(sum, Int256),
+  `last_block_number_state` AggregateFunction(max, UInt256),
+  `last_block_timestamp_state` AggregateFunction(max, DateTime),
+
+  INDEX idx_last_block_number (finalizeAggregation(last_block_number_state)) TYPE minmax GRANULARITY 1,
+  INDEX idx_last_block_timestamp (finalizeAggregation(last_block_timestamp_state)) TYPE minmax GRANULARITY 1,
+
+  PROJECTION owner_balances_projection
+  (
+    SELECT
+      chain_id,
+      owner_address,
+      token_address,
+      token_id,
+      sumMerge(balance_state) AS balance,
+      maxMerge(last_block_number_state) AS last_block_number,
+      maxMerge(last_block_timestamp_state) AS last_block_timestamp
+    GROUP BY chain_id, owner_address, token_address, token_id
+  ),
+  PROJECTION token_projection
+  (
+    SELECT
+      chain_id,
+      token_address,
+      token_id,
+      owner_address,
+      balance_state,
+      last_block_number_state,
+      last_block_timestamp_state
+    ORDER BY chain_id, token_address, token_id, owner_address
+  )
+)
+ENGINE = AggregatingMergeTree
+PARTITION BY chain_id
+ORDER BY (chain_id, owner_address, token_address, token_id)
+SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql
new file mode 100644
index 0000000..bb039fb
--- /dev/null
+++ b/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql
@@ -0,0 +1,157 @@
+-- ERC20
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc20_mv
+TO token_balance
+AS
+SELECT
+  chain_id,
+  token_type,
+  token_address,
+  owner_address,
+  token_id,
+  sumState(delta) AS balance_state,
+  maxState(block_number) AS last_block_number_state,
+  maxState(block_timestamp) AS last_block_timestamp_state
+FROM
+(
+  -- FROM side (negative)
+  SELECT
+    chain_id,
+    token_type,
+    token_address,
+    token_id,
+    from_address AS owner_address,
+    toInt256(amount) * (-1) * sign AS delta,
+    block_number,
+    block_timestamp
+  FROM logs_transfer WHERE token_type = 'erc20'
+  UNION ALL
+  -- TO side (positive)
+  SELECT
+    chain_id,
+    token_type,
+    token_address,
+    token_id,
+    to_address AS owner_address,
+    toInt256(amount) * (+1) * sign AS delta,
+    block_number,
+    block_timestamp
+  FROM logs_transfer WHERE token_type = 'erc20'
+)
+GROUP BY chain_id, token_type, token_address, owner_address, token_id;
+
+-- ERC721
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc721_mv
+TO token_balance
+AS
+SELECT
+  chain_id,
+  token_type,
+  token_address,
+  owner_address,
+  token_id,
+  sumState(delta) AS balance_state,
+  maxState(block_number) AS last_block_number_state,
+  maxState(block_timestamp) AS last_block_timestamp_state
+FROM
+(
+  SELECT
+    chain_id,
+    token_type,
+    token_address,
+    from_address AS owner_address,
+    token_id,
+    toInt256(1) * (-1) * sign AS delta,
+    block_number,
+    block_timestamp
+  FROM logs_transfer WHERE token_type = 'erc721'
+  UNION ALL
+  SELECT
+    chain_id,
+    token_type,
+    token_address,
+    to_address AS owner_address,
+    token_id,
+    toInt256(1) * (+1) * sign AS delta,
+    block_number,
+    block_timestamp
+  FROM logs_transfer WHERE token_type = 'erc721'
+)
+GROUP BY chain_id, token_type, token_address, owner_address, token_id;
+
+-- ERC1155
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc1155_mv
+TO token_balance
+AS
+SELECT
+  chain_id,
+  token_type,
+  token_address,
+  owner_address,
+  token_id,
+  sumState(delta) AS balance_state,
+  maxState(block_number) AS last_block_number_state,
+  maxState(block_timestamp) AS last_block_timestamp_state
+FROM
+(
+  SELECT
+    chain_id,
+    token_type,
+    token_address,
+    from_address AS owner_address,
+    token_id,
+    toInt256(amount) * (-1) * sign AS delta,
+    block_number,
+    block_timestamp
+  FROM logs_transfer WHERE token_type = 'erc1155'
+  UNION ALL
+  SELECT
+    chain_id,
+    token_type,
+    token_address,
+    to_address AS owner_address,
+    token_id,
+    toInt256(amount) * (+1) * sign AS delta,
+    block_number,
+    block_timestamp
+  FROM logs_transfer WHERE token_type = 'erc1155'
+)
+GROUP BY chain_id, token_type, token_address, owner_address, token_id;
+
+-- ERC6909
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc6909_mv
+TO token_balance
+AS
+SELECT
+  chain_id, 
+  token_type,
+  token_address,
+  owner_address,
+  token_id,
+  sumState(delta) AS balance_state,
+  maxState(block_number) AS last_block_number_state,
+  maxState(block_timestamp) AS last_block_timestamp_state
+FROM
+(
+  SELECT 
+    chain_id, 
+    token_type,
+    token_address, 
+    from_address AS owner_address,
+    token_id,
+    toInt256(amount) * (-1) * sign AS delta,
+    block_number,
+    block_timestamp
+  FROM logs_transfer WHERE token_type = 'erc6909'
+  UNION ALL
+  SELECT
+    chain_id,
+    token_type,
+    token_address,
+    to_address AS owner_address,
+    token_id,
+    toInt256(amount) * (+1) * sign AS delta,
+    block_number,
+    block_timestamp
+  FROM logs_transfer WHERE token_type = 'erc6909'
+)
+GROUP BY chain_id, token_type, token_address, owner_address, token_id;
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql
deleted file mode 100644
index c9e54cb..0000000
--- a/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql
+++ /dev/null
@@ -1,117 +0,0 @@
-CREATE TABLE IF NOT EXISTS token_balances
-(
-  `token_type` String,
-  `chain_id` UInt256,
-  `owner` FixedString(42),
-  `address` FixedString(42),
-  `token_id` UInt256,
-  `balance` Int256,
-  PROJECTION address_projection
-    (
-        SELECT *
-        ORDER BY 
-            token_type,
-            chain_id,
-            address,
-            token_id
-    )
-)
-ENGINE = SummingMergeTree
-ORDER BY (token_type, chain_id, owner, address, token_id)
-SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild';
-
-CREATE MATERIALIZED VIEW IF NOT EXISTS single_token_transfers_mv TO token_balances AS
-SELECT chain_id, owner, address, token_type, token_id, sum(amount) as balance
-FROM
-(
-    SELECT
-        chain_id,
-        address,
-        (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' AND topic_3 = '') as is_erc20,
-        (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' AND topic_3 != '') as is_erc721,
-        (topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62') as is_erc1155,
-        if(is_erc1155, concat('0x', substring(topic_2, 27, 40)), concat('0x', substring(topic_1, 27, 40))) AS sender_address, -- ERC20 & ERC721 both have topic_1 as sender
-        if(is_erc1155, concat('0x', substring(topic_3, 27, 40)), concat('0x', substring(topic_2, 27, 40))) AS receiver_address, -- ERC20 & ERC721 both have topic_2 as receiver
-        multiIf(is_erc20, 'erc20', is_erc721, 'erc721', 'erc1155') as token_type,
-        multiIf(
-            is_erc1155,
-            reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))),
-            is_erc721,
-            reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))),
-            toUInt256(0) -- other
-        ) AS token_id,
-        multiIf(
-            is_erc20 AND length(data) = 66,
-            reinterpretAsInt256(reverse(unhex(substring(data, 3)))),
-            is_erc721, 
-            toInt256(1),
-            is_erc1155,
-            if(length(data) = 130, reinterpretAsInt256(reverse(unhex(substring(data, 67, 64)))), toInt256(1)),
-            toInt256(0) -- unknown
-        ) AS transfer_amount,
-        (sign * transfer_amount) as amount
-    FROM logs
-    WHERE
-        topic_0 IN (
-            '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef',
-            '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62'
-        )
-)
-array join
-    [chain_id, chain_id] AS chain_id, 
-    [sender_address, receiver_address] AS owner, 
-    [-amount, amount] as amount, 
-    [token_type, token_type] AS token_type,
-    [token_id, token_id] AS token_id,
-    [address, address] AS address
-GROUP BY chain_id, owner, address, token_type, token_id;
-
-CREATE MATERIALIZED VIEW IF NOT EXISTS erc1155_batch_token_transfers_mv TO token_balances AS
-SELECT chain_id, owner, address, token_type, token_id, sum(amount) as balance
-FROM (
-    WITH
-        metadata as (
-            SELECT
-                *,
-                3 + 2 * 64 as ids_length_idx,
-                ids_length_idx + 64 as ids_values_idx,
-                reinterpretAsUInt64(reverse(unhex(substring(data, ids_length_idx, 64)))) AS ids_length,
-                ids_length_idx + 64 + (ids_length * 64) as amounts_length_idx,
-                reinterpretAsUInt64(reverse(unhex(substring(data, amounts_length_idx, 64)))) AS amounts_length,
-                amounts_length_idx + 64 as amounts_values_idx
-            FROM logs
-            WHERE topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb' AND topic_2 != '' AND topic_3 != '' AND ids_length = amounts_length
-        ),
-        decoded AS (
-            SELECT
-                *,
-                arrayMap(
-                    x -> substring(data, ids_values_idx + (x - 1) * 64, 64),
-                    range(1, ids_length + 1)
-                ) AS ids_hex,
-                arrayMap(
-                    x -> substring(data, amounts_values_idx + (x - 1) * 64, 64),
-                    range(1, amounts_length + 1)
-                ) AS amounts_hex
-            FROM metadata
-        )
-    SELECT
-        chain_id,
-        address,
-        concat('0x', substring(topic_2, 27, 40)) AS sender_address,
-        concat('0x', substring(topic_3, 27, 40)) AS receiver_address,
-        'erc1155' as token_type,
-        reinterpretAsUInt256(reverse(unhex(substring(hex_id, 1, 64)))) AS token_id,
-        reinterpretAsInt256(reverse(unhex(substring(hex_amount, 1, 64)))) AS transfer_amount,
-        (sign * transfer_amount) as amount
-    FROM decoded
-    ARRAY JOIN ids_hex AS hex_id, amounts_hex AS hex_amount
-)
-array join
-    [chain_id, chain_id] AS chain_id, 
-    [sender_address, receiver_address] AS owner, 
-    [-amount, amount] as amount, 
-    [token_type, token_type] AS token_type,
-    [token_id, token_id] AS token_id,
-    [address, address] AS address
-GROUP BY chain_id, owner, address, token_type, token_id;
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0010_clickhouse_create_token_transfers_mv.sql b/internal/tools/clickhouse/0010_clickhouse_create_token_transfers_mv.sql
deleted file mode 100644
index b87e35b..0000000
--- a/internal/tools/clickhouse/0010_clickhouse_create_token_transfers_mv.sql
+++ /dev/null
@@ -1,211 +0,0 @@
-CREATE TABLE IF NOT EXISTS token_transfers
-(
-    `token_type` LowCardinality(String),
-    `chain_id` UInt256,
-    `token_address` FixedString(42),
-    `from_address` FixedString(42),
-    `to_address` FixedString(42),
-    `block_number` UInt256,
-    `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)),
-    `transaction_hash` FixedString(66),
-    `token_id` UInt256,
-    `amount` UInt256,
-    `log_index` UInt64,
-    `sign` Int8 DEFAULT 1,
-    `insert_timestamp` DateTime DEFAULT now(),
-
-    INDEX minmax_block_number block_number TYPE minmax GRANULARITY 16,
-    INDEX minmax_block_timestamp block_timestamp TYPE minmax GRANULARITY 16,
-
-    PROJECTION from_address_projection
-    (
-        SELECT *
-        ORDER BY 
-            chain_id,
-            token_type,
-            from_address,
-            block_number,
-            log_index
-    ),
-    PROJECTION to_address_projection
-    (
-        SELECT *
-        ORDER BY 
-            chain_id,
-            token_type,
-            to_address,
-            block_number,
-            log_index
-    ),
-    PROJECTION transaction_hash_projection
-    (
-        SELECT *
-        ORDER BY 
-            chain_id,
-            token_type,
-            transaction_hash,
-            block_number,
-            log_index
-    ),
-    PROJECTION token_aggregation_projection
-    (
-        SELECT 
-            chain_id,
-            token_type,
-            max(block_number) AS max_block_number,
-            count() AS total_count
-        GROUP BY 
-            chain_id,
-            token_type
-    )
-)
-ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
-PARTITION BY chain_id
-ORDER BY (chain_id, token_type, token_address, block_number, log_index)
-SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild';
-
-CREATE MATERIALIZED VIEW IF NOT EXISTS logs_to_token_transfers TO token_transfers
-(
-    `chain_id` UInt256,
-    `token_address` FixedString(42),
-    `from_address` String,
-    `to_address` String,
-    `token_type` String,
-    `block_number` UInt256,
-    `block_timestamp` DateTime,
-    `transaction_hash` FixedString(66),
-    `log_index` UInt64,
-    `sign` Int8,
-    `insert_timestamp` DateTime,
-    `token_id` UInt256,
-    `amount` UInt256
-)
-AS WITH
-    transfer_logs AS
-    (
-        SELECT
-            chain_id,
-            address AS token_address,
-            topic_0,
-            topic_1,
-            topic_2,
-            topic_3,
-            (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef') AND (topic_3 = '') AS is_erc20,
-            (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef') AND (topic_3 != '') AS is_erc721,
-            topic_0 IN ('0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') AS is_erc1155,
-            multiIf(is_erc20, 'erc20', is_erc721, 'erc721', 'erc1155') AS token_type,
-            if(is_erc1155, concat('0x', substring(topic_2, 27, 40)), concat('0x', substring(topic_1, 27, 40))) AS from_address,
-            if(is_erc1155, concat('0x', substring(topic_3, 27, 40)), concat('0x', substring(topic_2, 27, 40))) AS to_address,
-            data,
-            block_number,
-            block_timestamp,
-            transaction_hash,
-            log_index,
-            sign,
-            insert_timestamp
-        FROM logs
-        WHERE topic_0 IN ('0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb')
-    ),
-    batch_transfer_metadata AS
-    (
-        SELECT
-            *,
-            3 + (2 * 64) AS ids_length_idx,
-            ids_length_idx + 64 AS ids_values_idx,
-            reinterpretAsUInt64(reverse(unhex(substring(data, ids_length_idx, 64)))) AS ids_length,
-            (ids_length_idx + 64) + (ids_length * 64) AS amounts_length_idx,
-            reinterpretAsUInt64(reverse(unhex(substring(data, amounts_length_idx, 64)))) AS amounts_length,
-            amounts_length_idx + 64 AS amounts_values_idx
-        FROM transfer_logs
-        WHERE (topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') AND (length(topic_1) = 66) AND (length(topic_2) = 66) AND (length(topic_3) = 66) AND (length(data) != (258 + ((ids_length + amounts_length) * 64))) AND (ids_length = amounts_length)
-    ),
-    batch_transfer_logs AS
-    (
-        SELECT
-            *,
-            arrayMap(x -> substring(data, ids_values_idx + ((x - 1) * 64), 64), range(1, toInt32(ids_length) + 1)) AS ids_hex,
-            arrayMap(x -> substring(data, amounts_values_idx + ((x - 1) * 64), 64), range(1, toInt32(amounts_length) + 1)) AS amounts_hex
-        FROM batch_transfer_metadata
-    )
-SELECT
-    chain_id,
-    token_address,
-    from_address,
-    to_address,
-    token_type,
-    block_number,
-    block_timestamp,
-    transaction_hash,
-    log_index,
-    sign,
-    insert_timestamp,
-    multiIf(is_erc1155, reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))), is_erc721, reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))), toUInt256(0)) AS token_id,
-    multiIf(is_erc20 AND (length(data) = 66), reinterpretAsUInt256(reverse(unhex(substring(data, 3)))), is_erc721, toUInt256(1), is_erc1155, if(length(data) = 130, reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))), toUInt256(1)), toUInt256(0)) AS amount
-FROM transfer_logs
-WHERE topic_0 IN ('0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62')
-UNION ALL
-WITH
-    transfer_logs AS
-    (
-        SELECT
-            chain_id,
-            address AS token_address,
-            topic_0,
-            topic_1,
-            topic_2,
-            topic_3,
-            (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef') AND (topic_3 = '') AS is_erc20,
-            (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef') AND (topic_3 != '') AS is_erc721,
-            topic_0 IN ('0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') AS is_erc1155,
-            multiIf(is_erc20, 'erc20', is_erc721, 'erc721', 'erc1155') AS token_type,
-            if(is_erc1155, concat('0x', substring(topic_2, 27, 40)), concat('0x', substring(topic_1, 27, 40))) AS from_address,
-            if(is_erc1155, concat('0x', substring(topic_3, 27, 40)), concat('0x', substring(topic_2, 27, 40))) AS to_address,
-            data,
-            block_number,
-            block_timestamp,
-            transaction_hash,
-            log_index,
-            sign,
-            insert_timestamp
-        FROM logs
-        WHERE topic_0 IN ('0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb')
-    ),
-    batch_transfer_metadata AS
-    (
-        SELECT
-            *,
-            3 + (2 * 64) AS ids_length_idx,
-            ids_length_idx + 64 AS ids_values_idx,
-            reinterpretAsUInt64(reverse(unhex(substring(data, ids_length_idx, 64)))) AS ids_length,
-            (ids_length_idx + 64) + (ids_length * 64) AS amounts_length_idx,
-            reinterpretAsUInt64(reverse(unhex(substring(data, amounts_length_idx, 64)))) AS amounts_length,
-            amounts_length_idx + 64 AS amounts_values_idx
-        FROM transfer_logs
-        WHERE (topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') AND (length(topic_1) = 66) AND (length(topic_2) = 66) AND (length(topic_3) = 66) AND (length(data) != (258 + ((ids_length + amounts_length) * 64))) AND (ids_length = amounts_length)
-    ),
-    batch_transfer_logs AS
-    (
-        SELECT
-            *,
-            arrayMap(x -> substring(data, ids_values_idx + ((x - 1) * 64), 64), range(1, toInt32(ids_length) + 1)) AS ids_hex,
-            arrayMap(x -> substring(data, amounts_values_idx + ((x - 1) * 64), 64), range(1, toInt32(amounts_length) + 1)) AS amounts_hex
-        FROM batch_transfer_metadata
-    )
-SELECT
-    chain_id,
-    token_address,
-    from_address,
-    to_address,
-    token_type,
-    block_number,
-    block_timestamp,
-    transaction_hash,
-    log_index,
-    sign,
-    insert_timestamp,
-    reinterpretAsUInt256(reverse(unhex(substring(hex_id, 1, 64)))) AS token_id,
-    reinterpretAsUInt256(reverse(unhex(substring(hex_amount, 1, 64)))) AS amount
-FROM batch_transfer_logs
-ARRAY JOIN
-    ids_hex AS hex_id,
-    amounts_hex AS hex_amount
\ No newline at end of file

From e152d5b9e82cca896214402760c3f71aa125e104 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Thu, 14 Aug 2025 08:28:00 +0000
Subject: [PATCH 05/43] Option to disable TLS for kafka

---
 cmd/root.go                         |  9 ++++++++-
 configs/config.go                   | 10 ++++++----
 internal/publisher/publisher.go     |  3 +++
 internal/storage/kafka_publisher.go |  3 +++
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/cmd/root.go b/cmd/root.go
index b18d947..8f798a6 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -124,10 +124,10 @@ func init() {
 	rootCmd.PersistentFlags().Int("storage-staging-postgres-maxIdleConns", 25, "PostgreSQL max idle connections for staging storage")
 	rootCmd.PersistentFlags().Int("storage-staging-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for staging storage")
 	rootCmd.PersistentFlags().Int("storage-staging-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for staging storage")
-	// Kafka storage flags - only for main storage (where blockchain data is committed)
 	rootCmd.PersistentFlags().String("storage-main-kafka-brokers", "", "Kafka brokers for main storage")
 	rootCmd.PersistentFlags().String("storage-main-kafka-username", "", "Kafka username for main storage")
 	rootCmd.PersistentFlags().String("storage-main-kafka-password", "", "Kafka password for main storage")
+	rootCmd.PersistentFlags().Bool("storage-main-kafka-enable-tls", true, "Enable TLS for Kafka connection in main storage")
 	rootCmd.PersistentFlags().String("storage-main-kafka-postgres-host", "", "PostgreSQL host for Kafka main storage bookkeeping")
 	rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-port", 5432, "PostgreSQL port for Kafka main storage bookkeeping")
 	rootCmd.PersistentFlags().String("storage-main-kafka-postgres-username", "", "PostgreSQL username for Kafka main storage bookkeeping")
@@ -149,6 +149,9 @@ func init() {
 	rootCmd.PersistentFlags().Bool("publisher-enabled", false, "Toggle publisher")
 	rootCmd.PersistentFlags().String("publisher-mode", "default", "Publisher mode: default or parallel")
 	rootCmd.PersistentFlags().String("publisher-brokers", "", "Kafka brokers")
+	rootCmd.PersistentFlags().String("publisher-username", "", "Kafka username for publisher")
+	rootCmd.PersistentFlags().String("publisher-password", "", "Kafka password for publisher")
+	rootCmd.PersistentFlags().Bool("publisher-enable-tls", true, "Enable TLS for Kafka connection in publisher")
 	rootCmd.PersistentFlags().Bool("publisher-blocks-enabled", false, "Toggle block publisher")
 	rootCmd.PersistentFlags().String("publisher-blocks-topicName", "", "Kafka topic name for blocks")
 	rootCmd.PersistentFlags().Bool("publisher-transactions-enabled", false, "Toggle transaction publisher")
@@ -255,6 +258,7 @@ func init() {
 	viper.BindPFlag("storage.main.kafka.brokers", rootCmd.PersistentFlags().Lookup("storage-main-kafka-brokers"))
 	viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username"))
 	viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password"))
+	viper.BindPFlag("storage.main.kafka.enable_tls", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls"))
 	viper.BindPFlag("storage.main.kafka.postgres.host", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-host"))
 	viper.BindPFlag("storage.main.kafka.postgres.port", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-port"))
 	viper.BindPFlag("storage.main.kafka.postgres.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-username"))
@@ -276,6 +280,9 @@ func init() {
 	viper.BindPFlag("publisher.enabled", rootCmd.PersistentFlags().Lookup("publisher-enabled"))
 	viper.BindPFlag("publisher.mode", rootCmd.PersistentFlags().Lookup("publisher-mode"))
 	viper.BindPFlag("publisher.brokers", rootCmd.PersistentFlags().Lookup("publisher-brokers"))
+	viper.BindPFlag("publisher.username", rootCmd.PersistentFlags().Lookup("publisher-username"))
+	viper.BindPFlag("publisher.password", rootCmd.PersistentFlags().Lookup("publisher-password"))
+	viper.BindPFlag("publisher.enable_tls", rootCmd.PersistentFlags().Lookup("publisher-enable-tls"))
 	viper.BindPFlag("publisher.blocks.enabled", rootCmd.PersistentFlags().Lookup("publisher-blocks-enabled"))
 	viper.BindPFlag("publisher.blocks.topicName", rootCmd.PersistentFlags().Lookup("publisher-blocks-topicName"))
 	viper.BindPFlag("publisher.transactions.enabled", rootCmd.PersistentFlags().Lookup("publisher-transactions-enabled"))
diff --git a/configs/config.go b/configs/config.go
index 1aeb450..b9d036b 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -102,10 +102,11 @@ type PostgresConfig struct {
 }
 
 type KafkaConfig struct {
-	Brokers  string          `mapstructure:"brokers"`
-	Username string          `mapstructure:"username"`
-	Password string          `mapstructure:"password"`
-	Postgres *PostgresConfig `mapstructure:"postgres"`
+	Brokers   string          `mapstructure:"brokers"`
+	Username  string          `mapstructure:"username"`
+	Password  string          `mapstructure:"password"`
+	EnableTLS bool            `mapstructure:"enable_tls"`
+	Postgres  *PostgresConfig `mapstructure:"postgres"`
 }
 
 type RPCBatchRequestConfig struct {
@@ -184,6 +185,7 @@ type PublisherConfig struct {
 	Brokers      string                     `mapstructure:"brokers"`
 	Username     string                     `mapstructure:"username"`
 	Password     string                     `mapstructure:"password"`
+	EnableTLS    bool                       `mapstructure:"enable_tls"`
 	Blocks       BlockPublisherConfig       `mapstructure:"blocks"`
 	Transactions TransactionPublisherConfig `mapstructure:"transactions"`
 	Traces       TracePublisherConfig       `mapstructure:"traces"`
diff --git a/internal/publisher/publisher.go b/internal/publisher/publisher.go
index 984115a..0f8a761 100644
--- a/internal/publisher/publisher.go
+++ b/internal/publisher/publisher.go
@@ -76,6 +76,9 @@ func (p *Publisher) initialize() error {
 			User: config.Cfg.Publisher.Username,
 			Pass: config.Cfg.Publisher.Password,
 		}.AsMechanism()))
+	}
+
+	if config.Cfg.Publisher.EnableTLS {
 		tlsDialer := &tls.Dialer{NetDialer: &net.Dialer{Timeout: 10 * time.Second}}
 		opts = append(opts, kgo.Dialer(tlsDialer.DialContext))
 	}
diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go
index 84aca54..b0b82ca 100644
--- a/internal/storage/kafka_publisher.go
+++ b/internal/storage/kafka_publisher.go
@@ -47,6 +47,9 @@ func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) {
 			User: cfg.Username,
 			Pass: cfg.Password,
 		}.AsMechanism()))
+	}
+
+	if cfg.EnableTLS {
 		tlsDialer := &tls.Dialer{NetDialer: &net.Dialer{Timeout: 10 * time.Second}}
 		opts = append(opts, kgo.Dialer(tlsDialer.DialContext))
 	}

From e61fae708f4e5497ddbe0e7aaea372519d860fe2 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Thu, 14 Aug 2025 09:04:34 +0000
Subject: [PATCH 06/43] Add projection mode in blocks

---
 .../tools/clickhouse/0000_clickhouse_create_blocks_table.sql   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
index ada0c9d..4768462 100644
--- a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
+++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
@@ -29,4 +29,5 @@ CREATE TABLE IF NOT EXISTS blocks (
     INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2,
 ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
 ORDER BY (chain_id, block_number)
-PARTITION BY toYYYYMM(block_timestamp);
\ No newline at end of file
+PARTITION BY toYYYYMM(block_timestamp)
+SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
\ No newline at end of file

From fc2ae64608837e6a4e80bb59da616fb56ace8bf4 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Thu, 14 Aug 2025 17:02:46 +0000
Subject: [PATCH 07/43] Fix publish parallel mode

---
 internal/orchestrator/committer.go | 60 +++++++++++++++++++++++++-----
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go
index d85213a..228812a 100644
--- a/internal/orchestrator/committer.go
+++ b/internal/orchestrator/committer.go
@@ -31,6 +31,7 @@ type Committer struct {
 	lastPublishedBlock atomic.Uint64
 	publisher          *publisher.Publisher
 	workMode           WorkMode
+	workModeMutex      sync.RWMutex
 	workModeChan       chan WorkMode
 	validator          *Validator
 }
@@ -101,7 +102,28 @@ func (c *Committer) Start(ctx context.Context) {
 		// corrected by the worker loop.
 		log.Error().Err(err).Msg("failed to get last published block number")
 	} else if lastPublished != nil && lastPublished.Sign() > 0 {
-		c.lastPublishedBlock.Store(lastPublished.Uint64())
+		// Always ensure publisher starts from at least the committed value
+		if latestCommittedBlockNumber != nil && latestCommittedBlockNumber.Sign() > 0 {
+			if lastPublished.Cmp(latestCommittedBlockNumber) < 0 {
+				gap := new(big.Int).Sub(latestCommittedBlockNumber, lastPublished)
+				log.Warn().
+					Str("last_published", lastPublished.String()).
+					Str("latest_committed", latestCommittedBlockNumber.String()).
+					Str("gap", gap.String()).
+					Msg("Publisher is behind committed position, seeking forward to committed value")
+				
+				c.lastPublishedBlock.Store(latestCommittedBlockNumber.Uint64())
+				if err := c.storage.StagingStorage.SetLastPublishedBlockNumber(chainID, latestCommittedBlockNumber); err != nil {
+					log.Error().Err(err).Msg("Failed to update last published block number after seeking forward")
+					// Fall back to the stored value on error
+					c.lastPublishedBlock.Store(lastPublished.Uint64())
+				}
+			} else {
+				c.lastPublishedBlock.Store(lastPublished.Uint64())
+			}
+		} else {
+			c.lastPublishedBlock.Store(lastPublished.Uint64())
+		}
 	} else {
 		c.lastPublishedBlock.Store(c.lastCommittedBlock.Load())
 	}
@@ -143,13 +165,21 @@ func (c *Committer) runCommitLoop(ctx context.Context, interval time.Duration) {
 		case <-ctx.Done():
 			return
 		case workMode := <-c.workModeChan:
-			if workMode != c.workMode && workMode != "" {
-				log.Info().Msgf("Committer work mode changing from %s to %s", c.workMode, workMode)
-				c.workMode = workMode
+			if workMode != "" {
+				c.workModeMutex.Lock()
+				oldMode := c.workMode
+				if workMode != oldMode {
+					log.Info().Msgf("Committer work mode changing from %s to %s", oldMode, workMode)
+					c.workMode = workMode
+				}
+				c.workModeMutex.Unlock()
 			}
 		default:
 			time.Sleep(interval)
-			if c.workMode == "" {
+			c.workModeMutex.RLock()
+			currentMode := c.workMode
+			c.workModeMutex.RUnlock()
+			if currentMode == "" {
 				log.Debug().Msg("Committer work mode not set, skipping commit")
 				continue
 			}
@@ -176,7 +206,10 @@ func (c *Committer) runPublishLoop(ctx context.Context, interval time.Duration)
 			return
 		default:
 			time.Sleep(interval)
-			if c.workMode == "" {
+			c.workModeMutex.RLock()
+			currentMode := c.workMode
+			c.workModeMutex.RUnlock()
+			if currentMode == "" {
 				log.Debug().Msg("Committer work mode not set, skipping publish")
 				continue
 			}
@@ -297,7 +330,10 @@ func (c *Committer) getBlockNumbersToPublish(ctx context.Context) ([]*big.Int, e
 
 func (c *Committer) getBlockToCommitUntil(ctx context.Context, latestCommittedBlockNumber *big.Int) (*big.Int, error) {
 	untilBlock := new(big.Int).Add(latestCommittedBlockNumber, big.NewInt(int64(c.blocksPerCommit)))
-	if c.workMode == WorkModeBackfill {
+	c.workModeMutex.RLock()
+	currentMode := c.workMode
+	c.workModeMutex.RUnlock()
+	if currentMode == WorkModeBackfill {
 		return untilBlock, nil
 	} else {
 		// get latest block from RPC and if that's less than until block, return that
@@ -314,7 +350,10 @@ func (c *Committer) getBlockToCommitUntil(ctx context.Context, latestCommittedBl
 }
 
 func (c *Committer) fetchBlockData(ctx context.Context, blockNumbers []*big.Int) ([]common.BlockData, error) {
-	if c.workMode == WorkModeBackfill {
+	c.workModeMutex.RLock()
+	currentMode := c.workMode
+	c.workModeMutex.RUnlock()
+	if currentMode == WorkModeBackfill {
 		startTime := time.Now()
 		blocksData, err := c.storage.StagingStorage.GetStagingData(storage.QueryFilter{BlockNumbers: blockNumbers, ChainId: c.rpc.GetChainID()})
 		log.Debug().Str("metric", "get_staging_data_duration").Msgf("StagingStorage.GetStagingData duration: %f", time.Since(startTime).Seconds())
@@ -489,7 +528,10 @@ func (c *Committer) handleGap(ctx context.Context, expectedStartBlockNumber *big
 	// record the first missed block number in prometheus
 	metrics.MissedBlockNumbers.Set(float64(expectedStartBlockNumber.Int64()))
 
-	if c.workMode == WorkModeLive {
+	c.workModeMutex.RLock()
+	currentMode := c.workMode
+	c.workModeMutex.RUnlock()
+	if currentMode == WorkModeLive {
 		log.Debug().Msgf("Skipping gap handling in live mode. Expected block %s, actual first block %s", expectedStartBlockNumber.String(), actualFirstBlock.Number.String())
 		return nil
 	}

From 191298b116d6ed9fac44b409aab6b81a7c0524c0 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Thu, 14 Aug 2025 17:03:53 +0000
Subject: [PATCH 08/43] Gofmt

---
 internal/orchestrator/committer.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go
index 228812a..1316f0e 100644
--- a/internal/orchestrator/committer.go
+++ b/internal/orchestrator/committer.go
@@ -111,7 +111,7 @@ func (c *Committer) Start(ctx context.Context) {
 					Str("latest_committed", latestCommittedBlockNumber.String()).
 					Str("gap", gap.String()).
 					Msg("Publisher is behind committed position, seeking forward to committed value")
-				
+
 				c.lastPublishedBlock.Store(latestCommittedBlockNumber.Uint64())
 				if err := c.storage.StagingStorage.SetLastPublishedBlockNumber(chainID, latestCommittedBlockNumber); err != nil {
 					log.Error().Err(err).Msg("Failed to update last published block number after seeking forward")

From e45907a25f749ada01715ba19b755fe6c7656e27 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Fri, 15 Aug 2025 05:36:50 +0000
Subject: [PATCH 09/43] Update schema

---
 .../0000_clickhouse_create_blocks_table.sql   |   2 +-
 ...1_clickhouse_create_transactions_table.sql |   2 +-
 .../0002_clickhouse_create_logs_table.sql     |   2 +-
 .../0003_clickhouse_create_traces_table.sql   |   2 +-
 ...006_clickhouse_create_token_transfers.sql} |   6 +-
 ..._clickhouse_create_token_transfers_mv.sql} |  20 +-
 .../0008_clickhouse_create_token_balance.sql  |   2 +-
 ...009_clickhouse_create_token_balance_mv.sql |  32 +--
 ...clickhouse_create_address_transactions.sql |  43 ++++
 ...ckhouse_create_address_transactions_mv.sql |  42 ++++
 ...12_clickhouse_create_address_transfers.sql |  24 +++
 ...clickhouse_create_address_transfers_mv.sql |  22 ++
 ...0000_clickhouse_backfill_logs_transfer.sql | 202 ++++++++++++++++++
 13 files changed, 366 insertions(+), 35 deletions(-)
 rename internal/tools/clickhouse/{0006_clickhouse_create_logs_transfer.sql => 0006_clickhouse_create_token_transfers.sql} (91%)
 rename internal/tools/clickhouse/{0007_clickhouse_create_logs_transfer_mv.sql => 0007_clickhouse_create_token_transfers_mv.sql} (91%)
 create mode 100644 internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
 create mode 100644 internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql
 create mode 100644 internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
 create mode 100644 internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql
 create mode 100644 internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql

diff --git a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
index 4768462..b311f24 100644
--- a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
+++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
@@ -29,5 +29,5 @@ CREATE TABLE IF NOT EXISTS blocks (
     INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2,
 ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
 ORDER BY (chain_id, block_number)
-PARTITION BY toYYYYMM(block_timestamp)
+PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
index c8a31cc..02a0294 100644
--- a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
+++ b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
@@ -78,5 +78,5 @@ CREATE TABLE IF NOT EXISTS transactions (
     )
 ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
 ORDER BY (chain_id, block_number, hash)
-PARTITION BY toYYYYMM(block_timestamp)
+PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
index f93cb9d..e327edb 100644
--- a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
+++ b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
@@ -51,5 +51,5 @@ CREATE TABLE IF NOT EXISTS logs (
     )
 ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
 ORDER BY (chain_id, block_number, transaction_hash, log_index)
-PARTITION BY toYYYYMM(block_timestamp)
+PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
diff --git a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
index b07ec88..17a032b 100644
--- a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
+++ b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
@@ -54,5 +54,5 @@ CREATE TABLE IF NOT EXISTS traces (
 
 ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
 ORDER BY (chain_id, transaction_hash, trace_address)
-PARTITION BY toYYYYMM(block_timestamp)
+PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
diff --git a/internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql
similarity index 91%
rename from internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql
rename to internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql
index 2a985b0..0d6ef92 100644
--- a/internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql
+++ b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql
@@ -1,4 +1,4 @@
-CREATE TABLE IF NOT EXISTS logs_transfer
+CREATE TABLE IF NOT EXISTS token_transfers
 (
     `chain_id` UInt256,
     `token_type` LowCardinality(String),
@@ -18,7 +18,6 @@ CREATE TABLE IF NOT EXISTS logs_transfer
     `insert_timestamp` DateTime DEFAULT now(),
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
-    INDEX idx_token_address token_address TYPE bloom_filter GRANULARITY 2,
     INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3,
     INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 3,
     INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 4,
@@ -42,7 +41,6 @@ CREATE TABLE IF NOT EXISTS logs_transfer
             block_number,
             transaction_index,
             log_index
-
     ),
     PROJECTION token_id_projection (
         SELECT 
@@ -57,6 +55,6 @@ CREATE TABLE IF NOT EXISTS logs_transfer
     )
 )
 ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
-PARTITION BY toYYYYMM(block_timestamp)
+PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 ORDER BY (chain_id, token_address, block_number, transaction_index, log_index)
 SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql
similarity index 91%
rename from internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql
rename to internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql
index 9a9b6bc..e03b1a0 100644
--- a/internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql
+++ b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql
@@ -1,6 +1,6 @@
 -- ERC20
-CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc20_mv
-TO logs_transfer
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc20_mv
+TO token_transfers
 AS
 SELECT
   chain_id,
@@ -26,8 +26,8 @@ WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b
   AND length(data) = 66;
 
 -- ERC721
-CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc721_mv
-TO logs_transfer
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc721_mv
+TO token_transfers
 AS
 SELECT
   chain_id,
@@ -53,8 +53,8 @@ WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b
   AND length(data) = 2;
 
 -- ERC1155 (single)
-CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc1155_single_mv
-TO logs_transfer
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc1155_single_mv
+TO token_transfers
 AS
 SELECT
     chain_id,
@@ -78,8 +78,8 @@ WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0
   AND length(data) = (2 + 2*64);
 
 -- ERC1155 (batch)
-CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc1155_batch_mv
-TO logs_transfer
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc1155_batch_mv
+TO token_transfers
 AS
 SELECT
     chain_id,
@@ -119,8 +119,8 @@ ARRAY JOIN
     arrayEnumerate(ids_array) AS array_index;
 
 -- ERC6909
-CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc6909_mv
-TO logs_transfer
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc6909_mv
+TO token_transfers
 AS
 SELECT
   chain_id,
diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql
index e476123..0cf38c9 100644
--- a/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql
+++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql
@@ -1,4 +1,4 @@
-CREATE TABLE IF NOT EXISTS token_balance
+CREATE TABLE IF NOT EXISTS token_balances
 (
   `chain_id` UInt256,
   `token_type` LowCardinality(String),
diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql
index bb039fb..be000df 100644
--- a/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql
+++ b/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql
@@ -1,6 +1,6 @@
 -- ERC20
-CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc20_mv
-TO token_balance
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc20_mv
+TO token_balances
 AS
 SELECT
   chain_id,
@@ -23,7 +23,7 @@ FROM
     toInt256(amount) * (-1) * sign AS delta,
     block_number,
     block_timestamp
-  FROM logs_transfer WHERE token_type = 'erc20'
+  FROM token_transfers WHERE token_type = 'erc20'
   UNION ALL
   -- TO side (positive)
   SELECT
@@ -35,13 +35,13 @@ FROM
     toInt256(amount) * (+1) * sign AS delta,
     block_number,
     block_timestamp
-  FROM logs_transfer WHERE token_type = 'erc20'
+  FROM token_transfers WHERE token_type = 'erc20'
 )
 GROUP BY chain_id, token_type, token_address, owner_address, token_id;
 
 -- ERC721
-CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc721_mv
-TO token_balance
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc721_mv
+TO token_balances
 AS
 SELECT
   chain_id,
@@ -63,7 +63,7 @@ FROM
     toInt256(1) * (-1) * sign AS delta,
     block_number,
     block_timestamp
-  FROM logs_transfer WHERE token_type = 'erc721'
+  FROM token_transfers WHERE token_type = 'erc721'
   UNION ALL
   SELECT
     chain_id,
@@ -74,13 +74,13 @@ FROM
     toInt256(1) * (+1) * sign AS delta,
     block_number,
     block_timestamp
-  FROM logs_transfer WHERE token_type = 'erc721'
+  FROM token_transfers WHERE token_type = 'erc721'
 )
 GROUP BY chain_id, token_type, token_address, owner_address, token_id;
 
 -- ERC1155
-CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc1155_mv
-TO token_balance
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc1155_mv
+TO token_balances
 AS
 SELECT
   chain_id,
@@ -102,7 +102,7 @@ FROM
     toInt256(amount) * (-1) * sign AS delta,
     block_number,
     block_timestamp
-  FROM logs_transfer WHERE token_type = 'erc1155'
+  FROM token_transfers WHERE token_type = 'erc1155'
   UNION ALL
   SELECT
     chain_id,
@@ -113,13 +113,13 @@ FROM
     toInt256(amount) * (+1) * sign AS delta,
     block_number,
     block_timestamp
-  FROM logs_transfer WHERE token_type = 'erc1155'
+  FROM token_transfers WHERE token_type = 'erc1155'
 )
 GROUP BY chain_id, token_type, token_address, owner_address, token_id;
 
 -- ERC6909
-CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc6909_mv
-TO token_balance
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc6909_mv
+TO token_balances
 AS
 SELECT
   chain_id, 
@@ -141,7 +141,7 @@ FROM
     toInt256(amount) * (-1) * sign AS delta,
     block_number,
     block_timestamp
-  FROM logs_transfer WHERE token_type = 'erc6909'
+  FROM token_transfers WHERE token_type = 'erc6909'
   UNION ALL
   SELECT
     chain_id,
@@ -152,6 +152,6 @@ FROM
     toInt256(amount) * (+1) * sign AS delta,
     block_number,
     block_timestamp
-  FROM logs_transfer WHERE token_type = 'erc6909'
+  FROM token_transfers WHERE token_type = 'erc6909'
 )
 GROUP BY chain_id, token_type, token_address, owner_address, token_id;
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
new file mode 100644
index 0000000..c33638e
--- /dev/null
+++ b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
@@ -0,0 +1,43 @@
+CREATE TABLE IF NOT EXISTS address_transactions (
+    `chain_id` UInt256,
+    `hash` FixedString(66),
+    `nonce` UInt64,
+    `block_hash` FixedString(66),
+    `block_number` UInt256,
+    `block_timestamp` DateTime CODEC(Delta, ZSTD),
+    `transaction_index` UInt64,
+    `address` FixedString(42),
+    `address_type` Enum8('from' = 1, 'to' = 2),
+    `value` UInt256,
+    `gas` UInt64,
+    `gas_price` UInt256,
+    `data` String,
+    `function_selector` FixedString(10),
+    `max_fee_per_gas` UInt128,
+    `max_priority_fee_per_gas` UInt128,
+    `max_fee_per_blob_gas` UInt256,
+    `blob_versioned_hashes` Array(String),
+    `transaction_type` UInt8,
+    `r` UInt256,
+    `s` UInt256,
+    `v` UInt256,
+    `access_list` Nullable(String),
+    `authorization_list` Nullable(String),
+    `contract_address` Nullable(FixedString(42)),
+    `gas_used` Nullable(UInt64),
+    `cumulative_gas_used` Nullable(UInt64),
+    `effective_gas_price` Nullable(UInt256),
+    `blob_gas_used` Nullable(UInt64),
+    `blob_gas_price` Nullable(UInt256),
+    `logs_bloom` Nullable(String),
+    `status` Nullable(UInt64),
+
+    `sign` Int8 DEFAULT 1,
+    `insert_timestamp` DateTime DEFAULT now(),
+
+    INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
+    INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3
+) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
+ORDER BY (chain_id, address, block_number, hash, transaction_index)
+PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
+SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql
new file mode 100644
index 0000000..46d64d3
--- /dev/null
+++ b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql
@@ -0,0 +1,42 @@
+CREATE MATERIALIZED VIEW address_transactions_mv
+TO address_transactions
+AS
+SELECT 
+    chain_id,
+    hash,
+    nonce,
+    block_hash,
+    block_number,
+    block_timestamp,
+    transaction_index,
+    address_tuple.1 AS address,
+    address_tuple.2 AS address_type,
+    value,
+    gas,
+    gas_price,
+    data,
+    function_selector,
+    max_fee_per_gas,
+    max_priority_fee_per_gas,
+    max_fee_per_blob_gas,
+    blob_versioned_hashes,
+    transaction_type,
+    r,
+    s,
+    v,
+    access_list,
+    authorization_list,
+    contract_address,
+    gas_used,
+    cumulative_gas_used,
+    effective_gas_price,
+    blob_gas_used,
+    blob_gas_price,
+    logs_bloom,
+    status,
+
+    sign,
+    insert_timestamp
+FROM transactions
+ARRAY JOIN 
+    arrayZip([from_address, to_address], ['from', 'to']) AS address_tuple;
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
new file mode 100644
index 0000000..2e8d071
--- /dev/null
+++ b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
@@ -0,0 +1,24 @@
+CREATE TABLE IF NOT EXISTS address_transfers (
+    `chain_id` UInt256,
+    `token_type` LowCardinality(String),
+    `token_address` FixedString(42),
+    `token_id` UInt256,
+    `address` FixedString(42),
+    `address_type` Enum8('from' = 1, 'to' = 2),
+    `block_number` UInt256,
+    `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)),
+    `transaction_hash` FixedString(66),
+    `transaction_index` UInt64,
+    `amount` UInt256,
+    `log_index` UInt64,
+    `batch_index` Nullable(UInt16) DEFAULT NULL,
+
+    `sign` Int8 DEFAULT 1,
+    `insert_timestamp` DateTime DEFAULT now(),
+
+    INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
+    INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3
+) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
+ORDER BY (chain_id, address, block_number, transaction_hash, transaction_index)
+PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
+SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql
new file mode 100644
index 0000000..72a3ebb
--- /dev/null
+++ b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql
@@ -0,0 +1,22 @@
+CREATE MATERIALIZED VIEW address_transfers_mv
+TO address_transfers
+AS
+SELECT
+    chain_id,
+    token_type,
+    token_address,
+    token_id,
+    address_tuple.1 AS address,
+    address_tuple.2 AS address_type,
+    block_number,
+    block_timestamp,
+    transaction_hash,
+    transaction_index,
+    amount,
+    log_index,
+    batch_index,
+    sign,
+    insert_timestamp
+FROM token_transfers
+ARRAY JOIN 
+    arrayZip([from_address, to_address], ['from', 'to']) AS address_tuple;
\ No newline at end of file
diff --git a/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql b/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql
new file mode 100644
index 0000000..22b3c17
--- /dev/null
+++ b/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql
@@ -0,0 +1,202 @@
+CREATE TABLE IF NOT EXISTS backfill_logs
+(
+    `chain_id` UInt256,
+    `block_number` UInt256,
+    `block_hash` FixedString(66),
+    `block_timestamp` DateTime CODEC(Delta, ZSTD),
+    `transaction_hash` FixedString(66),
+    `transaction_index` UInt64,
+    `log_index` UInt64,
+    `address` FixedString(42),
+    `data` String,
+    `topic_0` String,
+    `topic_1` String,
+    `topic_2` String,
+    `topic_3` String,
+
+    `sign` Int8 DEFAULT 1,
+    `insert_timestamp` DateTime DEFAULT now(),
+) ENGINE = Null;
+
+
+--- Materialize view running to the correct tables
+-- ERC20
+CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc20_mv
+TO logs_transfer
+AS
+SELECT
+  chain_id,
+  address AS token_address,
+  'erc20' AS token_type,
+  0 AS token_id,
+  concat('0x', substring(topic_1, 27, 40)) AS from_address,
+  concat('0x', substring(topic_2, 27, 40)) AS to_address,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS amount,
+  log_index,
+  CAST(NULL AS Nullable(UInt16)) AS batch_index,
+  sign,
+  insert_timestamp
+FROM backfill_logs
+WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef'
+  AND length(topic_1) = 66 AND startsWith(topic_1, '0x')
+  AND length(topic_2) = 66 AND startsWith(topic_2, '0x')
+  AND topic_3 = ''
+  AND length(data) = 66;
+
+-- ERC721
+CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc721_mv
+TO logs_transfer
+AS
+SELECT
+  chain_id,
+  address AS token_address,
+  'erc721' AS token_type,
+  reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id,
+  concat('0x', substring(topic_1, 27, 40)) AS from_address,
+  concat('0x', substring(topic_2, 27, 40)) AS to_address,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  toUInt8(1) AS amount,
+  log_index,
+  CAST(NULL AS Nullable(UInt16)) AS batch_index,
+  sign,
+  insert_timestamp
+FROM backfill_logs
+WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef'
+  AND length(topic_1) = 66 AND startsWith(topic_1, '0x')
+  AND length(topic_2) = 66 AND startsWith(topic_2, '0x')
+  AND length(topic_3) = 66 AND startsWith(topic_3, '0x')
+  AND length(data) = 2;
+
+-- ERC1155 (single)
+CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc1155_single_mv
+TO logs_transfer
+AS
+SELECT
+    chain_id,
+    address AS token_address,
+    'erc1155' AS token_type,
+    reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS token_id,
+    concat('0x', substring(topic_2, 27, 40)) AS from_address,
+    concat('0x', substring(topic_3, 27, 40)) AS to_address,
+    block_number,
+    block_timestamp,
+    transaction_hash,
+    transaction_index,
+    reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount,
+    log_index,
+    toNullable(toUInt16(0)) AS batch_index,
+    sign,
+    insert_timestamp
+FROM backfill_logs
+WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62'   -- TransferSingle
+  AND length(topic_2) = 66 AND length(topic_3) = 66
+  AND length(data) = (2 + 2*64);
+
+-- ERC1155 (batch)
+CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc1155_batch_mv
+TO logs_transfer
+AS
+SELECT
+    chain_id,
+    address AS token_address,
+    'erc1155' AS token_type,
+    reinterpretAsUInt256(reverse(unhex(id_hex))) AS token_id,
+    concat('0x', substring(topic_2, 27, 40)) AS from_address,
+    concat('0x', substring(topic_3, 27, 40)) AS to_address,
+    block_number,
+    block_timestamp,
+    transaction_hash,
+    transaction_index,
+    reinterpretAsUInt256(reverse(unhex(amount_hex))) AS amount,
+    log_index,
+    toNullable(toUInt16(array_index - 1)) AS batch_index,
+    sign,
+    insert_timestamp
+FROM (
+    SELECT 
+        chain_id, address, topic_2, topic_3,
+        block_number, block_timestamp, transaction_hash, transaction_index, log_index, sign, insert_timestamp,
+        toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64))))) AS ids_offset,
+        toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64))))) AS amounts_offset,
+        toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + ids_offset * 2, 64))))) AS ids_length,
+        toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + amounts_offset * 2, 64))))) AS amounts_length,
+        arrayMap(i -> substring(data, 3 + ids_offset * 2 + 64 + (i-1)*64, 64), range(1, least(ids_length, 10000) + 1)) AS ids_array,
+        arrayMap(i -> substring(data, 3 + amounts_offset * 2 + 64 + (i-1)*64, 64), range(1, least(amounts_length, 10000) + 1)) AS amounts_array
+    FROM backfill_logs
+    WHERE topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb'
+      AND length(topic_2) = 66 
+      AND length(topic_3) = 66
+      AND ids_length = amounts_length
+)
+ARRAY JOIN 
+    ids_array AS id_hex,
+    amounts_array AS amount_hex,
+    arrayEnumerate(ids_array) AS array_index;
+
+
+-- ERC6909
+CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc6909_mv
+TO logs_transfer
+AS
+SELECT
+  chain_id,
+  lower(address) AS token_address,
+  'erc6909' AS token_type,
+  reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id,
+  lower(concat('0x', substring(topic_1, 27, 40))) AS from_address,
+  lower(concat('0x', substring(topic_2, 27, 40))) AS to_address,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount,
+  log_index,
+  CAST(NULL AS Nullable(UInt16)) AS batch_index,
+  sign,
+  insert_timestamp
+FROM backfill_logs
+WHERE topic_0 = '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859'
+  AND length(topic_1) = 66
+  AND length(topic_2) = 66
+  AND length(data) == 2 + 128;
+
+--- INITIATE BACKFILL BY RUNNING:
+-- INSERT INTO backfill_logs
+-- SELECT
+-- chain_id,
+-- block_number,
+-- block_hash,
+-- block_timestamp,
+-- transaction_hash ,
+-- transaction_index,
+-- log_index,
+-- address,
+-- data,
+-- topic_0,
+-- topic_1,
+-- topic_2,
+-- topic_3,
+-- sign,
+-- insert_timestamp,
+-- FROM logs
+-- WHERE 1=1
+--   AND chain_id = 1
+--   AND block_number >= 0 AND block_number < 10000000
+--   AND topic_0 IN (
+--     '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', -- 20/721
+--     '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', -- 1155 single
+--     '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb', -- 1155 batch
+--     '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859' -- 6909
+--   );
+
+-- DROP TABLE logs_transfer, token_balance;
+-- DROP TABLE bf__logs_transfer_erc20_mv, bf__logs_transfer_erc721_mv, bf__logs_transfer_erc1155_mv, bf__logs_transfer_erc6909_mv;
+-- DROP TABLE logs_transfer_erc20_mv, logs_transfer_erc721_mv, logs_transfer_erc1155_mv, logs_transfer_erc6909_mv;
+-- DROP TABLE token_balance_erc20_mv, token_balance_erc721_mv, token_balance_erc1155_mv, token_balance_erc6909_mv;
\ No newline at end of file

From ceeac3b5a5e918e5d0855386413fcf73b474c795 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Fri, 15 Aug 2025 07:57:15 +0000
Subject: [PATCH 10/43] Fix backfill table

---
 ...0000_clickhouse_backfill_logs_transfer.sql | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql b/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql
index 22b3c17..c2090e6 100644
--- a/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql
+++ b/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql
@@ -21,8 +21,8 @@ CREATE TABLE IF NOT EXISTS backfill_logs
 
 --- Materialize view running to the correct tables
 -- ERC20
-CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc20_mv
-TO logs_transfer
+CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc20_mv
+TO token_transfers
 AS
 SELECT
   chain_id,
@@ -48,8 +48,8 @@ WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b
   AND length(data) = 66;
 
 -- ERC721
-CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc721_mv
-TO logs_transfer
+CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc721_mv
+TO token_transfers
 AS
 SELECT
   chain_id,
@@ -75,8 +75,8 @@ WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b
   AND length(data) = 2;
 
 -- ERC1155 (single)
-CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc1155_single_mv
-TO logs_transfer
+CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc1155_single_mv
+TO token_transfers
 AS
 SELECT
     chain_id,
@@ -100,8 +100,8 @@ WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0
   AND length(data) = (2 + 2*64);
 
 -- ERC1155 (batch)
-CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc1155_batch_mv
-TO logs_transfer
+CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc1155_batch_mv
+TO token_transfers
 AS
 SELECT
     chain_id,
@@ -142,8 +142,8 @@ ARRAY JOIN
 
 
 -- ERC6909
-CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc6909_mv
-TO logs_transfer
+CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc6909_mv
+TO token_transfers
 AS
 SELECT
   chain_id,
@@ -196,7 +196,7 @@ WHERE topic_0 = '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728
 --     '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859' -- 6909
 --   );
 
--- DROP TABLE logs_transfer, token_balance;
--- DROP TABLE bf__logs_transfer_erc20_mv, bf__logs_transfer_erc721_mv, bf__logs_transfer_erc1155_mv, bf__logs_transfer_erc6909_mv;
--- DROP TABLE logs_transfer_erc20_mv, logs_transfer_erc721_mv, logs_transfer_erc1155_mv, logs_transfer_erc6909_mv;
+-- DROP TABLE token_transfers, token_balance;
+-- DROP TABLE bf__token_transfers_erc20_mv, bf__token_transfers_erc721_mv, bf__token_transfers_erc1155_mv, bf__token_transfers_erc6909_mv;
+-- DROP TABLE token_transfers_erc20_mv, token_transfers_erc721_mv, token_transfers_erc1155_mv, token_transfers_erc6909_mv;
 -- DROP TABLE token_balance_erc20_mv, token_balance_erc721_mv, token_balance_erc1155_mv, token_balance_erc6909_mv;
\ No newline at end of file

From 64aaec52d457dce743c2e9c031aa764a7f06448f Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Fri, 15 Aug 2025 19:52:11 +0000
Subject: [PATCH 11/43] Update kafka storage producer

---
 internal/common/block.go            |  20 +++++
 internal/storage/kafka_publisher.go | 129 ++++++++++++++++++----------
 2 files changed, 102 insertions(+), 47 deletions(-)

diff --git a/internal/common/block.go b/internal/common/block.go
index eacf1f1..83a5bf0 100644
--- a/internal/common/block.go
+++ b/internal/common/block.go
@@ -100,3 +100,23 @@ func (b *Block) Serialize() BlockModel {
 		BaseFeePerGas:    b.BaseFeePerGas,
 	}
 }
+
+func (b *BlockData) Serialize() BlockData {
+	data := BlockData{
+		ChainId:      b.ChainId,
+		Block:        b.Block,
+		Transactions: b.Transactions,
+		Logs:         b.Logs,
+		Traces:       b.Traces,
+	}
+	if data.Transactions == nil {
+		data.Transactions = []Transaction{}
+	}
+	if data.Logs == nil {
+		data.Logs = []Log{}
+	}
+	if data.Traces == nil {
+		data.Traces = []Trace{}
+	}
+	return data
+}
diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go
index b0b82ca..f7f0f72 100644
--- a/internal/storage/kafka_publisher.go
+++ b/internal/storage/kafka_publisher.go
@@ -5,6 +5,8 @@ import (
 	"crypto/tls"
 	"encoding/json"
 	"fmt"
+	"hash/fnv"
+	"math"
 	"net"
 	"strings"
 	"sync"
@@ -18,28 +20,37 @@ import (
 )
 
 type KafkaPublisher struct {
-	client *kgo.Client
-	mu     sync.RWMutex
+	client  *kgo.Client
+	mu      sync.RWMutex
+	chainID string
 }
 
-type PublishableMessage[T common.BlockData] struct {
-	Data   T      `json:"data"`
-	Status string `json:"status"`
+type PublishableBlockMessage struct {
+	common.BlockData
+	Sign            int8      `json:"sign"`
+	InsertTimestamp time.Time `json:"insert_timestamp"`
 }
 
 // NewKafkaPublisher method for storage connector (public)
 func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) {
 	brokers := strings.Split(cfg.Brokers, ",")
+	chainID := config.Cfg.RPC.ChainID
+
 	opts := []kgo.Opt{
 		kgo.SeedBrokers(brokers...),
 		kgo.AllowAutoTopicCreation(),
-		kgo.ProducerBatchCompression(kgo.SnappyCompression()),
-		kgo.ClientID(fmt.Sprintf("insight-indexer-kafka-storage-%s", config.Cfg.RPC.ChainID)),
+		kgo.ProducerBatchCompression(kgo.ZstdCompression()),
+		kgo.ClientID(fmt.Sprintf("insight-indexer-kafka-storage-%s", chainID)),
+		kgo.TransactionalID(fmt.Sprintf("insight-producer-%s", chainID)),
+		kgo.MaxBufferedBytes(2 * 1024 * 1024 * 1024), // 2GB
 		kgo.MaxBufferedRecords(1_000_000),
 		kgo.ProducerBatchMaxBytes(16_000_000),
-		kgo.RecordPartitioner(kgo.UniformBytesPartitioner(1_000_000, false, false, nil)),
+		kgo.RecordPartitioner(kgo.ManualPartitioner()),
+		kgo.ProduceRequestTimeout(30 * time.Second),
 		kgo.MetadataMaxAge(60 * time.Second),
 		kgo.DialTimeout(10 * time.Second),
+		kgo.RequiredAcks(kgo.AllISRAcks()),
+		kgo.RequestRetries(5),
 	}
 
 	if cfg.Username != "" && cfg.Password != "" {
@@ -68,8 +79,10 @@ func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) {
 	}
 
 	publisher := &KafkaPublisher{
-		client: client,
+		client:  client,
+		chainID: chainID,
 	}
+
 	return publisher, nil
 }
 
@@ -78,7 +91,6 @@ func (p *KafkaPublisher) PublishBlockData(blockData []common.BlockData) error {
 }
 
 func (p *KafkaPublisher) PublishReorg(oldData []common.BlockData, newData []common.BlockData) error {
-	// TODO: need to revisit how reorg blocks get published to downstream
 	if err := p.publishBlockData(oldData, true); err != nil {
 		return fmt.Errorf("failed to publish old block data: %v", err)
 	}
@@ -105,30 +117,39 @@ func (p *KafkaPublisher) publishMessages(ctx context.Context, messages []*kgo.Re
 		return nil
 	}
 
-	p.mu.RLock()
-	defer p.mu.RUnlock()
+	// Lock for the entire transaction lifecycle to ensure thread safety
+	p.mu.Lock()
+	defer p.mu.Unlock()
 
 	if p.client == nil {
-		return nil // Skip if no client configured
+		return fmt.Errorf("no kafka client configured")
+	}
+
+	// Start a new transaction
+	if err := p.client.BeginTransaction(); err != nil {
+		return fmt.Errorf("failed to begin transaction: %v", err)
 	}
 
-	var wg sync.WaitGroup
-	wg.Add(len(messages))
-	// Publish to all configured producers
+	// Produce all messages in the transaction
 	for _, msg := range messages {
-		p.client.Produce(ctx, msg, func(_ *kgo.Record, err error) {
-			defer wg.Done()
-			if err != nil {
-				log.Error().Err(err).Msg("Failed to publish message to Kafka")
-			}
-		})
+		p.client.Produce(ctx, msg, nil)
+	}
+
+	// Flush all messages
+	if err := p.client.Flush(ctx); err != nil {
+		p.client.EndTransaction(ctx, kgo.TryAbort)
+		return fmt.Errorf("failed to flush messages: %v", err)
+	}
+
+	// Commit the transaction
+	if err := p.client.EndTransaction(ctx, kgo.TryCommit); err != nil {
+		return fmt.Errorf("failed to commit transaction: %v", err)
 	}
-	wg.Wait()
 
 	return nil
 }
 
-func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isReorg bool) error {
+func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isDeleted bool) error {
 	if len(blockData) == 0 {
 		return nil
 	}
@@ -138,15 +159,9 @@ func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isReorg
 	// Prepare messages for blocks, events, transactions and traces
 	blockMessages := make([]*kgo.Record, len(blockData))
 
-	// TODO: handle reorg
-	status := "new"
-	if isReorg {
-		status = "reverted"
-	}
-
 	for i, data := range blockData {
 		// Block message
-		if blockMsg, err := p.createBlockDataMessage(data, status); err == nil {
+		if blockMsg, err := p.createBlockDataMessage(data, isDeleted); err == nil {
 			blockMessages[i] = blockMsg
 		} else {
 			return fmt.Errorf("failed to create block message: %v", err)
@@ -161,27 +176,47 @@ func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isReorg
 	return nil
 }
 
-func (p *KafkaPublisher) createBlockDataMessage(data common.BlockData, status string) (*kgo.Record, error) {
-	msg := PublishableMessage[common.BlockData]{
-		Data:   data,
-		Status: status,
+func (p *KafkaPublisher) createBlockDataMessage(data common.BlockData, isDeleted bool) (*kgo.Record, error) {
+	insertTimestamp := time.Now()
+	msg := PublishableBlockMessage{
+		BlockData:       data.Serialize(),
+		Sign:            1,
+		InsertTimestamp: insertTimestamp,
+	}
+	if isDeleted {
+		msg.Sign = -1 // Indicate deletion with a negative sign
 	}
 	msgJson, err := json.Marshal(msg)
 	if err != nil {
 		return nil, fmt.Errorf("failed to marshal block data: %v", err)
 	}
-	return &kgo.Record{
-		Topic: p.getTopicName("commit", data.ChainId),
-		Key:   []byte(fmt.Sprintf("block-%s-%d-%s", status, data.ChainId, data.Block.Hash)),
-		Value: msgJson,
-	}, nil
-}
 
-func (p *KafkaPublisher) getTopicName(entity string, chainId uint64) string {
-	switch entity {
-	case "commit":
-		return fmt.Sprintf("insight.commit.blocks.%d", chainId)
-	default:
-		panic(fmt.Errorf("unknown topic entity: %s", entity))
+	// Determine partition based on chainID
+	var partition int32
+	if data.ChainId <= math.MaxInt32 {
+		// Direct assignment for chain IDs that fit in int32
+		partition = int32(data.ChainId)
+	} else {
+		// Hash for larger chain IDs to avoid overflow
+		h := fnv.New32a()
+		fmt.Fprintf(h, "%d", data.ChainId)
+		partition = int32(h.Sum32() & 0x7FFFFFFF) // Ensure positive
 	}
+
+	// Create headers with metadata
+	headers := []kgo.RecordHeader{
+		{Key: "chain_id", Value: []byte(fmt.Sprintf("%d", data.ChainId))},
+		{Key: "block_number", Value: []byte(fmt.Sprintf("%d", data.Block.Number))},
+		{Key: "sign", Value: []byte(fmt.Sprintf("%d", msg.Sign))},
+		{Key: "insert_timestamp", Value: []byte(insertTimestamp.Format(time.RFC3339Nano))},
+		{Key: "schema_version", Value: []byte("1")},
+	}
+
+	return &kgo.Record{
+		Topic:     "insight.commit.blocks",
+		Key:       []byte(fmt.Sprintf("blockdata-%d-%d-%s-%d", data.ChainId, data.Block.Number, data.Block.Hash, msg.Sign)),
+		Value:     msgJson,
+		Headers:   headers,
+		Partition: partition,
+	}, nil
 }

From 20dc4712d4522d77a5d183732830d07396f9a81d Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Fri, 15 Aug 2025 21:05:31 +0000
Subject: [PATCH 12/43] Kafka + Redis

---
 configs/config.go                  |  17 +-
 go.mod                             |   2 +
 go.sum                             |   4 +
 internal/storage/connector.go      |   2 +-
 internal/storage/kafka_postgres.go | 614 -----------------------------
 internal/storage/kafka_redis.go    | 300 ++++++++++++++
 6 files changed, 319 insertions(+), 620 deletions(-)
 delete mode 100644 internal/storage/kafka_postgres.go
 create mode 100644 internal/storage/kafka_redis.go

diff --git a/configs/config.go b/configs/config.go
index b9d036b..b94669f 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -101,12 +101,19 @@ type PostgresConfig struct {
 	ConnectTimeout  int    `mapstructure:"connectTimeout"`
 }
 
+type RedisConfig struct {
+	Host     string `mapstructure:"host"`
+	Port     int    `mapstructure:"port"`
+	Password string `mapstructure:"password"`
+	DB       int    `mapstructure:"db"`
+}
+
 type KafkaConfig struct {
-	Brokers   string          `mapstructure:"brokers"`
-	Username  string          `mapstructure:"username"`
-	Password  string          `mapstructure:"password"`
-	EnableTLS bool            `mapstructure:"enable_tls"`
-	Postgres  *PostgresConfig `mapstructure:"postgres"`
+	Brokers   string       `mapstructure:"brokers"`
+	Username  string       `mapstructure:"username"`
+	Password  string       `mapstructure:"password"`
+	EnableTLS bool         `mapstructure:"enable_tls"`
+	Redis     *RedisConfig `mapstructure:"redis"`
 }
 
 type RPCBatchRequestConfig struct {
diff --git a/go.mod b/go.mod
index f5e6788..66d4ef5 100644
--- a/go.mod
+++ b/go.mod
@@ -39,6 +39,7 @@ require (
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/deckarep/golang-set/v2 v2.6.0 // indirect
 	github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 // indirect
+	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
 	github.com/ethereum/c-kzg-4844/v2 v2.1.0 // indirect
 	github.com/ethereum/go-verkle v0.2.2 // indirect
 	github.com/fsnotify/fsnotify v1.7.0 // indirect
@@ -85,6 +86,7 @@ require (
 	github.com/prometheus/client_model v0.6.1 // indirect
 	github.com/prometheus/common v0.55.0 // indirect
 	github.com/prometheus/procfs v0.15.1 // indirect
+	github.com/redis/go-redis/v9 v9.12.1 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
 	github.com/sagikazarmark/locafero v0.4.0 // indirect
 	github.com/sagikazarmark/slog-shim v0.1.0 // indirect
diff --git a/go.sum b/go.sum
index 2d7c778..6302502 100644
--- a/go.sum
+++ b/go.sum
@@ -63,6 +63,8 @@ github.com/decred/dcrd/crypto/blake256 v1.0.0 h1:/8DMNYp9SGi5f0w7uCm6d6M4OU2rGFK
 github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc=
 github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 h1:YLtO71vCjJRCBcrPMtQ9nqBsqpA1m5sE92cU+pd5Mcc=
 github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
 github.com/ethereum/c-kzg-4844/v2 v2.1.0 h1:gQropX9YFBhl3g4HYhwE70zq3IHFRgbbNPw0Shwzf5w=
 github.com/ethereum/c-kzg-4844/v2 v2.1.0/go.mod h1:TC48kOKjJKPbN7C++qIgt0TJzZ70QznYR7Ob+WXl57E=
 github.com/ethereum/go-ethereum v1.15.11 h1:JK73WKeu0WC0O1eyX+mdQAVHUV+UR1a9VB/domDngBU=
@@ -237,6 +239,8 @@ github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G
 github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
 github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
 github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
+github.com/redis/go-redis/v9 v9.12.1 h1:k5iquqv27aBtnTm2tIkROUDp8JBXhXZIVu1InSgvovg=
+github.com/redis/go-redis/v9 v9.12.1/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw=
 github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
diff --git a/internal/storage/connector.go b/internal/storage/connector.go
index 9a90b16..0b5d743 100644
--- a/internal/storage/connector.go
+++ b/internal/storage/connector.go
@@ -149,7 +149,7 @@ func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) {
 	var conn interface{}
 	var err error
 	if cfg.Kafka != nil {
-		conn, err = NewKafkaPostgresConnector(cfg.Kafka)
+		conn, err = NewKafkaRedisConnector(cfg.Kafka)
 	} else if cfg.Postgres != nil {
 		conn, err = NewPostgresConnector(cfg.Postgres)
 	} else if cfg.Clickhouse != nil {
diff --git a/internal/storage/kafka_postgres.go b/internal/storage/kafka_postgres.go
deleted file mode 100644
index 23e7bfd..0000000
--- a/internal/storage/kafka_postgres.go
+++ /dev/null
@@ -1,614 +0,0 @@
-package storage
-
-import (
-	"database/sql"
-	"encoding/json"
-	"fmt"
-	"math/big"
-	"strings"
-	"time"
-
-	_ "github.com/lib/pq"
-	"github.com/rs/zerolog/log"
-	config "github.com/thirdweb-dev/indexer/configs"
-	"github.com/thirdweb-dev/indexer/internal/common"
-)
-
-// KafkaPostgresConnector uses PostgreSQL for metadata storage and Kafka for block data delivery
-type KafkaPostgresConnector struct {
-	db             *sql.DB
-	cfg            *config.KafkaConfig
-	kafkaPublisher *KafkaPublisher
-}
-
-func NewKafkaPostgresConnector(cfg *config.KafkaConfig) (*KafkaPostgresConnector, error) {
-	// Connect to PostgreSQL
-	connStr := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s",
-		cfg.Postgres.Host, cfg.Postgres.Port, cfg.Postgres.Username, cfg.Postgres.Password, cfg.Postgres.Database)
-
-	// Default to "require" for security if SSL mode not specified
-	sslMode := cfg.Postgres.SSLMode
-	if sslMode == "" {
-		sslMode = "require"
-		log.Info().Msg("No SSL mode specified, defaulting to 'require' for secure connection")
-	}
-	connStr += fmt.Sprintf(" sslmode=%s", sslMode)
-
-	if cfg.Postgres.ConnectTimeout > 0 {
-		connStr += fmt.Sprintf(" connect_timeout=%d", cfg.Postgres.ConnectTimeout)
-	}
-
-	db, err := sql.Open("postgres", connStr)
-	if err != nil {
-		return nil, fmt.Errorf("failed to connect to postgres: %w", err)
-	}
-
-	db.SetMaxOpenConns(cfg.Postgres.MaxOpenConns)
-	db.SetMaxIdleConns(cfg.Postgres.MaxIdleConns)
-
-	if cfg.Postgres.MaxConnLifetime > 0 {
-		db.SetConnMaxLifetime(time.Duration(cfg.Postgres.MaxConnLifetime) * time.Second)
-	}
-
-	if err := db.Ping(); err != nil {
-		return nil, fmt.Errorf("failed to ping postgres: %w", err)
-	}
-
-	// Initialize Kafka publisher if enabled
-	kafkaPublisher, err := NewKafkaPublisher(cfg)
-	if err != nil {
-		return nil, err
-	}
-
-	return &KafkaPostgresConnector{
-		db:             db,
-		cfg:            cfg,
-		kafkaPublisher: kafkaPublisher,
-	}, nil
-}
-
-// Orchestrator Storage Implementation (PostgreSQL)
-
-func (kp *KafkaPostgresConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) {
-	query := `SELECT chain_id, block_number, last_error_timestamp, failure_count, reason 
-	          FROM block_failures WHERE 1=1`
-
-	args := []interface{}{}
-	argCount := 0
-
-	if qf.ChainId != nil && qf.ChainId.Sign() > 0 {
-		argCount++
-		query += fmt.Sprintf(" AND chain_id = $%d", argCount)
-		args = append(args, qf.ChainId.String())
-	}
-
-	if len(qf.BlockNumbers) > 0 {
-		placeholders := make([]string, len(qf.BlockNumbers))
-		for i, bn := range qf.BlockNumbers {
-			argCount++
-			placeholders[i] = fmt.Sprintf("$%d", argCount)
-			args = append(args, bn.String())
-		}
-		query += fmt.Sprintf(" AND block_number IN (%s)", strings.Join(placeholders, ","))
-	}
-
-	if qf.SortBy != "" {
-		query += fmt.Sprintf(" ORDER BY %s", qf.SortBy)
-		if qf.SortOrder != "" {
-			query += " " + qf.SortOrder
-		}
-	} else {
-		query += " ORDER BY block_number DESC"
-	}
-
-	if qf.Limit > 0 {
-		argCount++
-		query += fmt.Sprintf(" LIMIT $%d", argCount)
-		args = append(args, qf.Limit)
-	}
-
-	if qf.Offset > 0 {
-		argCount++
-		query += fmt.Sprintf(" OFFSET $%d", argCount)
-		args = append(args, qf.Offset)
-	}
-
-	rows, err := kp.db.Query(query, args...)
-	if err != nil {
-		return nil, err
-	}
-	defer func() {
-		if err := rows.Close(); err != nil {
-			log.Error().Err(err).Msg("Failed to close rows in GetBlockFailures")
-		}
-	}()
-
-	var failures []common.BlockFailure
-	for rows.Next() {
-		var failure common.BlockFailure
-		var chainIdStr, blockNumberStr string
-		var timestamp int64
-		var count int
-
-		err := rows.Scan(&chainIdStr, &blockNumberStr, &timestamp, &count, &failure.FailureReason)
-		if err != nil {
-			return nil, fmt.Errorf("error scanning block failure: %w", err)
-		}
-
-		var ok bool
-		failure.ChainId, ok = new(big.Int).SetString(chainIdStr, 10)
-		if !ok {
-			return nil, fmt.Errorf("failed to parse chain_id '%s' as big.Int", chainIdStr)
-		}
-
-		failure.BlockNumber, ok = new(big.Int).SetString(blockNumberStr, 10)
-		if !ok {
-			return nil, fmt.Errorf("failed to parse block_number '%s' as big.Int", blockNumberStr)
-		}
-
-		failure.FailureTime = time.Unix(timestamp, 0)
-		failure.FailureCount = count
-
-		failures = append(failures, failure)
-	}
-
-	return failures, rows.Err()
-}
-
-func (kp *KafkaPostgresConnector) StoreBlockFailures(failures []common.BlockFailure) error {
-	if len(failures) == 0 {
-		return nil
-	}
-
-	valueStrings := make([]string, 0, len(failures))
-	valueArgs := make([]interface{}, 0, len(failures)*5)
-
-	for i, failure := range failures {
-		valueStrings = append(valueStrings, fmt.Sprintf("($%d, $%d, $%d, $%d, $%d)",
-			i*5+1, i*5+2, i*5+3, i*5+4, i*5+5))
-		valueArgs = append(valueArgs,
-			failure.ChainId.String(),
-			failure.BlockNumber.String(),
-			failure.FailureTime.Unix(),
-			failure.FailureCount,
-			failure.FailureReason,
-		)
-	}
-
-	query := fmt.Sprintf(`INSERT INTO block_failures (chain_id, block_number, last_error_timestamp, failure_count, reason)
-	          VALUES %s
-	          ON CONFLICT (chain_id, block_number) 
-	          DO UPDATE SET 
-	              last_error_timestamp = EXCLUDED.last_error_timestamp,
-	              failure_count = EXCLUDED.failure_count,
-	              reason = EXCLUDED.reason,
-	              updated_at = NOW()`, strings.Join(valueStrings, ","))
-
-	_, err := kp.db.Exec(query, valueArgs...)
-	return err
-}
-
-func (kp *KafkaPostgresConnector) DeleteBlockFailures(failures []common.BlockFailure) error {
-	if len(failures) == 0 {
-		return nil
-	}
-
-	tuples := make([]string, 0, len(failures))
-	args := make([]interface{}, 0, len(failures)*2)
-
-	for i, failure := range failures {
-		tuples = append(tuples, fmt.Sprintf("($%d, $%d)", i*2+1, i*2+2))
-		args = append(args, failure.ChainId.String(), failure.BlockNumber.String())
-	}
-
-	query := fmt.Sprintf(`DELETE FROM block_failures
-	WHERE ctid IN (
-		SELECT ctid
-		FROM block_failures
-		WHERE (chain_id, block_number) IN (%s)
-		FOR UPDATE SKIP LOCKED
-	)`, strings.Join(tuples, ","))
-
-	_, err := kp.db.Exec(query, args...)
-	return err
-}
-
-func (kp *KafkaPostgresConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) {
-	query := `SELECT cursor_value FROM cursors 
-	          WHERE cursor_type = 'reorg' AND chain_id = $1`
-
-	var blockNumberString string
-	err := kp.db.QueryRow(query, chainId.String()).Scan(&blockNumberString)
-	if err != nil {
-		if err == sql.ErrNoRows {
-			return big.NewInt(0), nil
-		}
-		return nil, err
-	}
-
-	blockNumber, ok := new(big.Int).SetString(blockNumberString, 10)
-	if !ok {
-		return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString)
-	}
-
-	return blockNumber, nil
-}
-
-func (kp *KafkaPostgresConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
-	query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value)
-	          VALUES ($1, 'reorg', $2)
-	          ON CONFLICT (chain_id, cursor_type) 
-	          DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()`
-
-	_, err := kp.db.Exec(query, chainId.String(), blockNumber.String())
-	return err
-}
-
-// Staging Storage Implementation (PostgreSQL)
-
-func (kp *KafkaPostgresConnector) InsertStagingData(data []common.BlockData) error {
-	if len(data) == 0 {
-		return nil
-	}
-
-	valueStrings := make([]string, 0, len(data))
-	valueArgs := make([]interface{}, 0, len(data)*3)
-
-	for i, blockData := range data {
-		blockDataJSON, err := json.Marshal(blockData)
-		if err != nil {
-			return err
-		}
-
-		valueStrings = append(valueStrings, fmt.Sprintf("($%d, $%d, $%d)",
-			i*3+1, i*3+2, i*3+3))
-		valueArgs = append(valueArgs,
-			blockData.Block.ChainId.String(),
-			blockData.Block.Number.String(),
-			string(blockDataJSON),
-		)
-	}
-
-	query := fmt.Sprintf(`INSERT INTO block_data (chain_id, block_number, data)
-	          VALUES %s
-	          ON CONFLICT (chain_id, block_number) 
-	          DO UPDATE SET data = EXCLUDED.data, updated_at = NOW()`, strings.Join(valueStrings, ","))
-
-	_, err := kp.db.Exec(query, valueArgs...)
-	return err
-}
-
-func (kp *KafkaPostgresConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) {
-	query := `SELECT data FROM block_data WHERE 1=1`
-
-	args := []interface{}{}
-	argCount := 0
-
-	if qf.ChainId != nil && qf.ChainId.Sign() > 0 {
-		argCount++
-		query += fmt.Sprintf(" AND chain_id = $%d", argCount)
-		args = append(args, qf.ChainId.String())
-	}
-
-	if len(qf.BlockNumbers) > 0 {
-		placeholders := make([]string, len(qf.BlockNumbers))
-		for i, bn := range qf.BlockNumbers {
-			argCount++
-			placeholders[i] = fmt.Sprintf("$%d", argCount)
-			args = append(args, bn.String())
-		}
-		query += fmt.Sprintf(" AND block_number IN (%s)", strings.Join(placeholders, ","))
-	} else if qf.StartBlock != nil && qf.EndBlock != nil {
-		argCount++
-		query += fmt.Sprintf(" AND block_number BETWEEN $%d AND $%d", argCount, argCount+1)
-		args = append(args, qf.StartBlock.String(), qf.EndBlock.String())
-		argCount++
-	}
-
-	query += " ORDER BY block_number ASC"
-
-	if qf.Limit > 0 {
-		argCount++
-		query += fmt.Sprintf(" LIMIT $%d", argCount)
-		args = append(args, qf.Limit)
-	}
-
-	rows, err := kp.db.Query(query, args...)
-	if err != nil {
-		return nil, err
-	}
-	defer func() {
-		if err := rows.Close(); err != nil {
-			log.Error().Err(err).Msg("Failed to close rows in GetStagingData")
-		}
-	}()
-
-	blockDataList := make([]common.BlockData, 0)
-	for rows.Next() {
-		var blockDataJson string
-		if err := rows.Scan(&blockDataJson); err != nil {
-			return nil, fmt.Errorf("error scanning block data: %w", err)
-		}
-
-		var blockData common.BlockData
-		if err := json.Unmarshal([]byte(blockDataJson), &blockData); err != nil {
-			return nil, err
-		}
-
-		blockDataList = append(blockDataList, blockData)
-	}
-
-	return blockDataList, rows.Err()
-}
-
-func (kp *KafkaPostgresConnector) DeleteStagingData(data []common.BlockData) error {
-	if len(data) == 0 {
-		return nil
-	}
-
-	tuples := make([]string, 0, len(data))
-	args := make([]interface{}, 0, len(data)*2)
-
-	for i, blockData := range data {
-		tuples = append(tuples, fmt.Sprintf("($%d, $%d)", i*2+1, i*2+2))
-		args = append(args, blockData.Block.ChainId.String(), blockData.Block.Number.String())
-	}
-
-	query := fmt.Sprintf(`DELETE FROM block_data
-	WHERE ctid IN (
-		SELECT ctid
-		FROM block_data
-		WHERE (chain_id, block_number) IN (%s)
-		FOR UPDATE SKIP LOCKED
-	)`, strings.Join(tuples, ","))
-
-	_, err := kp.db.Exec(query, args...)
-	return err
-}
-
-func (kp *KafkaPostgresConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) {
-	query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'publish' AND chain_id = $1`
-
-	var blockNumberString string
-	err := kp.db.QueryRow(query, chainId.String()).Scan(&blockNumberString)
-	if err != nil {
-		if err == sql.ErrNoRows {
-			return big.NewInt(0), nil
-		}
-		return nil, err
-	}
-
-	blockNumber, ok := new(big.Int).SetString(blockNumberString, 10)
-	if !ok {
-		return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString)
-	}
-	return blockNumber, nil
-}
-
-func (kp *KafkaPostgresConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
-	query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value)
-                 VALUES ($1, 'publish', $2)
-                 ON CONFLICT (chain_id, cursor_type)
-                 DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()`
-
-	_, err := kp.db.Exec(query, chainId.String(), blockNumber.String())
-	return err
-}
-
-func (kp *KafkaPostgresConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) {
-	query := `SELECT MAX(block_number) FROM block_data WHERE 1=1`
-
-	args := []interface{}{}
-	argCount := 0
-
-	if chainId != nil && chainId.Sign() > 0 {
-		argCount++
-		query += fmt.Sprintf(" AND chain_id = $%d", argCount)
-		args = append(args, chainId.String())
-	}
-
-	if rangeStart != nil && rangeStart.Sign() > 0 {
-		argCount++
-		query += fmt.Sprintf(" AND block_number >= $%d", argCount)
-		args = append(args, rangeStart.String())
-	}
-
-	if rangeEnd != nil && rangeEnd.Sign() > 0 {
-		argCount++
-		query += fmt.Sprintf(" AND block_number <= $%d", argCount)
-		args = append(args, rangeEnd.String())
-	}
-
-	var blockNumberStr sql.NullString
-	err := kp.db.QueryRow(query, args...).Scan(&blockNumberStr)
-	if err != nil {
-		return nil, err
-	}
-
-	if !blockNumberStr.Valid {
-		return big.NewInt(0), nil
-	}
-
-	blockNumber, ok := new(big.Int).SetString(blockNumberStr.String, 10)
-	if !ok {
-		return nil, fmt.Errorf("failed to parse block number: %s", blockNumberStr.String)
-	}
-
-	return blockNumber, nil
-}
-
-func (kp *KafkaPostgresConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error {
-	query := `DELETE FROM block_data
-	WHERE ctid IN (
-		SELECT ctid
-		FROM block_data
-		WHERE chain_id = $1
-			AND block_number <= $2
-		FOR UPDATE SKIP LOCKED
-	)`
-	_, err := kp.db.Exec(query, chainId.String(), blockNumber.String())
-	return err
-}
-
-// InsertBlockData publishes block data to Kafka instead of storing in database
-func (kp *KafkaPostgresConnector) InsertBlockData(data []common.BlockData) error {
-	if len(data) == 0 {
-		return nil
-	}
-
-	// Publish to Kafka
-	if err := kp.kafkaPublisher.PublishBlockData(data); err != nil {
-		return fmt.Errorf("failed to publish block data to kafka: %w", err)
-	}
-	log.Debug().
-		Int("blocks", len(data)).
-		Msg("Published block data to Kafka")
-
-	// Update cursor to track the highest block number published
-	if len(data) > 0 {
-		// Find the highest block number in the batch
-		var maxBlock *big.Int
-		for _, blockData := range data {
-			if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 {
-				maxBlock = blockData.Block.Number
-			}
-		}
-		if maxBlock != nil {
-			chainId := data[0].Block.ChainId
-			blockNumber := maxBlock
-			query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value)
-				VALUES ($1, 'commit', $2)
-				ON CONFLICT (chain_id, cursor_type)
-				DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()`
-			if _, err := kp.db.Exec(query, chainId.String(), blockNumber.String()); err != nil {
-				return err
-			}
-		}
-	}
-
-	return nil
-}
-
-// ReplaceBlockData handles reorg by publishing both old and new data to Kafka
-func (kp *KafkaPostgresConnector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) {
-	if len(data) == 0 {
-		return nil, nil
-	}
-
-	oldBlocks := []common.BlockData{}
-
-	// Publish reorg event to Kafka
-	// TODO: Publish new blocks (the reorg handler will mark old ones as reverted)
-	if err := kp.kafkaPublisher.PublishBlockData(data); err != nil {
-		return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err)
-	}
-
-	// Update cursor to track the highest block number
-	if len(data) > 0 {
-		var maxBlock *big.Int
-		for _, blockData := range data {
-			if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 {
-				maxBlock = blockData.Block.Number
-			}
-		}
-		if maxBlock != nil {
-			if err := kp.SetLastPublishedBlockNumber(data[0].Block.ChainId, maxBlock); err != nil {
-				return nil, fmt.Errorf("failed to update published block cursor: %w", err)
-			}
-		}
-	}
-
-	return oldBlocks, nil
-}
-
-func (kp *KafkaPostgresConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) {
-	query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'commit' AND chain_id = $1`
-
-	var blockNumberString string
-	err := kp.db.QueryRow(query, chainId.String()).Scan(&blockNumberString)
-	if err != nil {
-		if err == sql.ErrNoRows {
-			return big.NewInt(0), nil
-		}
-		return nil, err
-	}
-
-	blockNumber, ok := new(big.Int).SetString(blockNumberString, 10)
-	if !ok {
-		return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString)
-	}
-	return blockNumber, nil
-}
-
-func (kp *KafkaPostgresConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
-	// Get the last published block number
-	lastPublished, err := kp.GetLastPublishedBlockNumber(chainId)
-	if err != nil {
-		return nil, err
-	}
-
-	// Check if it's within the range
-	if lastPublished.Cmp(startBlock) >= 0 && lastPublished.Cmp(endBlock) <= 0 {
-		return lastPublished, nil
-	}
-
-	// If outside range, return appropriate boundary
-	if lastPublished.Cmp(endBlock) > 0 {
-		return endBlock, nil
-	}
-	if lastPublished.Cmp(startBlock) < 0 {
-		return big.NewInt(0), nil
-	}
-
-	return lastPublished, nil
-}
-
-func (kp *KafkaPostgresConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) {
-	return []common.BlockHeader{}, nil
-}
-
-func (kp *KafkaPostgresConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) {
-	return QueryResult[common.TokenBalance]{Data: []common.TokenBalance{}}, nil
-}
-
-func (kp *KafkaPostgresConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) {
-	return QueryResult[common.TokenTransfer]{Data: []common.TokenTransfer{}}, nil
-}
-
-func (kp *KafkaPostgresConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) {
-	return []common.BlockData{}, nil
-}
-
-func (kp *KafkaPostgresConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) {
-	return []*big.Int{}, nil
-}
-
-func (kp *KafkaPostgresConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) {
-	return []common.BlockData{}, nil
-}
-
-// Query methods return empty results as this connector uses Kafka for data delivery
-func (kp *KafkaPostgresConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) {
-	return QueryResult[common.Block]{Data: []common.Block{}}, nil
-}
-
-func (kp *KafkaPostgresConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) {
-	return QueryResult[common.Transaction]{Data: []common.Transaction{}}, nil
-}
-
-func (kp *KafkaPostgresConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) {
-	return QueryResult[common.Log]{Data: []common.Log{}}, nil
-}
-
-func (kp *KafkaPostgresConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) {
-	return QueryResult[common.Trace]{Data: []common.Trace{}}, nil
-}
-
-func (kp *KafkaPostgresConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) {
-	return QueryResult[interface{}]{Aggregates: []map[string]interface{}{}}, nil
-}
-
-// Close closes the database connection
-func (kp *KafkaPostgresConnector) Close() error {
-	return kp.db.Close()
-}
diff --git a/internal/storage/kafka_redis.go b/internal/storage/kafka_redis.go
new file mode 100644
index 0000000..9c1a0ea
--- /dev/null
+++ b/internal/storage/kafka_redis.go
@@ -0,0 +1,300 @@
+package storage
+
+import (
+	"context"
+	"fmt"
+	"math/big"
+	"time"
+
+	"github.com/redis/go-redis/v9"
+	"github.com/rs/zerolog/log"
+	config "github.com/thirdweb-dev/indexer/configs"
+	"github.com/thirdweb-dev/indexer/internal/common"
+)
+
+// Redis key namespace constants for better organization and maintainability
+const (
+	// Cursor keys for tracking positions
+	KeyCursorReorg   = "cursor:reorg"   // String: cursor:reorg:{chainId}
+	KeyCursorPublish = "cursor:publish" // String: cursor:publish:{chainId}
+	KeyCursorCommit  = "cursor:commit"  // String: cursor:commit:{chainId}
+)
+
+// KafkaRedisConnector uses Redis for metadata storage and Kafka for block data delivery
+type KafkaRedisConnector struct {
+	redisClient    *redis.Client
+	cfg            *config.KafkaConfig
+	kafkaPublisher *KafkaPublisher
+}
+
+func NewKafkaRedisConnector(cfg *config.KafkaConfig) (*KafkaRedisConnector, error) {
+	// Connect to Redis
+	redisClient := redis.NewClient(&redis.Options{
+		Addr:     fmt.Sprintf("%s:%d", cfg.Redis.Host, cfg.Redis.Port),
+		Password: cfg.Redis.Password,
+		DB:       cfg.Redis.DB,
+	})
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	if err := redisClient.Ping(ctx).Err(); err != nil {
+		return nil, fmt.Errorf("failed to connect to redis: %w", err)
+	}
+
+	// Initialize Kafka publisher
+	kafkaPublisher, err := NewKafkaPublisher(cfg)
+	if err != nil {
+		return nil, err
+	}
+
+	return &KafkaRedisConnector{
+		redisClient:    redisClient,
+		cfg:            cfg,
+		kafkaPublisher: kafkaPublisher,
+	}, nil
+}
+
+// Orchestrator Storage Implementation - Block failures not supported
+
+func (kr *KafkaRedisConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) {
+	return nil, fmt.Errorf("block failure tracking is not supported with KafkaRedis connector - use a different storage backend")
+}
+
+func (kr *KafkaRedisConnector) StoreBlockFailures(failures []common.BlockFailure) error {
+	return fmt.Errorf("block failure tracking is not supported with KafkaRedis connector - use a different storage backend")
+}
+
+func (kr *KafkaRedisConnector) DeleteBlockFailures(failures []common.BlockFailure) error {
+	return fmt.Errorf("block failure tracking is not supported with KafkaRedis connector - use a different storage backend")
+}
+
+func (kr *KafkaRedisConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String())
+
+	val, err := kr.redisClient.Get(ctx, key).Result()
+	if err == redis.Nil {
+		return big.NewInt(0), nil
+	} else if err != nil {
+		return nil, err
+	}
+
+	blockNumber, ok := new(big.Int).SetString(val, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", val)
+	}
+
+	return blockNumber, nil
+}
+
+func (kr *KafkaRedisConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String())
+	return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err()
+}
+
+// Staging Storage Implementation - Not supported for KafkaRedis connector
+
+func (kr *KafkaRedisConnector) InsertStagingData(data []common.BlockData) error {
+	return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging")
+}
+
+func (kr *KafkaRedisConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) {
+	return nil, fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging")
+}
+
+func (kr *KafkaRedisConnector) DeleteStagingData(data []common.BlockData) error {
+	return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging")
+}
+
+func (kr *KafkaRedisConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String())
+
+	val, err := kr.redisClient.Get(ctx, key).Result()
+	if err == redis.Nil {
+		return big.NewInt(0), nil
+	} else if err != nil {
+		return nil, err
+	}
+
+	blockNumber, ok := new(big.Int).SetString(val, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", val)
+	}
+	return blockNumber, nil
+}
+
+func (kr *KafkaRedisConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String())
+	return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err()
+}
+
+func (kr *KafkaRedisConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) {
+	return nil, fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging")
+}
+
+func (kr *KafkaRedisConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error {
+	return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging")
+}
+
+// InsertBlockData publishes block data to Kafka instead of storing in database
+func (kr *KafkaRedisConnector) InsertBlockData(data []common.BlockData) error {
+	if len(data) == 0 {
+		return nil
+	}
+
+	// Publish to Kafka
+	if err := kr.kafkaPublisher.PublishBlockData(data); err != nil {
+		return fmt.Errorf("failed to publish block data to kafka: %w", err)
+	}
+	log.Debug().
+		Int("blocks", len(data)).
+		Msg("Published block data to Kafka")
+
+	// Update cursor to track the highest block number published
+	if len(data) > 0 {
+		// Find the highest block number in the batch
+		var maxBlock *big.Int
+		for _, blockData := range data {
+			if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 {
+				maxBlock = blockData.Block.Number
+			}
+		}
+		if maxBlock != nil {
+			ctx := context.Background()
+			chainId := data[0].Block.ChainId
+			key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String())
+			if err := kr.redisClient.Set(ctx, key, maxBlock.String(), 0).Err(); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+// ReplaceBlockData handles reorg by publishing both old and new data to Kafka
+func (kr *KafkaRedisConnector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) {
+	if len(data) == 0 {
+		return nil, nil
+	}
+
+	oldBlocks := []common.BlockData{}
+
+	// Publish reorg event to Kafka
+	// TODO: Publish new blocks (the reorg handler will mark old ones as reverted)
+	if err := kr.kafkaPublisher.PublishBlockData(data); err != nil {
+		return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err)
+	}
+
+	// Update cursor to track the highest block number
+	if len(data) > 0 {
+		var maxBlock *big.Int
+		for _, blockData := range data {
+			if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 {
+				maxBlock = blockData.Block.Number
+			}
+		}
+		if maxBlock != nil {
+			if err := kr.SetLastPublishedBlockNumber(data[0].Block.ChainId, maxBlock); err != nil {
+				return nil, fmt.Errorf("failed to update published block cursor: %w", err)
+			}
+		}
+	}
+
+	return oldBlocks, nil
+}
+
+func (kr *KafkaRedisConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String())
+
+	val, err := kr.redisClient.Get(ctx, key).Result()
+	if err == redis.Nil {
+		return big.NewInt(0), nil
+	} else if err != nil {
+		return nil, err
+	}
+
+	blockNumber, ok := new(big.Int).SetString(val, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", val)
+	}
+	return blockNumber, nil
+}
+
+func (kr *KafkaRedisConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
+	// Get the last published block number
+	lastPublished, err := kr.GetLastPublishedBlockNumber(chainId)
+	if err != nil {
+		return nil, err
+	}
+
+	// Check if it's within the range
+	if lastPublished.Cmp(startBlock) >= 0 && lastPublished.Cmp(endBlock) <= 0 {
+		return lastPublished, nil
+	}
+
+	// If outside range, return appropriate boundary
+	if lastPublished.Cmp(endBlock) > 0 {
+		return endBlock, nil
+	}
+	if lastPublished.Cmp(startBlock) < 0 {
+		return big.NewInt(0), nil
+	}
+
+	return lastPublished, nil
+}
+
+func (kr *KafkaRedisConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) {
+	return []common.BlockHeader{}, nil
+}
+
+func (kr *KafkaRedisConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) {
+	return QueryResult[common.TokenBalance]{Data: []common.TokenBalance{}}, nil
+}
+
+func (kr *KafkaRedisConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) {
+	return QueryResult[common.TokenTransfer]{Data: []common.TokenTransfer{}}, nil
+}
+
+func (kr *KafkaRedisConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) {
+	return []common.BlockData{}, nil
+}
+
+func (kr *KafkaRedisConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) {
+	return []*big.Int{}, nil
+}
+
+func (kr *KafkaRedisConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) {
+	return []common.BlockData{}, nil
+}
+
+// Query methods return empty results as this connector uses Kafka for data delivery
+func (kr *KafkaRedisConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) {
+	return QueryResult[common.Block]{Data: []common.Block{}}, nil
+}
+
+func (kr *KafkaRedisConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) {
+	return QueryResult[common.Transaction]{Data: []common.Transaction{}}, nil
+}
+
+func (kr *KafkaRedisConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) {
+	return QueryResult[common.Log]{Data: []common.Log{}}, nil
+}
+
+func (kr *KafkaRedisConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) {
+	return QueryResult[common.Trace]{Data: []common.Trace{}}, nil
+}
+
+func (kr *KafkaRedisConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) {
+	return QueryResult[interface{}]{Aggregates: []map[string]interface{}{}}, nil
+}
+
+// Close closes the Redis connection
+func (kr *KafkaRedisConnector) Close() error {
+	return kr.redisClient.Close()
+}

From 0bf3097f136cddc5ce80adea57944b395596987c Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Mon, 18 Aug 2025 15:51:51 +0000
Subject: [PATCH 13/43] Update schema payload

---
 internal/storage/kafka_publisher.go | 128 ++++++++++++++++++++++------
 internal/storage/kafka_redis.go     |  45 ++++------
 2 files changed, 117 insertions(+), 56 deletions(-)

diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go
index f7f0f72..d3376ce 100644
--- a/internal/storage/kafka_publisher.go
+++ b/internal/storage/kafka_publisher.go
@@ -5,8 +5,6 @@ import (
 	"crypto/tls"
 	"encoding/json"
 	"fmt"
-	"hash/fnv"
-	"math"
 	"net"
 	"strings"
 	"sync"
@@ -25,12 +23,39 @@ type KafkaPublisher struct {
 	chainID string
 }
 
-type PublishableBlockMessage struct {
+type MessageType string
+
+type PublishableData interface {
+	GetType() MessageType
+}
+
+type PublishableMessagePayload struct {
+	Data      PublishableData `json:"data"`
+	Type      MessageType     `json:"type"`
+	Timestamp time.Time       `json:"timestamp"`
+}
+
+type PublishableMessageBlockData struct {
 	common.BlockData
 	Sign            int8      `json:"sign"`
 	InsertTimestamp time.Time `json:"insert_timestamp"`
 }
 
+type PublishableMessageRevert struct {
+	ChainId         uint64    `json:"chain_id"`
+	BlockNumber     uint64    `json:"block_number"`
+	Sign            int8      `json:"sign"`
+	InsertTimestamp time.Time `json:"insert_timestamp"`
+}
+
+func (b PublishableMessageBlockData) GetType() MessageType {
+	return "block_data"
+}
+
+func (b PublishableMessageRevert) GetType() MessageType {
+	return "revert"
+}
+
 // NewKafkaPublisher method for storage connector (public)
 func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) {
 	brokers := strings.Split(cfg.Brokers, ",")
@@ -91,6 +116,12 @@ func (p *KafkaPublisher) PublishBlockData(blockData []common.BlockData) error {
 }
 
 func (p *KafkaPublisher) PublishReorg(oldData []common.BlockData, newData []common.BlockData) error {
+	newHead := uint64(newData[0].Block.Number.Uint64())
+	// Publish revert the revert to the new head - 1, so that the new updated block data can be re-processed
+	if err := p.publishBlockRevert(newData[0].ChainId, newHead-1); err != nil {
+		return fmt.Errorf("failed to revert: %v", err)
+	}
+
 	if err := p.publishBlockData(oldData, true); err != nil {
 		return fmt.Errorf("failed to publish old block data: %v", err)
 	}
@@ -149,6 +180,27 @@ func (p *KafkaPublisher) publishMessages(ctx context.Context, messages []*kgo.Re
 	return nil
 }
 
+func (p *KafkaPublisher) publishBlockRevert(chainId uint64, blockNumber uint64) error {
+	publishStart := time.Now()
+
+	// Prepare messages for blocks, events, transactions and traces
+	blockMessages := make([]*kgo.Record, 1)
+
+	// Block message
+	if blockMsg, err := p.createBlockRevertMessage(chainId, blockNumber); err == nil {
+		blockMessages[0] = blockMsg
+	} else {
+		return fmt.Errorf("failed to create block revert message: %v", err)
+	}
+
+	if err := p.publishMessages(context.Background(), blockMessages); err != nil {
+		return fmt.Errorf("failed to publish block revert messages: %v", err)
+	}
+
+	log.Debug().Str("metric", "publish_duration").Msgf("Publisher.PublishBlockData duration: %f", time.Since(publishStart).Seconds())
+	return nil
+}
+
 func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isDeleted bool) error {
 	if len(blockData) == 0 {
 		return nil
@@ -176,47 +228,71 @@ func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isDelete
 	return nil
 }
 
-func (p *KafkaPublisher) createBlockDataMessage(data common.BlockData, isDeleted bool) (*kgo.Record, error) {
-	insertTimestamp := time.Now()
-	msg := PublishableBlockMessage{
-		BlockData:       data.Serialize(),
+func (p *KafkaPublisher) createBlockDataMessage(block common.BlockData, isDeleted bool) (*kgo.Record, error) {
+	timestamp := time.Now()
+
+	data := PublishableMessageBlockData{
+		BlockData:       block,
 		Sign:            1,
-		InsertTimestamp: insertTimestamp,
+		InsertTimestamp: timestamp,
 	}
 	if isDeleted {
-		msg.Sign = -1 // Indicate deletion with a negative sign
+		data.Sign = -1
 	}
+
+	msg := PublishableMessagePayload{
+		Data:      data,
+		Type:      data.GetType(),
+		Timestamp: timestamp,
+	}
+
 	msgJson, err := json.Marshal(msg)
 	if err != nil {
 		return nil, fmt.Errorf("failed to marshal block data: %v", err)
 	}
 
-	// Determine partition based on chainID
-	var partition int32
-	if data.ChainId <= math.MaxInt32 {
-		// Direct assignment for chain IDs that fit in int32
-		partition = int32(data.ChainId)
-	} else {
-		// Hash for larger chain IDs to avoid overflow
-		h := fnv.New32a()
-		fmt.Fprintf(h, "%d", data.ChainId)
-		partition = int32(h.Sum32() & 0x7FFFFFFF) // Ensure positive
+	return p.createRecord(data.GetType(), block.ChainId, block.Block.Number.Uint64(), timestamp, msgJson)
+}
+
+func (p *KafkaPublisher) createBlockRevertMessage(chainId uint64, blockNumber uint64) (*kgo.Record, error) {
+	timestamp := time.Now()
+
+	data := PublishableMessageRevert{
+		ChainId:         chainId,
+		BlockNumber:     blockNumber,
+		Sign:            1,
+		InsertTimestamp: timestamp,
+	}
+
+	msg := PublishableMessagePayload{
+		Data:      data,
+		Type:      data.GetType(),
+		Timestamp: timestamp,
 	}
 
+	msgJson, err := json.Marshal(msg)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal block data: %v", err)
+	}
+
+	return p.createRecord(data.GetType(), chainId, blockNumber, timestamp, msgJson)
+}
+
+func (p *KafkaPublisher) createRecord(msgType MessageType, chainId uint64, blockNumber uint64, timestamp time.Time, msgJson []byte) (*kgo.Record, error) {
 	// Create headers with metadata
 	headers := []kgo.RecordHeader{
-		{Key: "chain_id", Value: []byte(fmt.Sprintf("%d", data.ChainId))},
-		{Key: "block_number", Value: []byte(fmt.Sprintf("%d", data.Block.Number))},
-		{Key: "sign", Value: []byte(fmt.Sprintf("%d", msg.Sign))},
-		{Key: "insert_timestamp", Value: []byte(insertTimestamp.Format(time.RFC3339Nano))},
+		{Key: "chain_id", Value: []byte(fmt.Sprintf("%d", chainId))},
+		{Key: "block_number", Value: []byte(fmt.Sprintf("%d", blockNumber))},
+		{Key: "type", Value: []byte(fmt.Sprintf("%s", msgType))},
+		{Key: "timestamp", Value: []byte(timestamp.Format(time.RFC3339Nano))},
 		{Key: "schema_version", Value: []byte("1")},
 	}
 
 	return &kgo.Record{
-		Topic:     "insight.commit.blocks",
-		Key:       []byte(fmt.Sprintf("blockdata-%d-%d-%s-%d", data.ChainId, data.Block.Number, data.Block.Hash, msg.Sign)),
+		Topic:     fmt.Sprintf("insight.commit.blocks.%d", chainId),
+		Key:       []byte(fmt.Sprintf("%d:%s:%d", chainId, msgType, blockNumber)),
 		Value:     msgJson,
 		Headers:   headers,
-		Partition: partition,
+		Partition: 0,
 	}, nil
 }
diff --git a/internal/storage/kafka_redis.go b/internal/storage/kafka_redis.go
index 9c1a0ea..05d294c 100644
--- a/internal/storage/kafka_redis.go
+++ b/internal/storage/kafka_redis.go
@@ -184,27 +184,12 @@ func (kr *KafkaRedisConnector) ReplaceBlockData(data []common.BlockData) ([]comm
 
 	oldBlocks := []common.BlockData{}
 
-	// Publish reorg event to Kafka
-	// TODO: Publish new blocks (the reorg handler will mark old ones as reverted)
-	if err := kr.kafkaPublisher.PublishBlockData(data); err != nil {
+	// TODO: We need to fetch the old blocks from the primary data store
+	if err := kr.kafkaPublisher.PublishReorg(data, data); err != nil {
 		return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err)
 	}
 
-	// Update cursor to track the highest block number
-	if len(data) > 0 {
-		var maxBlock *big.Int
-		for _, blockData := range data {
-			if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 {
-				maxBlock = blockData.Block.Number
-			}
-		}
-		if maxBlock != nil {
-			if err := kr.SetLastPublishedBlockNumber(data[0].Block.ChainId, maxBlock); err != nil {
-				return nil, fmt.Errorf("failed to update published block cursor: %w", err)
-			}
-		}
-	}
-
+	// save cursor
 	return oldBlocks, nil
 }
 
@@ -250,48 +235,48 @@ func (kr *KafkaRedisConnector) GetMaxBlockNumberInRange(chainId *big.Int, startB
 }
 
 func (kr *KafkaRedisConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) {
-	return []common.BlockHeader{}, nil
+	return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
 }
 
 func (kr *KafkaRedisConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) {
-	return QueryResult[common.TokenBalance]{Data: []common.TokenBalance{}}, nil
+	return QueryResult[common.TokenBalance]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
 }
 
 func (kr *KafkaRedisConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) {
-	return QueryResult[common.TokenTransfer]{Data: []common.TokenTransfer{}}, nil
+	return QueryResult[common.TokenTransfer]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
 }
 
 func (kr *KafkaRedisConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) {
-	return []common.BlockData{}, nil
+	return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
 }
 
 func (kr *KafkaRedisConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) {
-	return []*big.Int{}, nil
+	return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
 }
 
 func (kr *KafkaRedisConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) {
-	return []common.BlockData{}, nil
+	return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
 }
 
-// Query methods return empty results as this connector uses Kafka for data delivery
+// Query methods return errors as this is a write-only connector for streaming
 func (kr *KafkaRedisConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) {
-	return QueryResult[common.Block]{Data: []common.Block{}}, nil
+	return QueryResult[common.Block]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
 }
 
 func (kr *KafkaRedisConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) {
-	return QueryResult[common.Transaction]{Data: []common.Transaction{}}, nil
+	return QueryResult[common.Transaction]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
 }
 
 func (kr *KafkaRedisConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) {
-	return QueryResult[common.Log]{Data: []common.Log{}}, nil
+	return QueryResult[common.Log]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
 }
 
 func (kr *KafkaRedisConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) {
-	return QueryResult[common.Trace]{Data: []common.Trace{}}, nil
+	return QueryResult[common.Trace]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
 }
 
 func (kr *KafkaRedisConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) {
-	return QueryResult[interface{}]{Aggregates: []map[string]interface{}{}}, nil
+	return QueryResult[interface{}]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
 }
 
 // Close closes the Redis connection

From cd434a2d7054b4baf6e86e70105458b7c1f41fc1 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 19 Aug 2025 05:53:35 +0000
Subject: [PATCH 14/43] Update kafka-postgres -> kafka-redis config

---
 cmd/root.go | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/cmd/root.go b/cmd/root.go
index 7eecdd0..61e10bc 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -131,14 +131,10 @@ func init() {
 	rootCmd.PersistentFlags().String("storage-main-kafka-username", "", "Kafka username for main storage")
 	rootCmd.PersistentFlags().String("storage-main-kafka-password", "", "Kafka password for main storage")
 	rootCmd.PersistentFlags().Bool("storage-main-kafka-enable-tls", true, "Enable TLS for Kafka connection in main storage")
-	rootCmd.PersistentFlags().String("storage-main-kafka-postgres-host", "", "PostgreSQL host for Kafka main storage bookkeeping")
-	rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-port", 5432, "PostgreSQL port for Kafka main storage bookkeeping")
-	rootCmd.PersistentFlags().String("storage-main-kafka-postgres-username", "", "PostgreSQL username for Kafka main storage bookkeeping")
-	rootCmd.PersistentFlags().String("storage-main-kafka-postgres-password", "", "PostgreSQL password for Kafka main storage bookkeeping")
-	rootCmd.PersistentFlags().String("storage-main-kafka-postgres-database", "", "PostgreSQL database for Kafka main storage bookkeeping")
-	rootCmd.PersistentFlags().String("storage-main-kafka-postgres-sslMode", "require", "PostgreSQL SSL mode for Kafka main storage bookkeeping")
-	rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-maxOpenConns", 25, "PostgreSQL max open connections for Kafka main storage bookkeeping")
-	rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-maxIdleConns", 10, "PostgreSQL max idle connections for Kafka main storage bookkeeping")
+	rootCmd.PersistentFlags().String("storage-main-kafka-redis-host", "", "Redis host for Kafka main storage metadata")
+	rootCmd.PersistentFlags().Int("storage-main-kafka-redis-port", 6379, "Redis port for Kafka main storage metadata")
+	rootCmd.PersistentFlags().String("storage-main-kafka-redis-password", "", "Redis password for Kafka main storage metadata")
+	rootCmd.PersistentFlags().Int("storage-main-kafka-redis-db", 0, "Redis database number for Kafka main storage metadata")
 	rootCmd.PersistentFlags().String("api-host", "localhost:3000", "API host")
 	rootCmd.PersistentFlags().String("api-basicAuth-username", "", "API basic auth username")
 	rootCmd.PersistentFlags().String("api-basicAuth-password", "", "API basic auth password")
@@ -265,14 +261,10 @@ func init() {
 	viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username"))
 	viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password"))
 	viper.BindPFlag("storage.main.kafka.enable_tls", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls"))
-	viper.BindPFlag("storage.main.kafka.postgres.host", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-host"))
-	viper.BindPFlag("storage.main.kafka.postgres.port", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-port"))
-	viper.BindPFlag("storage.main.kafka.postgres.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-username"))
-	viper.BindPFlag("storage.main.kafka.postgres.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-password"))
-	viper.BindPFlag("storage.main.kafka.postgres.database", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-database"))
-	viper.BindPFlag("storage.main.kafka.postgres.sslMode", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-sslMode"))
-	viper.BindPFlag("storage.main.kafka.postgres.maxOpenConns", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-maxOpenConns"))
-	viper.BindPFlag("storage.main.kafka.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-maxIdleConns"))
+	viper.BindPFlag("storage.main.kafka.redis.host", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-host"))
+	viper.BindPFlag("storage.main.kafka.redis.port", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-port"))
+	viper.BindPFlag("storage.main.kafka.redis.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-password"))
+	viper.BindPFlag("storage.main.kafka.redis.db", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-db"))
 	viper.BindPFlag("api.host", rootCmd.PersistentFlags().Lookup("api-host"))
 	viper.BindPFlag("api.basicAuth.username", rootCmd.PersistentFlags().Lookup("api-basicAuth-username"))
 	viper.BindPFlag("api.basicAuth.password", rootCmd.PersistentFlags().Lookup("api-basicAuth-password"))

From 4fd141d2e386795e9c972f2d4f7671dcf44da1c6 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Wed, 20 Aug 2025 06:56:12 +0000
Subject: [PATCH 15/43] Update schema to use replacing merge tree

---
 internal/storage/kafka_publisher.go           |  10 +-
 .../0000_clickhouse_create_blocks_table.sql   |   4 +-
 ...1_clickhouse_create_transactions_table.sql |  50 ++++--
 .../0002_clickhouse_create_logs_table.sql     |  20 ++-
 .../0003_clickhouse_create_traces_table.sql   |   4 +-
 ...04_clickhouse_create_insert_null_table.sql |   4 +-
 .../0005_clickhouse_create_insert_data_mv.sql |   6 +-
 ...0006_clickhouse_create_token_transfers.sql |   4 +-
 ...7_clickhouse_create_token_transfers_mv.sql |  33 ++--
 .../0008_clickhouse_create_token_balance.sql  |  44 -----
 .../0008_clickhouse_create_token_balances.sql |  64 +++++++
 ...009_clickhouse_create_token_balance_mv.sql | 157 -----------------
 ...09_clickhouse_create_token_balances_mv.sql | 161 ++++++++++++++++++
 ...clickhouse_create_address_transactions.sql |  21 ++-
 ...ckhouse_create_address_transactions_mv.sql |   6 +-
 ...12_clickhouse_create_address_transfers.sql |   4 +-
 ...clickhouse_create_address_transfers_mv.sql |   4 +-
 17 files changed, 337 insertions(+), 259 deletions(-)
 delete mode 100644 internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql
 create mode 100644 internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql
 delete mode 100644 internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql
 create mode 100644 internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql

diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go
index d3376ce..90f3ca3 100644
--- a/internal/storage/kafka_publisher.go
+++ b/internal/storage/kafka_publisher.go
@@ -37,14 +37,14 @@ type PublishableMessagePayload struct {
 
 type PublishableMessageBlockData struct {
 	common.BlockData
-	Sign            int8      `json:"sign"`
+	IsDeleted       int8      `json:"is_deleted"`
 	InsertTimestamp time.Time `json:"insert_timestamp"`
 }
 
 type PublishableMessageRevert struct {
 	ChainId         uint64    `json:"chain_id"`
 	BlockNumber     uint64    `json:"block_number"`
-	Sign            int8      `json:"sign"`
+	IsDeleted       int8      `json:"is_deleted"`
 	InsertTimestamp time.Time `json:"insert_timestamp"`
 }
 
@@ -233,11 +233,11 @@ func (p *KafkaPublisher) createBlockDataMessage(block common.BlockData, isDelete
 
 	data := PublishableMessageBlockData{
 		BlockData:       block,
-		Sign:            1,
+		IsDeleted:       0,
 		InsertTimestamp: timestamp,
 	}
 	if isDeleted {
-		data.Sign = -1
+		data.IsDeleted = 1
 	}
 
 	msg := PublishableMessagePayload{
@@ -260,7 +260,7 @@ func (p *KafkaPublisher) createBlockRevertMessage(chainId uint64, blockNumber ui
 	data := PublishableMessageRevert{
 		ChainId:         chainId,
 		BlockNumber:     blockNumber,
-		Sign:            1,
+		IsDeleted:       0,
 		InsertTimestamp: timestamp,
 	}
 
diff --git a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
index b311f24..fa349c6 100644
--- a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
+++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
@@ -23,11 +23,11 @@ CREATE TABLE IF NOT EXISTS blocks (
     `base_fee_per_gas` Nullable(UInt64),
 
     `insert_timestamp` DateTime DEFAULT now(),
-    `sign` Int8 DEFAULT 1,
+    `is_deleted` Int8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2,
-) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
+) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 ORDER BY (chain_id, block_number)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
index 02a0294..eb5787c 100644
--- a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
+++ b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
@@ -32,8 +32,8 @@ CREATE TABLE IF NOT EXISTS transactions (
     `logs_bloom` Nullable(String),
     `status` Nullable(UInt64),
 
-    `sign` Int8 DEFAULT 1,
     `insert_timestamp` DateTime DEFAULT now(),
+    `is_deleted` Int8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3,
@@ -45,14 +45,7 @@ CREATE TABLE IF NOT EXISTS transactions (
     PROJECTION from_address_projection
     (
         SELECT
-          chain_id,
-          block_number,
-          block_timestamp,
-          hash,
-          from_address,
-          to_address,
-          value,
-          data
+          *
         ORDER BY 
           chain_id,
           from_address,
@@ -62,21 +55,42 @@ CREATE TABLE IF NOT EXISTS transactions (
     PROJECTION to_address_projection
     (
         SELECT
-          chain_id,
-          block_number,
-          block_timestamp,
-          hash,
-          from_address,
-          to_address,
-          value,
-          data
+          *
         ORDER BY
           chain_id,
           to_address,
           block_number,
           hash
+    ),
+    PROJECTION from_address_state_projection
+    (
+        SELECT
+          chain_id,
+          from_address,
+          countState() AS tx_count_state,
+          minState(block_number) AS min_block_number_state,
+          minState(block_timestamp) AS min_block_timestamp_state,
+          maxState(block_number) AS max_block_number_state,
+          maxState(block_timestamp) AS max_block_timestamp_state
+        GROUP BY
+          chain_id,
+          from_address
+    ),
+    PROJECTION to_address_state_projection
+    (
+        SELECT
+          chain_id,
+          to_address,
+          countState() AS tx_count_state,
+          minState(block_number) AS min_block_number_state,
+          minState(block_timestamp) AS min_block_timestamp_state,
+          maxState(block_number) AS max_block_number_state,
+          maxState(block_timestamp) AS max_block_timestamp_state
+        GROUP BY
+          chain_id,
+          to_address
     )
-) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
+) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 ORDER BY (chain_id, block_number, hash)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
index e327edb..139d7dd 100644
--- a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
+++ b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
@@ -13,8 +13,8 @@ CREATE TABLE IF NOT EXISTS logs (
     `topic_2` String,
     `topic_3` String,
 
-    `sign` Int8 DEFAULT 1,
     `insert_timestamp` DateTime DEFAULT now(),
+    `is_deleted` Int8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3,
@@ -48,8 +48,24 @@ CREATE TABLE IF NOT EXISTS logs (
             transaction_index,
             log_index,
             address
+    ),
+    PROJECTION address_topic0_state_projection
+    (
+        SELECT
+            chain_id,
+            address,
+            topic_0,
+            countState() AS log_count_state,
+            minState(block_number) AS min_block_number_state,
+            minState(block_timestamp) AS min_block_timestamp_state,
+            maxState(block_number) AS max_block_number_state,
+            maxState(block_timestamp) AS max_block_timestamp_state
+        GROUP BY
+            chain_id,
+            address,
+            topic_0
     )
-) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
+) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 ORDER BY (chain_id, block_number, transaction_hash, log_index)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
diff --git a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
index 17a032b..289f690 100644
--- a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
+++ b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
@@ -21,8 +21,8 @@ CREATE TABLE IF NOT EXISTS traces (
     `reward_type` LowCardinality(Nullable(String)),
     `refund_address` Nullable(FixedString(42)),
 
-    `sign` Int8 DEFAULT 1,
     `insert_timestamp` DateTime DEFAULT now(),
+    `is_deleted` Int8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 2,
@@ -52,7 +52,7 @@ CREATE TABLE IF NOT EXISTS traces (
           trace_address
     )
 
-) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
+) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 ORDER BY (chain_id, transaction_hash, trace_address)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
diff --git a/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql
index 46f1541..3cc7b1a 100644
--- a/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql
+++ b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql
@@ -94,6 +94,6 @@ CREATE TABLE IF NOT EXISTS insert_null_block_data (
         refund_address Nullable(FixedString(42))
     )),
 
-    sign Int8 DEFAULT 1,
-    insert_timestamp DateTime DEFAULT now()
+    insert_timestamp DateTime DEFAULT now(),
+    is_deleted Int8 DEFAULT 0
 ) ENGINE = Null;
diff --git a/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql
index f7c7c46..1a067f8 100644
--- a/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql
+++ b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql
@@ -65,7 +65,7 @@ SELECT
     t.30 AS logs_bloom,
     t.31 AS status,
     insert_timestamp,
-    sign
+    is_deleted
 FROM insert_null_block_data
 ARRAY JOIN transactions AS t;
 
@@ -87,7 +87,7 @@ SELECT
     l.11 AS topic_2,
     l.12 AS topic_3,
     insert_timestamp,
-    sign
+    is_deleted
 FROM insert_null_block_data
 ARRAY JOIN logs AS l;
 
@@ -117,6 +117,6 @@ SELECT
     tr.19 AS reward_type,
     tr.20 AS refund_address,
     insert_timestamp,
-    sign
+    is_deleted
 FROM insert_null_block_data
 ARRAY JOIN traces AS tr;
diff --git a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql
index 0d6ef92..4afdcda 100644
--- a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql
+++ b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql
@@ -14,8 +14,8 @@ CREATE TABLE IF NOT EXISTS token_transfers
     `log_index` UInt64,
     `batch_index` Nullable(UInt16) DEFAULT NULL,
 
-    `sign` Int8 DEFAULT 1,
     `insert_timestamp` DateTime DEFAULT now(),
+    `is_deleted` Int8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3,
@@ -54,7 +54,7 @@ CREATE TABLE IF NOT EXISTS token_transfers
             log_index
     )
 )
-ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
+ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 ORDER BY (chain_id, token_address, block_number, transaction_index, log_index)
 SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql
index e03b1a0..7c09aea 100644
--- a/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql
+++ b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql
@@ -16,8 +16,8 @@ SELECT
   reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS amount,
   log_index,
   CAST(NULL AS Nullable(UInt16)) AS batch_index,
-  sign,
-  insert_timestamp
+  insert_timestamp,
+  is_deleted
 FROM logs
 WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef'  -- Transfer
   AND length(topic_1) = 66 AND startsWith(topic_1, '0x')
@@ -43,8 +43,8 @@ SELECT
   toUInt8(1) AS amount,
   log_index,
   CAST(NULL AS Nullable(UInt16)) AS batch_index,
-  sign,
-  insert_timestamp
+  insert_timestamp,
+  is_deleted
 FROM logs
 WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef'
   AND length(topic_1) = 66 AND startsWith(topic_1, '0x')
@@ -70,8 +70,8 @@ SELECT
     reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount,
     log_index,
     toNullable(toUInt16(0)) AS batch_index,
-    sign,
-    insert_timestamp
+    insert_timestamp,
+    is_deleted
 FROM logs
 WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62'   -- TransferSingle
   AND length(topic_2) = 66 AND length(topic_3) = 66
@@ -95,12 +95,21 @@ SELECT
     reinterpretAsUInt256(reverse(unhex(amount_hex))) AS amount,
     log_index,
     toNullable(toUInt16(array_index - 1)) AS batch_index,
-    sign,
-    insert_timestamp
+    insert_timestamp,
+    is_deleted
 FROM (
     SELECT 
-        chain_id, address, topic_2, topic_3,
-        block_number, block_timestamp, transaction_hash, transaction_index, log_index, sign, insert_timestamp,
+        chain_id, 
+        address, 
+        topic_2, 
+        topic_3,
+        block_number, 
+        block_timestamp, 
+        transaction_hash, 
+        transaction_index, 
+        log_index, 
+        is_deleted, 
+        insert_timestamp,
         toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64))))) AS ids_offset,
         toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64))))) AS amounts_offset,
         toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + ids_offset * 2, 64))))) AS ids_length,
@@ -136,8 +145,8 @@ SELECT
   reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount,
   log_index,
   CAST(NULL AS Nullable(UInt16)) AS batch_index,
-  sign,
-  insert_timestamp
+  insert_timestamp,
+  is_deleted
 FROM logs
 WHERE topic_0 = '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859'
   AND length(topic_1) = 66
diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql
deleted file mode 100644
index 0cf38c9..0000000
--- a/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql
+++ /dev/null
@@ -1,44 +0,0 @@
-CREATE TABLE IF NOT EXISTS token_balances
-(
-  `chain_id` UInt256,
-  `token_type` LowCardinality(String),
-  `token_address` FixedString(42),
-  `owner_address` FixedString(42),
-  `token_id` UInt256,
-
-  `balance_state` AggregateFunction(sum, Int256),
-  `last_block_number_state` AggregateFunction(max, UInt256),
-  `last_block_timestamp_state` AggregateFunction(max, DateTime),
-
-  INDEX idx_last_block_number (finalizeAggregation(last_block_number_state)) TYPE minmax GRANULARITY 1,
-  INDEX idx_last_block_timestamp (finalizeAggregation(last_block_timestamp_state)) TYPE minmax GRANULARITY 1,
-
-  PROJECTION owner_balances_projection
-  (
-    SELECT
-      chain_id,
-      owner_address,
-      token_address,
-      token_id,
-      sumMerge(balance_state) AS balance,
-      maxMerge(last_block_number_state) AS last_block_number,
-      maxMerge(last_block_timestamp_state) AS last_block_timestamp
-    GROUP BY chain_id, owner_address, token_address, token_id
-  ),
-  PROJECTION token_projection
-  (
-    SELECT
-      chain_id,
-      token_address,
-      token_id,
-      owner_address,
-      balance_state,
-      last_block_number_state,
-      last_block_timestamp_state
-    ORDER BY chain_id, token_address, token_id, owner_address
-  )
-)
-ENGINE = AggregatingMergeTree
-PARTITION BY chain_id
-ORDER BY (chain_id, owner_address, token_address, token_id)
-SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql
new file mode 100644
index 0000000..a0ed08b
--- /dev/null
+++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql
@@ -0,0 +1,64 @@
+CREATE TABLE IF NOT EXISTS token_balances
+(
+  `chain_id` UInt256,
+  `token_type` LowCardinality(String),
+  `token_address` FixedString(42),
+  `owner_address` FixedString(42),
+  `token_id` UInt256,
+  
+  -- Normalized delta: positive for incoming, negative for outgoing
+  `balance_delta` Int256,
+  
+  -- Transaction details for ordering and deduplication
+  `block_number` UInt256,
+  `block_timestamp` DateTime,
+  `transaction_hash` FixedString(66),
+  `transaction_index` UInt64,
+  `log_index` UInt64,
+  `direction` Enum8('from' = 1, 'to' = 2),  -- To make each transfer create 2 unique rows
+  
+  `insert_timestamp` DateTime DEFAULT now(),
+  `is_deleted` Int8 DEFAULT 0,
+
+  INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
+  INDEX idx_token_address token_address TYPE bloom_filter GRANULARITY 3,
+  INDEX idx_owner_address owner_address TYPE bloom_filter GRANULARITY 3,
+
+  -- Projection for efficient balance queries by owner
+  PROJECTION owner_balances_projection
+  (
+    SELECT
+      chain_id,
+      owner_address,
+      token_address,
+      token_id,
+      sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state
+      minState(block_number) AS min_block_number_state,
+      minState(block_timestamp) AS min_block_timestamp_state,
+      maxState(block_number) AS max_block_number_state,
+      maxState(block_timestamp) AS max_block_timestamp_state
+    GROUP BY chain_id, owner_address, token_address, token_id
+    ORDER BY chain_id, owner_address, token_address, token_id
+  ),
+  
+  -- Projection for efficient balance queries by token
+  PROJECTION token_balances_projection
+  (
+    SELECT
+      chain_id,
+      token_address,
+      token_id,
+      owner_address,
+      sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state
+      minState(block_number) AS min_block_number_state,
+      minState(block_timestamp) AS min_block_timestamp_state,
+      maxState(block_number) AS max_block_number_state,
+      maxState(block_timestamp) AS max_block_timestamp_state
+    GROUP BY chain_id, token_address, token_id, owner_address
+    ORDER BY chain_id, token_address, token_id, owner_address
+  )
+)
+ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
+PARTITION BY chain_id
+ORDER BY (chain_id, owner_address, token_address, token_id, block_number, transaction_index, log_index, direction)
+SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql
deleted file mode 100644
index be000df..0000000
--- a/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql
+++ /dev/null
@@ -1,157 +0,0 @@
--- ERC20
-CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc20_mv
-TO token_balances
-AS
-SELECT
-  chain_id,
-  token_type,
-  token_address,
-  owner_address,
-  token_id,
-  sumState(delta) AS balance_state,
-  maxState(block_number) AS last_block_number_state,
-  maxState(block_timestamp) AS last_block_timestamp_state
-FROM
-(
-  -- FROM side (negative)
-  SELECT
-    chain_id,
-    token_type,
-    token_address,
-    token_id,
-    from_address AS owner_address,
-    toInt256(amount) * (-1) * sign AS delta,
-    block_number,
-    block_timestamp
-  FROM token_transfers WHERE token_type = 'erc20'
-  UNION ALL
-  -- TO side (positive)
-  SELECT
-    chain_id,
-    token_type,
-    token_address,
-    token_id,
-    to_address AS owner_address,
-    toInt256(amount) * (+1) * sign AS delta,
-    block_number,
-    block_timestamp
-  FROM token_transfers WHERE token_type = 'erc20'
-)
-GROUP BY chain_id, token_type, token_address, owner_address, token_id;
-
--- ERC721
-CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc721_mv
-TO token_balances
-AS
-SELECT
-  chain_id,
-  token_type,
-  token_address,
-  owner_address,
-  token_id,
-  sumState(delta) AS balance_state,
-  maxState(block_number) AS last_block_number_state,
-  maxState(block_timestamp) AS last_block_timestamp_state
-FROM
-(
-  SELECT
-    chain_id,
-    token_type,
-    token_address,
-    from_address AS owner_address,
-    token_id,
-    toInt256(1) * (-1) * sign AS delta,
-    block_number,
-    block_timestamp
-  FROM token_transfers WHERE token_type = 'erc721'
-  UNION ALL
-  SELECT
-    chain_id,
-    token_type,
-    token_address,
-    to_address AS owner_address,
-    token_id,
-    toInt256(1) * (+1) * sign AS delta,
-    block_number,
-    block_timestamp
-  FROM token_transfers WHERE token_type = 'erc721'
-)
-GROUP BY chain_id, token_type, token_address, owner_address, token_id;
-
--- ERC1155
-CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc1155_mv
-TO token_balances
-AS
-SELECT
-  chain_id,
-  token_type,
-  token_address,
-  owner_address,
-  token_id,
-  sumState(delta) AS balance_state,
-  maxState(block_number) AS last_block_number_state,
-  maxState(block_timestamp) AS last_block_timestamp_state
-FROM
-(
-  SELECT
-    chain_id,
-    token_type,
-    token_address,
-    from_address AS owner_address,
-    token_id,
-    toInt256(amount) * (-1) * sign AS delta,
-    block_number,
-    block_timestamp
-  FROM token_transfers WHERE token_type = 'erc1155'
-  UNION ALL
-  SELECT
-    chain_id,
-    token_type,
-    token_address,
-    to_address AS owner_address,
-    token_id,
-    toInt256(amount) * (+1) * sign AS delta,
-    block_number,
-    block_timestamp
-  FROM token_transfers WHERE token_type = 'erc1155'
-)
-GROUP BY chain_id, token_type, token_address, owner_address, token_id;
-
--- ERC6909
-CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc6909_mv
-TO token_balances
-AS
-SELECT
-  chain_id, 
-  token_type,
-  token_address,
-  owner_address,
-  token_id,
-  sumState(delta) AS balance_state,
-  maxState(block_number) AS last_block_number_state,
-  maxState(block_timestamp) AS last_block_timestamp_state
-FROM
-(
-  SELECT 
-    chain_id, 
-    token_type,
-    token_address, 
-    from_address AS owner_address,
-    token_id,
-    toInt256(amount) * (-1) * sign AS delta,
-    block_number,
-    block_timestamp
-  FROM token_transfers WHERE token_type = 'erc6909'
-  UNION ALL
-  SELECT
-    chain_id,
-    token_type,
-    token_address,
-    to_address AS owner_address,
-    token_id,
-    toInt256(amount) * (+1) * sign AS delta,
-    block_number,
-    block_timestamp
-  FROM token_transfers WHERE token_type = 'erc6909'
-)
-GROUP BY chain_id, token_type, token_address, owner_address, token_id;
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql
new file mode 100644
index 0000000..63e523e
--- /dev/null
+++ b/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql
@@ -0,0 +1,161 @@
+-- ERC20
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc20_mv
+TO token_balances
+AS
+-- FROM side (outgoing, negative delta)
+SELECT
+  chain_id,
+  token_type,
+  token_address,
+  from_address AS owner_address,
+  token_id,
+  -toInt256(amount) AS balance_delta,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  log_index,
+  'from' AS direction,
+  insert_timestamp,
+  is_deleted
+FROM token_transfers 
+WHERE token_type = 'erc20'
+UNION ALL
+-- TO side (incoming, positive delta)
+SELECT
+  chain_id,
+  token_type,
+  token_address,
+  to_address AS owner_address,
+  token_id,
+  toInt256(amount) AS balance_delta,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  log_index,
+  'to' AS direction,
+  insert_timestamp,
+  is_deleted
+FROM token_transfers 
+WHERE token_type = 'erc20';
+
+-- ERC721
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc721_mv
+TO token_balances
+AS
+SELECT
+  chain_id,
+  token_type,
+  token_address,
+  from_address AS owner_address,
+  token_id,
+  -1 AS balance_delta,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  log_index,
+  'from' AS direction,
+  insert_timestamp,
+  is_deleted
+FROM token_transfers 
+WHERE token_type = 'erc721'
+UNION ALL
+SELECT
+  chain_id,
+  token_type,
+  token_address,
+  to_address AS owner_address,
+  token_id,
+  1 AS balance_delta,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  log_index,
+  'to' AS direction,
+  insert_timestamp,
+  is_deleted
+FROM token_transfers 
+WHERE token_type = 'erc721';
+
+-- ERC1155
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc1155_mv
+TO token_balances
+AS
+SELECT
+  chain_id,
+  token_type,
+  token_address,
+  from_address AS owner_address,
+  token_id,
+  -toInt256(amount) AS balance_delta,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  log_index,
+  'from' AS direction,
+  insert_timestamp,
+  is_deleted
+FROM token_transfers 
+WHERE token_type = 'erc1155'
+UNION ALL
+SELECT
+  chain_id,
+  token_type,
+  token_address,
+  to_address AS owner_address,
+  token_id,
+  toInt256(amount) AS balance_delta,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  log_index,
+  'to' AS direction,
+  insert_timestamp,
+  is_deleted
+FROM token_transfers 
+WHERE token_type = 'erc1155';
+
+-- ERC6909
+CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc6909_mv
+TO token_balances
+AS
+SELECT
+  chain_id,
+  token_type,
+  token_address,
+  from_address AS owner_address,
+  token_id,
+  -toInt256(amount) AS balance_delta,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  log_index,
+  'from' AS direction,
+  insert_timestamp,
+  is_deleted
+FROM token_transfers 
+WHERE token_type = 'erc6909'
+UNION ALL
+SELECT
+  chain_id,
+  token_type,
+  token_address,
+  to_address AS owner_address,
+  token_id,
+  toInt256(amount) AS balance_delta,
+  block_number,
+  block_timestamp,
+  transaction_hash,
+  transaction_index,
+  log_index,
+  'to' AS direction,
+  insert_timestamp,
+  is_deleted
+FROM token_transfers 
+WHERE token_type = 'erc6909';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
index c33638e..55ed9f9 100644
--- a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
+++ b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
@@ -32,12 +32,27 @@ CREATE TABLE IF NOT EXISTS address_transactions (
     `logs_bloom` Nullable(String),
     `status` Nullable(UInt64),
 
-    `sign` Int8 DEFAULT 1,
     `insert_timestamp` DateTime DEFAULT now(),
+    `is_deleted` Int8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
-    INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3
-) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
+    INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3,
+    
+    PROJECTION address_total_count_projection
+    (
+        SELECT
+          chain_id,
+          address,
+          countState() AS tx_count_state,
+          minState(block_number) AS min_block_number_state,
+          minState(block_timestamp) AS min_block_timestamp_state,
+          maxState(block_number) AS max_block_number_state,
+          maxState(block_timestamp) AS max_block_timestamp_state
+        GROUP BY
+          chain_id,
+          address
+    )
+) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 ORDER BY (chain_id, address, block_number, hash, transaction_index)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql
index 46d64d3..48c4cb2 100644
--- a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql
+++ b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql
@@ -34,9 +34,9 @@ SELECT
     blob_gas_price,
     logs_bloom,
     status,
-
-    sign,
-    insert_timestamp
+    
+    insert_timestamp,
+    is_deleted
 FROM transactions
 ARRAY JOIN 
     arrayZip([from_address, to_address], ['from', 'to']) AS address_tuple;
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
index 2e8d071..2600e59 100644
--- a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
+++ b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
@@ -13,12 +13,12 @@ CREATE TABLE IF NOT EXISTS address_transfers (
     `log_index` UInt64,
     `batch_index` Nullable(UInt16) DEFAULT NULL,
 
-    `sign` Int8 DEFAULT 1,
     `insert_timestamp` DateTime DEFAULT now(),
+    `is_deleted` Int8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3
-) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp)
+) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 ORDER BY (chain_id, address, block_number, transaction_hash, transaction_index)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql
index 72a3ebb..9256143 100644
--- a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql
+++ b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql
@@ -15,8 +15,8 @@ SELECT
     amount,
     log_index,
     batch_index,
-    sign,
-    insert_timestamp
+    insert_timestamp,
+    is_deleted
 FROM token_transfers
 ARRAY JOIN 
     arrayZip([from_address, to_address], ['from', 'to']) AS address_tuple;
\ No newline at end of file

From 3b9f6943e1fc5ef7cb8264d8ec6b77953a5c3bc4 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Wed, 20 Aug 2025 20:32:14 +0000
Subject: [PATCH 16/43] Fix schema

---
 .../0000_clickhouse_create_blocks_table.sql   |  2 +-
 ...1_clickhouse_create_transactions_table.sql |  2 +-
 .../0002_clickhouse_create_logs_table.sql     |  2 +-
 .../0003_clickhouse_create_traces_table.sql   |  2 +-
 ...04_clickhouse_create_insert_null_table.sql |  2 +-
 .../0005_clickhouse_create_insert_data_mv.sql |  2 +-
 ...0006_clickhouse_create_token_transfers.sql | 56 ++++++++++++++++++-
 .../0008_clickhouse_create_token_balances.sql | 10 +---
 ...clickhouse_create_address_transactions.sql |  2 +-
 ...12_clickhouse_create_address_transfers.sql | 43 +++++++++++++-
 10 files changed, 106 insertions(+), 17 deletions(-)

diff --git a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
index fa349c6..a1d1979 100644
--- a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
+++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
@@ -23,7 +23,7 @@ CREATE TABLE IF NOT EXISTS blocks (
     `base_fee_per_gas` Nullable(UInt64),
 
     `insert_timestamp` DateTime DEFAULT now(),
-    `is_deleted` Int8 DEFAULT 0,
+    `is_deleted` UInt8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2,
diff --git a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
index eb5787c..11dff13 100644
--- a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
+++ b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
@@ -33,7 +33,7 @@ CREATE TABLE IF NOT EXISTS transactions (
     `status` Nullable(UInt64),
 
     `insert_timestamp` DateTime DEFAULT now(),
-    `is_deleted` Int8 DEFAULT 0,
+    `is_deleted` UInt8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3,
diff --git a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
index 139d7dd..89f6e1c 100644
--- a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
+++ b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
@@ -14,7 +14,7 @@ CREATE TABLE IF NOT EXISTS logs (
     `topic_3` String,
 
     `insert_timestamp` DateTime DEFAULT now(),
-    `is_deleted` Int8 DEFAULT 0,
+    `is_deleted` UInt8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3,
diff --git a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
index 289f690..8f69a1f 100644
--- a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
+++ b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
@@ -22,7 +22,7 @@ CREATE TABLE IF NOT EXISTS traces (
     `refund_address` Nullable(FixedString(42)),
 
     `insert_timestamp` DateTime DEFAULT now(),
-    `is_deleted` Int8 DEFAULT 0,
+    `is_deleted` UInt8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 2,
diff --git a/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql
index 3cc7b1a..8597fcd 100644
--- a/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql
+++ b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql
@@ -95,5 +95,5 @@ CREATE TABLE IF NOT EXISTS insert_null_block_data (
     )),
 
     insert_timestamp DateTime DEFAULT now(),
-    is_deleted Int8 DEFAULT 0
+    is_deleted UInt8 DEFAULT 0
 ) ENGINE = Null;
diff --git a/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql
index 1a067f8..b10c379 100644
--- a/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql
+++ b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql
@@ -25,7 +25,7 @@ SELECT
     block.20 AS withdrawals_root,
     block.21 AS base_fee_per_gas,
     insert_timestamp,
-    sign
+    is_deleted
 FROM insert_null_block_data;
 
 CREATE MATERIALIZED VIEW IF NOT EXISTS insert_transactions_mv
diff --git a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql
index 4afdcda..9007649 100644
--- a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql
+++ b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql
@@ -15,7 +15,7 @@ CREATE TABLE IF NOT EXISTS token_transfers
     `batch_index` Nullable(UInt16) DEFAULT NULL,
 
     `insert_timestamp` DateTime DEFAULT now(),
-    `is_deleted` Int8 DEFAULT 0,
+    `is_deleted` UInt8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3,
@@ -52,6 +52,60 @@ CREATE TABLE IF NOT EXISTS token_transfers
             block_number,
             transaction_index,
             log_index
+    ),
+    PROJECTION from_address_state_projection (
+        SELECT
+            chain_id,
+            from_address,
+            token_address,
+            token_type,
+            countState() AS transfer_count_state,
+            sumState(toInt256(amount)) AS total_amount_state,
+            minState(block_number) AS min_block_number_state,
+            minState(block_timestamp) AS min_block_timestamp_state,
+            maxState(block_number) AS max_block_number_state,
+            maxState(block_timestamp) AS max_block_timestamp_state
+        GROUP BY
+            chain_id,
+            from_address,
+            token_address,
+            token_type
+    ),
+    PROJECTION to_address_state_projection (
+        SELECT
+            chain_id,
+            to_address,
+            token_address,
+            token_type,
+            countState() AS transfer_count_state,
+            sumState(toInt256(amount)) AS total_amount_state,
+            minState(block_number) AS min_block_number_state,
+            minState(block_timestamp) AS min_block_timestamp_state,
+            maxState(block_number) AS max_block_number_state,
+            maxState(block_timestamp) AS max_block_timestamp_state
+        GROUP BY
+            chain_id,
+            to_address,
+            token_address,
+            token_type
+    ),
+    PROJECTION token_state_projection (
+        SELECT
+            chain_id,
+            token_address,
+            token_id,
+            token_type,
+            countState() AS transfer_count_state,
+            sumState(toInt256(amount)) AS total_volume_state,
+            minState(block_number) AS min_block_number_state,
+            minState(block_timestamp) AS min_block_timestamp_state,
+            maxState(block_number) AS max_block_number_state,
+            maxState(block_timestamp) AS max_block_timestamp_state
+        GROUP BY
+            chain_id,
+            token_address,
+            token_id,
+            token_type
     )
 )
 ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql
index a0ed08b..11e0c6a 100644
--- a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql
+++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql
@@ -18,13 +18,12 @@ CREATE TABLE IF NOT EXISTS token_balances
   `direction` Enum8('from' = 1, 'to' = 2),  -- To make each transfer create 2 unique rows
   
   `insert_timestamp` DateTime DEFAULT now(),
-  `is_deleted` Int8 DEFAULT 0,
+  `is_deleted` UInt8 DEFAULT 0,
 
   INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
   INDEX idx_token_address token_address TYPE bloom_filter GRANULARITY 3,
   INDEX idx_owner_address owner_address TYPE bloom_filter GRANULARITY 3,
 
-  -- Projection for efficient balance queries by owner
   PROJECTION owner_balances_projection
   (
     SELECT
@@ -32,16 +31,14 @@ CREATE TABLE IF NOT EXISTS token_balances
       owner_address,
       token_address,
       token_id,
-      sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state
+      sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state,
       minState(block_number) AS min_block_number_state,
       minState(block_timestamp) AS min_block_timestamp_state,
       maxState(block_number) AS max_block_number_state,
       maxState(block_timestamp) AS max_block_timestamp_state
     GROUP BY chain_id, owner_address, token_address, token_id
-    ORDER BY chain_id, owner_address, token_address, token_id
   ),
   
-  -- Projection for efficient balance queries by token
   PROJECTION token_balances_projection
   (
     SELECT
@@ -49,13 +46,12 @@ CREATE TABLE IF NOT EXISTS token_balances
       token_address,
       token_id,
       owner_address,
-      sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state
+      sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state,
       minState(block_number) AS min_block_number_state,
       minState(block_timestamp) AS min_block_timestamp_state,
       maxState(block_number) AS max_block_number_state,
       maxState(block_timestamp) AS max_block_timestamp_state
     GROUP BY chain_id, token_address, token_id, owner_address
-    ORDER BY chain_id, token_address, token_id, owner_address
   )
 )
 ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
diff --git a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
index 55ed9f9..11179d7 100644
--- a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
+++ b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
@@ -33,7 +33,7 @@ CREATE TABLE IF NOT EXISTS address_transactions (
     `status` Nullable(UInt64),
 
     `insert_timestamp` DateTime DEFAULT now(),
-    `is_deleted` Int8 DEFAULT 0,
+    `is_deleted` UInt8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3,
diff --git a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
index 2600e59..4b9b864 100644
--- a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
+++ b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
@@ -14,10 +14,49 @@ CREATE TABLE IF NOT EXISTS address_transfers (
     `batch_index` Nullable(UInt16) DEFAULT NULL,
 
     `insert_timestamp` DateTime DEFAULT now(),
-    `is_deleted` Int8 DEFAULT 0,
+    `is_deleted` UInt8 DEFAULT 0,
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
-    INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3
+    INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3,
+    
+    PROJECTION address_state_projection (
+        SELECT
+            chain_id,
+            address,
+            address_type,
+            token_address,
+            token_type,
+            countState() AS transfer_count_state,
+            sumState(toInt256(amount)) AS total_amount_state,
+            minState(block_number) AS min_block_number_state,
+            minState(block_timestamp) AS min_block_timestamp_state,
+            maxState(block_number) AS max_block_number_state,
+            maxState(block_timestamp) AS max_block_timestamp_state
+        GROUP BY
+            chain_id,
+            address,
+            address_type,
+            token_address,
+            token_type
+    ),
+    PROJECTION address_total_state_projection (
+        SELECT
+            chain_id,
+            address,
+            token_address,
+            token_type,
+            countState() AS transfer_count_state,
+            sumState(toInt256(amount)) AS total_amount_state,
+            minState(block_number) AS min_block_number_state,
+            minState(block_timestamp) AS min_block_timestamp_state,
+            maxState(block_number) AS max_block_number_state,
+            maxState(block_timestamp) AS max_block_timestamp_state
+        GROUP BY
+            chain_id,
+            address,
+            token_address,
+            token_type
+    )
 ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 ORDER BY (chain_id, address, block_number, transaction_hash, transaction_index)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))

From 92a35ab4878372f5890e05537d084e26af63e018 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Fri, 22 Aug 2025 21:46:23 +0000
Subject: [PATCH 17/43] Badger & S3

---
 cmd/orchestrator.go                   |    9 +-
 cmd/root.go                           |   45 +-
 configs/config.go                     |   32 +-
 go.mod                                |   51 +-
 go.sum                                |  120 ++-
 internal/orchestrator/orchestrator.go |   42 +-
 internal/orchestrator/poller.go       |    2 +-
 internal/orchestrator/validator.go    |    5 +-
 internal/storage/badger.go            |  479 +++++++++++
 internal/storage/clickhouse.go        |    8 +
 internal/storage/connector.go         |   94 ++-
 internal/storage/s3.go                | 1071 +++++++++++++++++++++++++
 12 files changed, 1880 insertions(+), 78 deletions(-)
 create mode 100644 internal/storage/badger.go
 create mode 100644 internal/storage/s3.go

diff --git a/cmd/orchestrator.go b/cmd/orchestrator.go
index 84665df..6d8a357 100644
--- a/cmd/orchestrator.go
+++ b/cmd/orchestrator.go
@@ -32,12 +32,19 @@ func RunOrchestrator(cmd *cobra.Command, args []string) {
 	if err != nil {
 		log.Fatal().Err(err).Msg("Failed to create orchestrator")
 	}
+
 	// Start Prometheus metrics server
 	log.Info().Msg("Starting Metrics Server on port 2112")
 	go func() {
 		http.Handle("/metrics", promhttp.Handler())
-		http.ListenAndServe(":2112", nil)
+		if err := http.ListenAndServe(":2112", nil); err != nil {
+			log.Error().Err(err).Msg("Metrics server error")
+		}
 	}()
 
+	// Start orchestrator (blocks until shutdown)
+	// The orchestrator handles signals internally and coordinates shutdown
 	orchestrator.Start()
+
+	log.Info().Msg("Shutdown complete")
 }
diff --git a/cmd/root.go b/cmd/root.go
index 61e10bc..d9548fb 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -135,6 +135,29 @@ func init() {
 	rootCmd.PersistentFlags().Int("storage-main-kafka-redis-port", 6379, "Redis port for Kafka main storage metadata")
 	rootCmd.PersistentFlags().String("storage-main-kafka-redis-password", "", "Redis password for Kafka main storage metadata")
 	rootCmd.PersistentFlags().Int("storage-main-kafka-redis-db", 0, "Redis database number for Kafka main storage metadata")
+	// Storage type selection flags
+	rootCmd.PersistentFlags().String("storage-staging-type", "auto", "Storage type for staging (auto, clickhouse, postgres, kafka, badger, s3)")
+	rootCmd.PersistentFlags().String("storage-main-type", "auto", "Storage type for main (auto, clickhouse, postgres, kafka, badger, s3)")
+	rootCmd.PersistentFlags().String("storage-orchestrator-type", "auto", "Storage type for orchestrator (auto, clickhouse, postgres, badger)")
+	// BadgerDB flags for staging storage
+	rootCmd.PersistentFlags().String("storage-staging-badger-path", "", "BadgerDB path for staging storage")
+	// BadgerDB flags for orchestrator storage
+	rootCmd.PersistentFlags().String("storage-orchestrator-badger-path", "", "BadgerDB path for orchestrator storage")
+	// S3 flags for main storage
+	rootCmd.PersistentFlags().String("storage-main-s3-bucket", "", "S3 bucket for main storage")
+	rootCmd.PersistentFlags().String("storage-main-s3-region", "", "S3 region for main storage")
+	rootCmd.PersistentFlags().String("storage-main-s3-prefix", "", "S3 key prefix for main storage")
+	rootCmd.PersistentFlags().String("storage-main-s3-accessKeyId", "", "S3 access key ID for main storage")
+	rootCmd.PersistentFlags().String("storage-main-s3-secretAccessKey", "", "S3 secret access key for main storage")
+	rootCmd.PersistentFlags().String("storage-main-s3-endpoint", "", "S3 endpoint URL for main storage (for S3-compatible services)")
+	rootCmd.PersistentFlags().String("storage-main-s3-format", "parquet", "S3 storage format for main storage (parquet or json)")
+	rootCmd.PersistentFlags().Int64("storage-main-s3-bufferSizeMB", 1024, "S3 buffer size in MB before flush for main storage")
+	rootCmd.PersistentFlags().Int("storage-main-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for main storage")
+	rootCmd.PersistentFlags().Int("storage-main-s3-maxBlocksPerFile", 0, "S3 max blocks per file for main storage (0 = no limit)")
+	// S3 Parquet configuration
+	rootCmd.PersistentFlags().String("storage-main-s3-parquet-compression", "snappy", "Parquet compression type for S3 main storage")
+	rootCmd.PersistentFlags().Int64("storage-main-s3-parquet-rowGroupSize", 256, "Parquet row group size in MB for S3 main storage")
+	rootCmd.PersistentFlags().Int64("storage-main-s3-parquet-pageSize", 8192, "Parquet page size in KB for S3 main storage")
 	rootCmd.PersistentFlags().String("api-host", "localhost:3000", "API host")
 	rootCmd.PersistentFlags().String("api-basicAuth-username", "", "API basic auth username")
 	rootCmd.PersistentFlags().String("api-basicAuth-password", "", "API basic auth password")
@@ -260,11 +283,29 @@ func init() {
 	viper.BindPFlag("storage.main.kafka.brokers", rootCmd.PersistentFlags().Lookup("storage-main-kafka-brokers"))
 	viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username"))
 	viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password"))
-	viper.BindPFlag("storage.main.kafka.enable_tls", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls"))
+	viper.BindPFlag("storage.main.kafka.enableTLS", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls"))
 	viper.BindPFlag("storage.main.kafka.redis.host", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-host"))
 	viper.BindPFlag("storage.main.kafka.redis.port", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-port"))
 	viper.BindPFlag("storage.main.kafka.redis.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-password"))
 	viper.BindPFlag("storage.main.kafka.redis.db", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-db"))
+	viper.BindPFlag("storage.staging.type", rootCmd.PersistentFlags().Lookup("storage-staging-type"))
+	viper.BindPFlag("storage.main.type", rootCmd.PersistentFlags().Lookup("storage-main-type"))
+	viper.BindPFlag("storage.orchestrator.type", rootCmd.PersistentFlags().Lookup("storage-orchestrator-type"))
+	viper.BindPFlag("storage.staging.badger.path", rootCmd.PersistentFlags().Lookup("storage-staging-badger-path"))
+	viper.BindPFlag("storage.orchestrator.badger.path", rootCmd.PersistentFlags().Lookup("storage-orchestrator-badger-path"))
+	viper.BindPFlag("storage.main.s3.bucket", rootCmd.PersistentFlags().Lookup("storage-main-s3-bucket"))
+	viper.BindPFlag("storage.main.s3.region", rootCmd.PersistentFlags().Lookup("storage-main-s3-region"))
+	viper.BindPFlag("storage.main.s3.prefix", rootCmd.PersistentFlags().Lookup("storage-main-s3-prefix"))
+	viper.BindPFlag("storage.main.s3.accessKeyId", rootCmd.PersistentFlags().Lookup("storage-main-s3-accessKeyId"))
+	viper.BindPFlag("storage.main.s3.secretAccessKey", rootCmd.PersistentFlags().Lookup("storage-main-s3-secretAccessKey"))
+	viper.BindPFlag("storage.main.s3.endpoint", rootCmd.PersistentFlags().Lookup("storage-main-s3-endpoint"))
+	viper.BindPFlag("storage.main.s3.format", rootCmd.PersistentFlags().Lookup("storage-main-s3-format"))
+	viper.BindPFlag("storage.main.s3.bufferSizeMB", rootCmd.PersistentFlags().Lookup("storage-main-s3-bufferSizeMB"))
+	viper.BindPFlag("storage.main.s3.bufferTimeoutSeconds", rootCmd.PersistentFlags().Lookup("storage-main-s3-bufferTimeoutSeconds"))
+	viper.BindPFlag("storage.main.s3.maxBlocksPerFile", rootCmd.PersistentFlags().Lookup("storage-main-s3-maxBlocksPerFile"))
+	viper.BindPFlag("storage.main.s3.parquet.compression", rootCmd.PersistentFlags().Lookup("storage-main-s3-parquet-compression"))
+	viper.BindPFlag("storage.main.s3.parquet.rowGroupSize", rootCmd.PersistentFlags().Lookup("storage-main-s3-parquet-rowGroupSize"))
+	viper.BindPFlag("storage.main.s3.parquet.pageSize", rootCmd.PersistentFlags().Lookup("storage-main-s3-parquet-pageSize"))
 	viper.BindPFlag("api.host", rootCmd.PersistentFlags().Lookup("api-host"))
 	viper.BindPFlag("api.basicAuth.username", rootCmd.PersistentFlags().Lookup("api-basicAuth-username"))
 	viper.BindPFlag("api.basicAuth.password", rootCmd.PersistentFlags().Lookup("api-basicAuth-password"))
@@ -280,7 +321,7 @@ func init() {
 	viper.BindPFlag("publisher.brokers", rootCmd.PersistentFlags().Lookup("publisher-brokers"))
 	viper.BindPFlag("publisher.username", rootCmd.PersistentFlags().Lookup("publisher-username"))
 	viper.BindPFlag("publisher.password", rootCmd.PersistentFlags().Lookup("publisher-password"))
-	viper.BindPFlag("publisher.enable_tls", rootCmd.PersistentFlags().Lookup("publisher-enable-tls"))
+	viper.BindPFlag("publisher.enableTLS", rootCmd.PersistentFlags().Lookup("publisher-enable-tls"))
 	viper.BindPFlag("publisher.blocks.enabled", rootCmd.PersistentFlags().Lookup("publisher-blocks-enabled"))
 	viper.BindPFlag("publisher.blocks.topicName", rootCmd.PersistentFlags().Lookup("publisher-blocks-topicName"))
 	viper.BindPFlag("publisher.transactions.enabled", rootCmd.PersistentFlags().Lookup("publisher-transactions-enabled"))
diff --git a/configs/config.go b/configs/config.go
index 3777928..29703bb 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -60,9 +60,37 @@ const (
 )
 
 type StorageConnectionConfig struct {
+	Type       string            `mapstructure:"type"` // "auto", "clickhouse", "postgres", "kafka", "badger", "s3"
 	Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"`
 	Postgres   *PostgresConfig   `mapstructure:"postgres"`
 	Kafka      *KafkaConfig      `mapstructure:"kafka"`
+	Badger     *BadgerConfig     `mapstructure:"badger"`
+	S3         *S3Config         `mapstructure:"s3"`
+}
+
+type BadgerConfig struct {
+	Path string `mapstructure:"path"`
+}
+
+type S3Config struct {
+	Bucket          string         `mapstructure:"bucket"`
+	Region          string         `mapstructure:"region"`
+	Prefix          string         `mapstructure:"prefix"`
+	AccessKeyID     string         `mapstructure:"accessKeyId"`
+	SecretAccessKey string         `mapstructure:"secretAccessKey"`
+	Endpoint        string         `mapstructure:"endpoint"`
+	Format          string         `mapstructure:"format"`
+	Parquet         *ParquetConfig `mapstructure:"parquet"`
+	// Buffering configuration
+	BufferSize       int64 `mapstructure:"bufferSizeMB"`         // Target buffer size in MB before flush (default 1024 MB = 1GB)
+	BufferTimeout    int   `mapstructure:"bufferTimeoutSeconds"` // Max time in seconds before flush (default 300 = 5 min)
+	MaxBlocksPerFile int   `mapstructure:"maxBlocksPerFile"`     // Max blocks per parquet file (0 = no limit, only size/timeout triggers)
+}
+
+type ParquetConfig struct {
+	Compression  string `mapstructure:"compression"`
+	RowGroupSize int64  `mapstructure:"rowGroupSize"`
+	PageSize     int64  `mapstructure:"pageSize"`
 }
 
 type TableConfig struct {
@@ -113,7 +141,7 @@ type KafkaConfig struct {
 	Brokers   string       `mapstructure:"brokers"`
 	Username  string       `mapstructure:"username"`
 	Password  string       `mapstructure:"password"`
-	EnableTLS bool         `mapstructure:"enable_tls"`
+	EnableTLS bool         `mapstructure:"enableTLS"`
 	Redis     *RedisConfig `mapstructure:"redis"`
 }
 
@@ -193,7 +221,7 @@ type PublisherConfig struct {
 	Brokers      string                     `mapstructure:"brokers"`
 	Username     string                     `mapstructure:"username"`
 	Password     string                     `mapstructure:"password"`
-	EnableTLS    bool                       `mapstructure:"enable_tls"`
+	EnableTLS    bool                       `mapstructure:"enableTLS"`
 	Blocks       BlockPublisherConfig       `mapstructure:"blocks"`
 	Transactions TransactionPublisherConfig `mapstructure:"transactions"`
 	Traces       TracePublisherConfig       `mapstructure:"traces"`
diff --git a/go.mod b/go.mod
index 66d4ef5..68052a9 100644
--- a/go.mod
+++ b/go.mod
@@ -4,14 +4,20 @@ go 1.23.0
 
 require (
 	github.com/ClickHouse/clickhouse-go/v2 v2.36.0
+	github.com/aws/aws-sdk-go-v2 v1.38.0
+	github.com/aws/aws-sdk-go-v2/config v1.31.0
+	github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0
+	github.com/dgraph-io/badger/v4 v4.8.0
 	github.com/ethereum/go-ethereum v1.15.11
 	github.com/gin-gonic/gin v1.10.0
 	github.com/gorilla/schema v1.4.1
 	github.com/holiman/uint256 v1.3.2
 	github.com/lib/pq v1.10.9
+	github.com/parquet-go/parquet-go v0.25.1
 	github.com/prometheus/client_golang v1.20.4
+	github.com/redis/go-redis/v9 v9.12.1
 	github.com/rs/zerolog v1.33.0
-	github.com/spf13/cobra v1.8.1
+	github.com/spf13/cobra v1.9.1
 	github.com/spf13/viper v1.18.0
 	github.com/stretchr/testify v1.10.0
 	github.com/swaggo/files v1.0.1
@@ -25,6 +31,21 @@ require (
 	github.com/KyleBanks/depth v1.2.1 // indirect
 	github.com/Microsoft/go-winio v0.6.2 // indirect
 	github.com/andybalholm/brotli v1.1.1 // indirect
+	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0 // indirect
+	github.com/aws/aws-sdk-go-v2/credentials v1.18.4 // indirect
+	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.3 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.3 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.3 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.3 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.3 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.3 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.3 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sso v1.28.0 // indirect
+	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.0 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sts v1.37.0 // indirect
+	github.com/aws/smithy-go v1.22.5 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/bits-and-blooms/bitset v1.20.0 // indirect
 	github.com/bytedance/sonic v1.12.6 // indirect
@@ -39,7 +60,9 @@ require (
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/deckarep/golang-set/v2 v2.6.0 // indirect
 	github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 // indirect
+	github.com/dgraph-io/ristretto/v2 v2.2.0 // indirect
 	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
+	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/ethereum/c-kzg-4844/v2 v2.1.0 // indirect
 	github.com/ethereum/go-verkle v0.2.2 // indirect
 	github.com/fsnotify/fsnotify v1.7.0 // indirect
@@ -47,6 +70,8 @@ require (
 	github.com/gin-contrib/sse v0.1.0 // indirect
 	github.com/go-faster/city v1.0.1 // indirect
 	github.com/go-faster/errors v0.7.1 // indirect
+	github.com/go-logr/logr v1.4.3 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-ole/go-ole v1.3.0 // indirect
 	github.com/go-openapi/jsonpointer v0.21.0 // indirect
 	github.com/go-openapi/jsonreference v0.21.0 // indirect
@@ -58,6 +83,7 @@ require (
 	github.com/goccy/go-json v0.10.4 // indirect
 	github.com/gofrs/flock v0.8.1 // indirect
 	github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect
+	github.com/google/flatbuffers v25.2.10+incompatible // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/gorilla/websocket v1.4.2 // indirect
 	github.com/hashicorp/hcl v1.0.0 // indirect
@@ -86,7 +112,6 @@ require (
 	github.com/prometheus/client_model v0.6.1 // indirect
 	github.com/prometheus/common v0.55.0 // indirect
 	github.com/prometheus/procfs v0.15.1 // indirect
-	github.com/redis/go-redis/v9 v9.12.1 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
 	github.com/sagikazarmark/locafero v0.4.0 // indirect
 	github.com/sagikazarmark/slog-shim v0.1.0 // indirect
@@ -96,7 +121,7 @@ require (
 	github.com/sourcegraph/conc v0.3.0 // indirect
 	github.com/spf13/afero v1.11.0 // indirect
 	github.com/spf13/cast v1.6.0 // indirect
-	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/spf13/pflag v1.0.6 // indirect
 	github.com/stretchr/objx v0.5.2 // indirect
 	github.com/subosito/gotenv v1.6.0 // indirect
 	github.com/supranational/blst v0.3.14 // indirect
@@ -106,18 +131,20 @@ require (
 	github.com/twmb/franz-go/pkg/kmsg v1.9.0 // indirect
 	github.com/ugorji/go/codec v1.2.12 // indirect
 	github.com/yusufpapurcu/wmi v1.2.4 // indirect
-	go.opentelemetry.io/otel v1.36.0 // indirect
-	go.opentelemetry.io/otel/trace v1.36.0 // indirect
+	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
+	go.opentelemetry.io/otel v1.37.0 // indirect
+	go.opentelemetry.io/otel/metric v1.37.0 // indirect
+	go.opentelemetry.io/otel/trace v1.37.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	golang.org/x/arch v0.12.0 // indirect
-	golang.org/x/crypto v0.38.0 // indirect
+	golang.org/x/crypto v0.39.0 // indirect
 	golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 // indirect
-	golang.org/x/net v0.40.0 // indirect
-	golang.org/x/sync v0.14.0 // indirect
-	golang.org/x/sys v0.33.0 // indirect
-	golang.org/x/text v0.25.0 // indirect
-	golang.org/x/tools v0.30.0 // indirect
-	google.golang.org/protobuf v1.36.1 // indirect
+	golang.org/x/net v0.41.0 // indirect
+	golang.org/x/sync v0.15.0 // indirect
+	golang.org/x/sys v0.34.0 // indirect
+	golang.org/x/text v0.26.0 // indirect
+	golang.org/x/tools v0.33.0 // indirect
+	google.golang.org/protobuf v1.36.6 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	rsc.io/tmplfunc v0.0.3 // indirect
diff --git a/go.sum b/go.sum
index 6302502..92d6bd5 100644
--- a/go.sum
+++ b/go.sum
@@ -12,10 +12,50 @@ github.com/VictoriaMetrics/fastcache v1.12.2 h1:N0y9ASrJ0F6h0QaC3o6uJb3NIZ9VKLjC
 github.com/VictoriaMetrics/fastcache v1.12.2/go.mod h1:AmC+Nzz1+3G2eCPapF6UcsnkThDcMsQicp4xDukwJYI=
 github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
 github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
+github.com/aws/aws-sdk-go-v2 v1.38.0 h1:UCRQ5mlqcFk9HJDIqENSLR3wiG1VTWlyUfLDEvY7RxU=
+github.com/aws/aws-sdk-go-v2 v1.38.0/go.mod h1:9Q0OoGQoboYIAJyslFyF1f5K1Ryddop8gqMhWx/n4Wg=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0 h1:6GMWV6CNpA/6fbFHnoAjrv4+LGfyTqZz2LtCHnspgDg=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0/go.mod h1:/mXlTIVG9jbxkqDnr5UQNQxW1HRYxeGklkM9vAFeabg=
+github.com/aws/aws-sdk-go-v2/config v1.31.0 h1:9yH0xiY5fUnVNLRWO0AtayqwU1ndriZdN78LlhruJR4=
+github.com/aws/aws-sdk-go-v2/config v1.31.0/go.mod h1:VeV3K72nXnhbe4EuxxhzsDc/ByrCSlZwUnWH52Nde/I=
+github.com/aws/aws-sdk-go-v2/credentials v1.18.4 h1:IPd0Algf1b+Qy9BcDp0sCUcIWdCQPSzDoMK3a8pcbUM=
+github.com/aws/aws-sdk-go-v2/credentials v1.18.4/go.mod h1:nwg78FjH2qvsRM1EVZlX9WuGUJOL5od+0qvm0adEzHk=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.3 h1:GicIdnekoJsjq9wqnvyi2elW6CGMSYKhdozE7/Svh78=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.3/go.mod h1:R7BIi6WNC5mc1kfRM7XM/VHC3uRWkjc396sfabq4iOo=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.3 h1:o9RnO+YZ4X+kt5Z7Nvcishlz0nksIt2PIzDglLMP0vA=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.3/go.mod h1:+6aLJzOG1fvMOyzIySYjOFjcguGvVRL68R+uoRencN4=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.3 h1:joyyUFhiTQQmVK6ImzNU9TQSNRNeD9kOklqTzyk5v6s=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.3/go.mod h1:+vNIyZQP3b3B1tSLI0lxvrU9cfM7gpdRXMFfm67ZcPc=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.3 h1:ZV2XK2L3HBq9sCKQiQ/MdhZJppH/rH0vddEAamsHUIs=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.3/go.mod h1:b9F9tk2HdHpbf3xbN7rUZcfmJI26N6NcJu/8OsBFI/0=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 h1:6+lZi2JeGKtCraAj1rpoZfKqnQ9SptseRZioejfUOLM=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0/go.mod h1:eb3gfbVIxIoGgJsi9pGne19dhCBpK6opTYpQqAmdy44=
+github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.3 h1:3ZKmesYBaFX33czDl6mbrcHb6jeheg6LqjJhQdefhsY=
+github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.3/go.mod h1:7ryVb78GLCnjq7cw45N6oUb9REl7/vNUwjvIqC5UgdY=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.3 h1:ieRzyHXypu5ByllM7Sp4hC5f/1Fy5wqxqY0yB85hC7s=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.3/go.mod h1:O5ROz8jHiOAKAwx179v+7sHMhfobFVi6nZt8DEyiYoM=
+github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.3 h1:SE/e52dq9a05RuxzLcjT+S5ZpQobj3ie3UTaSf2NnZc=
+github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.3/go.mod h1:zkpvBTsR020VVr8TOrwK2TrUW9pOir28sH5ECHpnAfo=
+github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0 h1:egoDf+Geuuntmw79Mz6mk9gGmELCPzg5PFEABOHB+6Y=
+github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0/go.mod h1:t9MDi29H+HDbkolTSQtbI0HP9DemAWQzUjmWC7LGMnE=
+github.com/aws/aws-sdk-go-v2/service/sso v1.28.0 h1:Mc/MKBf2m4VynyJkABoVEN+QzkfLqGj0aiJuEe7cMeM=
+github.com/aws/aws-sdk-go-v2/service/sso v1.28.0/go.mod h1:iS5OmxEcN4QIPXARGhavH7S8kETNL11kym6jhoS7IUQ=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.0 h1:6csaS/aJmqZQbKhi1EyEMM7yBW653Wy/B9hnBofW+sw=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.0/go.mod h1:59qHWaY5B+Rs7HGTuVGaC32m0rdpQ68N8QCN3khYiqs=
+github.com/aws/aws-sdk-go-v2/service/sts v1.37.0 h1:MG9VFW43M4A8BYeAfaJJZWrroinxeTi2r3+SnmLQfSA=
+github.com/aws/aws-sdk-go-v2/service/sts v1.37.0/go.mod h1:JdeBDPgpJfuS6rU/hNglmOigKhyEZtBmbraLE4GK1J8=
+github.com/aws/smithy-go v1.22.5 h1:P9ATCXPMb2mPjYBgueqJNCA5S9UfktsW0tTxi+a7eqw=
+github.com/aws/smithy-go v1.22.5/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/bits-and-blooms/bitset v1.20.0 h1:2F+rfL86jE2d/bmw7OhqUg2Sj/1rURkBn3MdfoPyRVU=
 github.com/bits-and-blooms/bitset v1.20.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
+github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
+github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
+github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
+github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
 github.com/bytedance/sonic v1.12.6 h1:/isNmCUF2x3Sh8RAp/4mh4ZGkcFAX/hLrzrK3AvpRzk=
 github.com/bytedance/sonic v1.12.6/go.mod h1:B8Gt/XvtZ3Fqj+iSKMypzymZxw/FVwgIGKzMzT9r/rk=
 github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
@@ -44,9 +84,8 @@ github.com/consensys/bavard v0.1.27/go.mod h1:k/zVjHHC4B+PQy1Pg7fgvG3ALicQw540Cr
 github.com/consensys/gnark-crypto v0.16.0 h1:8Dl4eYmUWK9WmlP1Bj6je688gBRJCJbT8Mw4KoTAawo=
 github.com/consensys/gnark-crypto v0.16.0/go.mod h1:Ke3j06ndtPTVvo++PhGNgvm+lgpLvzbcE2MqljY7diU=
 github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
-github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
-github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/cpuguy83/go-md2man/v2 v2.0.6 h1:XJtiaUW6dEEqVuZiMTn1ldk455QWwEIsMIJlo5vtkx0=
+github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
 github.com/crate-crypto/go-eth-kzg v1.3.0 h1:05GrhASN9kDAidaFJOda6A4BEvgvuXbazXg/0E3OOdI=
 github.com/crate-crypto/go-eth-kzg v1.3.0/go.mod h1:J9/u5sWfznSObptgfa92Jq8rTswn6ahQWEuiLHOjCUI=
 github.com/crate-crypto/go-ipa v0.0.0-20240724233137-53bbb0ceb27a h1:W8mUrRp6NOVl3J+MYp5kPMoUZPp7aOYHtaua31lwRHg=
@@ -63,8 +102,16 @@ github.com/decred/dcrd/crypto/blake256 v1.0.0 h1:/8DMNYp9SGi5f0w7uCm6d6M4OU2rGFK
 github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc=
 github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 h1:YLtO71vCjJRCBcrPMtQ9nqBsqpA1m5sE92cU+pd5Mcc=
 github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs=
+github.com/dgraph-io/badger/v4 v4.8.0 h1:JYph1ChBijCw8SLeybvPINizbDKWZ5n/GYbz2yhN/bs=
+github.com/dgraph-io/badger/v4 v4.8.0/go.mod h1:U6on6e8k/RTbUWxqKR0MvugJuVmkxSNc79ap4917h4w=
+github.com/dgraph-io/ristretto/v2 v2.2.0 h1:bkY3XzJcXoMuELV8F+vS8kzNgicwQFAaGINAEJdWGOM=
+github.com/dgraph-io/ristretto/v2 v2.2.0/go.mod h1:RZrm63UmcBAaYWC1DotLYBmTvgkrs0+XhBd7Npn7/zI=
+github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38=
+github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
+github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
 github.com/ethereum/c-kzg-4844/v2 v2.1.0 h1:gQropX9YFBhl3g4HYhwE70zq3IHFRgbbNPw0Shwzf5w=
 github.com/ethereum/c-kzg-4844/v2 v2.1.0/go.mod h1:TC48kOKjJKPbN7C++qIgt0TJzZ70QznYR7Ob+WXl57E=
 github.com/ethereum/go-ethereum v1.15.11 h1:JK73WKeu0WC0O1eyX+mdQAVHUV+UR1a9VB/domDngBU=
@@ -89,6 +136,11 @@ github.com/go-faster/city v1.0.1 h1:4WAxSZ3V2Ws4QRDrscLEDcibJY8uf41H6AhXDrNDcGw=
 github.com/go-faster/city v1.0.1/go.mod h1:jKcUJId49qdW3L1qKHH/3wPeUstCVpVSXTM6vO3VcTw=
 github.com/go-faster/errors v0.7.1 h1:MkJTnDoEdi9pDabt1dpWf7AA8/BaSYZqibYyhZ20AYg=
 github.com/go-faster/errors v0.7.1/go.mod h1:5ySTjWFiphBs07IKuiL69nxdfd5+fzh1u7FPGZP2quo=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
 github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE=
 github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78=
@@ -121,6 +173,8 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS
 github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb h1:PBC98N2aIaM3XXiurYmW7fx4GZkL8feAMVq7nEjURHk=
 github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q=
+github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
 github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
@@ -139,6 +193,8 @@ github.com/hashicorp/go-bexpr v0.1.10 h1:9kuI5PFotCboP3dkDYFr/wi0gg0QVbSNz5oFRpx
 github.com/hashicorp/go-bexpr v0.1.10/go.mod h1:oxlubA2vC/gFVfX1A6JGp7ls7uCDlfJn732ehYYg+g0=
 github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
 github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
+github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
+github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/holiman/billy v0.0.0-20240216141850-2abb0c79d3c4 h1:X4egAf/gcS1zATw6wn4Ej8vjuVGxeHdan+bRb2ebyv4=
 github.com/holiman/billy v0.0.0-20240216141850-2abb0c79d3c4/go.mod h1:5GuXa7vkL8u9FkFuWdVvfR5ix8hRB7DbOAaYULamFpc=
 github.com/holiman/bloomfilter/v2 v2.0.3 h1:73e0e/V0tCydx14a0SCYS/EWCxgwLZ18CZcZKVu0fao=
@@ -209,6 +265,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
 github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
+github.com/parquet-go/parquet-go v0.25.1 h1:l7jJwNM0xrk0cnIIptWMtnSnuxRkwq53S+Po3KG8Xgo=
+github.com/parquet-go/parquet-go v0.25.1/go.mod h1:AXBuotO1XiBtcqJb/FKFyjBG4aqa3aQAAWF3ZPzCanY=
 github.com/paulmach/orb v0.11.1 h1:3koVegMC4X/WeiXYz9iswopaTwMem53NzTJuTF20JzU=
 github.com/paulmach/orb v0.11.1/go.mod h1:5mULz1xQfs3bmQm63QEJA6lNGujuRafwA5S/EnuLaLU=
 github.com/paulmach/protoscan v0.2.1/go.mod h1:SpcSwydNLrxUGSDvXvO0P7g7AuhJ7lcKfDlhJCDw2gY=
@@ -243,8 +301,8 @@ github.com/redis/go-redis/v9 v9.12.1 h1:k5iquqv27aBtnTm2tIkROUDp8JBXhXZIVu1InSgv
 github.com/redis/go-redis/v9 v9.12.1/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw=
 github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
-github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
+github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
+github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
 github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik=
 github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU=
 github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
@@ -268,10 +326,10 @@ github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8=
 github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY=
 github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0=
 github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
-github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
-github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
-github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
-github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo=
+github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0=
+github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
+github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/spf13/viper v1.18.0 h1:pN6W1ub/G4OfnM+NR9p7xP9R6TltLUzp5JG9yZD3Qg0=
 github.com/spf13/viper v1.18.0/go.mod h1:EKmWIqdnk5lOcmR72yw6hS+8OPYcwD0jteitLMVB+yk=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@@ -328,10 +386,14 @@ github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5t
 github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
 github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
 go.mongodb.org/mongo-driver v1.11.4/go.mod h1:PTSz5yu21bkT/wXpkS7WR5f0ddqw5quethTUn9WM+2g=
-go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
-go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E=
-go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w=
-go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA=
+go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
+go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
+go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
+go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
+go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
+go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
+go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
+go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
 go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
 go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
 golang.org/x/arch v0.12.0 h1:UsYJhbzPYGsT0HbEdmYcqtCv8UNGvnaL561NnIUvaKg=
@@ -341,15 +403,15 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
-golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8=
-golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw=
+golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM=
+golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U=
 golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 h1:aAcj0Da7eBAtrTp03QXWvm88pSyOt+UgdZw2BFZ+lEw=
 golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8/go.mod h1:CQ1k9gNrJ50XIzaKCRR2hssIjF07kZFEiieALBM/ARQ=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
-golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM=
-golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
+golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w=
+golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
@@ -358,15 +420,15 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
 golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
-golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY=
-golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds=
+golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw=
+golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
-golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8=
+golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -383,8 +445,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
-golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
+golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
@@ -393,8 +455,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
 golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
-golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
-golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
+golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M=
+golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=
 golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
 golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -402,16 +464,16 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
-golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY=
-golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY=
+golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc=
+golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk=
-google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
+google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
+google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go
index 154dc89..1d879dc 100644
--- a/internal/orchestrator/orchestrator.go
+++ b/internal/orchestrator/orchestrator.go
@@ -21,6 +21,8 @@ type Orchestrator struct {
 	committerEnabled        bool
 	reorgHandlerEnabled     bool
 	cancel                  context.CancelFunc
+	wg                      sync.WaitGroup
+	shutdownOnce            sync.Once
 }
 
 func NewOrchestrator(rpc rpc.IRPCClient) (*Orchestrator, error) {
@@ -43,8 +45,6 @@ func (o *Orchestrator) Start() {
 	ctx, cancel := context.WithCancel(context.Background())
 	o.cancel = cancel
 
-	var wg sync.WaitGroup
-
 	sigChan := make(chan os.Signal, 1)
 	signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT)
 
@@ -58,30 +58,32 @@ func (o *Orchestrator) Start() {
 	workModeMonitor := NewWorkModeMonitor(o.rpc, o.storage)
 
 	if o.pollerEnabled {
-		wg.Add(1)
+		o.wg.Add(1)
 		go func() {
-			defer wg.Done()
+			defer o.wg.Done()
 			pollerWorkModeChan := make(chan WorkMode, 1)
 			workModeMonitor.RegisterChannel(pollerWorkModeChan)
 			defer workModeMonitor.UnregisterChannel(pollerWorkModeChan)
+
 			poller := NewPoller(o.rpc, o.storage, WithPollerWorkModeChan(pollerWorkModeChan))
 			poller.Start(ctx)
+			log.Info().Msg("Poller completed")
 		}()
 	}
 
 	if o.failureRecovererEnabled {
-		wg.Add(1)
+		o.wg.Add(1)
 		go func() {
-			defer wg.Done()
+			defer o.wg.Done()
 			failureRecoverer := NewFailureRecoverer(o.rpc, o.storage)
 			failureRecoverer.Start(ctx)
 		}()
 	}
 
 	if o.committerEnabled {
-		wg.Add(1)
+		o.wg.Add(1)
 		go func() {
-			defer wg.Done()
+			defer o.wg.Done()
 			committerWorkModeChan := make(chan WorkMode, 1)
 			workModeMonitor.RegisterChannel(committerWorkModeChan)
 			defer workModeMonitor.UnregisterChannel(committerWorkModeChan)
@@ -92,33 +94,35 @@ func (o *Orchestrator) Start() {
 	}
 
 	if o.reorgHandlerEnabled {
-		wg.Add(1)
+		o.wg.Add(1)
 		go func() {
-			defer wg.Done()
+			defer o.wg.Done()
 			reorgHandler := NewReorgHandler(o.rpc, o.storage)
 			reorgHandler.Start(ctx)
 		}()
 	}
 
-	wg.Add(1)
+	o.wg.Add(1)
 	go func() {
-		defer wg.Done()
+		defer o.wg.Done()
 		workModeMonitor.Start(ctx)
 	}()
 
 	// The chain tracker is always running
-	wg.Add(1)
+	o.wg.Add(1)
 	go func() {
-		defer wg.Done()
+		defer o.wg.Done()
 		chainTracker := NewChainTracker(o.rpc)
 		chainTracker.Start(ctx)
 	}()
 
-	wg.Wait()
-}
+	o.wg.Wait()
 
-func (o *Orchestrator) Shutdown() {
-	if o.cancel != nil {
-		o.cancel()
+	// Waiting for all goroutines to complete
+
+	if err := o.storage.Close(); err != nil {
+		log.Error().Err(err).Msg("Error closing storage connections")
 	}
+
+	log.Info().Msg("Orchestrator shutdown complete")
 }
diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go
index 5e3b313..31a64ae 100644
--- a/internal/orchestrator/poller.go
+++ b/internal/orchestrator/poller.go
@@ -158,7 +158,7 @@ func (p *Poller) Start(ctx context.Context) {
 
 					lastPolledBlock := p.Poll(pollCtx, blockNumbers)
 					if p.reachedPollLimit(lastPolledBlock) {
-						log.Debug().Msg("Reached poll limit, exiting poller")
+						log.Info().Msgf("Reached poll limit at block %s, completing poller", lastPolledBlock.String())
 						cancel()
 						return
 					}
diff --git a/internal/orchestrator/validator.go b/internal/orchestrator/validator.go
index db03cbe..b37b986 100644
--- a/internal/orchestrator/validator.go
+++ b/internal/orchestrator/validator.go
@@ -98,8 +98,11 @@ func (v *Validator) ValidateBlock(blockData common.BlockData) (valid bool, err e
 			return true, nil
 		}
 
-		// TODO: remove this once we know how to validate all tx types
 		for _, tx := range blockData.Transactions {
+			if tx.TransactionType == 0x7E {
+				// TODO: Need to properly validate op-stack deposit transaction
+				return true, nil
+			}
 			if tx.TransactionType > 4 { // Currently supported types are 0-4
 				log.Warn().Msgf("Skipping transaction root validation for block %s due to unsupported transaction type %d", blockData.Block.Number, tx.TransactionType)
 				return true, nil
diff --git a/internal/storage/badger.go b/internal/storage/badger.go
new file mode 100644
index 0000000..1ffd431
--- /dev/null
+++ b/internal/storage/badger.go
@@ -0,0 +1,479 @@
+package storage
+
+import (
+	"bytes"
+	"encoding/gob"
+	"fmt"
+	"math/big"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/dgraph-io/badger/v4"
+	"github.com/dgraph-io/badger/v4/options"
+	"github.com/rs/zerolog/log"
+	config "github.com/thirdweb-dev/indexer/configs"
+	"github.com/thirdweb-dev/indexer/internal/common"
+)
+
+type BadgerConnector struct {
+	db       *badger.DB
+	mu       sync.RWMutex
+	gcTicker *time.Ticker
+	stopGC   chan struct{}
+}
+
+func NewBadgerConnector(cfg *config.BadgerConfig) (*BadgerConnector, error) {
+	opts := badger.DefaultOptions(cfg.Path)
+
+	opts.ValueLogFileSize = 1024 * 1024 * 1024 // 1GB
+	opts.BaseTableSize = 128 * 1024 * 1024     // 128MB
+	opts.BaseLevelSize = 128 * 1024 * 1024     // 128MB
+	opts.LevelSizeMultiplier = 10              // Aggressive growth
+	opts.NumMemtables = 10                     // ~1.28GB
+	opts.MemTableSize = opts.BaseTableSize     // 128MB per memtable
+	opts.NumLevelZeroTables = 10
+	opts.NumLevelZeroTablesStall = 30
+	opts.SyncWrites = false                 // Faster but less durable
+	opts.DetectConflicts = false            // No need for ACID in staging
+	opts.NumCompactors = 4                  // More compactors for parallel compaction
+	opts.CompactL0OnClose = true            // Compact L0 tables on close
+	opts.ValueLogMaxEntries = 1000000       // More entries per value log
+	opts.ValueThreshold = 1024              // Store values > 1024 bytes in value log
+	opts.IndexCacheSize = 512 * 1024 * 1024 // 512MB index cache
+	opts.BlockCacheSize = 256 * 1024 * 1024 // 256MB block cache
+	opts.Compression = options.Snappy
+
+	opts.Logger = nil // Disable badger's internal logging
+
+	db, err := badger.Open(opts)
+	if err != nil {
+		return nil, fmt.Errorf("failed to open badger db: %w", err)
+	}
+
+	bc := &BadgerConnector{
+		db:     db,
+		stopGC: make(chan struct{}),
+	}
+
+	// Start GC routine
+	bc.gcTicker = time.NewTicker(time.Duration(60) * time.Second)
+	go bc.runGC()
+
+	return bc, nil
+}
+
+func (bc *BadgerConnector) runGC() {
+	for {
+		select {
+		case <-bc.gcTicker.C:
+			err := bc.db.RunValueLogGC(0.5)
+			if err != nil && err != badger.ErrNoRewrite {
+				log.Debug().Err(err).Msg("BadgerDB GC error")
+			}
+		case <-bc.stopGC:
+			return
+		}
+	}
+}
+
+func (bc *BadgerConnector) Close() error {
+	if bc.gcTicker != nil {
+		bc.gcTicker.Stop()
+		close(bc.stopGC)
+	}
+	return bc.db.Close()
+}
+
+// Key construction helpers
+func blockKey(chainId *big.Int, blockNumber *big.Int) []byte {
+	return []byte(fmt.Sprintf("b:%d:%s", chainId.Uint64(), blockNumber.String()))
+}
+
+func blockFailureKey(chainId *big.Int, blockNumber *big.Int, timestamp int64) []byte {
+	return []byte(fmt.Sprintf("f:%d:%s:%d", chainId.Uint64(), blockNumber.String(), timestamp))
+}
+
+func lastReorgKey(chainId *big.Int) []byte {
+	return []byte(fmt.Sprintf("reorg:%d", chainId.Uint64()))
+}
+
+func lastPublishedKey(chainId *big.Int) []byte {
+	return []byte(fmt.Sprintf("published:%d", chainId.Uint64()))
+}
+
+// IOrchestratorStorage implementation
+func (bc *BadgerConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) {
+	bc.mu.RLock()
+	defer bc.mu.RUnlock()
+
+	var failures []common.BlockFailure
+	prefix := fmt.Sprintf("f:%d:", qf.ChainId.Uint64())
+
+	err := bc.db.View(func(txn *badger.Txn) error {
+		opts := badger.DefaultIteratorOptions
+		opts.Prefix = []byte(prefix)
+		it := txn.NewIterator(opts)
+		defer it.Close()
+
+		for it.Rewind(); it.Valid(); it.Next() {
+			item := it.Item()
+			err := item.Value(func(val []byte) error {
+				var failure common.BlockFailure
+				if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&failure); err != nil {
+					return err
+				}
+
+				// Apply filters
+				if qf.StartBlock != nil && failure.BlockNumber.Cmp(qf.StartBlock) < 0 {
+					return nil
+				}
+				if qf.EndBlock != nil && failure.BlockNumber.Cmp(qf.EndBlock) > 0 {
+					return nil
+				}
+
+				failures = append(failures, failure)
+				return nil
+			})
+			if err != nil {
+				return err
+			}
+
+			if qf.Limit > 0 && len(failures) >= qf.Limit {
+				break
+			}
+		}
+		return nil
+	})
+
+	return failures, err
+}
+
+func (bc *BadgerConnector) StoreBlockFailures(failures []common.BlockFailure) error {
+	bc.mu.Lock()
+	defer bc.mu.Unlock()
+
+	return bc.db.Update(func(txn *badger.Txn) error {
+		for _, failure := range failures {
+			key := blockFailureKey(failure.ChainId, failure.BlockNumber, time.Now().Unix())
+
+			var buf bytes.Buffer
+			if err := gob.NewEncoder(&buf).Encode(failure); err != nil {
+				return err
+			}
+
+			if err := txn.Set(key, buf.Bytes()); err != nil {
+				return err
+			}
+		}
+		return nil
+	})
+}
+
+func (bc *BadgerConnector) DeleteBlockFailures(failures []common.BlockFailure) error {
+	bc.mu.Lock()
+	defer bc.mu.Unlock()
+
+	return bc.db.Update(func(txn *badger.Txn) error {
+		for _, failure := range failures {
+			// Delete all failure entries for this block
+			prefix := fmt.Sprintf("f:%d:%s:", failure.ChainId.Uint64(), failure.BlockNumber.String())
+
+			opts := badger.DefaultIteratorOptions
+			opts.Prefix = []byte(prefix)
+			it := txn.NewIterator(opts)
+			defer it.Close()
+
+			for it.Rewind(); it.Valid(); it.Next() {
+				if err := txn.Delete(it.Item().Key()); err != nil {
+					return err
+				}
+			}
+		}
+		return nil
+	})
+}
+
+func (bc *BadgerConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	bc.mu.RLock()
+	defer bc.mu.RUnlock()
+
+	var blockNumber *big.Int
+	err := bc.db.View(func(txn *badger.Txn) error {
+		item, err := txn.Get(lastReorgKey(chainId))
+		if err == badger.ErrKeyNotFound {
+			return nil
+		}
+		if err != nil {
+			return err
+		}
+
+		return item.Value(func(val []byte) error {
+			blockNumber = new(big.Int).SetBytes(val)
+			return nil
+		})
+	})
+
+	if blockNumber == nil {
+		return big.NewInt(0), nil
+	}
+	return blockNumber, err
+}
+
+func (bc *BadgerConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	bc.mu.Lock()
+	defer bc.mu.Unlock()
+
+	return bc.db.Update(func(txn *badger.Txn) error {
+		return txn.Set(lastReorgKey(chainId), blockNumber.Bytes())
+	})
+}
+
+// IStagingStorage implementation
+func (bc *BadgerConnector) InsertStagingData(data []common.BlockData) error {
+	bc.mu.Lock()
+	defer bc.mu.Unlock()
+
+	return bc.db.Update(func(txn *badger.Txn) error {
+		for _, blockData := range data {
+			key := blockKey(big.NewInt(int64(blockData.ChainId)), blockData.Block.Number)
+
+			var buf bytes.Buffer
+			if err := gob.NewEncoder(&buf).Encode(blockData); err != nil {
+				return err
+			}
+
+			if err := txn.Set(key, buf.Bytes()); err != nil {
+				return err
+			}
+		}
+		return nil
+	})
+}
+
+func (bc *BadgerConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) {
+	bc.mu.RLock()
+	defer bc.mu.RUnlock()
+
+	var results []common.BlockData
+
+	if len(qf.BlockNumbers) > 0 {
+		// Fetch specific blocks
+		err := bc.db.View(func(txn *badger.Txn) error {
+			for _, blockNum := range qf.BlockNumbers {
+				key := blockKey(qf.ChainId, blockNum)
+				item, err := txn.Get(key)
+				if err == badger.ErrKeyNotFound {
+					continue
+				}
+				if err != nil {
+					return err
+				}
+
+				err = item.Value(func(val []byte) error {
+					var blockData common.BlockData
+					if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil {
+						return err
+					}
+					results = append(results, blockData)
+					return nil
+				})
+				if err != nil {
+					return err
+				}
+			}
+			return nil
+		})
+		return results, err
+	}
+
+	// Range query
+	prefix := fmt.Sprintf("b:%d:", qf.ChainId.Uint64())
+
+	err := bc.db.View(func(txn *badger.Txn) error {
+		opts := badger.DefaultIteratorOptions
+		opts.Prefix = []byte(prefix)
+		it := txn.NewIterator(opts)
+		defer it.Close()
+
+		count := 0
+		for it.Rewind(); it.Valid(); it.Next() {
+			if qf.Offset > 0 && count < qf.Offset {
+				count++
+				continue
+			}
+
+			item := it.Item()
+			err := item.Value(func(val []byte) error {
+				var blockData common.BlockData
+				if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil {
+					return err
+				}
+
+				// Apply filters
+				if qf.StartBlock != nil && blockData.Block.Number.Cmp(qf.StartBlock) < 0 {
+					return nil
+				}
+				if qf.EndBlock != nil && blockData.Block.Number.Cmp(qf.EndBlock) > 0 {
+					return nil
+				}
+
+				results = append(results, blockData)
+				return nil
+			})
+			if err != nil {
+				return err
+			}
+
+			count++
+			if qf.Limit > 0 && len(results) >= qf.Limit {
+				break
+			}
+		}
+		return nil
+	})
+
+	// Sort by block number
+	sort.Slice(results, func(i, j int) bool {
+		return results[i].Block.Number.Cmp(results[j].Block.Number) < 0
+	})
+
+	return results, err
+}
+
+func (bc *BadgerConnector) DeleteStagingData(data []common.BlockData) error {
+	bc.mu.Lock()
+	defer bc.mu.Unlock()
+
+	return bc.db.Update(func(txn *badger.Txn) error {
+		for _, blockData := range data {
+			key := blockKey(big.NewInt(int64(blockData.ChainId)), blockData.Block.Number)
+			if err := txn.Delete(key); err != nil && err != badger.ErrKeyNotFound {
+				return err
+			}
+		}
+		return nil
+	})
+}
+
+func (bc *BadgerConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) {
+	bc.mu.RLock()
+	defer bc.mu.RUnlock()
+
+	var maxBlock *big.Int
+	prefix := fmt.Sprintf("b:%d:", chainId.Uint64())
+
+	err := bc.db.View(func(txn *badger.Txn) error {
+		opts := badger.DefaultIteratorOptions
+		opts.Prefix = []byte(prefix)
+		opts.Reverse = true // Iterate in reverse to find max quickly
+		it := txn.NewIterator(opts)
+		defer it.Close()
+
+		for it.Rewind(); it.Valid(); it.Next() {
+			key := string(it.Item().Key())
+			parts := strings.Split(key, ":")
+			if len(parts) != 3 {
+				continue
+			}
+
+			blockNum, ok := new(big.Int).SetString(parts[2], 10)
+			if !ok {
+				continue
+			}
+
+			// Apply range filters if provided
+			if rangeStart != nil && rangeStart.Sign() > 0 && blockNum.Cmp(rangeStart) < 0 {
+				continue
+			}
+			if rangeEnd != nil && rangeEnd.Sign() > 0 && blockNum.Cmp(rangeEnd) > 0 {
+				continue
+			}
+
+			maxBlock = blockNum
+			break // Found the maximum since we're iterating in reverse
+		}
+		return nil
+	})
+
+	if maxBlock == nil {
+		return big.NewInt(0), nil
+	}
+	return maxBlock, err
+}
+
+func (bc *BadgerConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	bc.mu.RLock()
+	defer bc.mu.RUnlock()
+
+	var blockNumber *big.Int
+	err := bc.db.View(func(txn *badger.Txn) error {
+		item, err := txn.Get(lastPublishedKey(chainId))
+		if err == badger.ErrKeyNotFound {
+			return nil
+		}
+		if err != nil {
+			return err
+		}
+
+		return item.Value(func(val []byte) error {
+			blockNumber = new(big.Int).SetBytes(val)
+			return nil
+		})
+	})
+
+	if blockNumber == nil {
+		return big.NewInt(0), nil
+	}
+	return blockNumber, err
+}
+
+func (bc *BadgerConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	bc.mu.Lock()
+	defer bc.mu.Unlock()
+
+	return bc.db.Update(func(txn *badger.Txn) error {
+		return txn.Set(lastPublishedKey(chainId), blockNumber.Bytes())
+	})
+}
+
+func (bc *BadgerConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error {
+	bc.mu.Lock()
+	defer bc.mu.Unlock()
+
+	prefix := fmt.Sprintf("b:%d:", chainId.Uint64())
+
+	return bc.db.Update(func(txn *badger.Txn) error {
+		opts := badger.DefaultIteratorOptions
+		opts.Prefix = []byte(prefix)
+		it := txn.NewIterator(opts)
+		defer it.Close()
+
+		var keysToDelete [][]byte
+		for it.Rewind(); it.Valid(); it.Next() {
+			key := string(it.Item().Key())
+			parts := strings.Split(key, ":")
+			if len(parts) != 3 {
+				continue
+			}
+
+			blockNum, ok := new(big.Int).SetString(parts[2], 10)
+			if !ok {
+				continue
+			}
+
+			if blockNum.Cmp(blockNumber) <= 0 {
+				keysToDelete = append(keysToDelete, it.Item().KeyCopy(nil))
+			}
+		}
+
+		for _, key := range keysToDelete {
+			if err := txn.Delete(key); err != nil {
+				return err
+			}
+		}
+
+		return nil
+	})
+}
diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go
index 1f95536..9ea97ce 100644
--- a/internal/storage/clickhouse.go
+++ b/internal/storage/clickhouse.go
@@ -78,6 +78,14 @@ func NewClickHouseConnector(cfg *config.ClickhouseConfig) (*ClickHouseConnector,
 	}, nil
 }
 
+// Close closes the ClickHouse connection
+func (c *ClickHouseConnector) Close() error {
+	if c.conn != nil {
+		return c.conn.Close()
+	}
+	return nil
+}
+
 func connectDB(cfg *config.ClickhouseConfig) (clickhouse.Conn, error) {
 	port := cfg.Port
 	if port == 0 {
diff --git a/internal/storage/connector.go b/internal/storage/connector.go
index 0b5d743..4b962af 100644
--- a/internal/storage/connector.go
+++ b/internal/storage/connector.go
@@ -72,12 +72,37 @@ type IStorage struct {
 	StagingStorage      IStagingStorage
 }
 
+// Close closes all storage connections
+func (s *IStorage) Close() error {
+	var errs []error
+
+	// Close each storage that implements Closer interface
+	if err := s.OrchestratorStorage.Close(); err != nil {
+		errs = append(errs, fmt.Errorf("failed to close orchestrator storage: %w", err))
+	}
+
+	if err := s.MainStorage.Close(); err != nil {
+		errs = append(errs, fmt.Errorf("failed to close main storage: %w", err))
+	}
+
+	if err := s.StagingStorage.Close(); err != nil {
+		errs = append(errs, fmt.Errorf("failed to close staging storage: %w", err))
+	}
+
+	if len(errs) > 0 {
+		return fmt.Errorf("errors closing storage: %v", errs)
+	}
+
+	return nil
+}
+
 type IOrchestratorStorage interface {
 	GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error)
 	StoreBlockFailures(failures []common.BlockFailure) error
 	DeleteBlockFailures(failures []common.BlockFailure) error
 	GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error)
 	SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error
+	Close() error
 }
 
 type IStagingStorage interface {
@@ -88,6 +113,7 @@ type IStagingStorage interface {
 	GetLastPublishedBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error)
 	SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error
 	DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error
+	Close() error
 }
 
 type IMainStorage interface {
@@ -99,16 +125,15 @@ type IMainStorage interface {
 	GetLogs(qf QueryFilter, fields ...string) (logs QueryResult[common.Log], err error)
 	GetTraces(qf QueryFilter, fields ...string) (traces QueryResult[common.Trace], err error)
 	GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error)
+	GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error)
+	GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error)
+
 	GetMaxBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error)
 	GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (maxBlockNumber *big.Int, err error)
 	/**
 	 * Get block headers ordered from latest to oldest.
 	 */
 	GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) (blockHeaders []common.BlockHeader, err error)
-
-	GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error)
-	GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error)
-
 	/**
 	 * Gets only the data required for validation.
 	 */
@@ -121,6 +146,8 @@ type IMainStorage interface {
 	 * Gets full block data with transactions, logs and traces.
 	 */
 	GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) (blocks []common.BlockData, err error)
+
+	Close() error
 }
 
 func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) {
@@ -148,14 +175,59 @@ func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) {
 func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) {
 	var conn interface{}
 	var err error
-	if cfg.Kafka != nil {
-		conn, err = NewKafkaRedisConnector(cfg.Kafka)
-	} else if cfg.Postgres != nil {
-		conn, err = NewPostgresConnector(cfg.Postgres)
-	} else if cfg.Clickhouse != nil {
-		conn, err = NewClickHouseConnector(cfg.Clickhouse)
+	
+	// Default to "auto" if Type is not specified
+	storageType := cfg.Type
+	if storageType == "" {
+		storageType = "auto"
+	}
+	
+	// Handle explicit type selection
+	if storageType != "auto" {
+		switch storageType {
+		case "kafka":
+			if cfg.Kafka == nil {
+				return *new(T), fmt.Errorf("kafka storage type specified but kafka config is nil")
+			}
+			conn, err = NewKafkaRedisConnector(cfg.Kafka)
+		case "postgres":
+			if cfg.Postgres == nil {
+				return *new(T), fmt.Errorf("postgres storage type specified but postgres config is nil")
+			}
+			conn, err = NewPostgresConnector(cfg.Postgres)
+		case "clickhouse":
+			if cfg.Clickhouse == nil {
+				return *new(T), fmt.Errorf("clickhouse storage type specified but clickhouse config is nil")
+			}
+			conn, err = NewClickHouseConnector(cfg.Clickhouse)
+		case "badger":
+			if cfg.Badger == nil {
+				return *new(T), fmt.Errorf("badger storage type specified but badger config is nil")
+			}
+			conn, err = NewBadgerConnector(cfg.Badger)
+		case "s3":
+			if cfg.S3 == nil {
+				return *new(T), fmt.Errorf("s3 storage type specified but s3 config is nil")
+			}
+			conn, err = NewS3Connector(cfg.S3)
+		default:
+			return *new(T), fmt.Errorf("unknown storage type: %s", storageType)
+		}
 	} else {
-		return *new(T), fmt.Errorf("no storage driver configured")
+		// Auto mode: use the first non-nil config (existing behavior)
+		if cfg.Kafka != nil {
+			conn, err = NewKafkaRedisConnector(cfg.Kafka)
+		} else if cfg.Postgres != nil {
+			conn, err = NewPostgresConnector(cfg.Postgres)
+		} else if cfg.Clickhouse != nil {
+			conn, err = NewClickHouseConnector(cfg.Clickhouse)
+		} else if cfg.Badger != nil {
+			conn, err = NewBadgerConnector(cfg.Badger)
+		} else if cfg.S3 != nil {
+			conn, err = NewS3Connector(cfg.S3)
+		} else {
+			return *new(T), fmt.Errorf("no storage driver configured")
+		}
 	}
 
 	if err != nil {
diff --git a/internal/storage/s3.go b/internal/storage/s3.go
new file mode 100644
index 0000000..8a75c65
--- /dev/null
+++ b/internal/storage/s3.go
@@ -0,0 +1,1071 @@
+package storage
+
+import (
+	"bytes"
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"io"
+	"math/big"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/aws/aws-sdk-go-v2/aws"
+	awsconfig "github.com/aws/aws-sdk-go-v2/config"
+	"github.com/aws/aws-sdk-go-v2/service/s3"
+	"github.com/parquet-go/parquet-go"
+	"github.com/rs/zerolog/log"
+	config "github.com/thirdweb-dev/indexer/configs"
+	"github.com/thirdweb-dev/indexer/internal/common"
+)
+
+type S3Connector struct {
+	client    *s3.Client
+	config    *config.S3Config
+	formatter DataFormatter
+
+	// Buffering
+	buffer      []common.BlockData
+	bufferMu    sync.Mutex
+	bufferSize  int64 // Current buffer size in bytes
+	bufferTimer *time.Timer
+	stopCh      chan struct{}
+	flushCh     chan struct{}
+	flushDoneCh chan struct{} // Signals when flush is complete
+	wg          sync.WaitGroup
+}
+
+// DataFormatter interface for different file formats
+type DataFormatter interface {
+	FormatBlockData(data []common.BlockData) ([]byte, error)
+	GetFileExtension() string
+	GetContentType() string
+}
+
+// ParquetBlockData represents the complete block data in Parquet format
+type ParquetBlockData struct {
+	ChainID        uint64 `parquet:"chain_id"`
+	BlockNumber    uint64 `parquet:"block_number"` // Numeric for efficient min/max queries
+	BlockHash      string `parquet:"block_hash"`
+	BlockTimestamp int64  `parquet:"block_timestamp"`
+	Block          []byte `parquet:"block_json"`
+	Transactions   []byte `parquet:"transactions_json"`
+	Logs           []byte `parquet:"logs_json"`
+	Traces         []byte `parquet:"traces_json"`
+}
+
+func NewS3Connector(cfg *config.S3Config) (*S3Connector, error) {
+	awsCfg, err := awsconfig.LoadDefaultConfig(context.Background(),
+		awsconfig.WithRegion(cfg.Region),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("failed to load AWS config: %w", err)
+	}
+
+	// Override with explicit credentials if provided
+	if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" {
+		awsCfg.Credentials = aws.CredentialsProviderFunc(func(ctx context.Context) (aws.Credentials, error) {
+			return aws.Credentials{
+				AccessKeyID:     cfg.AccessKeyID,
+				SecretAccessKey: cfg.SecretAccessKey,
+			}, nil
+		})
+	}
+
+	s3Client := s3.NewFromConfig(awsCfg, func(o *s3.Options) {
+		if cfg.Endpoint != "" {
+			o.BaseEndpoint = aws.String(cfg.Endpoint)
+		}
+	})
+
+	// Set defaults
+	if cfg.Format == "" {
+		cfg.Format = "parquet"
+	}
+
+	// Initialize parquet config with defaults if using parquet
+	if cfg.Format == "parquet" && cfg.Parquet == nil {
+		cfg.Parquet = &config.ParquetConfig{
+			Compression:  "snappy",
+			RowGroupSize: 256,  // MB
+			PageSize:     8192, // KB
+		}
+	}
+
+	// Set buffer defaults
+	if cfg.BufferSize == 0 {
+		cfg.BufferSize = 1024 // 1GB default
+	}
+	if cfg.BufferTimeout == 0 {
+		cfg.BufferTimeout = 300 // 5 minutes default
+	}
+
+	// Create formatter based on format
+	var formatter DataFormatter
+	switch cfg.Format {
+	case "parquet":
+		formatter = &ParquetFormatter{config: cfg.Parquet}
+	default:
+		return nil, fmt.Errorf("unsupported format: %s", cfg.Format)
+	}
+
+	s3c := &S3Connector{
+		client:      s3Client,
+		config:      cfg,
+		formatter:   formatter,
+		buffer:      make([]common.BlockData, 0),
+		stopCh:      make(chan struct{}),
+		flushCh:     make(chan struct{}, 1),
+		flushDoneCh: make(chan struct{}),
+	}
+
+	// Start background flush worker
+	s3c.wg.Add(1)
+	go s3c.flushWorker()
+
+	return s3c, nil
+}
+
+func (s *S3Connector) InsertBlockData(data []common.BlockData) error {
+	if len(data) == 0 {
+		return nil
+	}
+
+	s.bufferMu.Lock()
+	defer s.bufferMu.Unlock()
+
+	// Calculate actual serialized size for accurate memory tracking
+	formattedData, err := s.formatter.FormatBlockData(data)
+	if err != nil {
+		return fmt.Errorf("failed to format block data for size calculation: %w", err)
+	}
+
+	// Use actual serialized size for accurate memory tracking
+	actualSize := int64(len(formattedData))
+	s.bufferSize += actualSize
+	log.Debug().
+		Int("block_count", len(data)).
+		Int64("size_bytes", actualSize).
+		Int64("avg_bytes_per_block", actualSize/int64(len(data))).
+		Msg("Calculated actual block data size")
+
+	// Add to buffer
+	s.buffer = append(s.buffer, data...)
+
+	// Reset timer if this is the first data in buffer
+	if len(s.buffer) == len(data) && s.bufferTimer == nil {
+		s.bufferTimer = time.AfterFunc(time.Duration(s.config.BufferTimeout)*time.Second, func() {
+			select {
+			case s.flushCh <- struct{}{}:
+			default:
+			}
+		})
+	}
+
+	// Check if we should flush based on size or block count
+	shouldFlush := s.bufferSize >= s.config.BufferSize*1024*1024 // Convert MB to bytes
+
+	// Only check block count if MaxBlocksPerFile is set (> 0)
+	if s.config.MaxBlocksPerFile > 0 && len(s.buffer) >= s.config.MaxBlocksPerFile {
+		shouldFlush = true
+	}
+
+	if shouldFlush {
+		// Stop timer if running
+		if s.bufferTimer != nil {
+			s.bufferTimer.Stop()
+			s.bufferTimer = nil
+		}
+
+		// Trigger flush
+		select {
+		case s.flushCh <- struct{}{}:
+		default:
+		}
+	}
+
+	return nil
+}
+
+// flushWorker runs in background and handles buffer flushes
+func (s *S3Connector) flushWorker() {
+	defer s.wg.Done()
+
+	for {
+		select {
+		case <-s.stopCh:
+			// Final flush before stopping
+			s.flushBuffer()
+			return
+		case <-s.flushCh:
+			s.flushBuffer()
+			// Signal flush completion
+			select {
+			case s.flushDoneCh <- struct{}{}:
+			default:
+			}
+		}
+	}
+}
+
+// flushBuffer writes buffered data to S3
+func (s *S3Connector) flushBuffer() error {
+	s.bufferMu.Lock()
+	if len(s.buffer) == 0 {
+		s.bufferMu.Unlock()
+		return nil
+	}
+
+	// Take ownership of buffer
+	data := s.buffer
+	s.buffer = make([]common.BlockData, 0)
+	s.bufferSize = 0
+
+	// Stop timer if running
+	if s.bufferTimer != nil {
+		s.bufferTimer.Stop()
+		s.bufferTimer = nil
+	}
+	s.bufferMu.Unlock()
+
+	// Group blocks by chain to generate appropriate keys
+	chainGroups := make(map[uint64][]common.BlockData)
+	for _, block := range data {
+		chainGroups[block.ChainId] = append(chainGroups[block.ChainId], block)
+	}
+
+	for _, blocks := range chainGroups {
+		// Sort blocks by number
+		sort.Slice(blocks, func(i, j int) bool {
+			return blocks[i].Block.Number.Cmp(blocks[j].Block.Number) < 0
+		})
+
+		// Process in chunks if MaxBlocksPerFile is set, otherwise upload all at once
+		if s.config.MaxBlocksPerFile > 0 {
+			// Split into chunks based on MaxBlocksPerFile
+			for i := 0; i < len(blocks); i += s.config.MaxBlocksPerFile {
+				end := i + s.config.MaxBlocksPerFile
+				if end > len(blocks) {
+					end = len(blocks)
+				}
+
+				chunk := blocks[i:end]
+				if err := s.uploadBatch(chunk); err != nil {
+					log.Error().Err(err).Msg("Failed to upload batch to S3")
+					return err
+				}
+			}
+		} else {
+			// No block limit, upload entire buffer as one file
+			if err := s.uploadBatch(blocks); err != nil {
+				log.Error().Err(err).Msg("Failed to upload batch to S3")
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+// Flush manually triggers a buffer flush and waits for completion
+func (s *S3Connector) Flush() error {
+	// Check if buffer has data
+	s.bufferMu.Lock()
+	hasData := len(s.buffer) > 0
+	s.bufferMu.Unlock()
+
+	if !hasData {
+		return nil
+	}
+
+	// Clear any pending flush completion signals
+	select {
+	case <-s.flushDoneCh:
+	default:
+	}
+
+	// Trigger flush
+	select {
+	case s.flushCh <- struct{}{}:
+		// Wait for flush to complete
+		select {
+		case <-s.flushDoneCh:
+			return nil
+		case <-time.After(30 * time.Second):
+			return fmt.Errorf("flush timeout after 30 seconds")
+		}
+	default:
+		// Flush channel is full, likely a flush is already in progress
+		// Wait for it to complete
+		select {
+		case <-s.flushDoneCh:
+			return nil
+		case <-time.After(30 * time.Second):
+			return fmt.Errorf("flush timeout after 30 seconds")
+		}
+	}
+}
+
+// Close closes the S3 connector and flushes any remaining data
+func (s *S3Connector) Close() error {
+	// First, ensure any pending data is flushed
+	if err := s.Flush(); err != nil {
+		log.Error().Err(err).Msg("Error flushing buffer during close")
+	}
+
+	// Signal stop
+	close(s.stopCh)
+
+	// Wait for worker to finish
+	s.wg.Wait()
+
+	return nil
+}
+
+func (s *S3Connector) uploadBatch(data []common.BlockData) error {
+	if len(data) == 0 {
+		return nil
+	}
+
+	chainID := data[0].ChainId
+	startBlock := data[0].Block.Number
+	endBlock := data[len(data)-1].Block.Number
+	// Use the first block's timestamp for year partitioning
+	blockTimestamp := data[0].Block.Timestamp
+
+	// Format data using the configured formatter
+	formattedData, err := s.formatter.FormatBlockData(data)
+	if err != nil {
+		return fmt.Errorf("failed to format block data: %w", err)
+	}
+
+	// Generate S3 key with chain_id/year partitioning based on block timestamp
+	key := s.generateS3Key(chainID, startBlock, endBlock, blockTimestamp)
+
+	// Upload to S3
+	ctx := context.Background()
+	_, err = s.client.PutObject(ctx, &s3.PutObjectInput{
+		Bucket:      aws.String(s.config.Bucket),
+		Key:         aws.String(key),
+		Body:        bytes.NewReader(formattedData),
+		ContentType: aws.String(s.formatter.GetContentType()),
+		Metadata: map[string]string{
+			"chain_id":    fmt.Sprintf("%d", chainID),
+			"start_block": startBlock.String(),
+			"end_block":   endBlock.String(),
+			"block_count": fmt.Sprintf("%d", len(data)),
+			"timestamp":   blockTimestamp.Format(time.RFC3339),
+			"checksum":    s.calculateChecksum(formattedData),
+			"file_size":   fmt.Sprintf("%d", len(formattedData)),
+		},
+	})
+
+	if err != nil {
+		return fmt.Errorf("failed to upload to S3: %w", err)
+	}
+
+	log.Info().
+		Uint64("chain_id", chainID).
+		Str("min_block", startBlock.String()).
+		Str("max_block", endBlock.String()).
+		Int("block_count", len(data)).
+		Int("file_size_mb", len(formattedData)/(1024*1024)).
+		Str("s3_key", key).
+		Msg("Successfully uploaded buffered blocks to S3")
+
+	return nil
+}
+
+func (s *S3Connector) generateS3Key(chainID uint64, startBlock, endBlock *big.Int, blockTimestamp time.Time) string {
+	// Use the block's timestamp for year partitioning
+	year := blockTimestamp.Year()
+	if len(s.config.Prefix) > 0 {
+		return fmt.Sprintf("%s/chain_%d/year=%d/blocks_%s_%s%s",
+			s.config.Prefix,
+			chainID,
+			year,
+			startBlock.String(),
+			endBlock.String(),
+			s.formatter.GetFileExtension(),
+		)
+	}
+	return fmt.Sprintf("chain_%d/year=%d/blocks_%s_%s%s",
+		chainID,
+		year,
+		startBlock.String(),
+		endBlock.String(),
+		s.formatter.GetFileExtension(),
+	)
+}
+
+// ParquetFormatter implements DataFormatter for Parquet format
+type ParquetFormatter struct {
+	config *config.ParquetConfig
+}
+
+func (f *ParquetFormatter) FormatBlockData(data []common.BlockData) ([]byte, error) {
+	var parquetData []ParquetBlockData
+
+	for _, d := range data {
+		// Serialize each component to JSON
+		blockJSON, err := json.Marshal(d.Block)
+		if err != nil {
+			return nil, fmt.Errorf("failed to marshal block: %w", err)
+		}
+
+		// Default transactions to empty array if nil
+		var txJSON []byte
+		if d.Transactions == nil {
+			txJSON, err = json.Marshal([]common.Transaction{})
+		} else {
+			txJSON, err = json.Marshal(d.Transactions)
+		}
+		if err != nil {
+			return nil, fmt.Errorf("failed to marshal transactions: %w", err)
+		}
+
+		// Default logs to empty array if nil
+		var logsJSON []byte
+		if d.Logs == nil {
+			logsJSON, err = json.Marshal([]common.Log{})
+		} else {
+			logsJSON, err = json.Marshal(d.Logs)
+		}
+		if err != nil {
+			return nil, fmt.Errorf("failed to marshal logs: %w", err)
+		}
+
+		// Default traces to empty array if nil
+		var tracesJSON []byte
+		if d.Traces == nil {
+			tracesJSON, err = json.Marshal([]common.Trace{})
+		} else {
+			tracesJSON, err = json.Marshal(d.Traces)
+		}
+		if err != nil {
+			return nil, fmt.Errorf("failed to marshal traces: %w", err)
+		}
+
+		// Convert block number to uint64 for efficient queries
+		// If block number is too large for uint64, use MaxUint64
+		blockNum := d.Block.Number.Uint64()
+		if d.Block.Number.BitLen() > 64 {
+			return nil, fmt.Errorf("block number exceeds uint64 is not supported")
+		}
+
+		pd := ParquetBlockData{
+			ChainID:        d.ChainId,
+			BlockNumber:    blockNum,
+			BlockHash:      d.Block.Hash,
+			BlockTimestamp: d.Block.Timestamp.Unix(),
+			Block:          blockJSON,
+			Transactions:   txJSON,
+			Logs:           logsJSON,
+			Traces:         tracesJSON,
+		}
+
+		parquetData = append(parquetData, pd)
+	}
+
+	var buf bytes.Buffer
+
+	// Configure writer with compression and statistics for efficient queries
+	writerOptions := []parquet.WriterOption{
+		f.getCompressionCodec(),
+		// Enable page statistics for query optimization (min/max per page)
+		parquet.DataPageStatistics(true),
+		// Set page buffer size for better statistics granularity
+		parquet.PageBufferSize(8 * 1024 * 1024), // 8MB pages
+		// Configure sorting for optimal query performance
+		// Sort by block_number first, then block_timestamp for efficient range queries
+		parquet.SortingWriterConfig(
+			parquet.SortingColumns(
+				parquet.Ascending("block_number"),
+				parquet.Ascending("block_timestamp"),
+			),
+		),
+		// Set column index size limit (enables column indexes for all columns)
+		parquet.ColumnIndexSizeLimit(16 * 1024), // 16KB limit for column index
+	}
+
+	writer := parquet.NewGenericWriter[ParquetBlockData](&buf, writerOptions...)
+
+	// Write all data at once for better compression and statistics
+	if _, err := writer.Write(parquetData); err != nil {
+		return nil, fmt.Errorf("failed to write parquet data: %w", err)
+	}
+
+	if err := writer.Close(); err != nil {
+		return nil, err
+	}
+
+	return buf.Bytes(), nil
+}
+
+func (f *ParquetFormatter) GetFileExtension() string {
+	return ".parquet"
+}
+
+func (f *ParquetFormatter) GetContentType() string {
+	return "application/octet-stream"
+}
+
+func (f *ParquetFormatter) getCompressionCodec() parquet.WriterOption {
+	switch f.config.Compression {
+	case "gzip":
+		return parquet.Compression(&parquet.Gzip)
+	case "zstd":
+		return parquet.Compression(&parquet.Zstd)
+	default:
+		return parquet.Compression(&parquet.Snappy)
+	}
+}
+
+func (s *S3Connector) calculateChecksum(data []byte) string {
+	hash := sha256.Sum256(data)
+	return hex.EncodeToString(hash[:])
+}
+
+// Implement remaining IMainStorage methods with empty implementations
+// These will return errors indicating they're not supported
+
+func (s *S3Connector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) {
+	return nil, fmt.Errorf("ReplaceBlockData not supported by S3 connector")
+}
+
+func (s *S3Connector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) {
+	return QueryResult[common.Block]{}, fmt.Errorf("GetBlocks not supported by S3 connector - use Athena or similar")
+}
+
+func (s *S3Connector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) {
+	return QueryResult[common.Transaction]{}, fmt.Errorf("GetTransactions not supported by S3 connector - use Athena or similar")
+}
+
+func (s *S3Connector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) {
+	return QueryResult[common.Log]{}, fmt.Errorf("GetLogs not supported by S3 connector - use Athena or similar")
+}
+
+func (s *S3Connector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) {
+	return QueryResult[common.Trace]{}, fmt.Errorf("GetTraces not supported by S3 connector")
+}
+
+func (s *S3Connector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) {
+	return QueryResult[interface{}]{}, fmt.Errorf("GetAggregations not supported by S3 connector")
+}
+
+func (s *S3Connector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) {
+	return QueryResult[common.TokenBalance]{}, fmt.Errorf("GetTokenBalances not supported by S3 connector")
+}
+
+func (s *S3Connector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) {
+	return QueryResult[common.TokenTransfer]{}, fmt.Errorf("GetTokenTransfers not supported by S3 connector")
+}
+
+func (s *S3Connector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) {
+	maxBlock := big.NewInt(0)
+
+	// First check the buffer for blocks from this chain
+	s.bufferMu.Lock()
+	for _, block := range s.buffer {
+		if block.ChainId == chainId.Uint64() && block.Block.Number.Cmp(maxBlock) > 0 {
+			maxBlock = new(big.Int).Set(block.Block.Number)
+		}
+	}
+	s.bufferMu.Unlock()
+
+	// Then check S3 for the maximum block number
+	prefix := fmt.Sprintf("chain_%d/", chainId.Uint64())
+	if s.config.Prefix != "" {
+		prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix)
+	}
+
+	ctx := context.Background()
+	paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{
+		Bucket: aws.String(s.config.Bucket),
+		Prefix: aws.String(prefix),
+	})
+
+	for paginator.HasMorePages() {
+		page, err := paginator.NextPage(ctx)
+		if err != nil {
+			return nil, fmt.Errorf("failed to list S3 objects: %w", err)
+		}
+
+		for _, obj := range page.Contents {
+			// Extract block range from filename: blocks_{start}_{end}.parquet
+			if obj.Key == nil {
+				continue
+			}
+			_, endBlock := s.extractBlockRangeFromKey(*obj.Key)
+			if endBlock != nil && endBlock.Cmp(maxBlock) > 0 {
+				maxBlock = endBlock
+			}
+		}
+	}
+
+	return maxBlock, nil
+}
+
+func (s *S3Connector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
+	maxBlock := big.NewInt(0)
+
+	// First check the buffer for blocks in this range
+	s.bufferMu.Lock()
+	for _, block := range s.buffer {
+		if block.ChainId == chainId.Uint64() {
+			blockNum := block.Block.Number
+			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 && blockNum.Cmp(maxBlock) > 0 {
+				maxBlock = new(big.Int).Set(blockNum)
+			}
+		}
+	}
+	s.bufferMu.Unlock()
+
+	// Then check S3 files
+	prefix := fmt.Sprintf("chain_%d/", chainId.Uint64())
+	if s.config.Prefix != "" {
+		prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix)
+	}
+
+	ctx := context.Background()
+	paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{
+		Bucket: aws.String(s.config.Bucket),
+		Prefix: aws.String(prefix),
+	})
+
+	for paginator.HasMorePages() {
+		page, err := paginator.NextPage(ctx)
+		if err != nil {
+			return nil, fmt.Errorf("failed to list objects: %w", err)
+		}
+
+		for _, obj := range page.Contents {
+			if obj.Key == nil {
+				continue
+			}
+			fileStart, fileEnd := s.extractBlockRangeFromKey(*obj.Key)
+			if fileStart == nil || fileEnd == nil {
+				continue
+			}
+
+			// Check if this file overlaps with our range
+			if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 {
+				// File overlaps with our range
+				effectiveEnd := new(big.Int).Set(fileEnd)
+				if effectiveEnd.Cmp(endBlock) > 0 {
+					effectiveEnd = endBlock
+				}
+				if effectiveEnd.Cmp(maxBlock) > 0 {
+					maxBlock = effectiveEnd
+				}
+			}
+		}
+	}
+
+	return maxBlock, nil
+}
+
+func (s *S3Connector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) {
+	var headers []common.BlockHeader
+
+	// First get headers from buffer
+	s.bufferMu.Lock()
+	for _, block := range s.buffer {
+		if block.ChainId == chainId.Uint64() {
+			// Check if block is in range (if from is specified)
+			if from != nil && block.Block.Number.Cmp(from) > 0 {
+				continue
+			}
+			// Apply limit if specified
+			if to != nil && len(headers) >= int(to.Int64()) {
+				break
+			}
+			headers = append(headers, common.BlockHeader{
+				Number:     block.Block.Number,
+				Hash:       block.Block.Hash,
+				ParentHash: block.Block.ParentHash,
+			})
+		}
+	}
+	s.bufferMu.Unlock()
+
+	// If we need more headers, get from S3
+	if to == nil || len(headers) < int(to.Int64()) {
+		// Download relevant parquet files and extract block headers
+		files, err := s.findFilesInRange(chainId, big.NewInt(0), from) // from 0 to 'from' block
+		if err != nil {
+			return nil, err
+		}
+
+		for _, file := range files {
+			fileHeaders, err := s.extractBlockHeadersFromFile(file, chainId, from, to)
+			if err != nil {
+				log.Warn().Err(err).Str("file", file).Msg("Failed to extract headers from file")
+				continue
+			}
+			headers = append(headers, fileHeaders...)
+		}
+	}
+
+	// Sort in descending order
+	sort.Slice(headers, func(i, j int) bool {
+		return headers[i].Number.Cmp(headers[j].Number) > 0
+	})
+
+	// Apply limit if specified
+	if to != nil && len(headers) > int(to.Int64()) {
+		headers = headers[:to.Int64()]
+	}
+
+	return headers, nil
+}
+
+func (s *S3Connector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) {
+	if startBlock == nil || endBlock == nil {
+		return nil, fmt.Errorf("start block and end block must not be nil")
+	}
+
+	if startBlock.Cmp(endBlock) > 0 {
+		return nil, fmt.Errorf("start block must be less than or equal to end block")
+	}
+
+	var blockData []common.BlockData
+
+	// First check buffer for blocks in range
+	s.bufferMu.Lock()
+	for _, block := range s.buffer {
+		if block.ChainId == chainId.Uint64() {
+			blockNum := block.Block.Number
+			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 {
+				blockData = append(blockData, block)
+			}
+		}
+	}
+	s.bufferMu.Unlock()
+
+	// Then find and download relevant files from S3
+	files, err := s.findFilesInRange(chainId, startBlock, endBlock)
+	if err != nil {
+		return nil, err
+	}
+
+	for _, file := range files {
+		fileData, err := s.downloadAndParseFile(file, chainId, startBlock, endBlock)
+		if err != nil {
+			log.Warn().Err(err).Str("file", file).Msg("Failed to parse file")
+			continue
+		}
+		blockData = append(blockData, fileData...)
+	}
+
+	// Sort by block number
+	sort.Slice(blockData, func(i, j int) bool {
+		return blockData[i].Block.Number.Cmp(blockData[j].Block.Number) < 0
+	})
+
+	return blockData, nil
+}
+
+func (s *S3Connector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) {
+	// Build a set of all block numbers we have
+	blockSet := make(map[string]bool)
+
+	// First add blocks from buffer
+	s.bufferMu.Lock()
+	for _, block := range s.buffer {
+		if block.ChainId == chainId.Uint64() {
+			blockNum := block.Block.Number
+			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 {
+				blockSet[blockNum.String()] = true
+			}
+		}
+	}
+	s.bufferMu.Unlock()
+
+	// Then check S3 files in range
+	files, err := s.findFilesInRange(chainId, startBlock, endBlock)
+	if err != nil {
+		return nil, err
+	}
+
+	for _, file := range files {
+		fileStart, fileEnd := s.extractBlockRangeFromKey(file)
+		if fileStart == nil || fileEnd == nil {
+			continue
+		}
+
+		// Add all blocks in this file's range to our set
+		for i := new(big.Int).Set(fileStart); i.Cmp(fileEnd) <= 0; i.Add(i, big.NewInt(1)) {
+			if i.Cmp(startBlock) >= 0 && i.Cmp(endBlock) <= 0 {
+				blockSet[i.String()] = true
+			}
+		}
+	}
+
+	// Find missing blocks
+	var missing []*big.Int
+	for i := new(big.Int).Set(startBlock); i.Cmp(endBlock) <= 0; i.Add(i, big.NewInt(1)) {
+		if !blockSet[i.String()] {
+			missing = append(missing, new(big.Int).Set(i))
+		}
+	}
+
+	return missing, nil
+}
+
+func (s *S3Connector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) {
+	if len(blockNumbers) == 0 {
+		return nil, nil
+	}
+
+	// Create a map for quick lookup
+	blockNumMap := make(map[string]bool)
+	for _, bn := range blockNumbers {
+		blockNumMap[bn.String()] = true
+	}
+
+	var result []common.BlockData
+
+	// First check buffer for requested blocks
+	s.bufferMu.Lock()
+	for _, block := range s.buffer {
+		if block.ChainId == chainId.Uint64() {
+			if blockNumMap[block.Block.Number.String()] {
+				result = append(result, block)
+				// Remove from map so we don't fetch it from S3
+				delete(blockNumMap, block.Block.Number.String())
+			}
+		}
+	}
+	s.bufferMu.Unlock()
+
+	// If all blocks were in buffer, return early
+	if len(blockNumMap) == 0 {
+		return result, nil
+	}
+
+	// Sort remaining block numbers to optimize file access
+	var remainingBlocks []*big.Int
+	for blockStr := range blockNumMap {
+		bn, _ := new(big.Int).SetString(blockStr, 10)
+		remainingBlocks = append(remainingBlocks, bn)
+	}
+	sort.Slice(remainingBlocks, func(i, j int) bool {
+		return remainingBlocks[i].Cmp(remainingBlocks[j]) < 0
+	})
+
+	if len(remainingBlocks) == 0 {
+		return result, nil
+	}
+
+	minBlock := remainingBlocks[0]
+	maxBlock := remainingBlocks[len(remainingBlocks)-1]
+
+	// Find relevant files for remaining blocks
+	files, err := s.findFilesInRange(chainId, minBlock, maxBlock)
+	if err != nil {
+		return nil, err
+	}
+
+	for _, file := range files {
+		fileData, err := s.downloadAndParseFile(file, chainId, minBlock, maxBlock)
+		if err != nil {
+			log.Warn().Err(err).Str("file", file).Msg("Failed to parse file")
+			continue
+		}
+
+		// Filter to only requested blocks
+		for _, bd := range fileData {
+			if blockNumMap[bd.Block.Number.String()] {
+				result = append(result, bd)
+			}
+		}
+	}
+
+	return result, nil
+}
+
+// Helper functions
+
+func (s *S3Connector) extractBlockRangeFromKey(key string) (*big.Int, *big.Int) {
+	// Extract block range from key like: chain_1/year=2024/blocks_1000_2000.parquet
+	parts := strings.Split(key, "/")
+	if len(parts) == 0 {
+		return nil, nil
+	}
+
+	filename := parts[len(parts)-1]
+	if !strings.HasPrefix(filename, "blocks_") || !strings.HasSuffix(filename, s.formatter.GetFileExtension()) {
+		return nil, nil
+	}
+
+	// Remove prefix and extension
+	rangeStr := strings.TrimPrefix(filename, "blocks_")
+	rangeStr = strings.TrimSuffix(rangeStr, s.formatter.GetFileExtension())
+
+	// Split by underscore to get start and end
+	rangeParts := strings.Split(rangeStr, "_")
+	if len(rangeParts) != 2 {
+		return nil, nil
+	}
+
+	startBlock, ok1 := new(big.Int).SetString(rangeParts[0], 10)
+	endBlock, ok2 := new(big.Int).SetString(rangeParts[1], 10)
+	if !ok1 || !ok2 {
+		return nil, nil
+	}
+
+	return startBlock, endBlock
+}
+
+func (s *S3Connector) findFilesInRange(chainId *big.Int, startBlock, endBlock *big.Int) ([]string, error) {
+	prefix := fmt.Sprintf("chain_%d/", chainId.Uint64())
+	if s.config.Prefix != "" {
+		prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix)
+	}
+
+	ctx := context.Background()
+	paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{
+		Bucket: aws.String(s.config.Bucket),
+		Prefix: aws.String(prefix),
+	})
+
+	var relevantFiles []string
+	for paginator.HasMorePages() {
+		page, err := paginator.NextPage(ctx)
+		if err != nil {
+			return nil, fmt.Errorf("failed to list objects: %w", err)
+		}
+
+		for _, obj := range page.Contents {
+			if obj.Key == nil {
+				continue
+			}
+
+			fileStart, fileEnd := s.extractBlockRangeFromKey(*obj.Key)
+			if fileStart == nil || fileEnd == nil {
+				continue
+			}
+
+			// Check if this file's range overlaps with our query range
+			if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 {
+				relevantFiles = append(relevantFiles, *obj.Key)
+			}
+		}
+	}
+
+	return relevantFiles, nil
+}
+
+func (s *S3Connector) downloadAndParseFile(key string, chainId *big.Int, startBlock, endBlock *big.Int) ([]common.BlockData, error) {
+	ctx := context.Background()
+
+	// Download the file
+	result, err := s.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s.config.Bucket),
+		Key:    aws.String(key),
+	})
+	if err != nil {
+		return nil, fmt.Errorf("failed to download file: %w", err)
+	}
+	defer result.Body.Close()
+
+	// Read entire file into memory (required for parquet reader)
+	data, err := io.ReadAll(result.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read file data: %w", err)
+	}
+
+	// Read the parquet file
+	reader := parquet.NewGenericReader[ParquetBlockData](bytes.NewReader(data))
+	defer reader.Close()
+
+	var blockData []common.BlockData
+	parquetRows := make([]ParquetBlockData, 100) // Read in batches
+
+	for {
+		n, err := reader.Read(parquetRows)
+		if err != nil && err.Error() != "EOF" {
+			return nil, fmt.Errorf("failed to read parquet: %w", err)
+		}
+		if n == 0 {
+			break
+		}
+
+		for i := 0; i < n; i++ {
+			pd := parquetRows[i]
+
+			// Convert uint64 block number to big.Int
+			blockNum := new(big.Int).SetUint64(pd.BlockNumber)
+
+			// Filter by range if specified
+			if startBlock != nil && blockNum.Cmp(startBlock) < 0 {
+				continue
+			}
+			if endBlock != nil && blockNum.Cmp(endBlock) > 0 {
+				continue
+			}
+
+			// Unmarshal JSON data
+			var block common.Block
+			if err := json.Unmarshal(pd.Block, &block); err != nil {
+				log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal block")
+				continue
+			}
+
+			var transactions []common.Transaction
+			if len(pd.Transactions) > 0 {
+				if err := json.Unmarshal(pd.Transactions, &transactions); err != nil {
+					log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal transactions")
+				}
+			}
+
+			var logs []common.Log
+			if len(pd.Logs) > 0 {
+				if err := json.Unmarshal(pd.Logs, &logs); err != nil {
+					log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal logs")
+				}
+			}
+
+			var traces []common.Trace
+			if len(pd.Traces) > 0 {
+				if err := json.Unmarshal(pd.Traces, &traces); err != nil {
+					log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal traces")
+				}
+			}
+
+			blockData = append(blockData, common.BlockData{
+				ChainId:      pd.ChainID,
+				Block:        block,
+				Transactions: transactions,
+				Logs:         logs,
+				Traces:       traces,
+			})
+		}
+	}
+
+	return blockData, nil
+}
+
+func (s *S3Connector) extractBlockHeadersFromFile(key string, chainId *big.Int, from, to *big.Int) ([]common.BlockHeader, error) {
+	// Download and parse only the block headers
+	blockData, err := s.downloadAndParseFile(key, chainId, from, to)
+	if err != nil {
+		return nil, err
+	}
+
+	headers := make([]common.BlockHeader, 0, len(blockData))
+	for _, bd := range blockData {
+		headers = append(headers, common.BlockHeader{
+			Number:     bd.Block.Number,
+			Hash:       bd.Block.Hash,
+			ParentHash: bd.Block.ParentHash,
+		})
+	}
+
+	return headers, nil
+}

From eea71f4de66fb5e1463a303773e5ff78fab3ae9f Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Fri, 22 Aug 2025 23:02:25 +0000
Subject: [PATCH 18/43] Until block for committer

---
 configs/config.go                  |  1 +
 internal/orchestrator/committer.go | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/configs/config.go b/configs/config.go
index 29703bb..e92a4b6 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -30,6 +30,7 @@ type CommitterConfig struct {
 	Interval        int  `mapstructure:"interval"`
 	BlocksPerCommit int  `mapstructure:"blocksPerCommit"`
 	FromBlock       int  `mapstructure:"fromBlock"`
+	UntilBlock      int  `mapstructure:"untilBlock"`
 }
 
 type ReorgHandlerConfig struct {
diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go
index 47d93ba..53ff864 100644
--- a/internal/orchestrator/committer.go
+++ b/internal/orchestrator/committer.go
@@ -26,6 +26,7 @@ type Committer struct {
 	blocksPerCommit    int
 	storage            storage.IStorage
 	commitFromBlock    *big.Int
+	commitUntilBlock   *big.Int
 	rpc                rpc.IRPCClient
 	lastCommittedBlock atomic.Uint64
 	lastPublishedBlock atomic.Uint64
@@ -60,12 +61,23 @@ func NewCommitter(rpc rpc.IRPCClient, storage storage.IStorage, opts ...Committe
 		blocksPerCommit = DEFAULT_BLOCKS_PER_COMMIT
 	}
 
+	commitUntilBlock := config.Cfg.Committer.UntilBlock
+	if commitUntilBlock == 0 {
+		// default to match the poller.untilBlock
+		if config.Cfg.Poller.UntilBlock != 0 {
+			commitUntilBlock = config.Cfg.Poller.UntilBlock
+		} else {
+			commitUntilBlock = -1
+		}
+	}
+
 	commitFromBlock := big.NewInt(int64(config.Cfg.Committer.FromBlock))
 	committer := &Committer{
 		triggerIntervalMs: triggerInterval,
 		blocksPerCommit:   blocksPerCommit,
 		storage:           storage,
 		commitFromBlock:   commitFromBlock,
+		commitUntilBlock:  big.NewInt(int64(commitUntilBlock)),
 		rpc:               rpc,
 		publisher:         publisher.GetInstance(),
 		workMode:          "",
@@ -204,6 +216,7 @@ func (c *Committer) Start(ctx context.Context) {
 	}
 
 	c.runCommitLoop(ctx, interval)
+
 	log.Info().Msg("Committer shutting down")
 	c.publisher.Close()
 }
@@ -232,6 +245,11 @@ func (c *Committer) runCommitLoop(ctx context.Context, interval time.Duration) {
 				log.Debug().Msg("Committer work mode not set, skipping commit")
 				continue
 			}
+			if c.commitUntilBlock.Sign() > 0 && c.lastCommittedBlock.Load() > c.commitUntilBlock.Uint64() {
+				// Completing the commit loop if we've committed more than commit until block
+				log.Info().Msgf("Committer reached configured untilBlock %s, the last commit block is %d, stopping commits", c.commitUntilBlock.String(), c.lastCommittedBlock.Load())
+				return
+			}
 			blockDataToCommit, err := c.getSequentialBlockDataToCommit(ctx)
 			if err != nil {
 				log.Error().Err(err).Msg("Error getting block data to commit")

From 68087a0ec65df602aa51637e91a1dde68da16eec Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Mon, 25 Aug 2025 22:52:39 +0000
Subject: [PATCH 19/43] terminate when poller or committer exit

---
 internal/orchestrator/orchestrator.go | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go
index 1d879dc..f412dff 100644
--- a/internal/orchestrator/orchestrator.go
+++ b/internal/orchestrator/orchestrator.go
@@ -67,7 +67,10 @@ func (o *Orchestrator) Start() {
 
 			poller := NewPoller(o.rpc, o.storage, WithPollerWorkModeChan(pollerWorkModeChan))
 			poller.Start(ctx)
+
 			log.Info().Msg("Poller completed")
+			// If the poller is terminated, cancel the orchestrator
+			o.cancel()
 		}()
 	}
 
@@ -77,6 +80,8 @@ func (o *Orchestrator) Start() {
 			defer o.wg.Done()
 			failureRecoverer := NewFailureRecoverer(o.rpc, o.storage)
 			failureRecoverer.Start(ctx)
+
+			log.Info().Msg("Failure recoverer completed")
 		}()
 	}
 
@@ -90,6 +95,10 @@ func (o *Orchestrator) Start() {
 			validator := NewValidator(o.rpc, o.storage)
 			committer := NewCommitter(o.rpc, o.storage, WithCommitterWorkModeChan(committerWorkModeChan), WithValidator(validator))
 			committer.Start(ctx)
+
+			// If the committer is terminated, cancel the orchestrator
+			log.Info().Msg("Committer completed")
+			o.cancel()
 		}()
 	}
 
@@ -99,6 +108,8 @@ func (o *Orchestrator) Start() {
 			defer o.wg.Done()
 			reorgHandler := NewReorgHandler(o.rpc, o.storage)
 			reorgHandler.Start(ctx)
+
+			log.Info().Msg("Reorg handler completed")
 		}()
 	}
 
@@ -106,6 +117,8 @@ func (o *Orchestrator) Start() {
 	go func() {
 		defer o.wg.Done()
 		workModeMonitor.Start(ctx)
+
+		log.Info().Msg("Work mode monitor completed")
 	}()
 
 	// The chain tracker is always running
@@ -114,11 +127,12 @@ func (o *Orchestrator) Start() {
 		defer o.wg.Done()
 		chainTracker := NewChainTracker(o.rpc)
 		chainTracker.Start(ctx)
-	}()
 
-	o.wg.Wait()
+		log.Info().Msg("Chain tracker completed")
+	}()
 
 	// Waiting for all goroutines to complete
+	o.wg.Wait()
 
 	if err := o.storage.Close(); err != nil {
 		log.Error().Err(err).Msg("Error closing storage connections")

From c0ba962f39fdc16b2c2981525e95d4cb0eaf9502 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 01:56:33 +0000
Subject: [PATCH 20/43] Fix commit until block

---
 internal/orchestrator/committer.go | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go
index 53ff864..bde4854 100644
--- a/internal/orchestrator/committer.go
+++ b/internal/orchestrator/committer.go
@@ -245,7 +245,7 @@ func (c *Committer) runCommitLoop(ctx context.Context, interval time.Duration) {
 				log.Debug().Msg("Committer work mode not set, skipping commit")
 				continue
 			}
-			if c.commitUntilBlock.Sign() > 0 && c.lastCommittedBlock.Load() > c.commitUntilBlock.Uint64() {
+			if c.commitUntilBlock.Sign() > 0 && c.lastCommittedBlock.Load() >= c.commitUntilBlock.Uint64() {
 				// Completing the commit loop if we've committed more than commit until block
 				log.Info().Msgf("Committer reached configured untilBlock %s, the last commit block is %d, stopping commits", c.commitUntilBlock.String(), c.lastCommittedBlock.Load())
 				return
@@ -399,9 +399,16 @@ func (c *Committer) getBlockNumbersToPublish(ctx context.Context) ([]*big.Int, e
 
 func (c *Committer) getBlockToCommitUntil(ctx context.Context, latestCommittedBlockNumber *big.Int) (*big.Int, error) {
 	untilBlock := new(big.Int).Add(latestCommittedBlockNumber, big.NewInt(int64(c.blocksPerCommit)))
+
+	// If a commit until block is set, then set a limit on the commit until block
+	if c.commitUntilBlock.Sign() > 0 && untilBlock.Cmp(c.commitUntilBlock) > 0 {
+		return new(big.Int).Set(c.commitUntilBlock), nil
+	}
+
 	c.workModeMutex.RLock()
 	currentMode := c.workMode
 	c.workModeMutex.RUnlock()
+
 	if currentMode == WorkModeBackfill {
 		return untilBlock, nil
 	} else {

From 51f1398e92546714e7f1ef14ba29af4f8c3324eb Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 02:36:10 +0000
Subject: [PATCH 21/43] Don't cancel active tasks in poller

---
 internal/orchestrator/poller.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go
index 31a64ae..b46fc34 100644
--- a/internal/orchestrator/poller.go
+++ b/internal/orchestrator/poller.go
@@ -159,7 +159,6 @@ func (p *Poller) Start(ctx context.Context) {
 					lastPolledBlock := p.Poll(pollCtx, blockNumbers)
 					if p.reachedPollLimit(lastPolledBlock) {
 						log.Info().Msgf("Reached poll limit at block %s, completing poller", lastPolledBlock.String())
-						cancel()
 						return
 					}
 				}

From debc231eb7f150d4e8da748bd0f9c6dcd424e44f Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 08:05:30 +0000
Subject: [PATCH 22/43] migrate with destination storage

---
 cmd/migrate_valid.go                       | 304 +++++++++++++--------
 cmd/root.go                                |  93 ++++++-
 configs/config.go                          |  17 +-
 internal/common/block.go                   |   2 -
 internal/orchestrator/failure_recoverer.go |   1 -
 internal/orchestrator/poller.go            |   1 -
 internal/orchestrator/reorg_handler.go     |   1 -
 internal/storage/badger.go                 |   4 +-
 internal/storage/clickhouse.go             |  23 +-
 internal/storage/connector.go              |   6 +-
 internal/storage/kafka_publisher.go        |  15 +-
 internal/storage/kafka_redis.go            |  23 +-
 internal/storage/s3.go                     | 154 +++++++++--
 test/mocks/MockIMainStorage.go             |  60 ----
 14 files changed, 462 insertions(+), 242 deletions(-)

diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go
index cc3e912..d8d34db 100644
--- a/cmd/migrate_valid.go
+++ b/cmd/migrate_valid.go
@@ -4,7 +4,9 @@ import (
 	"context"
 	"math/big"
 	"os"
-	"strconv"
+	"os/signal"
+	"syscall"
+	"time"
 
 	"github.com/rs/zerolog/log"
 	"github.com/spf13/cobra"
@@ -13,6 +15,7 @@ import (
 	"github.com/thirdweb-dev/indexer/internal/orchestrator"
 	"github.com/thirdweb-dev/indexer/internal/rpc"
 	"github.com/thirdweb-dev/indexer/internal/storage"
+	"github.com/thirdweb-dev/indexer/internal/worker"
 )
 
 var (
@@ -27,12 +30,18 @@ var (
 )
 
 const (
-	TARGET_STORAGE_DATABASE = "temp"
-	DEFAULT_RPC_BATCH_SIZE  = 200
-	DEFAULT_BATCH_SIZE      = 1000
+	DEFAULT_RPC_BATCH_SIZE = 100
+	DEFAULT_BATCH_SIZE     = 2000
 )
 
 func RunValidationMigration(cmd *cobra.Command, args []string) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	// Set up signal handling for graceful shutdown
+	sigChan := make(chan os.Signal, 1)
+	signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
+
 	migrator := NewMigrator()
 	defer migrator.Close()
 
@@ -40,88 +49,159 @@ func RunValidationMigration(cmd *cobra.Command, args []string) {
 
 	log.Info().Msgf("Migrating blocks from %s to %s (both ends inclusive)", rangeStartBlock.String(), rangeEndBlock.String())
 
-	// 2. Start going in loops
-	for currentBlock := rangeStartBlock; currentBlock.Cmp(rangeEndBlock) <= 0; {
-		endBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.migrationBatchSize-1)))
-		if endBlock.Cmp(rangeEndBlock) > 0 {
-			endBlock = rangeEndBlock
-		}
+	// Run migration in a goroutine
+	done := make(chan struct{})
+	var migrationErr error
 
-		blockNumbers := generateBlockNumbersForRange(currentBlock, endBlock)
-		log.Info().Msgf("Processing blocks %s to %s", blockNumbers[0].String(), blockNumbers[len(blockNumbers)-1].String())
+	go func() {
+		defer close(done)
 
-		validBlocksForRange := migrator.GetValidBlocksForRange(blockNumbers)
+		// 2. Start going in loops
+		for currentBlock := rangeStartBlock; currentBlock.Cmp(rangeEndBlock) <= 0; {
+			batchStartTime := time.Now()
 
-		blocksToInsertMap := make(map[string]common.BlockData)
-		for _, blockData := range validBlocksForRange {
-			blocksToInsertMap[blockData.Block.Number.String()] = blockData
-		}
+			// Check for cancellation
+			select {
+			case <-ctx.Done():
+				log.Info().Msgf("Migration interrupted at block %s", currentBlock.String())
+				return
+			default:
+			}
 
-		// Loop over block numbers to find missing blocks
-		missingBlocks := make([]*big.Int, 0)
-		for _, blockNum := range blockNumbers {
-			if _, exists := blocksToInsertMap[blockNum.String()]; !exists {
-				missingBlocks = append(missingBlocks, blockNum)
+			endBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.migrationBatchSize-1)))
+			if endBlock.Cmp(rangeEndBlock) > 0 {
+				endBlock = rangeEndBlock
 			}
-		}
 
-		validMissingBlocks := migrator.GetValidBlocksFromRPC(missingBlocks)
-		for _, blockData := range validMissingBlocks {
-			blocksToInsertMap[blockData.Block.Number.String()] = blockData
-		}
+			blockNumbers := generateBlockNumbersForRange(currentBlock, endBlock)
+			log.Info().Msgf("Processing blocks %s to %s", blockNumbers[0].String(), blockNumbers[len(blockNumbers)-1].String())
+
+			// Fetch valid blocks from source
+			fetchStartTime := time.Now()
+			validBlocksForRange, err := migrator.GetValidBlocksForRange(blockNumbers)
+			fetchDuration := time.Since(fetchStartTime)
+			if err != nil {
+				// If we got an error fetching valid blocks, we'll continue
+				log.Error().Err(err).Msg("Failed to get valid blocks for range")
+				time.Sleep(3 * time.Second)
+				continue
+			}
+			log.Debug().Dur("duration", fetchDuration).Int("blocks_fetched", len(validBlocksForRange)).Msg("Fetched valid blocks from source")
 
-		blocksToInsert := make([]common.BlockData, 0)
-		for _, blockData := range blocksToInsertMap {
-			blocksToInsert = append(blocksToInsert, blockData)
-		}
+			// Build map of fetched blocks
+			mapBuildStartTime := time.Now()
+			blocksToInsertMap := make(map[string]common.BlockData)
+			for _, blockData := range validBlocksForRange {
+				blocksToInsertMap[blockData.Block.Number.String()] = blockData
+			}
 
-		err := migrator.targetConn.InsertBlockData(blocksToInsert)
-		if err != nil {
-			log.Fatal().Err(err).Msg("Failed to insert blocks to target storage")
+			// Loop over block numbers to find missing blocks
+			missingBlocks := make([]*big.Int, 0)
+			for _, blockNum := range blockNumbers {
+				if _, exists := blocksToInsertMap[blockNum.String()]; !exists {
+					missingBlocks = append(missingBlocks, blockNum)
+				}
+			}
+			mapBuildDuration := time.Since(mapBuildStartTime)
+			log.Debug().Dur("duration", mapBuildDuration).Int("missing_blocks", len(missingBlocks)).Msg("Identified missing blocks")
+
+			// Fetch missing blocks from RPC
+			if len(missingBlocks) > 0 {
+				rpcFetchStartTime := time.Now()
+				validMissingBlocks := migrator.GetValidBlocksFromRPC(missingBlocks)
+				rpcFetchDuration := time.Since(rpcFetchStartTime)
+				log.Debug().Dur("duration", rpcFetchDuration).Int("blocks_fetched", len(validMissingBlocks)).Msg("Fetched missing blocks from RPC")
+
+				for _, blockData := range validMissingBlocks {
+					if blockData.Block.ChainId.Sign() == 0 {
+						log.Fatal().Msgf("Block %s has chain ID 0, %+v", blockData.Block.Number.String(), blockData.Block)
+					}
+					blocksToInsertMap[blockData.Block.Number.String()] = blockData
+				}
+			}
+
+			// Prepare blocks for insertion
+			prepStartTime := time.Now()
+			blocksToInsert := make([]common.BlockData, 0, len(blocksToInsertMap))
+			for _, blockData := range blocksToInsertMap {
+				blocksToInsert = append(blocksToInsert, blockData)
+			}
+			prepDuration := time.Since(prepStartTime)
+			log.Debug().Dur("duration", prepDuration).Int("blocks_to_insert", len(blocksToInsert)).Msg("Prepared blocks for insertion")
+
+			// Insert blocks to destination
+			insertStartTime := time.Now()
+			err = migrator.destination.InsertBlockData(blocksToInsert)
+			insertDuration := time.Since(insertStartTime)
+			if err != nil {
+				migrationErr = err
+				log.Error().Err(err).Dur("duration", insertDuration).Msg("Failed to insert blocks to target storage")
+				time.Sleep(3 * time.Second)
+				continue
+			}
+
+			batchDuration := time.Since(batchStartTime)
+			log.Info().
+				Dur("total_duration", batchDuration).
+				Dur("fetch_duration", fetchDuration).
+				Dur("insert_duration", insertDuration).
+				Int("blocks_processed", len(blocksToInsert)).
+				Msg("Batch processed successfully")
+
+			currentBlock = new(big.Int).Add(endBlock, big.NewInt(1))
 		}
 
-		currentBlock = new(big.Int).Add(endBlock, big.NewInt(1))
-	}
+		// 3. then finally copy partitions from target table to main tables
+		log.Info().Msg("Migration completed successfully")
+	}()
 
-	// 3. then finally copy partitions from target table to main tables
-	log.Info().Msg("Done")
+	// Wait for either completion or interrupt signal
+	select {
+	case <-done:
+		if migrationErr != nil {
+			log.Fatal().Err(migrationErr).Msg("Migration failed")
+		}
+		log.Info().Msg("Done")
+	case sig := <-sigChan:
+		log.Info().Msgf("Received signal: %s, initiating graceful shutdown...", sig)
+		cancel()
+		<-done
+		log.Info().Msg("Migration stopped gracefully")
+	}
 }
 
 type Migrator struct {
 	rpcClient          rpc.IRPCClient
-	storage            storage.IStorage
+	worker             *worker.Worker
+	source             storage.IStorage
+	destination        storage.IMainStorage
 	validator          *orchestrator.Validator
-	targetConn         *storage.ClickHouseConnector
 	migrationBatchSize int
 	rpcBatchSize       int
 }
 
 func NewMigrator() *Migrator {
-	targetDBName := os.Getenv("TARGET_STORAGE_DATABASE")
-	if targetDBName == "" {
-		targetDBName = TARGET_STORAGE_DATABASE
-	}
 	batchSize := DEFAULT_BATCH_SIZE
-	batchSizeEnvInt, err := strconv.Atoi(os.Getenv("MIGRATION_BATCH_SIZE"))
-	if err == nil && batchSizeEnvInt > 0 {
-		batchSize = batchSizeEnvInt
+	if config.Cfg.Migrator.StorageBatchSize > 0 {
+		batchSize = int(config.Cfg.Migrator.StorageBatchSize)
 	}
 	rpcBatchSize := DEFAULT_RPC_BATCH_SIZE
-	rpcBatchSizeEnvInt, err := strconv.Atoi(os.Getenv("MIGRATION_RPC_BATCH_SIZE"))
-	if err == nil && rpcBatchSizeEnvInt > 0 {
-		rpcBatchSize = rpcBatchSizeEnvInt
+	if config.Cfg.Migrator.RpcBatchSize > 0 {
+		rpcBatchSize = int(config.Cfg.Migrator.RpcBatchSize)
 	}
+
 	rpcClient, err := rpc.Initialize()
 	if err != nil {
 		log.Fatal().Err(err).Msg("Failed to initialize RPC")
 	}
-	s, err := storage.NewStorageConnector(&config.Cfg.Storage)
+
+	sourceConnector, err := storage.NewStorageConnector(&config.Cfg.Storage)
 	if err != nil {
 		log.Fatal().Err(err).Msg("Failed to initialize storage")
 	}
 
 	// check if chain was indexed with block receipts. If it was, then the current RPC must support block receipts
-	validRpc, err := validateRPC(rpcClient, s)
+	validRpc, err := validateRPC(rpcClient, sourceConnector)
 	if err != nil {
 		log.Fatal().Err(err).Msg("Failed to validate RPC")
 	}
@@ -129,114 +209,112 @@ func NewMigrator() *Migrator {
 		log.Fatal().Msg("RPC does not support block receipts, but transactions were indexed with receipts")
 	}
 
-	validator := orchestrator.NewValidator(rpcClient, s)
+	validator := orchestrator.NewValidator(rpcClient, sourceConnector)
 
-	targetStorageConfig := *config.Cfg.Storage.Main.Clickhouse
-	targetStorageConfig.Database = targetDBName
-	targetConn, err := storage.NewClickHouseConnector(&targetStorageConfig)
+	destinationConnector, err := storage.NewConnector[storage.IMainStorage](&config.Cfg.Migrator.Destination)
 	if err != nil {
-		log.Fatal().Err(err).Msg("Failed to initialize target storage")
+		log.Fatal().Err(err).Msg("Failed to initialize storage")
 	}
 
 	return &Migrator{
 		migrationBatchSize: batchSize,
 		rpcBatchSize:       rpcBatchSize,
 		rpcClient:          rpcClient,
-		storage:            s,
+		source:             sourceConnector,
+		destination:        destinationConnector,
 		validator:          validator,
-		targetConn:         targetConn,
+		worker:             worker.NewWorker(rpcClient),
 	}
 }
 
 func (m *Migrator) Close() {
 	m.rpcClient.Close()
+
+	if err := m.source.Close(); err != nil {
+		log.Fatal().Err(err).Msg("Failed to close source storage")
+	}
+
+	if err := m.destination.Close(); err != nil {
+		log.Fatal().Err(err).Msg("Failed to close destination storage")
+	}
 }
 
 func (m *Migrator) DetermineMigrationBoundaries() (*big.Int, *big.Int) {
 	// get latest block from main storage
-	latestBlockStored, err := m.storage.MainStorage.GetMaxBlockNumber(m.rpcClient.GetChainID())
+	latestBlockStored, err := m.source.MainStorage.GetMaxBlockNumber(m.rpcClient.GetChainID())
 	if err != nil {
 		log.Fatal().Err(err).Msg("Failed to get latest block from main storage")
 	}
 	log.Info().Msgf("Latest block in main storage: %d", latestBlockStored)
 
 	endBlock := latestBlockStored
-	// set range end from env instead if configured
-	endBlockEnv := os.Getenv("END_BLOCK")
-	if endBlockEnv != "" {
-		configuredEndBlock, ok := new(big.Int).SetString(endBlockEnv, 10)
-		if !ok {
-			log.Fatal().Msgf("Failed to parse end block %s", endBlockEnv)
-		}
-		log.Info().Msgf("Configured end block: %s", configuredEndBlock.String())
-		// set configured end block only if it's greater than 0 and less than latest block in main storage
-		if configuredEndBlock.Sign() > 0 && configuredEndBlock.Cmp(latestBlockStored) < 0 {
-			endBlock = configuredEndBlock
-		}
+	endBlockEnv := big.NewInt(int64(config.Cfg.Migrator.EndBlock))
+	if endBlockEnv.Sign() > 0 && endBlockEnv.Cmp(latestBlockStored) < 0 {
+		endBlock = endBlockEnv
 	}
 
-	startBlock := big.NewInt(0) // default start block is 0
-	// if start block is configured, use it
-	startBlockEnv := os.Getenv("START_BLOCK")
-	if startBlockEnv != "" {
-		configuredStartBlock, ok := new(big.Int).SetString(startBlockEnv, 10)
-		if !ok {
-			log.Fatal().Msgf("Failed to parse start block %s", startBlockEnv)
-		}
-		log.Info().Msgf("Configured start block: %s", configuredStartBlock.String())
-		startBlock = configuredStartBlock
-	}
+	startBlock := big.NewInt(int64(config.Cfg.Migrator.StartBlock)) // default start block is 0
 
-	latestMigratedBlock, err := m.targetConn.GetMaxBlockNumberInRange(m.rpcClient.GetChainID(), startBlock, endBlock)
+	blockCount, err := m.destination.GetBlockCount(m.rpcClient.GetChainID(), startBlock, endBlock)
 	if err != nil {
 		log.Fatal().Err(err).Msg("Failed to get latest block from target storage")
 	}
-	log.Info().Msgf("Latest block in target storage: %d", latestMigratedBlock)
+	log.Info().Msgf("Block count in the target storage for range %s to %s: count=%s", startBlock.String(), endBlock.String(), blockCount.String())
 
-	if latestMigratedBlock.Cmp(endBlock) >= 0 {
+	expectedCount := new(big.Int).Sub(endBlock, startBlock)
+	expectedCount = expectedCount.Add(expectedCount, big.NewInt(1))
+	if expectedCount.Cmp(blockCount) == 0 {
 		log.Fatal().Msgf("Full range is already migrated")
+		return nil, nil
 	}
 
-	// if configured start block is less than or equal to already migrated and migrated block is not 0, start from last migrated + 1
-	if startBlock.Cmp(latestMigratedBlock) <= 0 && latestMigratedBlock.Sign() > 0 {
-		startBlock = new(big.Int).Add(latestMigratedBlock, big.NewInt(1))
+	maxStoredBlock, err := m.destination.GetMaxBlockNumberInRange(m.rpcClient.GetChainID(), startBlock, endBlock)
+	if err != nil {
+		log.Fatal().Err(err).Msg("Failed to get max block from destination storage")
+		return nil, nil
+	}
+
+	log.Info().Msgf("Block in the target storage for range %s to %s: count=%s, max=%s", startBlock.String(), endBlock.String(), blockCount.String(), maxStoredBlock.String())
+	if maxStoredBlock != nil && maxStoredBlock.Cmp(startBlock) >= 0 {
+		startBlock = new(big.Int).Add(maxStoredBlock, big.NewInt(1))
 	}
 
 	return startBlock, endBlock
 }
 
 func (m *Migrator) FetchBlocksFromRPC(blockNumbers []*big.Int) ([]common.BlockData, error) {
-	allBlockData := make([]common.BlockData, 0)
-	for i := 0; i < len(blockNumbers); i += m.rpcBatchSize {
-		end := i + m.rpcBatchSize
-		if end > len(blockNumbers) {
-			end = len(blockNumbers)
-		}
-		batch := blockNumbers[i:end]
-		blockData := m.rpcClient.GetFullBlocks(context.Background(), batch)
+	allBlockData := make([]common.BlockData, 0, len(blockNumbers))
 
-		for _, block := range blockData {
-			if block.Error != nil {
-				log.Warn().Err(block.Error).Msgf("Failed to fetch block %s from RPC", block.BlockNumber.String())
-				continue
-			}
-			allBlockData = append(allBlockData, block.Data)
+	blockData := m.worker.Run(context.Background(), blockNumbers)
+	for _, block := range blockData {
+		if block.Error != nil {
+			log.Warn().Err(block.Error).Msgf("Failed to fetch block %s from RPC", block.BlockNumber.String())
+			continue
 		}
+		allBlockData = append(allBlockData, block.Data)
 	}
 	return allBlockData, nil
 }
 
-func (m *Migrator) GetValidBlocksForRange(blockNumbers []*big.Int) []common.BlockData {
-	blockData, err := m.storage.MainStorage.GetFullBlockData(m.rpcClient.GetChainID(), blockNumbers)
+func (m *Migrator) GetValidBlocksForRange(blockNumbers []*big.Int) ([]common.BlockData, error) {
+	getFullBlockTime := time.Now()
+	blockData, err := m.source.MainStorage.GetFullBlockData(m.rpcClient.GetChainID(), blockNumbers)
+	getFullBlockDuration := time.Since(getFullBlockTime)
 	if err != nil {
-		log.Fatal().Err(err).Msg("Failed to get full block data")
+		log.Error().Err(err).Msg("Failed to get full block data")
+		return nil, err
 	}
 
+	validateBlockTime := time.Now()
 	validBlocks, _, err := m.validator.ValidateBlocks(blockData)
+	validateBlockDuration := time.Since(validateBlockTime)
 	if err != nil {
-		log.Fatal().Err(err).Msg("Failed to validate blocks")
+		log.Error().Err(err).Msg("Failed to validate blocks")
+		return nil, err
 	}
-	return validBlocks
+
+	log.Debug().Dur("get_full_block", getFullBlockDuration).Dur("validate_block", validateBlockDuration).Int("count", len(blockNumbers)).Msg("Get valid blocks for range")
+	return validBlocks, nil
 }
 
 func (m *Migrator) GetValidBlocksFromRPC(blockNumbers []*big.Int) []common.BlockData {
@@ -282,7 +360,15 @@ func validateRPC(rpcClient rpc.IRPCClient, s storage.IStorage) (bool, error) {
 }
 
 func generateBlockNumbersForRange(startBlock, endBlock *big.Int) []*big.Int {
-	blockNumbers := make([]*big.Int, 0)
+	if startBlock.Cmp(endBlock) > 0 {
+		return []*big.Int{}
+	}
+
+	// Pre-calculate capacity to avoid slice growth
+	length := new(big.Int).Sub(endBlock, startBlock)
+	length.Add(length, big.NewInt(1))
+
+	blockNumbers := make([]*big.Int, 0, length.Int64())
 	for i := new(big.Int).Set(startBlock); i.Cmp(endBlock) <= 0; i.Add(i, big.NewInt(1)) {
 		blockNumbers = append(blockNumbers, new(big.Int).Set(i))
 	}
diff --git a/cmd/root.go b/cmd/root.go
index d9548fb..479d4f4 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -83,6 +83,7 @@ func init() {
 	rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for orchestrator storage")
 	rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-disableTLS", false, "Clickhouse disableTLS for orchestrator storage")
 	rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-enableParallelViewProcessing", false, "Clickhouse enableParallelViewProcessing for orchestrator storage")
+	rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-enableCompression", false, "Clickhouse enableCompression for orchestrator storage")
 	rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxQueryTime", 60, "Clickhouse max query time for orchestrator storage")
 	rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxMemoryUsage", 1000000000, "Clickhouse max memory usage in bytes for orchestrator storage")
 	rootCmd.PersistentFlags().String("storage-orchestrator-postgres-host", "", "PostgreSQL host for orchestrator storage")
@@ -105,6 +106,7 @@ func init() {
 	rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for main storage")
 	rootCmd.PersistentFlags().Bool("storage-main-clickhouse-disableTLS", false, "Clickhouse disableTLS for main storage")
 	rootCmd.PersistentFlags().Bool("storage-main-clickhouse-enableParallelViewProcessing", false, "Clickhouse enableParallelViewProcessing for main storage")
+	rootCmd.PersistentFlags().Bool("storage-main-clickhouse-enableCompression", false, "Clickhouse enableCompression for main storage")
 	rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxQueryTime", 60, "Clickhouse max query time for main storage")
 	rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxMemoryUsage", 1000000000, "Clickhouse max memory usage in bytes for main storage")
 	rootCmd.PersistentFlags().String("storage-staging-clickhouse-username", "", "Clickhouse username for staging storage")
@@ -115,6 +117,7 @@ func init() {
 	rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for staging storage")
 	rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-disableTLS", false, "Clickhouse disableTLS for staging storage")
 	rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-enableParallelViewProcessing", false, "Clickhouse enableParallelViewProcessing for staging storage")
+	rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-enableCompression", false, "Clickhouse enableCompression for staging storage")
 	rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxQueryTime", 60, "Clickhouse max query time for staging storage")
 	rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxMemoryUsage", 1000000000, "Clickhouse max memory usage in bytes for staging storage")
 	rootCmd.PersistentFlags().String("storage-staging-postgres-host", "", "PostgreSQL host for staging storage")
@@ -135,15 +138,11 @@ func init() {
 	rootCmd.PersistentFlags().Int("storage-main-kafka-redis-port", 6379, "Redis port for Kafka main storage metadata")
 	rootCmd.PersistentFlags().String("storage-main-kafka-redis-password", "", "Redis password for Kafka main storage metadata")
 	rootCmd.PersistentFlags().Int("storage-main-kafka-redis-db", 0, "Redis database number for Kafka main storage metadata")
-	// Storage type selection flags
 	rootCmd.PersistentFlags().String("storage-staging-type", "auto", "Storage type for staging (auto, clickhouse, postgres, kafka, badger, s3)")
 	rootCmd.PersistentFlags().String("storage-main-type", "auto", "Storage type for main (auto, clickhouse, postgres, kafka, badger, s3)")
 	rootCmd.PersistentFlags().String("storage-orchestrator-type", "auto", "Storage type for orchestrator (auto, clickhouse, postgres, badger)")
-	// BadgerDB flags for staging storage
 	rootCmd.PersistentFlags().String("storage-staging-badger-path", "", "BadgerDB path for staging storage")
-	// BadgerDB flags for orchestrator storage
 	rootCmd.PersistentFlags().String("storage-orchestrator-badger-path", "", "BadgerDB path for orchestrator storage")
-	// S3 flags for main storage
 	rootCmd.PersistentFlags().String("storage-main-s3-bucket", "", "S3 bucket for main storage")
 	rootCmd.PersistentFlags().String("storage-main-s3-region", "", "S3 region for main storage")
 	rootCmd.PersistentFlags().String("storage-main-s3-prefix", "", "S3 key prefix for main storage")
@@ -154,7 +153,6 @@ func init() {
 	rootCmd.PersistentFlags().Int64("storage-main-s3-bufferSizeMB", 1024, "S3 buffer size in MB before flush for main storage")
 	rootCmd.PersistentFlags().Int("storage-main-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for main storage")
 	rootCmd.PersistentFlags().Int("storage-main-s3-maxBlocksPerFile", 0, "S3 max blocks per file for main storage (0 = no limit)")
-	// S3 Parquet configuration
 	rootCmd.PersistentFlags().String("storage-main-s3-parquet-compression", "snappy", "Parquet compression type for S3 main storage")
 	rootCmd.PersistentFlags().Int64("storage-main-s3-parquet-rowGroupSize", 256, "Parquet row group size in MB for S3 main storage")
 	rootCmd.PersistentFlags().Int64("storage-main-s3-parquet-pageSize", 8192, "Parquet page size in KB for S3 main storage")
@@ -189,6 +187,47 @@ func init() {
 	rootCmd.PersistentFlags().Int("workMode-checkIntervalMinutes", 10, "How often to check work mode in minutes")
 	rootCmd.PersistentFlags().Int64("workMode-liveModeThreshold", 500, "How many blocks the indexer can be behind before switching to live mode")
 	rootCmd.PersistentFlags().String("validation-mode", "strict", "Validation mode. Strict will validate logsBloom and transactionsRoot. Minimal will validate transaction count and logs existence.")
+	rootCmd.PersistentFlags().String("migrator-destination-type", "auto", "Storage type for migrator destination (auto, clickhouse, postgres, kafka, badger, s3)")
+	rootCmd.PersistentFlags().String("migrator-destination-clickhouse-host", "", "Clickhouse host for migrator destination")
+	rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-port", 0, "Clickhouse port for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-clickhouse-username", "", "Clickhouse username for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-clickhouse-password", "", "Clickhouse password for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-clickhouse-database", "", "Clickhouse database for migrator destination")
+	rootCmd.PersistentFlags().Bool("migrator-destination-clickhouse-disableTLS", false, "Clickhouse disableTLS for migrator destination")
+	rootCmd.PersistentFlags().Bool("migrator-destination-clickhouse-asyncInsert", false, "Clickhouse async insert for migrator destination")
+	rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-maxRowsPerInsert", 100000, "Clickhouse max rows per insert for migrator destination")
+	rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-maxOpenConns", 30, "Clickhouse max open connections for migrator destination")
+	rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-postgres-host", "", "PostgreSQL host for migrator destination")
+	rootCmd.PersistentFlags().Int("migrator-destination-postgres-port", 5432, "PostgreSQL port for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-postgres-username", "", "PostgreSQL username for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-postgres-password", "", "PostgreSQL password for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-postgres-database", "", "PostgreSQL database for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-postgres-sslMode", "require", "PostgreSQL SSL mode for migrator destination")
+	rootCmd.PersistentFlags().Int("migrator-destination-postgres-maxOpenConns", 50, "PostgreSQL max open connections for migrator destination")
+	rootCmd.PersistentFlags().Int("migrator-destination-postgres-maxIdleConns", 25, "PostgreSQL max idle connections for migrator destination")
+	rootCmd.PersistentFlags().Int("migrator-destination-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for migrator destination")
+	rootCmd.PersistentFlags().Int("migrator-destination-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-kafka-brokers", "", "Kafka brokers for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-kafka-username", "", "Kafka username for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-kafka-password", "", "Kafka password for migrator destination")
+	rootCmd.PersistentFlags().Bool("migrator-destination-kafka-enableTLS", true, "Enable TLS for Kafka connection in migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-badger-path", "", "BadgerDB path for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-s3-bucket", "", "S3 bucket for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-s3-region", "", "S3 region for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-s3-prefix", "", "S3 key prefix for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-s3-accessKeyId", "", "S3 access key ID for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-s3-secretAccessKey", "", "S3 secret access key for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-s3-endpoint", "", "S3 endpoint URL for migrator destination")
+	rootCmd.PersistentFlags().String("migrator-destination-s3-format", "parquet", "S3 storage format for migrator destination")
+	rootCmd.PersistentFlags().Int64("migrator-destination-s3-bufferSizeMB", 1024, "S3 buffer size in MB before flush for migrator destination")
+	rootCmd.PersistentFlags().Int("migrator-destination-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for migrator destination")
+	rootCmd.PersistentFlags().Int("migrator-destination-s3-maxBlocksPerFile", 0, "S3 max blocks per file for migrator destination")
+	rootCmd.PersistentFlags().Uint("migrator-storageBatchSize", 2000, "Batch size for storage operations in migrator")
+	rootCmd.PersistentFlags().Uint("migrator-rpcBatchSize", 100, "Batch size for RPC operations in migrator")
+	rootCmd.PersistentFlags().Uint("migrator-startBlock", 0, "Start block for migration")
+	rootCmd.PersistentFlags().Uint("migrator-endBlock", 0, "End block for migration")
+
 	viper.BindPFlag("rpc.url", rootCmd.PersistentFlags().Lookup("rpc-url"))
 	viper.BindPFlag("rpc.blocks.blocksPerRequest", rootCmd.PersistentFlags().Lookup("rpc-blocks-blocksPerRequest"))
 	viper.BindPFlag("rpc.blocks.batchDelay", rootCmd.PersistentFlags().Lookup("rpc-blocks-batchDelay"))
@@ -232,6 +271,7 @@ func init() {
 	viper.BindPFlag("storage.staging.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxIdleConns"))
 	viper.BindPFlag("storage.staging.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-disableTLS"))
 	viper.BindPFlag("storage.staging.clickhouse.enableParallelViewProcessing", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-enableParallelViewProcessing"))
+	viper.BindPFlag("storage.staging.clickhouse.enableCompression", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-enableCompression"))
 	viper.BindPFlag("storage.staging.clickhouse.maxQueryTime", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxQueryTime"))
 	viper.BindPFlag("storage.staging.clickhouse.maxMemoryUsage", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxMemoryUsage"))
 	viper.BindPFlag("storage.main.clickhouse.database", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-database"))
@@ -245,6 +285,7 @@ func init() {
 	viper.BindPFlag("storage.main.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxIdleConns"))
 	viper.BindPFlag("storage.main.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-disableTLS"))
 	viper.BindPFlag("storage.main.clickhouse.enableParallelViewProcessing", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-enableParallelViewProcessing"))
+	viper.BindPFlag("storage.main.clickhouse.enableCompression", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-enableCompression"))
 	viper.BindPFlag("storage.main.clickhouse.maxQueryTime", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxQueryTime"))
 	viper.BindPFlag("storage.main.clickhouse.maxMemoryUsage", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxMemoryUsage"))
 	viper.BindPFlag("storage.orchestrator.clickhouse.database", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-database"))
@@ -258,6 +299,7 @@ func init() {
 	viper.BindPFlag("storage.orchestrator.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxIdleConns"))
 	viper.BindPFlag("storage.orchestrator.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-disableTLS"))
 	viper.BindPFlag("storage.orchestrator.clickhouse.enableParallelViewProcessing", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-enableParallelViewProcessing"))
+	viper.BindPFlag("storage.orchestrator.clickhouse.enableCompression", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-enableCompression"))
 	viper.BindPFlag("storage.orchestrator.clickhouse.maxQueryTime", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxQueryTime"))
 	viper.BindPFlag("storage.orchestrator.clickhouse.maxMemoryUsage", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxMemoryUsage"))
 	viper.BindPFlag("storage.orchestrator.postgres.host", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-host"))
@@ -337,6 +379,47 @@ func init() {
 	viper.BindPFlag("workMode.checkIntervalMinutes", rootCmd.PersistentFlags().Lookup("workMode-checkIntervalMinutes"))
 	viper.BindPFlag("workMode.liveModeThreshold", rootCmd.PersistentFlags().Lookup("workMode-liveModeThreshold"))
 	viper.BindPFlag("validation.mode", rootCmd.PersistentFlags().Lookup("validation-mode"))
+	// Migrator viper bindings
+	viper.BindPFlag("migrator.destination.type", rootCmd.PersistentFlags().Lookup("migrator-destination-type"))
+	viper.BindPFlag("migrator.destination.clickhouse.host", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-host"))
+	viper.BindPFlag("migrator.destination.clickhouse.port", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-port"))
+	viper.BindPFlag("migrator.destination.clickhouse.username", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-username"))
+	viper.BindPFlag("migrator.destination.clickhouse.password", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-password"))
+	viper.BindPFlag("migrator.destination.clickhouse.database", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-database"))
+	viper.BindPFlag("migrator.destination.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-disableTLS"))
+	viper.BindPFlag("migrator.destination.clickhouse.asyncInsert", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-asyncInsert"))
+	viper.BindPFlag("migrator.destination.clickhouse.maxRowsPerInsert", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-maxRowsPerInsert"))
+	viper.BindPFlag("migrator.destination.clickhouse.maxOpenConns", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-maxOpenConns"))
+	viper.BindPFlag("migrator.destination.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-maxIdleConns"))
+	viper.BindPFlag("migrator.destination.postgres.host", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-host"))
+	viper.BindPFlag("migrator.destination.postgres.port", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-port"))
+	viper.BindPFlag("migrator.destination.postgres.username", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-username"))
+	viper.BindPFlag("migrator.destination.postgres.password", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-password"))
+	viper.BindPFlag("migrator.destination.postgres.database", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-database"))
+	viper.BindPFlag("migrator.destination.postgres.sslMode", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-sslMode"))
+	viper.BindPFlag("migrator.destination.postgres.maxOpenConns", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-maxOpenConns"))
+	viper.BindPFlag("migrator.destination.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-maxIdleConns"))
+	viper.BindPFlag("migrator.destination.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-maxConnLifetime"))
+	viper.BindPFlag("migrator.destination.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-connectTimeout"))
+	viper.BindPFlag("migrator.destination.kafka.brokers", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-brokers"))
+	viper.BindPFlag("migrator.destination.kafka.username", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-username"))
+	viper.BindPFlag("migrator.destination.kafka.password", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-password"))
+	viper.BindPFlag("migrator.destination.kafka.enableTLS", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-enableTLS"))
+	viper.BindPFlag("migrator.destination.badger.path", rootCmd.PersistentFlags().Lookup("migrator-destination-badger-path"))
+	viper.BindPFlag("migrator.destination.s3.bucket", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-bucket"))
+	viper.BindPFlag("migrator.destination.s3.region", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-region"))
+	viper.BindPFlag("migrator.destination.s3.prefix", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-prefix"))
+	viper.BindPFlag("migrator.destination.s3.accessKeyId", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-accessKeyId"))
+	viper.BindPFlag("migrator.destination.s3.secretAccessKey", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-secretAccessKey"))
+	viper.BindPFlag("migrator.destination.s3.endpoint", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-endpoint"))
+	viper.BindPFlag("migrator.destination.s3.format", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-format"))
+	viper.BindPFlag("migrator.destination.s3.bufferSizeMB", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-bufferSizeMB"))
+	viper.BindPFlag("migrator.destination.s3.bufferTimeoutSeconds", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-bufferTimeoutSeconds"))
+	viper.BindPFlag("migrator.destination.s3.maxBlocksPerFile", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-maxBlocksPerFile"))
+	viper.BindPFlag("migrator.startBlock", rootCmd.PersistentFlags().Lookup("migrator-startBlock"))
+	viper.BindPFlag("migrator.endBlock", rootCmd.PersistentFlags().Lookup("migrator-endBlock"))
+	viper.BindPFlag("migrator.storageBatchSize", rootCmd.PersistentFlags().Lookup("migrator-storageBatchSize"))
+	viper.BindPFlag("migrator.rpcBatchSize", rootCmd.PersistentFlags().Lookup("migrator-rpcBatchSize"))
 	rootCmd.AddCommand(orchestratorCmd)
 	rootCmd.AddCommand(apiCmd)
 	rootCmd.AddCommand(validateAndFixCmd)
diff --git a/configs/config.go b/configs/config.go
index e92a4b6..226fa9d 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -52,13 +52,6 @@ type StorageConfig struct {
 	Main         StorageConnectionConfig `mapstructure:"main"`
 	Orchestrator StorageConnectionConfig `mapstructure:"orchestrator"`
 }
-type StorageType string
-
-const (
-	StorageTypeMain         StorageType = "main"
-	StorageTypeStaging      StorageType = "staging"
-	StorageTypeOrchestrator StorageType = "orchestrator"
-)
 
 type StorageConnectionConfig struct {
 	Type       string            `mapstructure:"type"` // "auto", "clickhouse", "postgres", "kafka", "badger", "s3"
@@ -116,6 +109,7 @@ type ClickhouseConfig struct {
 	EnableParallelViewProcessing bool                           `mapstructure:"enableParallelViewProcessing"`
 	MaxQueryTime                 int                            `mapstructure:"maxQueryTime"`
 	MaxMemoryUsage               int                            `mapstructure:"maxMemoryUsage"`
+	EnableCompression            bool                           `mapstructure:"enableCompression"`
 }
 
 type PostgresConfig struct {
@@ -238,6 +232,14 @@ type ValidationConfig struct {
 	Mode string `mapstructure:"mode"` // "disabled", "minimal", "strict"
 }
 
+type MigratorConfig struct {
+	Destination      StorageConnectionConfig `mapstructure:"destination"`
+	StartBlock       uint                    `mapstructure:"startBlock"`
+	EndBlock         uint                    `mapstructure:"endBlock"`
+	StorageBatchSize uint                    `mapstructure:"storageBatchSize"`
+	RpcBatchSize     uint                    `mapstructure:"rpcBatchSize"`
+}
+
 type Config struct {
 	RPC              RPCConfig              `mapstructure:"rpc"`
 	Log              LogConfig              `mapstructure:"log"`
@@ -250,6 +252,7 @@ type Config struct {
 	Publisher        PublisherConfig        `mapstructure:"publisher"`
 	WorkMode         WorkModeConfig         `mapstructure:"workMode"`
 	Validation       ValidationConfig       `mapstructure:"validation"`
+	Migrator         MigratorConfig         `mapstructure:"migrator"`
 }
 
 var Cfg Config
diff --git a/internal/common/block.go b/internal/common/block.go
index 83a5bf0..f0cd019 100644
--- a/internal/common/block.go
+++ b/internal/common/block.go
@@ -59,7 +59,6 @@ type BlockModel struct {
 }
 
 type BlockData struct {
-	ChainId      uint64        `json:"chain_id"`
 	Block        Block         `json:"block"`
 	Transactions []Transaction `json:"transactions"`
 	Logs         []Log         `json:"logs"`
@@ -103,7 +102,6 @@ func (b *Block) Serialize() BlockModel {
 
 func (b *BlockData) Serialize() BlockData {
 	data := BlockData{
-		ChainId:      b.ChainId,
 		Block:        b.Block,
 		Transactions: b.Transactions,
 		Logs:         b.Logs,
diff --git a/internal/orchestrator/failure_recoverer.go b/internal/orchestrator/failure_recoverer.go
index a097034..da1ae91 100644
--- a/internal/orchestrator/failure_recoverer.go
+++ b/internal/orchestrator/failure_recoverer.go
@@ -110,7 +110,6 @@ func (fr *FailureRecoverer) handleWorkerResults(blockFailures []common.BlockFail
 			})
 		} else {
 			successfulResults = append(successfulResults, common.BlockData{
-				ChainId:      fr.rpc.GetChainID().Uint64(),
 				Block:        result.Data.Block,
 				Logs:         result.Data.Logs,
 				Transactions: result.Data.Transactions,
diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go
index b46fc34..5045dc1 100644
--- a/internal/orchestrator/poller.go
+++ b/internal/orchestrator/poller.go
@@ -261,7 +261,6 @@ func (p *Poller) convertPollResultsToBlockData(results []rpc.GetFullBlockResult)
 	blockData := make([]common.BlockData, 0, len(successfulResults))
 	for _, result := range successfulResults {
 		blockData = append(blockData, common.BlockData{
-			ChainId:      p.rpc.GetChainID().Uint64(),
 			Block:        result.Data.Block,
 			Logs:         result.Data.Logs,
 			Transactions: result.Data.Transactions,
diff --git a/internal/orchestrator/reorg_handler.go b/internal/orchestrator/reorg_handler.go
index 889801c..2de8b95 100644
--- a/internal/orchestrator/reorg_handler.go
+++ b/internal/orchestrator/reorg_handler.go
@@ -274,7 +274,6 @@ func (rh *ReorgHandler) handleReorg(ctx context.Context, reorgedBlockNumbers []*
 			return fmt.Errorf("cannot fix reorg: failed block %s: %w", result.BlockNumber.String(), result.Error)
 		}
 		data = append(data, common.BlockData{
-			ChainId:      rh.rpc.GetChainID().Uint64(),
 			Block:        result.Data.Block,
 			Logs:         result.Data.Logs,
 			Transactions: result.Data.Transactions,
diff --git a/internal/storage/badger.go b/internal/storage/badger.go
index 1ffd431..3c1305d 100644
--- a/internal/storage/badger.go
+++ b/internal/storage/badger.go
@@ -237,7 +237,7 @@ func (bc *BadgerConnector) InsertStagingData(data []common.BlockData) error {
 
 	return bc.db.Update(func(txn *badger.Txn) error {
 		for _, blockData := range data {
-			key := blockKey(big.NewInt(int64(blockData.ChainId)), blockData.Block.Number)
+			key := blockKey(blockData.Block.ChainId, blockData.Block.Number)
 
 			var buf bytes.Buffer
 			if err := gob.NewEncoder(&buf).Encode(blockData); err != nil {
@@ -348,7 +348,7 @@ func (bc *BadgerConnector) DeleteStagingData(data []common.BlockData) error {
 
 	return bc.db.Update(func(txn *badger.Txn) error {
 		for _, blockData := range data {
-			key := blockKey(big.NewInt(int64(blockData.ChainId)), blockData.Block.Number)
+			key := blockKey(blockData.Block.ChainId, blockData.Block.Number)
 			if err := txn.Delete(key); err != nil && err != badger.ErrKeyNotFound {
 				return err
 			}
diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go
index 9ea97ce..d3319c6 100644
--- a/internal/storage/clickhouse.go
+++ b/internal/storage/clickhouse.go
@@ -107,6 +107,14 @@ func connectDB(cfg *config.ClickhouseConfig) (clickhouse.Conn, error) {
 		},
 		MaxOpenConns: cfg.MaxOpenConns,
 		MaxIdleConns: cfg.MaxIdleConns,
+		Compression: func() *clickhouse.Compression {
+			c := &clickhouse.Compression{}
+			if cfg.EnableCompression {
+				zLog.Debug().Msg("ClickHouse LZ4 compression is enabled")
+				c.Method = clickhouse.CompressionLZ4
+			}
+			return c
+		}(),
 		Settings: func() clickhouse.Settings {
 			settings := clickhouse.Settings{
 				"do_not_merge_across_partitions_select_final": "1",
@@ -901,6 +909,19 @@ func (c *ClickHouseConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBl
 	return maxBlockNumber, nil
 }
 
+func (c *ClickHouseConnector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (blockCount *big.Int, err error) {
+	tableName := c.getTableName(chainId, "blocks")
+	query := fmt.Sprintf("SELECT COUNT(DISTINCT block_number) FROM %s.%s WHERE chain_id = ? AND block_number >= ? AND block_number <= ?", c.cfg.Database, tableName)
+	err = c.conn.QueryRow(context.Background(), query, chainId, startBlock, endBlock).Scan(&blockCount)
+	if err != nil {
+		if err == sql.ErrNoRows {
+			return big.NewInt(0), nil
+		}
+		return nil, err
+	}
+	return blockCount, nil
+}
+
 func (c *ClickHouseConnector) getMaxBlockNumberConsistent(chainId *big.Int) (maxBlockNumber *big.Int, err error) {
 	tableName := c.getTableName(chainId, "blocks")
 	query := fmt.Sprintf("SELECT block_number FROM %s.%s WHERE chain_id = ? ORDER BY block_number DESC LIMIT 1 SETTINGS select_sequential_consistency = 1", c.cfg.Database, tableName)
@@ -1976,7 +1997,6 @@ func (c *ClickHouseConnector) GetValidationBlockData(chainId *big.Int, startBloc
 	for i, block := range blocksResult.blocks {
 		blockNum := block.Number.String()
 		blockData[i] = common.BlockData{
-			ChainId:      chainId.Uint64(),
 			Block:        block,
 			Logs:         logsResult.logMap[blockNum],
 			Transactions: txsResult.txMap[blockNum],
@@ -2156,7 +2176,6 @@ func (c *ClickHouseConnector) GetFullBlockData(chainId *big.Int, blockNumbers []
 	for i, block := range blocksResult.blocks {
 		blockNum := block.Number.String()
 		blockData[i] = common.BlockData{
-			ChainId:      chainId.Uint64(),
 			Block:        block,
 			Logs:         logsResult.logMap[blockNum],
 			Transactions: txsResult.txMap[blockNum],
diff --git a/internal/storage/connector.go b/internal/storage/connector.go
index 4b962af..dc23d9b 100644
--- a/internal/storage/connector.go
+++ b/internal/storage/connector.go
@@ -130,6 +130,8 @@ type IMainStorage interface {
 
 	GetMaxBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error)
 	GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (maxBlockNumber *big.Int, err error)
+	GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (blockCount *big.Int, err error)
+
 	/**
 	 * Get block headers ordered from latest to oldest.
 	 */
@@ -175,13 +177,13 @@ func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) {
 func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) {
 	var conn interface{}
 	var err error
-	
+
 	// Default to "auto" if Type is not specified
 	storageType := cfg.Type
 	if storageType == "" {
 		storageType = "auto"
 	}
-	
+
 	// Handle explicit type selection
 	if storageType != "auto" {
 		switch storageType {
diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go
index 90f3ca3..72dc96f 100644
--- a/internal/storage/kafka_publisher.go
+++ b/internal/storage/kafka_publisher.go
@@ -18,9 +18,8 @@ import (
 )
 
 type KafkaPublisher struct {
-	client  *kgo.Client
-	mu      sync.RWMutex
-	chainID string
+	client *kgo.Client
+	mu     sync.RWMutex
 }
 
 type MessageType string
@@ -37,6 +36,7 @@ type PublishableMessagePayload struct {
 
 type PublishableMessageBlockData struct {
 	common.BlockData
+	ChainId         uint64    `json:"chain_id"`
 	IsDeleted       int8      `json:"is_deleted"`
 	InsertTimestamp time.Time `json:"insert_timestamp"`
 }
@@ -104,8 +104,7 @@ func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) {
 	}
 
 	publisher := &KafkaPublisher{
-		client:  client,
-		chainID: chainID,
+		client: client,
 	}
 
 	return publisher, nil
@@ -116,9 +115,10 @@ func (p *KafkaPublisher) PublishBlockData(blockData []common.BlockData) error {
 }
 
 func (p *KafkaPublisher) PublishReorg(oldData []common.BlockData, newData []common.BlockData) error {
+	chainId := newData[0].Block.ChainId.Uint64()
 	newHead := uint64(newData[0].Block.Number.Uint64())
 	// Publish revert the revert to the new head - 1, so that the new updated block data can be re-processed
-	if err := p.publishBlockRevert(newData[0].ChainId, newHead-1); err != nil {
+	if err := p.publishBlockRevert(chainId, newHead-1); err != nil {
 		return fmt.Errorf("failed to revert: %v", err)
 	}
 
@@ -233,6 +233,7 @@ func (p *KafkaPublisher) createBlockDataMessage(block common.BlockData, isDelete
 
 	data := PublishableMessageBlockData{
 		BlockData:       block,
+		ChainId:         block.Block.ChainId.Uint64(),
 		IsDeleted:       0,
 		InsertTimestamp: timestamp,
 	}
@@ -251,7 +252,7 @@ func (p *KafkaPublisher) createBlockDataMessage(block common.BlockData, isDelete
 		return nil, fmt.Errorf("failed to marshal block data: %v", err)
 	}
 
-	return p.createRecord(data.GetType(), block.ChainId, block.Block.Number.Uint64(), timestamp, msgJson)
+	return p.createRecord(data.GetType(), data.ChainId, block.Block.Number.Uint64(), timestamp, msgJson)
 }
 
 func (p *KafkaPublisher) createBlockRevertMessage(chainId uint64, blockNumber uint64) (*kgo.Record, error) {
diff --git a/internal/storage/kafka_redis.go b/internal/storage/kafka_redis.go
index 05d294c..030fa21 100644
--- a/internal/storage/kafka_redis.go
+++ b/internal/storage/kafka_redis.go
@@ -212,26 +212,11 @@ func (kr *KafkaRedisConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, er
 }
 
 func (kr *KafkaRedisConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
-	// Get the last published block number
-	lastPublished, err := kr.GetLastPublishedBlockNumber(chainId)
-	if err != nil {
-		return nil, err
-	}
-
-	// Check if it's within the range
-	if lastPublished.Cmp(startBlock) >= 0 && lastPublished.Cmp(endBlock) <= 0 {
-		return lastPublished, nil
-	}
-
-	// If outside range, return appropriate boundary
-	if lastPublished.Cmp(endBlock) > 0 {
-		return endBlock, nil
-	}
-	if lastPublished.Cmp(startBlock) < 0 {
-		return big.NewInt(0), nil
-	}
+	return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
+}
 
-	return lastPublished, nil
+func (kr *KafkaRedisConnector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
+	return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
 }
 
 func (kr *KafkaRedisConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) {
diff --git a/internal/storage/s3.go b/internal/storage/s3.go
index 8a75c65..e3e4038 100644
--- a/internal/storage/s3.go
+++ b/internal/storage/s3.go
@@ -48,7 +48,7 @@ type DataFormatter interface {
 
 // ParquetBlockData represents the complete block data in Parquet format
 type ParquetBlockData struct {
-	ChainID        uint64 `parquet:"chain_id"`
+	ChainId        uint64 `parquet:"chain_id"`
 	BlockNumber    uint64 `parquet:"block_number"` // Numeric for efficient min/max queries
 	BlockHash      string `parquet:"block_hash"`
 	BlockTimestamp int64  `parquet:"block_timestamp"`
@@ -235,7 +235,8 @@ func (s *S3Connector) flushBuffer() error {
 	// Group blocks by chain to generate appropriate keys
 	chainGroups := make(map[uint64][]common.BlockData)
 	for _, block := range data {
-		chainGroups[block.ChainId] = append(chainGroups[block.ChainId], block)
+		chainId := block.Block.ChainId.Uint64()
+		chainGroups[chainId] = append(chainGroups[chainId], block)
 	}
 
 	for _, blocks := range chainGroups {
@@ -295,8 +296,8 @@ func (s *S3Connector) Flush() error {
 		select {
 		case <-s.flushDoneCh:
 			return nil
-		case <-time.After(30 * time.Second):
-			return fmt.Errorf("flush timeout after 30 seconds")
+		case <-time.After(60 * time.Second):
+			return fmt.Errorf("flush timeout after 60 seconds")
 		}
 	default:
 		// Flush channel is full, likely a flush is already in progress
@@ -304,8 +305,8 @@ func (s *S3Connector) Flush() error {
 		select {
 		case <-s.flushDoneCh:
 			return nil
-		case <-time.After(30 * time.Second):
-			return fmt.Errorf("flush timeout after 30 seconds")
+		case <-time.After(60 * time.Second):
+			return fmt.Errorf("flush timeout after 60 seconds")
 		}
 	}
 }
@@ -331,7 +332,7 @@ func (s *S3Connector) uploadBatch(data []common.BlockData) error {
 		return nil
 	}
 
-	chainID := data[0].ChainId
+	chainId := data[0].Block.ChainId.Uint64()
 	startBlock := data[0].Block.Number
 	endBlock := data[len(data)-1].Block.Number
 	// Use the first block's timestamp for year partitioning
@@ -344,7 +345,7 @@ func (s *S3Connector) uploadBatch(data []common.BlockData) error {
 	}
 
 	// Generate S3 key with chain_id/year partitioning based on block timestamp
-	key := s.generateS3Key(chainID, startBlock, endBlock, blockTimestamp)
+	key := s.generateS3Key(chainId, startBlock, endBlock, blockTimestamp)
 
 	// Upload to S3
 	ctx := context.Background()
@@ -354,7 +355,7 @@ func (s *S3Connector) uploadBatch(data []common.BlockData) error {
 		Body:        bytes.NewReader(formattedData),
 		ContentType: aws.String(s.formatter.GetContentType()),
 		Metadata: map[string]string{
-			"chain_id":    fmt.Sprintf("%d", chainID),
+			"chain_id":    fmt.Sprintf("%d", chainId),
 			"start_block": startBlock.String(),
 			"end_block":   endBlock.String(),
 			"block_count": fmt.Sprintf("%d", len(data)),
@@ -369,7 +370,7 @@ func (s *S3Connector) uploadBatch(data []common.BlockData) error {
 	}
 
 	log.Info().
-		Uint64("chain_id", chainID).
+		Uint64("chain_id", chainId).
 		Str("min_block", startBlock.String()).
 		Str("max_block", endBlock.String()).
 		Int("block_count", len(data)).
@@ -458,7 +459,7 @@ func (f *ParquetFormatter) FormatBlockData(data []common.BlockData) ([]byte, err
 		}
 
 		pd := ParquetBlockData{
-			ChainID:        d.ChainId,
+			ChainId:        d.Block.ChainId.Uint64(),
 			BlockNumber:    blockNum,
 			BlockHash:      d.Block.Hash,
 			BlockTimestamp: d.Block.Timestamp.Unix(),
@@ -571,7 +572,7 @@ func (s *S3Connector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) {
 	// First check the buffer for blocks from this chain
 	s.bufferMu.Lock()
 	for _, block := range s.buffer {
-		if block.ChainId == chainId.Uint64() && block.Block.Number.Cmp(maxBlock) > 0 {
+		if block.Block.ChainId.Cmp(chainId) == 0 && block.Block.Number.Cmp(maxBlock) > 0 {
 			maxBlock = new(big.Int).Set(block.Block.Number)
 		}
 	}
@@ -612,14 +613,99 @@ func (s *S3Connector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) {
 
 func (s *S3Connector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
 	maxBlock := big.NewInt(0)
+	foundAny := false
 
 	// First check the buffer for blocks in this range
 	s.bufferMu.Lock()
 	for _, block := range s.buffer {
-		if block.ChainId == chainId.Uint64() {
+		if block.Block.ChainId.Cmp(chainId) == 0 {
 			blockNum := block.Block.Number
-			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 && blockNum.Cmp(maxBlock) > 0 {
-				maxBlock = new(big.Int).Set(blockNum)
+			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 {
+				if !foundAny || blockNum.Cmp(maxBlock) > 0 {
+					maxBlock = new(big.Int).Set(blockNum)
+					foundAny = true
+				}
+			}
+		}
+	}
+	s.bufferMu.Unlock()
+
+	// Then check S3 files
+	prefix := fmt.Sprintf("chain_%d/", chainId.Uint64())
+	if s.config.Prefix != "" {
+		prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix)
+	}
+
+	ctx := context.Background()
+	paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{
+		Bucket: aws.String(s.config.Bucket),
+		Prefix: aws.String(prefix),
+	})
+
+	for paginator.HasMorePages() {
+		page, err := paginator.NextPage(ctx)
+		if err != nil {
+			return nil, fmt.Errorf("failed to list objects: %w", err)
+		}
+
+		for _, obj := range page.Contents {
+			if obj.Key == nil {
+				continue
+			}
+			fileStart, fileEnd := s.extractBlockRangeFromKey(*obj.Key)
+			if fileStart == nil || fileEnd == nil {
+				continue
+			}
+
+			// Check if this file overlaps with our range
+			if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 {
+				// The maximum block in this file that's within our range
+				maxInFile := new(big.Int).Set(fileEnd)
+				if maxInFile.Cmp(endBlock) > 0 {
+					maxInFile = endBlock
+				}
+
+				if !foundAny || maxInFile.Cmp(maxBlock) > 0 {
+					maxBlock = new(big.Int).Set(maxInFile)
+					foundAny = true
+				}
+			}
+		}
+	}
+
+	if !foundAny {
+		return big.NewInt(0), nil
+	}
+
+	return maxBlock, nil
+}
+
+func (s *S3Connector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
+	minBlock := big.NewInt(0)
+	maxBlock := big.NewInt(0)
+	count := big.NewInt(0)
+	foundAny := false
+
+	// First check the buffer for blocks in this range
+	s.bufferMu.Lock()
+	for _, block := range s.buffer {
+		if block.Block.ChainId.Cmp(chainId) == 0 {
+			blockNum := block.Block.Number
+			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 {
+				count.Add(count, big.NewInt(1))
+
+				if !foundAny {
+					minBlock = new(big.Int).Set(blockNum)
+					maxBlock = new(big.Int).Set(blockNum)
+					foundAny = true
+				} else {
+					if blockNum.Cmp(minBlock) < 0 {
+						minBlock = new(big.Int).Set(blockNum)
+					}
+					if blockNum.Cmp(maxBlock) > 0 {
+						maxBlock = new(big.Int).Set(blockNum)
+					}
+				}
 			}
 		}
 	}
@@ -654,19 +740,40 @@ func (s *S3Connector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big
 
 			// Check if this file overlaps with our range
 			if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 {
-				// File overlaps with our range
+				// Calculate the effective range within our query bounds
+				effectiveStart := new(big.Int).Set(fileStart)
+				if effectiveStart.Cmp(startBlock) < 0 {
+					effectiveStart = startBlock
+				}
 				effectiveEnd := new(big.Int).Set(fileEnd)
 				if effectiveEnd.Cmp(endBlock) > 0 {
 					effectiveEnd = endBlock
 				}
-				if effectiveEnd.Cmp(maxBlock) > 0 {
-					maxBlock = effectiveEnd
+
+				// Update min/max blocks
+				if !foundAny {
+					minBlock = new(big.Int).Set(effectiveStart)
+					maxBlock = new(big.Int).Set(effectiveEnd)
+					foundAny = true
+				} else {
+					if effectiveStart.Cmp(minBlock) < 0 {
+						minBlock = new(big.Int).Set(effectiveStart)
+					}
+					if effectiveEnd.Cmp(maxBlock) > 0 {
+						maxBlock = new(big.Int).Set(effectiveEnd)
+					}
 				}
+
+				// Add the count of blocks in this file's overlapping range
+				// Note: This assumes contiguous blocks in the file
+				blocksInRange := new(big.Int).Sub(effectiveEnd, effectiveStart)
+				blocksInRange.Add(blocksInRange, big.NewInt(1)) // Add 1 because range is inclusive
+				count.Add(count, blocksInRange)
 			}
 		}
 	}
 
-	return maxBlock, nil
+	return count, nil
 }
 
 func (s *S3Connector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) {
@@ -675,7 +782,7 @@ func (s *S3Connector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int,
 	// First get headers from buffer
 	s.bufferMu.Lock()
 	for _, block := range s.buffer {
-		if block.ChainId == chainId.Uint64() {
+		if block.Block.ChainId.Cmp(chainId) == 0 {
 			// Check if block is in range (if from is specified)
 			if from != nil && block.Block.Number.Cmp(from) > 0 {
 				continue
@@ -738,7 +845,7 @@ func (s *S3Connector) GetValidationBlockData(chainId *big.Int, startBlock *big.I
 	// First check buffer for blocks in range
 	s.bufferMu.Lock()
 	for _, block := range s.buffer {
-		if block.ChainId == chainId.Uint64() {
+		if block.Block.ChainId.Cmp(chainId) == 0 {
 			blockNum := block.Block.Number
 			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 {
 				blockData = append(blockData, block)
@@ -777,7 +884,7 @@ func (s *S3Connector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.
 	// First add blocks from buffer
 	s.bufferMu.Lock()
 	for _, block := range s.buffer {
-		if block.ChainId == chainId.Uint64() {
+		if block.Block.ChainId.Cmp(chainId) == 0 {
 			blockNum := block.Block.Number
 			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 {
 				blockSet[blockNum.String()] = true
@@ -833,7 +940,7 @@ func (s *S3Connector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int
 	// First check buffer for requested blocks
 	s.bufferMu.Lock()
 	for _, block := range s.buffer {
-		if block.ChainId == chainId.Uint64() {
+		if block.Block.ChainId.Cmp(chainId) == 0 {
 			if blockNumMap[block.Block.Number.String()] {
 				result = append(result, block)
 				// Remove from map so we don't fetch it from S3
@@ -1039,7 +1146,6 @@ func (s *S3Connector) downloadAndParseFile(key string, chainId *big.Int, startBl
 			}
 
 			blockData = append(blockData, common.BlockData{
-				ChainId:      pd.ChainID,
 				Block:        block,
 				Transactions: transactions,
 				Logs:         logs,
diff --git a/test/mocks/MockIMainStorage.go b/test/mocks/MockIMainStorage.go
index 679345c..a77c398 100644
--- a/test/mocks/MockIMainStorage.go
+++ b/test/mocks/MockIMainStorage.go
@@ -462,66 +462,6 @@ func (_c *MockIMainStorage_GetMaxBlockNumber_Call) RunAndReturn(run func(*big.In
 	return _c
 }
 
-// GetMaxBlockNumberInRange provides a mock function with given fields: chainId, startBlock, endBlock
-func (_m *MockIMainStorage) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
-	ret := _m.Called(chainId, startBlock, endBlock)
-
-	if len(ret) == 0 {
-		panic("no return value specified for GetMaxBlockNumberInRange")
-	}
-
-	var r0 *big.Int
-	var r1 error
-	if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) (*big.Int, error)); ok {
-		return rf(chainId, startBlock, endBlock)
-	}
-	if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) *big.Int); ok {
-		r0 = rf(chainId, startBlock, endBlock)
-	} else {
-		if ret.Get(0) != nil {
-			r0 = ret.Get(0).(*big.Int)
-		}
-	}
-
-	if rf, ok := ret.Get(1).(func(*big.Int, *big.Int, *big.Int) error); ok {
-		r1 = rf(chainId, startBlock, endBlock)
-	} else {
-		r1 = ret.Error(1)
-	}
-
-	return r0, r1
-}
-
-// MockIMainStorage_GetMaxBlockNumberInRange_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetMaxBlockNumberInRange'
-type MockIMainStorage_GetMaxBlockNumberInRange_Call struct {
-	*mock.Call
-}
-
-// GetMaxBlockNumberInRange is a helper method to define mock.On call
-//   - chainId *big.Int
-//   - startBlock *big.Int
-//   - endBlock *big.Int
-func (_e *MockIMainStorage_Expecter) GetMaxBlockNumberInRange(chainId interface{}, startBlock interface{}, endBlock interface{}) *MockIMainStorage_GetMaxBlockNumberInRange_Call {
-	return &MockIMainStorage_GetMaxBlockNumberInRange_Call{Call: _e.mock.On("GetMaxBlockNumberInRange", chainId, startBlock, endBlock)}
-}
-
-func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) Run(run func(chainId *big.Int, startBlock *big.Int, endBlock *big.Int)) *MockIMainStorage_GetMaxBlockNumberInRange_Call {
-	_c.Call.Run(func(args mock.Arguments) {
-		run(args[0].(*big.Int), args[1].(*big.Int), args[2].(*big.Int))
-	})
-	return _c
-}
-
-func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) Return(maxBlockNumber *big.Int, err error) *MockIMainStorage_GetMaxBlockNumberInRange_Call {
-	_c.Call.Return(maxBlockNumber, err)
-	return _c
-}
-
-func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) RunAndReturn(run func(*big.Int, *big.Int, *big.Int) (*big.Int, error)) *MockIMainStorage_GetMaxBlockNumberInRange_Call {
-	_c.Call.Return(run)
-	return _c
-}
-
 // GetTokenBalances provides a mock function with given fields: qf, fields
 func (_m *MockIMainStorage) GetTokenBalances(qf storage.BalancesQueryFilter, fields ...string) (storage.QueryResult[common.TokenBalance], error) {
 	_va := make([]interface{}, len(fields))

From bddbf54ebaa2af0ae69d084229cb4b1aeacbea73 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 08:08:58 +0000
Subject: [PATCH 23/43] Remove RPC batch config in migrate

---
 cmd/migrate_valid.go | 12 +++---------
 cmd/root.go          |  6 ++----
 configs/config.go    |  9 ++++-----
 3 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go
index d8d34db..28088ed 100644
--- a/cmd/migrate_valid.go
+++ b/cmd/migrate_valid.go
@@ -30,8 +30,7 @@ var (
 )
 
 const (
-	DEFAULT_RPC_BATCH_SIZE = 100
-	DEFAULT_BATCH_SIZE     = 2000
+	DEFAULT_BATCH_SIZE = 2000
 )
 
 func RunValidationMigration(cmd *cobra.Command, args []string) {
@@ -182,12 +181,8 @@ type Migrator struct {
 
 func NewMigrator() *Migrator {
 	batchSize := DEFAULT_BATCH_SIZE
-	if config.Cfg.Migrator.StorageBatchSize > 0 {
-		batchSize = int(config.Cfg.Migrator.StorageBatchSize)
-	}
-	rpcBatchSize := DEFAULT_RPC_BATCH_SIZE
-	if config.Cfg.Migrator.RpcBatchSize > 0 {
-		rpcBatchSize = int(config.Cfg.Migrator.RpcBatchSize)
+	if config.Cfg.Migrator.BatchSize > 0 {
+		batchSize = int(config.Cfg.Migrator.BatchSize)
 	}
 
 	rpcClient, err := rpc.Initialize()
@@ -218,7 +213,6 @@ func NewMigrator() *Migrator {
 
 	return &Migrator{
 		migrationBatchSize: batchSize,
-		rpcBatchSize:       rpcBatchSize,
 		rpcClient:          rpcClient,
 		source:             sourceConnector,
 		destination:        destinationConnector,
diff --git a/cmd/root.go b/cmd/root.go
index 479d4f4..a5ac7c9 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -223,8 +223,7 @@ func init() {
 	rootCmd.PersistentFlags().Int64("migrator-destination-s3-bufferSizeMB", 1024, "S3 buffer size in MB before flush for migrator destination")
 	rootCmd.PersistentFlags().Int("migrator-destination-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for migrator destination")
 	rootCmd.PersistentFlags().Int("migrator-destination-s3-maxBlocksPerFile", 0, "S3 max blocks per file for migrator destination")
-	rootCmd.PersistentFlags().Uint("migrator-storageBatchSize", 2000, "Batch size for storage operations in migrator")
-	rootCmd.PersistentFlags().Uint("migrator-rpcBatchSize", 100, "Batch size for RPC operations in migrator")
+	rootCmd.PersistentFlags().Uint("migrator-batchSize", 2000, "Batch size for storage operations in migrator")
 	rootCmd.PersistentFlags().Uint("migrator-startBlock", 0, "Start block for migration")
 	rootCmd.PersistentFlags().Uint("migrator-endBlock", 0, "End block for migration")
 
@@ -418,8 +417,7 @@ func init() {
 	viper.BindPFlag("migrator.destination.s3.maxBlocksPerFile", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-maxBlocksPerFile"))
 	viper.BindPFlag("migrator.startBlock", rootCmd.PersistentFlags().Lookup("migrator-startBlock"))
 	viper.BindPFlag("migrator.endBlock", rootCmd.PersistentFlags().Lookup("migrator-endBlock"))
-	viper.BindPFlag("migrator.storageBatchSize", rootCmd.PersistentFlags().Lookup("migrator-storageBatchSize"))
-	viper.BindPFlag("migrator.rpcBatchSize", rootCmd.PersistentFlags().Lookup("migrator-rpcBatchSize"))
+	viper.BindPFlag("migrator.batchSize", rootCmd.PersistentFlags().Lookup("migrator-batchSize"))
 	rootCmd.AddCommand(orchestratorCmd)
 	rootCmd.AddCommand(apiCmd)
 	rootCmd.AddCommand(validateAndFixCmd)
diff --git a/configs/config.go b/configs/config.go
index 226fa9d..f92a3e2 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -233,11 +233,10 @@ type ValidationConfig struct {
 }
 
 type MigratorConfig struct {
-	Destination      StorageConnectionConfig `mapstructure:"destination"`
-	StartBlock       uint                    `mapstructure:"startBlock"`
-	EndBlock         uint                    `mapstructure:"endBlock"`
-	StorageBatchSize uint                    `mapstructure:"storageBatchSize"`
-	RpcBatchSize     uint                    `mapstructure:"rpcBatchSize"`
+	Destination StorageConnectionConfig `mapstructure:"destination"`
+	StartBlock  uint                    `mapstructure:"startBlock"`
+	EndBlock    uint                    `mapstructure:"endBlock"`
+	BatchSize   uint                    `mapstructure:"batchSize"`
 }
 
 type Config struct {

From 43dad9cb4a1ba5f7084dfececfbb5d91ab6de8b6 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 08:12:11 +0000
Subject: [PATCH 24/43] Cleanup

---
 cmd/migrate_valid.go | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go
index 28088ed..f06a43c 100644
--- a/cmd/migrate_valid.go
+++ b/cmd/migrate_valid.go
@@ -67,7 +67,7 @@ func RunValidationMigration(cmd *cobra.Command, args []string) {
 			default:
 			}
 
-			endBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.migrationBatchSize-1)))
+			endBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.batchSize-1)))
 			if endBlock.Cmp(rangeEndBlock) > 0 {
 				endBlock = rangeEndBlock
 			}
@@ -170,13 +170,12 @@ func RunValidationMigration(cmd *cobra.Command, args []string) {
 }
 
 type Migrator struct {
-	rpcClient          rpc.IRPCClient
-	worker             *worker.Worker
-	source             storage.IStorage
-	destination        storage.IMainStorage
-	validator          *orchestrator.Validator
-	migrationBatchSize int
-	rpcBatchSize       int
+	rpcClient   rpc.IRPCClient
+	worker      *worker.Worker
+	source      storage.IStorage
+	destination storage.IMainStorage
+	validator   *orchestrator.Validator
+	batchSize   int
 }
 
 func NewMigrator() *Migrator {
@@ -212,12 +211,12 @@ func NewMigrator() *Migrator {
 	}
 
 	return &Migrator{
-		migrationBatchSize: batchSize,
-		rpcClient:          rpcClient,
-		source:             sourceConnector,
-		destination:        destinationConnector,
-		validator:          validator,
-		worker:             worker.NewWorker(rpcClient),
+		batchSize:   batchSize,
+		rpcClient:   rpcClient,
+		source:      sourceConnector,
+		destination: destinationConnector,
+		validator:   validator,
+		worker:      worker.NewWorker(rpcClient),
 	}
 }
 

From da31422e11e08c4802355bef7fb9ad1cd11abc06 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 09:06:54 +0000
Subject: [PATCH 25/43] Add from_address, to_address to schema

---
 .../0010_clickhouse_create_address_transactions.sql         | 4 ++++
 .../0011_clickhouse_create_address_transactions_mv.sql      | 2 ++
 .../clickhouse/0012_clickhouse_create_address_transfers.sql | 6 +++++-
 .../0013_clickhouse_create_address_transfers_mv.sql         | 2 ++
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
index 11179d7..fa9f55a 100644
--- a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
+++ b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
@@ -8,6 +8,8 @@ CREATE TABLE IF NOT EXISTS address_transactions (
     `transaction_index` UInt64,
     `address` FixedString(42),
     `address_type` Enum8('from' = 1, 'to' = 2),
+    `from_address` FixedString(42),
+    `to_address` FixedString(42),
     `value` UInt256,
     `gas` UInt64,
     `gas_price` UInt256,
@@ -37,6 +39,8 @@ CREATE TABLE IF NOT EXISTS address_transactions (
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3,
+    INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 4,
+    INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 4,
     
     PROJECTION address_total_count_projection
     (
diff --git a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql
index 48c4cb2..c5b5ac7 100644
--- a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql
+++ b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql
@@ -11,6 +11,8 @@ SELECT
     transaction_index,
     address_tuple.1 AS address,
     address_tuple.2 AS address_type,
+    from_address,
+    to_address,
     value,
     gas,
     gas_price,
diff --git a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
index 4b9b864..3803323 100644
--- a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
+++ b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
@@ -5,6 +5,8 @@ CREATE TABLE IF NOT EXISTS address_transfers (
     `token_id` UInt256,
     `address` FixedString(42),
     `address_type` Enum8('from' = 1, 'to' = 2),
+    `from_address` FixedString(42),
+    `to_address` FixedString(42),
     `block_number` UInt256,
     `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)),
     `transaction_hash` FixedString(66),
@@ -18,7 +20,9 @@ CREATE TABLE IF NOT EXISTS address_transfers (
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3,
-    
+    INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 4,
+    INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 4,
+
     PROJECTION address_state_projection (
         SELECT
             chain_id,
diff --git a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql
index 9256143..0a7d2cc 100644
--- a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql
+++ b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql
@@ -8,6 +8,8 @@ SELECT
     token_id,
     address_tuple.1 AS address,
     address_tuple.2 AS address_type,
+    from_address,
+    to_address,
     block_number,
     block_timestamp,
     transaction_hash,

From b698c18076abdacd9134b95d40c742c473d9ba01 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 17:27:01 +0000
Subject: [PATCH 26/43] Retry with RPC batch size reduction

---
 cmd/migrate_valid.go    | 371 ++++++++++++++++++++++++++++------------
 cmd/root.go             |   4 +-
 configs/config.go       |   2 +-
 internal/rpc/batcher.go | 116 +++++++++++++
 internal/rpc/rpc.go     |   8 +-
 internal/storage/s3.go  |  24 ++-
 6 files changed, 400 insertions(+), 125 deletions(-)

diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go
index f06a43c..43f2988 100644
--- a/cmd/migrate_valid.go
+++ b/cmd/migrate_valid.go
@@ -2,9 +2,11 @@ package cmd
 
 import (
 	"context"
+	"fmt"
 	"math/big"
 	"os"
 	"os/signal"
+	"sync"
 	"syscall"
 	"time"
 
@@ -31,6 +33,7 @@ var (
 
 const (
 	DEFAULT_BATCH_SIZE = 2000
+	DEFAULT_WORKERS    = 1
 )
 
 func RunValidationMigration(cmd *cobra.Command, args []string) {
@@ -44,129 +47,235 @@ func RunValidationMigration(cmd *cobra.Command, args []string) {
 	migrator := NewMigrator()
 	defer migrator.Close()
 
-	rangeStartBlock, rangeEndBlock := migrator.DetermineMigrationBoundaries()
+	targetEndBlock := big.NewInt(int64(config.Cfg.Migrator.EndBlock))
+	targetStartBlock := big.NewInt(int64(config.Cfg.Migrator.StartBlock))
+	rangeStartBlock, rangeEndBlock := migrator.DetermineMigrationBoundaries(targetStartBlock, targetEndBlock)
 
 	log.Info().Msgf("Migrating blocks from %s to %s (both ends inclusive)", rangeStartBlock.String(), rangeEndBlock.String())
 
-	// Run migration in a goroutine
-	done := make(chan struct{})
-	var migrationErr error
+	// Calculate work distribution for workers
+	numWorkers := DEFAULT_WORKERS
+	workRanges := divideBlockRange(rangeStartBlock, rangeEndBlock, numWorkers)
+	log.Info().Msgf("Starting %d workers to process migration", len(workRanges))
+
+	// Create error channel and wait group
+	errChan := make(chan error, numWorkers)
+	var wg sync.WaitGroup
+
+	// Start workers
+	for workerID, workRange := range workRanges {
+		wg.Add(1)
+		go func(id int, startBlock, endBlock *big.Int) {
+			defer wg.Done()
+			
+			// Only check boundaries per-worker if we have multiple workers
+			// For single worker, we already determined boundaries globally
+			var actualStart, actualEnd *big.Int
+			if numWorkers > 1 {
+				// Multiple workers: each needs to check their specific range
+				actualStart, actualEnd = migrator.DetermineMigrationBoundariesForRange(startBlock, endBlock)
+				if actualStart == nil || actualEnd == nil {
+					log.Info().Msgf("Worker %d: Range %s to %s already fully migrated", id, startBlock.String(), endBlock.String())
+					return
+				}
+				log.Info().Msgf("Worker %d starting: blocks %s to %s (adjusted from %s to %s)", 
+					id, actualStart.String(), actualEnd.String(), startBlock.String(), endBlock.String())
+			} else {
+				// Single worker: use the already-determined boundaries
+				actualStart, actualEnd = startBlock, endBlock
+				log.Info().Msgf("Worker %d starting: blocks %s to %s", id, actualStart.String(), actualEnd.String())
+			}
+
+			if err := processBlockRange(ctx, migrator, id, actualStart, actualEnd); err != nil {
+				errChan <- err
+				log.Error().Err(err).Msgf("Worker %d failed", id)
+				return
+			}
 
+			log.Info().Msgf("Worker %d completed successfully", id)
+		}(workerID, workRange.start, workRange.end)
+	}
+
+	// Monitor for completion or interruption
+	done := make(chan struct{})
 	go func() {
-		defer close(done)
+		wg.Wait()
+		close(done)
+	}()
 
-		// 2. Start going in loops
-		for currentBlock := rangeStartBlock; currentBlock.Cmp(rangeEndBlock) <= 0; {
-			batchStartTime := time.Now()
+	// Wait for either completion, error, or interrupt signal
+	select {
+	case <-done:
+		log.Info().Msg("All workers completed successfully")
+		// 3. then finally copy partitions from target table to main tables
+		log.Info().Msg("Migration completed successfully")
+	case err := <-errChan:
+		log.Error().Err(err).Msg("Migration failed due to worker error")
+		cancel()
+		wg.Wait()
+		log.Fatal().Msg("Migration stopped due to error")
+	case sig := <-sigChan:
+		log.Info().Msgf("Received signal: %s, initiating graceful shutdown...", sig)
+		cancel()
+		wg.Wait()
+		log.Info().Msg("Migration stopped gracefully")
+	}
+}
 
-			// Check for cancellation
-			select {
-			case <-ctx.Done():
-				log.Info().Msgf("Migration interrupted at block %s", currentBlock.String())
-				return
-			default:
-			}
+type blockRange struct {
+	start *big.Int
+	end   *big.Int
+}
 
-			endBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.batchSize-1)))
-			if endBlock.Cmp(rangeEndBlock) > 0 {
-				endBlock = rangeEndBlock
-			}
+func divideBlockRange(startBlock, endBlock *big.Int, numWorkers int) []blockRange {
+	ranges := make([]blockRange, 0, numWorkers)
 
-			blockNumbers := generateBlockNumbersForRange(currentBlock, endBlock)
-			log.Info().Msgf("Processing blocks %s to %s", blockNumbers[0].String(), blockNumbers[len(blockNumbers)-1].String())
-
-			// Fetch valid blocks from source
-			fetchStartTime := time.Now()
-			validBlocksForRange, err := migrator.GetValidBlocksForRange(blockNumbers)
-			fetchDuration := time.Since(fetchStartTime)
-			if err != nil {
-				// If we got an error fetching valid blocks, we'll continue
-				log.Error().Err(err).Msg("Failed to get valid blocks for range")
-				time.Sleep(3 * time.Second)
-				continue
-			}
-			log.Debug().Dur("duration", fetchDuration).Int("blocks_fetched", len(validBlocksForRange)).Msg("Fetched valid blocks from source")
+	// Calculate total blocks
+	totalBlocks := new(big.Int).Sub(endBlock, startBlock)
+	totalBlocks.Add(totalBlocks, big.NewInt(1)) // inclusive range
 
-			// Build map of fetched blocks
-			mapBuildStartTime := time.Now()
-			blocksToInsertMap := make(map[string]common.BlockData)
-			for _, blockData := range validBlocksForRange {
-				blocksToInsertMap[blockData.Block.Number.String()] = blockData
-			}
+	// Calculate blocks per worker
+	blocksPerWorker := new(big.Int).Div(totalBlocks, big.NewInt(int64(numWorkers)))
+	remainder := new(big.Int).Mod(totalBlocks, big.NewInt(int64(numWorkers)))
 
-			// Loop over block numbers to find missing blocks
-			missingBlocks := make([]*big.Int, 0)
-			for _, blockNum := range blockNumbers {
-				if _, exists := blocksToInsertMap[blockNum.String()]; !exists {
-					missingBlocks = append(missingBlocks, blockNum)
-				}
-			}
-			mapBuildDuration := time.Since(mapBuildStartTime)
-			log.Debug().Dur("duration", mapBuildDuration).Int("missing_blocks", len(missingBlocks)).Msg("Identified missing blocks")
-
-			// Fetch missing blocks from RPC
-			if len(missingBlocks) > 0 {
-				rpcFetchStartTime := time.Now()
-				validMissingBlocks := migrator.GetValidBlocksFromRPC(missingBlocks)
-				rpcFetchDuration := time.Since(rpcFetchStartTime)
-				log.Debug().Dur("duration", rpcFetchDuration).Int("blocks_fetched", len(validMissingBlocks)).Msg("Fetched missing blocks from RPC")
-
-				for _, blockData := range validMissingBlocks {
-					if blockData.Block.ChainId.Sign() == 0 {
-						log.Fatal().Msgf("Block %s has chain ID 0, %+v", blockData.Block.Number.String(), blockData.Block)
-					}
-					blocksToInsertMap[blockData.Block.Number.String()] = blockData
-				}
-			}
+	currentStart := new(big.Int).Set(startBlock)
 
-			// Prepare blocks for insertion
-			prepStartTime := time.Now()
-			blocksToInsert := make([]common.BlockData, 0, len(blocksToInsertMap))
-			for _, blockData := range blocksToInsertMap {
-				blocksToInsert = append(blocksToInsert, blockData)
-			}
-			prepDuration := time.Since(prepStartTime)
-			log.Debug().Dur("duration", prepDuration).Int("blocks_to_insert", len(blocksToInsert)).Msg("Prepared blocks for insertion")
-
-			// Insert blocks to destination
-			insertStartTime := time.Now()
-			err = migrator.destination.InsertBlockData(blocksToInsert)
-			insertDuration := time.Since(insertStartTime)
-			if err != nil {
-				migrationErr = err
-				log.Error().Err(err).Dur("duration", insertDuration).Msg("Failed to insert blocks to target storage")
-				time.Sleep(3 * time.Second)
-				continue
-			}
+	for i := 0; i < numWorkers; i++ {
+		// Calculate end block for this worker
+		workerBlockCount := new(big.Int).Set(blocksPerWorker)
 
-			batchDuration := time.Since(batchStartTime)
-			log.Info().
-				Dur("total_duration", batchDuration).
-				Dur("fetch_duration", fetchDuration).
-				Dur("insert_duration", insertDuration).
-				Int("blocks_processed", len(blocksToInsert)).
-				Msg("Batch processed successfully")
+		// Distribute remainder blocks to first workers
+		if big.NewInt(int64(i)).Cmp(remainder) < 0 {
+			workerBlockCount.Add(workerBlockCount, big.NewInt(1))
+		}
 
-			currentBlock = new(big.Int).Add(endBlock, big.NewInt(1))
+		// Skip if no blocks for this worker
+		if workerBlockCount.Sign() == 0 {
+			continue
 		}
 
-		// 3. then finally copy partitions from target table to main tables
-		log.Info().Msg("Migration completed successfully")
-	}()
+		currentEnd := new(big.Int).Add(currentStart, workerBlockCount)
+		currentEnd.Sub(currentEnd, big.NewInt(1)) // inclusive range
 
-	// Wait for either completion or interrupt signal
-	select {
-	case <-done:
-		if migrationErr != nil {
-			log.Fatal().Err(migrationErr).Msg("Migration failed")
+		// Ensure we don't exceed the end block
+		if currentEnd.Cmp(endBlock) > 0 {
+			currentEnd = new(big.Int).Set(endBlock)
 		}
-		log.Info().Msg("Done")
-	case sig := <-sigChan:
-		log.Info().Msgf("Received signal: %s, initiating graceful shutdown...", sig)
-		cancel()
-		<-done
-		log.Info().Msg("Migration stopped gracefully")
+
+		ranges = append(ranges, blockRange{
+			start: new(big.Int).Set(currentStart),
+			end:   new(big.Int).Set(currentEnd),
+		})
+
+		// Move to next range
+		currentStart = new(big.Int).Add(currentEnd, big.NewInt(1))
+
+		// Stop if we've covered all blocks
+		if currentStart.Cmp(endBlock) > 0 {
+			break
+		}
+	}
+
+	return ranges
+}
+
+func processBlockRange(ctx context.Context, migrator *Migrator, workerID int, startBlock, endBlock *big.Int) error {
+	currentBlock := new(big.Int).Set(startBlock)
+
+	for currentBlock.Cmp(endBlock) <= 0 {
+		batchStartTime := time.Now()
+
+		// Check for cancellation
+		select {
+		case <-ctx.Done():
+			log.Info().Msgf("Worker %d: Migration interrupted at block %s", workerID, currentBlock.String())
+			return nil
+		default:
+		}
+
+		batchEndBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.batchSize-1)))
+		if batchEndBlock.Cmp(endBlock) > 0 {
+			batchEndBlock = endBlock
+		}
+
+		blockNumbers := generateBlockNumbersForRange(currentBlock, batchEndBlock)
+		log.Info().Msgf("Worker %d: Processing blocks %s to %s", workerID, blockNumbers[0].String(), blockNumbers[len(blockNumbers)-1].String())
+
+		// Fetch valid blocks from source
+		fetchStartTime := time.Now()
+		validBlocksForRange, err := migrator.GetValidBlocksForRange(blockNumbers)
+		fetchDuration := time.Since(fetchStartTime)
+		if err != nil {
+			// If we got an error fetching valid blocks, we'll continue
+			log.Error().Err(err).Msgf("Worker %d: Failed to get valid blocks for range", workerID)
+			time.Sleep(3 * time.Second)
+			continue
+		}
+		log.Debug().Dur("duration", fetchDuration).Int("blocks_fetched", len(validBlocksForRange)).Msgf("Worker %d: Fetched valid blocks from source", workerID)
+
+		// Build map of fetched blocks
+		mapBuildStartTime := time.Now()
+		blocksToInsertMap := make(map[string]common.BlockData)
+		for _, blockData := range validBlocksForRange {
+			blocksToInsertMap[blockData.Block.Number.String()] = blockData
+		}
+
+		// Loop over block numbers to find missing blocks
+		missingBlocks := make([]*big.Int, 0)
+		for _, blockNum := range blockNumbers {
+			if _, exists := blocksToInsertMap[blockNum.String()]; !exists {
+				missingBlocks = append(missingBlocks, blockNum)
+			}
+		}
+		mapBuildDuration := time.Since(mapBuildStartTime)
+		log.Debug().Dur("duration", mapBuildDuration).Int("missing_blocks", len(missingBlocks)).Msgf("Worker %d: Identified missing blocks", workerID)
+
+		// Fetch missing blocks from RPC
+		if len(missingBlocks) > 0 {
+			rpcFetchStartTime := time.Now()
+			validMissingBlocks := migrator.GetValidBlocksFromRPC(missingBlocks)
+			rpcFetchDuration := time.Since(rpcFetchStartTime)
+			log.Debug().Dur("duration", rpcFetchDuration).Int("blocks_fetched", len(validMissingBlocks)).Msgf("Worker %d: Fetched missing blocks from RPC", workerID)
+
+			for _, blockData := range validMissingBlocks {
+				if blockData.Block.ChainId.Sign() == 0 {
+					return fmt.Errorf("worker %d: block %s has chain ID 0", workerID, blockData.Block.Number.String())
+				}
+				blocksToInsertMap[blockData.Block.Number.String()] = blockData
+			}
+		}
+
+		// Prepare blocks for insertion
+		prepStartTime := time.Now()
+		blocksToInsert := make([]common.BlockData, 0, len(blocksToInsertMap))
+		for _, blockData := range blocksToInsertMap {
+			blocksToInsert = append(blocksToInsert, blockData)
+		}
+		prepDuration := time.Since(prepStartTime)
+		log.Debug().Dur("duration", prepDuration).Int("blocks_to_insert", len(blocksToInsert)).Msgf("Worker %d: Prepared blocks for insertion", workerID)
+
+		// Insert blocks to destination
+		insertStartTime := time.Now()
+		err = migrator.destination.InsertBlockData(blocksToInsert)
+		insertDuration := time.Since(insertStartTime)
+		if err != nil {
+			log.Error().Err(err).Dur("duration", insertDuration).Msgf("Worker %d: Failed to insert blocks to target storage", workerID)
+			time.Sleep(3 * time.Second)
+			continue
+		}
+
+		batchDuration := time.Since(batchStartTime)
+		log.Info().
+			Dur("total_duration", batchDuration).
+			Dur("fetch_duration", fetchDuration).
+			Dur("insert_duration", insertDuration).
+			Int("blocks_processed", len(blocksToInsert)).
+			Msgf("Worker %d: Batch processed successfully", workerID)
+
+		currentBlock = new(big.Int).Add(batchEndBlock, big.NewInt(1))
 	}
+
+	return nil
 }
 
 type Migrator struct {
@@ -232,7 +341,7 @@ func (m *Migrator) Close() {
 	}
 }
 
-func (m *Migrator) DetermineMigrationBoundaries() (*big.Int, *big.Int) {
+func (m *Migrator) DetermineMigrationBoundaries(targetStartBlock, targetEndBlock *big.Int) (*big.Int, *big.Int) {
 	// get latest block from main storage
 	latestBlockStored, err := m.source.MainStorage.GetMaxBlockNumber(m.rpcClient.GetChainID())
 	if err != nil {
@@ -241,12 +350,11 @@ func (m *Migrator) DetermineMigrationBoundaries() (*big.Int, *big.Int) {
 	log.Info().Msgf("Latest block in main storage: %d", latestBlockStored)
 
 	endBlock := latestBlockStored
-	endBlockEnv := big.NewInt(int64(config.Cfg.Migrator.EndBlock))
-	if endBlockEnv.Sign() > 0 && endBlockEnv.Cmp(latestBlockStored) < 0 {
-		endBlock = endBlockEnv
+	if targetEndBlock.Sign() > 0 && targetEndBlock.Cmp(latestBlockStored) < 0 {
+		endBlock = targetEndBlock
 	}
 
-	startBlock := big.NewInt(int64(config.Cfg.Migrator.StartBlock)) // default start block is 0
+	startBlock := targetStartBlock
 
 	blockCount, err := m.destination.GetBlockCount(m.rpcClient.GetChainID(), startBlock, endBlock)
 	if err != nil {
@@ -275,6 +383,51 @@ func (m *Migrator) DetermineMigrationBoundaries() (*big.Int, *big.Int) {
 	return startBlock, endBlock
 }
 
+// DetermineMigrationBoundariesForRange determines the actual migration boundaries for a worker's specific range
+// Returns nil, nil if the range is already fully migrated
+// Fails fatally if it cannot determine boundaries (to ensure data correctness)
+func (m *Migrator) DetermineMigrationBoundariesForRange(rangeStart, rangeEnd *big.Int) (*big.Int, *big.Int) {
+	// Check how many blocks we have in this specific range
+	blockCount, err := m.destination.GetBlockCount(m.rpcClient.GetChainID(), rangeStart, rangeEnd)
+	if err != nil {
+		log.Fatal().Err(err).Msgf("Worker failed to get block count for range %s to %s", rangeStart.String(), rangeEnd.String())
+		return nil, nil
+	}
+
+	expectedCount := new(big.Int).Sub(rangeEnd, rangeStart)
+	expectedCount = expectedCount.Add(expectedCount, big.NewInt(1))
+	
+	// If all blocks are already migrated, return nil
+	if expectedCount.Cmp(blockCount) == 0 {
+		log.Debug().Msgf("Range %s to %s already fully migrated (%s blocks)", rangeStart.String(), rangeEnd.String(), blockCount.String())
+		return nil, nil
+	}
+
+	// Find the actual starting point by checking what blocks we already have
+	maxStoredBlock, err := m.destination.GetMaxBlockNumberInRange(m.rpcClient.GetChainID(), rangeStart, rangeEnd)
+	if err != nil {
+		log.Fatal().Err(err).Msgf("Worker failed to get max block in range %s to %s", rangeStart.String(), rangeEnd.String())
+		return nil, nil
+	}
+
+	actualStart := rangeStart
+	if maxStoredBlock != nil && maxStoredBlock.Cmp(rangeStart) >= 0 {
+		// We have some blocks already, start from the next one
+		actualStart = new(big.Int).Add(maxStoredBlock, big.NewInt(1))
+		
+		// If the new start is beyond our range end, the range is fully migrated
+		if actualStart.Cmp(rangeEnd) > 0 {
+			log.Debug().Msgf("Range %s to %s already fully migrated (max block: %s)", rangeStart.String(), rangeEnd.String(), maxStoredBlock.String())
+			return nil, nil
+		}
+	}
+
+	log.Debug().Msgf("Range %s-%s: found %s blocks, max stored: %v, will migrate from %s", 
+		rangeStart.String(), rangeEnd.String(), blockCount.String(), maxStoredBlock, actualStart.String())
+
+	return actualStart, rangeEnd
+}
+
 func (m *Migrator) FetchBlocksFromRPC(blockNumbers []*big.Int) ([]common.BlockData, error) {
 	allBlockData := make([]common.BlockData, 0, len(blockNumbers))
 
diff --git a/cmd/root.go b/cmd/root.go
index a5ac7c9..1afa037 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -150,7 +150,7 @@ func init() {
 	rootCmd.PersistentFlags().String("storage-main-s3-secretAccessKey", "", "S3 secret access key for main storage")
 	rootCmd.PersistentFlags().String("storage-main-s3-endpoint", "", "S3 endpoint URL for main storage (for S3-compatible services)")
 	rootCmd.PersistentFlags().String("storage-main-s3-format", "parquet", "S3 storage format for main storage (parquet or json)")
-	rootCmd.PersistentFlags().Int64("storage-main-s3-bufferSizeMB", 1024, "S3 buffer size in MB before flush for main storage")
+	rootCmd.PersistentFlags().Int64("storage-main-s3-bufferSizeMB", 512, "S3 buffer size in MB before flush for main storage")
 	rootCmd.PersistentFlags().Int("storage-main-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for main storage")
 	rootCmd.PersistentFlags().Int("storage-main-s3-maxBlocksPerFile", 0, "S3 max blocks per file for main storage (0 = no limit)")
 	rootCmd.PersistentFlags().String("storage-main-s3-parquet-compression", "snappy", "Parquet compression type for S3 main storage")
@@ -220,7 +220,7 @@ func init() {
 	rootCmd.PersistentFlags().String("migrator-destination-s3-secretAccessKey", "", "S3 secret access key for migrator destination")
 	rootCmd.PersistentFlags().String("migrator-destination-s3-endpoint", "", "S3 endpoint URL for migrator destination")
 	rootCmd.PersistentFlags().String("migrator-destination-s3-format", "parquet", "S3 storage format for migrator destination")
-	rootCmd.PersistentFlags().Int64("migrator-destination-s3-bufferSizeMB", 1024, "S3 buffer size in MB before flush for migrator destination")
+	rootCmd.PersistentFlags().Int64("migrator-destination-s3-bufferSizeMB", 512, "S3 buffer size in MB before flush for migrator destination")
 	rootCmd.PersistentFlags().Int("migrator-destination-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for migrator destination")
 	rootCmd.PersistentFlags().Int("migrator-destination-s3-maxBlocksPerFile", 0, "S3 max blocks per file for migrator destination")
 	rootCmd.PersistentFlags().Uint("migrator-batchSize", 2000, "Batch size for storage operations in migrator")
diff --git a/configs/config.go b/configs/config.go
index f92a3e2..7c2bfce 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -76,7 +76,7 @@ type S3Config struct {
 	Format          string         `mapstructure:"format"`
 	Parquet         *ParquetConfig `mapstructure:"parquet"`
 	// Buffering configuration
-	BufferSize       int64 `mapstructure:"bufferSizeMB"`         // Target buffer size in MB before flush (default 1024 MB = 1GB)
+	BufferSize       int64 `mapstructure:"bufferSizeMB"`         // Target buffer size in MB before flush (default 512 MB)
 	BufferTimeout    int   `mapstructure:"bufferTimeoutSeconds"` // Max time in seconds before flush (default 300 = 5 min)
 	MaxBlocksPerFile int   `mapstructure:"maxBlocksPerFile"`     // Max blocks per parquet file (0 = no limit, only size/timeout triggers)
 }
diff --git a/internal/rpc/batcher.go b/internal/rpc/batcher.go
index c34fd13..2589e0d 100644
--- a/internal/rpc/batcher.go
+++ b/internal/rpc/batcher.go
@@ -2,6 +2,7 @@ package rpc
 
 import (
 	"context"
+	"strings"
 	"sync"
 	"time"
 
@@ -50,6 +51,121 @@ func RPCFetchInBatches[K any, T any](rpc *Client, ctx context.Context, keys []K,
 	return results
 }
 
+func RPCFetchInBatchesWithRetry[K any, T any](rpc *Client, ctx context.Context, keys []K, batchSize int, batchDelay int, method string, argsFunc func(K) []interface{}) []RPCFetchBatchResult[K, T] {
+	if len(keys) <= batchSize {
+		return RPCFetchSingleBatchWithRetry[K, T](rpc, ctx, keys, method, argsFunc)
+	}
+	chunks := common.SliceToChunks[K](keys, batchSize)
+
+	log.Debug().Msgf("Fetching %s for %d blocks in %d chunks of max %d requests", method, len(keys), len(chunks), batchSize)
+
+	var wg sync.WaitGroup
+	resultsCh := make(chan []RPCFetchBatchResult[K, T], len(chunks))
+
+	for _, chunk := range chunks {
+		wg.Add(1)
+		go func(chunk []K) {
+			defer wg.Done()
+			resultsCh <- RPCFetchSingleBatchWithRetry[K, T](rpc, ctx, chunk, method, argsFunc)
+			if batchDelay > 0 {
+				time.Sleep(time.Duration(batchDelay) * time.Millisecond)
+			}
+		}(chunk)
+	}
+	go func() {
+		wg.Wait()
+		close(resultsCh)
+	}()
+
+	results := make([]RPCFetchBatchResult[K, T], 0, len(keys))
+	for batchResults := range resultsCh {
+		results = append(results, batchResults...)
+	}
+
+	return results
+}
+
+func RPCFetchSingleBatchWithRetry[K any, T any](rpc *Client, ctx context.Context, keys []K, method string, argsFunc func(K) []interface{}) []RPCFetchBatchResult[K, T] {
+	currentBatchSize := len(keys)
+	minBatchSize := 1
+
+	// First try with the full batch
+	results := RPCFetchSingleBatch[K, T](rpc, ctx, keys, method, argsFunc)
+	if !hasBatchError(results) {
+		return results
+	}
+
+	// If we got 413, start retrying with smaller batches
+	newBatchSize := len(keys) / 2
+	if newBatchSize < minBatchSize {
+		newBatchSize = minBatchSize
+	}
+	log.Debug().Msgf("Got error for batch size %d, retrying with batch size %d", currentBatchSize, newBatchSize)
+
+	// Start with half the size
+	currentBatchSize = newBatchSize
+
+	// Keep retrying with smaller batch sizes
+	for currentBatchSize >= minBatchSize {
+		chunks := common.SliceToChunks[K](keys, currentBatchSize)
+		allResults := make([]RPCFetchBatchResult[K, T], 0, len(keys))
+		hasError := false
+
+		// Process chunks sequentially to maintain order
+		for _, chunk := range chunks {
+			chunkResults := RPCFetchSingleBatch[K, T](rpc, ctx, chunk, method, argsFunc)
+
+			if hasBatchError(chunkResults) {
+				hasError = true
+				break
+			}
+			allResults = append(allResults, chunkResults...)
+		}
+
+		if !hasError {
+			// Successfully processed all chunks, return results in original order
+			return allResults
+		}
+
+		// Still getting error, reduce batch size further
+		newBatchSize := currentBatchSize / 2
+		if newBatchSize < minBatchSize {
+			newBatchSize = minBatchSize
+		}
+		log.Debug().Msgf("Got error for batch size %d, retrying with batch size %d", currentBatchSize, newBatchSize)
+		currentBatchSize = newBatchSize
+
+		// If we're already at minimum batch size and still failing, try one more time
+		if currentBatchSize == minBatchSize && hasError {
+			// Process items one by one as last resort
+			finalResults := make([]RPCFetchBatchResult[K, T], 0, len(keys))
+			for _, key := range keys {
+				singleResult := RPCFetchSingleBatch[K, T](rpc, ctx, []K{key}, method, argsFunc)
+				finalResults = append(finalResults, singleResult...)
+			}
+			return finalResults
+		}
+	}
+
+	// Should not reach here, but return error results as fallback
+	log.Fatal().Msgf("Unable to process batch even with size 1, returning errors")
+	return nil
+}
+
+func hasBatchError[K any, T any](results []RPCFetchBatchResult[K, T]) bool {
+	for _, result := range results {
+		if result.Error != nil {
+			if httpErr, ok := result.Error.(gethRpc.HTTPError); ok && httpErr.StatusCode == 413 {
+				return true
+			}
+			if strings.Contains(result.Error.Error(), "413") {
+				return true
+			}
+		}
+	}
+	return false
+}
+
 func RPCFetchSingleBatch[K any, T any](rpc *Client, ctx context.Context, keys []K, method string, argsFunc func(K) []interface{}) []RPCFetchBatchResult[K, T] {
 	batch := make([]gethRpc.BatchElem, len(keys))
 	results := make([]RPCFetchBatchResult[K, T], len(keys))
diff --git a/internal/rpc/rpc.go b/internal/rpc/rpc.go
index d148418..67295df 100644
--- a/internal/rpc/rpc.go
+++ b/internal/rpc/rpc.go
@@ -238,20 +238,20 @@ func (rpc *Client) GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) [
 
 	go func() {
 		defer wg.Done()
-		result := RPCFetchSingleBatch[*big.Int, common.RawBlock](rpc, ctx, blockNumbers, "eth_getBlockByNumber", GetBlockWithTransactionsParams)
+		result := RPCFetchSingleBatchWithRetry[*big.Int, common.RawBlock](rpc, ctx, blockNumbers, "eth_getBlockByNumber", GetBlockWithTransactionsParams)
 		blocks = result
 	}()
 
 	if rpc.supportsBlockReceipts {
 		go func() {
 			defer wg.Done()
-			result := RPCFetchInBatches[*big.Int, common.RawReceipts](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Receipts, config.Cfg.RPC.BlockReceipts.BatchDelay, "eth_getBlockReceipts", GetBlockReceiptsParams)
+			result := RPCFetchInBatchesWithRetry[*big.Int, common.RawReceipts](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Receipts, config.Cfg.RPC.BlockReceipts.BatchDelay, "eth_getBlockReceipts", GetBlockReceiptsParams)
 			receipts = result
 		}()
 	} else {
 		go func() {
 			defer wg.Done()
-			result := RPCFetchInBatches[*big.Int, common.RawLogs](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Logs, config.Cfg.RPC.Logs.BatchDelay, "eth_getLogs", GetLogsParams)
+			result := RPCFetchInBatchesWithRetry[*big.Int, common.RawLogs](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Logs, config.Cfg.RPC.Logs.BatchDelay, "eth_getLogs", GetLogsParams)
 			logs = result
 		}()
 	}
@@ -260,7 +260,7 @@ func (rpc *Client) GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) [
 		wg.Add(1)
 		go func() {
 			defer wg.Done()
-			result := RPCFetchInBatches[*big.Int, common.RawTraces](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Traces, config.Cfg.RPC.Traces.BatchDelay, "trace_block", TraceBlockParams)
+			result := RPCFetchInBatchesWithRetry[*big.Int, common.RawTraces](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Traces, config.Cfg.RPC.Traces.BatchDelay, "trace_block", TraceBlockParams)
 			traces = result
 		}()
 	}
diff --git a/internal/storage/s3.go b/internal/storage/s3.go
index e3e4038..2ed0a74 100644
--- a/internal/storage/s3.go
+++ b/internal/storage/s3.go
@@ -37,6 +37,7 @@ type S3Connector struct {
 	flushCh     chan struct{}
 	flushDoneCh chan struct{} // Signals when flush is complete
 	wg          sync.WaitGroup
+	closeOnce   sync.Once
 }
 
 // DataFormatter interface for different file formats
@@ -313,18 +314,23 @@ func (s *S3Connector) Flush() error {
 
 // Close closes the S3 connector and flushes any remaining data
 func (s *S3Connector) Close() error {
-	// First, ensure any pending data is flushed
-	if err := s.Flush(); err != nil {
-		log.Error().Err(err).Msg("Error flushing buffer during close")
-	}
+	var closeErr error
+	
+	s.closeOnce.Do(func() {
+		// First, ensure any pending data is flushed
+		if err := s.Flush(); err != nil {
+			log.Error().Err(err).Msg("Error flushing buffer during close")
+			closeErr = err
+		}
 
-	// Signal stop
-	close(s.stopCh)
+		// Signal stop
+		close(s.stopCh)
 
-	// Wait for worker to finish
-	s.wg.Wait()
+		// Wait for worker to finish
+		s.wg.Wait()
+	})
 
-	return nil
+	return closeErr
 }
 
 func (s *S3Connector) uploadBatch(data []common.BlockData) error {

From 96dc60ba4d7a6fd55cbdf04f5ea28ea9e375b8f8 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 19:10:41 +0000
Subject: [PATCH 27/43] Shuffle Orchestrator and Staging interface

---
 internal/orchestrator/committer.go         | 14 +++----
 internal/orchestrator/committer_test.go    | 10 ++---
 internal/orchestrator/failure_recoverer.go |  6 +--
 internal/orchestrator/poller.go            |  2 +-
 internal/storage/badger.go                 | 49 +++++++++++++++++++---
 internal/storage/clickhouse.go             | 27 +++++++++++-
 internal/storage/connector.go              | 23 ++++++----
 internal/storage/kafka_redis.go            | 26 +++++++++++-
 internal/storage/postgres.go               | 31 +++++++++++++-
 test/mocks/MockIStagingStorage.go          | 22 +++++-----
 10 files changed, 168 insertions(+), 42 deletions(-)

diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go
index bde4854..6b4df18 100644
--- a/internal/orchestrator/committer.go
+++ b/internal/orchestrator/committer.go
@@ -109,7 +109,7 @@ func (c *Committer) Start(ctx context.Context) {
 	}
 
 	// Initialize publisher position - always use max(lastPublished, lastCommitted) to prevent double publishing
-	lastPublished, err := c.storage.StagingStorage.GetLastPublishedBlockNumber(chainID)
+	lastPublished, err := c.storage.OrchestratorStorage.GetLastPublishedBlockNumber(chainID)
 	if err != nil {
 		// It's okay to fail silently here; it's only used for staging cleanup and will be
 		// corrected by the worker loop.
@@ -126,7 +126,7 @@ func (c *Committer) Start(ctx context.Context) {
 					Msg("Publisher is behind committed position, seeking forward to committed value")
 
 				c.lastPublishedBlock.Store(latestCommittedBlockNumber.Uint64())
-				if err := c.storage.StagingStorage.SetLastPublishedBlockNumber(chainID, latestCommittedBlockNumber); err != nil {
+				if err := c.storage.OrchestratorStorage.SetLastPublishedBlockNumber(chainID, latestCommittedBlockNumber); err != nil {
 					log.Error().Err(err).Msg("Failed to update last published block number after seeking forward")
 					// Fall back to the stored value on error
 					c.lastPublishedBlock.Store(lastPublished.Uint64())
@@ -167,7 +167,7 @@ func (c *Committer) Start(ctx context.Context) {
 
 	// Only update storage if we're changing the position
 	if lastPublished == nil || targetPublishBlock.Cmp(lastPublished) != 0 {
-		if err := c.storage.StagingStorage.SetLastPublishedBlockNumber(chainID, targetPublishBlock); err != nil {
+		if err := c.storage.OrchestratorStorage.SetLastPublishedBlockNumber(chainID, targetPublishBlock); err != nil {
 			log.Error().Err(err).Msg("Failed to update published block number in storage")
 			// If we can't update storage, use what was there originally to avoid issues
 			if lastPublished != nil {
@@ -303,11 +303,11 @@ func (c *Committer) cleanupProcessedStagingBlocks() {
 	chainID := c.rpc.GetChainID()
 	blockNumber := new(big.Int).SetUint64(limit)
 	stagingDeleteStart := time.Now()
-	if err := c.storage.StagingStorage.DeleteOlderThan(chainID, blockNumber); err != nil {
+	if err := c.storage.StagingStorage.DeleteStagingDataOlderThan(chainID, blockNumber); err != nil {
 		log.Error().Err(err).Msg("Failed to delete staging data")
 		return
 	}
-	log.Debug().Str("metric", "staging_delete_duration").Msgf("StagingStorage.DeleteOlderThan duration: %f", time.Since(stagingDeleteStart).Seconds())
+	log.Debug().Str("metric", "staging_delete_duration").Msgf("StagingStorage.DeleteStagingDataOlderThan duration: %f", time.Since(stagingDeleteStart).Seconds())
 	metrics.StagingDeleteDuration.Observe(time.Since(stagingDeleteStart).Seconds())
 }
 
@@ -358,7 +358,7 @@ func (c *Committer) getBlockNumbersToCommit(ctx context.Context) ([]*big.Int, er
 
 func (c *Committer) getBlockNumbersToPublish(ctx context.Context) ([]*big.Int, error) {
 	// Get the last published block from storage (which was already corrected in Start)
-	latestPublishedBlockNumber, err := c.storage.StagingStorage.GetLastPublishedBlockNumber(c.rpc.GetChainID())
+	latestPublishedBlockNumber, err := c.storage.OrchestratorStorage.GetLastPublishedBlockNumber(c.rpc.GetChainID())
 	if err != nil {
 		return nil, fmt.Errorf("failed to get last published block number: %v", err)
 	}
@@ -550,7 +550,7 @@ func (c *Committer) publish(ctx context.Context) error {
 
 	chainID := c.rpc.GetChainID()
 	highest := blockData[len(blockData)-1].Block.Number
-	if err := c.storage.StagingStorage.SetLastPublishedBlockNumber(chainID, highest); err != nil {
+	if err := c.storage.OrchestratorStorage.SetLastPublishedBlockNumber(chainID, highest); err != nil {
 		return err
 	}
 	c.lastPublishedBlock.Store(highest.Uint64())
diff --git a/internal/orchestrator/committer_test.go b/internal/orchestrator/committer_test.go
index c6d5906..8e2cb90 100644
--- a/internal/orchestrator/committer_test.go
+++ b/internal/orchestrator/committer_test.go
@@ -336,7 +336,7 @@ func TestCommitDeletesAfterPublish(t *testing.T) {
 
 	mockRPC.EXPECT().GetChainID().Return(chainID)
 	mockMainStorage.EXPECT().InsertBlockData(blockData).Return(nil)
-	mockStagingStorage.EXPECT().DeleteOlderThan(chainID, big.NewInt(102)).RunAndReturn(func(*big.Int, *big.Int) error {
+	mockStagingStorage.EXPECT().DeleteStagingDataOlderThan(chainID, big.NewInt(102)).RunAndReturn(func(*big.Int, *big.Int) error {
 		close(deleteDone)
 		return nil
 	})
@@ -347,7 +347,7 @@ func TestCommitDeletesAfterPublish(t *testing.T) {
 	select {
 	case <-deleteDone:
 	case <-time.After(2 * time.Second):
-		t.Fatal("DeleteOlderThan was not called within timeout period")
+		t.Fatal("DeleteStagingDataOlderThan was not called within timeout period")
 	}
 }
 
@@ -380,7 +380,7 @@ func TestCommitParallelPublisherMode(t *testing.T) {
 
 	mockStagingStorage.AssertNotCalled(t, "GetLastPublishedBlockNumber", mock.Anything)
 	mockStagingStorage.AssertNotCalled(t, "SetLastPublishedBlockNumber", mock.Anything, mock.Anything)
-	mockStagingStorage.AssertNotCalled(t, "DeleteOlderThan", mock.Anything, mock.Anything)
+	mockStagingStorage.AssertNotCalled(t, "DeleteStagingDataOlderThan", mock.Anything, mock.Anything)
 }
 
 func TestCleanupProcessedStagingBlocks(t *testing.T) {
@@ -400,11 +400,11 @@ func TestCleanupProcessedStagingBlocks(t *testing.T) {
 	committer.lastPublishedBlock.Store(0)
 
 	committer.cleanupProcessedStagingBlocks()
-	mockStagingStorage.AssertNotCalled(t, "DeleteOlderThan", mock.Anything, mock.Anything)
+	mockStagingStorage.AssertNotCalled(t, "DeleteStagingDataOlderThan", mock.Anything, mock.Anything)
 
 	committer.lastPublishedBlock.Store(90)
 	mockRPC.EXPECT().GetChainID().Return(chainID)
-	mockStagingStorage.EXPECT().DeleteOlderThan(chainID, big.NewInt(90)).Return(nil)
+	mockStagingStorage.EXPECT().DeleteStagingDataOlderThan(chainID, big.NewInt(90)).Return(nil)
 	committer.cleanupProcessedStagingBlocks()
 }
 func TestHandleGap(t *testing.T) {
diff --git a/internal/orchestrator/failure_recoverer.go b/internal/orchestrator/failure_recoverer.go
index da1ae91..8ca110f 100644
--- a/internal/orchestrator/failure_recoverer.go
+++ b/internal/orchestrator/failure_recoverer.go
@@ -55,7 +55,7 @@ func (fr *FailureRecoverer) Start(ctx context.Context) {
 			log.Info().Msg("Failure recoverer shutting down")
 			return
 		case <-ticker.C:
-			blockFailures, err := fr.storage.OrchestratorStorage.GetBlockFailures(storage.QueryFilter{
+			blockFailures, err := fr.storage.StagingStorage.GetBlockFailures(storage.QueryFilter{
 				ChainId: fr.rpc.GetChainID(),
 				Limit:   fr.failuresPerPoll,
 			})
@@ -122,11 +122,11 @@ func (fr *FailureRecoverer) handleWorkerResults(blockFailures []common.BlockFail
 		log.Error().Err(fmt.Errorf("error inserting block data in failure recoverer: %v", err))
 		return
 	}
-	if err := fr.storage.OrchestratorStorage.StoreBlockFailures(newBlockFailures); err != nil {
+	if err := fr.storage.StagingStorage.StoreBlockFailures(newBlockFailures); err != nil {
 		log.Error().Err(err).Msg("Error storing block failures")
 		return
 	}
-	if err := fr.storage.OrchestratorStorage.DeleteBlockFailures(failuresToDelete); err != nil {
+	if err := fr.storage.StagingStorage.DeleteBlockFailures(failuresToDelete); err != nil {
 		log.Error().Err(err).Msg("Error deleting block failures")
 		return
 	}
diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go
index 5045dc1..331f00c 100644
--- a/internal/orchestrator/poller.go
+++ b/internal/orchestrator/poller.go
@@ -352,7 +352,7 @@ func (p *Poller) handleBlockFailures(results []rpc.GetFullBlockResult) {
 			})
 		}
 	}
-	err := p.storage.OrchestratorStorage.StoreBlockFailures(blockFailures)
+	err := p.storage.StagingStorage.StoreBlockFailures(blockFailures)
 	if err != nil {
 		// TODO: exiting if this fails, but should handle this better
 		log.Error().Err(err).Msg("Error saving block failures")
diff --git a/internal/storage/badger.go b/internal/storage/badger.go
index 3c1305d..e649040 100644
--- a/internal/storage/badger.go
+++ b/internal/storage/badger.go
@@ -88,19 +88,23 @@ func (bc *BadgerConnector) Close() error {
 
 // Key construction helpers
 func blockKey(chainId *big.Int, blockNumber *big.Int) []byte {
-	return []byte(fmt.Sprintf("b:%d:%s", chainId.Uint64(), blockNumber.String()))
+	return []byte(fmt.Sprintf("blockdata:%s:%s", chainId.String(), blockNumber.String()))
 }
 
 func blockFailureKey(chainId *big.Int, blockNumber *big.Int, timestamp int64) []byte {
-	return []byte(fmt.Sprintf("f:%d:%s:%d", chainId.Uint64(), blockNumber.String(), timestamp))
+	return []byte(fmt.Sprintf("blockfailure:%s:%s:%d", chainId.String(), blockNumber.String(), timestamp))
 }
 
 func lastReorgKey(chainId *big.Int) []byte {
-	return []byte(fmt.Sprintf("reorg:%d", chainId.Uint64()))
+	return []byte(fmt.Sprintf("reorg:%s", chainId.String()))
 }
 
 func lastPublishedKey(chainId *big.Int) []byte {
-	return []byte(fmt.Sprintf("published:%d", chainId.Uint64()))
+	return []byte(fmt.Sprintf("publish:%s", chainId.String()))
+}
+
+func lastCommittedKey(chainId *big.Int) []byte {
+	return []byte(fmt.Sprintf("commit:%s", chainId.String()))
 }
 
 // IOrchestratorStorage implementation
@@ -438,7 +442,42 @@ func (bc *BadgerConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNu
 	})
 }
 
-func (bc *BadgerConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error {
+func (bc *BadgerConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	bc.mu.RLock()
+	defer bc.mu.RUnlock()
+
+	var blockNumber *big.Int
+	err := bc.db.View(func(txn *badger.Txn) error {
+		item, err := txn.Get(lastCommittedKey(chainId))
+		if err == badger.ErrKeyNotFound {
+			return nil
+		}
+		if err != nil {
+			return err
+		}
+
+		return item.Value(func(val []byte) error {
+			blockNumber = new(big.Int).SetBytes(val)
+			return nil
+		})
+	})
+
+	if blockNumber == nil {
+		return big.NewInt(0), nil
+	}
+	return blockNumber, err
+}
+
+func (bc *BadgerConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	bc.mu.Lock()
+	defer bc.mu.Unlock()
+
+	return bc.db.Update(func(txn *badger.Txn) error {
+		return txn.Set(lastCommittedKey(chainId), blockNumber.Bytes())
+	})
+}
+
+func (bc *BadgerConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error {
 	bc.mu.Lock()
 	defer bc.mu.Unlock()
 
diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go
index d3319c6..013e917 100644
--- a/internal/storage/clickhouse.go
+++ b/internal/storage/clickhouse.go
@@ -1147,6 +1147,31 @@ func (c *ClickHouseConnector) SetLastPublishedBlockNumber(chainId *big.Int, bloc
 	return c.conn.Exec(context.Background(), query)
 }
 
+func (c *ClickHouseConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	query := fmt.Sprintf("SELECT cursor_value FROM %s.cursors FINAL WHERE cursor_type = 'commit'", c.cfg.Database)
+	if chainId.Sign() > 0 {
+		query += fmt.Sprintf(" AND chain_id = %s", chainId.String())
+	}
+	var blockNumberString string
+	err := c.conn.QueryRow(context.Background(), query).Scan(&blockNumberString)
+	if err != nil {
+		if err == sql.ErrNoRows {
+			return big.NewInt(0), nil
+		}
+		return nil, err
+	}
+	blockNumber, ok := new(big.Int).SetString(blockNumberString, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString)
+	}
+	return blockNumber, nil
+}
+
+func (c *ClickHouseConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	query := fmt.Sprintf("INSERT INTO %s.cursors (chain_id, cursor_type, cursor_value) VALUES (%s, 'commit', '%s')", c.cfg.Database, chainId, blockNumber.String())
+	return c.conn.Exec(context.Background(), query)
+}
+
 func (c *ClickHouseConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) {
 	query := fmt.Sprintf("SELECT cursor_value FROM %s.cursors FINAL WHERE cursor_type = 'reorg'", c.cfg.Database)
 	if chainId.Sign() > 0 {
@@ -2186,7 +2211,7 @@ func (c *ClickHouseConnector) GetFullBlockData(chainId *big.Int, blockNumbers []
 	return blockData, nil
 }
 
-func (c *ClickHouseConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error {
+func (c *ClickHouseConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error {
 	query := fmt.Sprintf(`
 		INSERT INTO %s.block_data (chain_id, block_number, is_deleted)
 		SELECT chain_id, block_number, 1
diff --git a/internal/storage/connector.go b/internal/storage/connector.go
index dc23d9b..60e5cfb 100644
--- a/internal/storage/connector.go
+++ b/internal/storage/connector.go
@@ -96,23 +96,32 @@ func (s *IStorage) Close() error {
 	return nil
 }
 
+// The orchestartor storage is a persisted key/value store
 type IOrchestratorStorage interface {
-	GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error)
-	StoreBlockFailures(failures []common.BlockFailure) error
-	DeleteBlockFailures(failures []common.BlockFailure) error
 	GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error)
 	SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error
+	GetLastPublishedBlockNumber(chainId *big.Int) (blockNumber *big.Int, err error)
+	SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error
+	GetLastCommittedBlockNumber(chainId *big.Int) (blockNumber *big.Int, err error)
+	SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error
+
 	Close() error
 }
 
+// The staging storage is a emphemeral block data store
 type IStagingStorage interface {
+	// Staging block data
 	InsertStagingData(data []common.BlockData) error
 	GetStagingData(qf QueryFilter) (data []common.BlockData, err error)
-	DeleteStagingData(data []common.BlockData) error
 	GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (maxBlockNumber *big.Int, err error)
-	GetLastPublishedBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error)
-	SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error
-	DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error
+	DeleteStagingData(data []common.BlockData) error
+	DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error
+
+	// Block failures
+	GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error)
+	StoreBlockFailures(failures []common.BlockFailure) error
+	DeleteBlockFailures(failures []common.BlockFailure) error
+
 	Close() error
 }
 
diff --git a/internal/storage/kafka_redis.go b/internal/storage/kafka_redis.go
index 030fa21..e76a8e9 100644
--- a/internal/storage/kafka_redis.go
+++ b/internal/storage/kafka_redis.go
@@ -132,11 +132,35 @@ func (kr *KafkaRedisConnector) SetLastPublishedBlockNumber(chainId *big.Int, blo
 	return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err()
 }
 
+func (kr *KafkaRedisConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String())
+
+	val, err := kr.redisClient.Get(ctx, key).Result()
+	if err == redis.Nil {
+		return big.NewInt(0), nil
+	} else if err != nil {
+		return nil, err
+	}
+
+	blockNumber, ok := new(big.Int).SetString(val, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", val)
+	}
+	return blockNumber, nil
+}
+
+func (kr *KafkaRedisConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String())
+	return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err()
+}
+
 func (kr *KafkaRedisConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) {
 	return nil, fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging")
 }
 
-func (kr *KafkaRedisConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error {
+func (kr *KafkaRedisConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error {
 	return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging")
 }
 
diff --git a/internal/storage/postgres.go b/internal/storage/postgres.go
index 1476c44..fb0748d 100644
--- a/internal/storage/postgres.go
+++ b/internal/storage/postgres.go
@@ -388,6 +388,35 @@ func (p *PostgresConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockN
 	return err
 }
 
+func (p *PostgresConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'commit' AND chain_id = $1`
+
+	var blockNumberString string
+	err := p.db.QueryRow(query, chainId.String()).Scan(&blockNumberString)
+	if err != nil {
+		if err == sql.ErrNoRows {
+			return big.NewInt(0), nil
+		}
+		return nil, err
+	}
+
+	blockNumber, ok := new(big.Int).SetString(blockNumberString, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString)
+	}
+	return blockNumber, nil
+}
+
+func (p *PostgresConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value)
+                 VALUES ($1, 'commit', $2)
+                 ON CONFLICT (chain_id, cursor_type)
+                 DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()`
+
+	_, err := p.db.Exec(query, chainId.String(), blockNumber.String())
+	return err
+}
+
 func (p *PostgresConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) {
 	query := `SELECT MAX(block_number) FROM block_data WHERE 1=1`
 
@@ -431,7 +460,7 @@ func (p *PostgresConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStar
 	return blockNumber, nil
 }
 
-func (p *PostgresConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error {
+func (p *PostgresConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error {
 	query := `DELETE FROM block_data
 	WHERE ctid IN (
 		SELECT ctid
diff --git a/test/mocks/MockIStagingStorage.go b/test/mocks/MockIStagingStorage.go
index 14f8e68..bd73136 100644
--- a/test/mocks/MockIStagingStorage.go
+++ b/test/mocks/MockIStagingStorage.go
@@ -341,12 +341,12 @@ func (_c *MockIStagingStorage_InsertStagingData_Call) RunAndReturn(run func([]co
 	return _c
 }
 
-// DeleteOlderThan provides a mock function with given fields: chainId, blockNumber
-func (_m *MockIStagingStorage) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error {
+// DeleteStagingDataOlderThan provides a mock function with given fields: chainId, blockNumber
+func (_m *MockIStagingStorage) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error {
 	ret := _m.Called(chainId, blockNumber)
 
 	if len(ret) == 0 {
-		panic("no return value specified for DeleteOlderThan")
+		panic("no return value specified for DeleteStagingDataOlderThan")
 	}
 
 	var r0 error
@@ -359,31 +359,31 @@ func (_m *MockIStagingStorage) DeleteOlderThan(chainId *big.Int, blockNumber *bi
 	return r0
 }
 
-// MockIStagingStorage_DeleteOlderThan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteOlderThan'
-type MockIStagingStorage_DeleteOlderThan_Call struct {
+// MockIStagingStorage_DeleteStagingDataOlderThan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteStagingDataOlderThan'
+type MockIStagingStorage_DeleteStagingDataOlderThan_Call struct {
 	*mock.Call
 }
 
-// DeleteOlderThan is a helper method to define mock.On call
+// DeleteStagingDataOlderThan is a helper method to define mock.On call
 //   - chainId *big.Int
 //   - blockNumber *big.Int
-func (_e *MockIStagingStorage_Expecter) DeleteOlderThan(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_DeleteOlderThan_Call {
-	return &MockIStagingStorage_DeleteOlderThan_Call{Call: _e.mock.On("DeleteOlderThan", chainId, blockNumber)}
+func (_e *MockIStagingStorage_Expecter) DeleteStagingDataOlderThan(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_DeleteStagingDataOlderThan_Call {
+	return &MockIStagingStorage_DeleteStagingDataOlderThan_Call{Call: _e.mock.On("DeleteStagingDataOlderThan", chainId, blockNumber)}
 }
 
-func (_c *MockIStagingStorage_DeleteOlderThan_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_DeleteOlderThan_Call {
+func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_DeleteStagingDataOlderThan_Call {
 	_c.Call.Run(func(args mock.Arguments) {
 		run(args[0].(*big.Int), args[1].(*big.Int))
 	})
 	return _c
 }
 
-func (_c *MockIStagingStorage_DeleteOlderThan_Call) Return(_a0 error) *MockIStagingStorage_DeleteOlderThan_Call {
+func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Return(_a0 error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call {
 	_c.Call.Return(_a0)
 	return _c
 }
 
-func (_c *MockIStagingStorage_DeleteOlderThan_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_DeleteOlderThan_Call {
+func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call {
 	_c.Call.Return(run)
 	return _c
 }

From 41cc98d610865a68c53bc7b3eeaf682d23287661 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 19:55:04 +0000
Subject: [PATCH 28/43] store poller in committer

---
 internal/orchestrator/committer.go | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go
index 6b4df18..1e00602 100644
--- a/internal/orchestrator/committer.go
+++ b/internal/orchestrator/committer.go
@@ -31,6 +31,7 @@ type Committer struct {
 	lastCommittedBlock atomic.Uint64
 	lastPublishedBlock atomic.Uint64
 	publisher          *publisher.Publisher
+	poller             *Poller
 	workMode           WorkMode
 	workModeMutex      sync.RWMutex
 	workModeChan       chan WorkMode
@@ -80,6 +81,7 @@ func NewCommitter(rpc rpc.IRPCClient, storage storage.IStorage, opts ...Committe
 		commitUntilBlock:  big.NewInt(int64(commitUntilBlock)),
 		rpc:               rpc,
 		publisher:         publisher.GetInstance(),
+		poller:            NewBoundlessPoller(rpc, storage),
 		workMode:          "",
 	}
 	cfb := commitFromBlock.Uint64()
@@ -445,8 +447,7 @@ func (c *Committer) fetchBlockData(ctx context.Context, blockNumbers []*big.Int)
 		}
 		return blocksData, nil
 	} else {
-		poller := NewBoundlessPoller(c.rpc, c.storage)
-		blocksData, err := poller.PollWithoutSaving(ctx, blockNumbers)
+		blocksData, err := c.poller.PollWithoutSaving(ctx, blockNumbers)
 		if err != nil {
 			return nil, fmt.Errorf("poller error: %v", err)
 		}
@@ -612,13 +613,11 @@ func (c *Committer) handleGap(ctx context.Context, expectedStartBlockNumber *big
 		return nil
 	}
 
-	poller := NewBoundlessPoller(c.rpc, c.storage)
-
 	missingBlockCount := new(big.Int).Sub(actualFirstBlock.Number, expectedStartBlockNumber).Int64()
 	log.Debug().Msgf("Detected %d missing blocks between blocks %s and %s", missingBlockCount, expectedStartBlockNumber.String(), actualFirstBlock.Number.String())
-	if missingBlockCount > poller.blocksPerPoll {
-		log.Debug().Msgf("Limiting polling missing blocks to %d blocks due to config", poller.blocksPerPoll)
-		missingBlockCount = poller.blocksPerPoll
+	if missingBlockCount > c.poller.blocksPerPoll {
+		log.Debug().Msgf("Limiting polling missing blocks to %d blocks due to config", c.poller.blocksPerPoll)
+		missingBlockCount = c.poller.blocksPerPoll
 	}
 	missingBlockNumbers := make([]*big.Int, missingBlockCount)
 	for i := int64(0); i < missingBlockCount; i++ {
@@ -627,7 +626,7 @@ func (c *Committer) handleGap(ctx context.Context, expectedStartBlockNumber *big
 	}
 
 	log.Debug().Msgf("Polling %d blocks while handling gap: %v", len(missingBlockNumbers), missingBlockNumbers)
-	poller.Poll(ctx, missingBlockNumbers)
+	c.poller.Poll(ctx, missingBlockNumbers)
 	return fmt.Errorf("first block number (%s) in commit batch does not match expected (%s)", actualFirstBlock.Number.String(), expectedStartBlockNumber.String())
 }
 
@@ -644,11 +643,10 @@ func (c *Committer) handleMissingStagingData(ctx context.Context, blocksToCommit
 	}
 	log.Debug().Msgf("Detected missing blocks in staging data starting from %s.", blocksToCommit[0].String())
 
-	poller := NewBoundlessPoller(c.rpc, c.storage)
 	blocksToPoll := blocksToCommit
-	if len(blocksToCommit) > int(poller.blocksPerPoll) {
-		blocksToPoll = blocksToCommit[:int(poller.blocksPerPoll)]
+	if len(blocksToCommit) > int(c.poller.blocksPerPoll) {
+		blocksToPoll = blocksToCommit[:int(c.poller.blocksPerPoll)]
 	}
-	poller.Poll(ctx, blocksToPoll)
+	c.poller.Poll(ctx, blocksToPoll)
 	log.Debug().Msgf("Polled %d blocks due to committer detecting them as missing. Range: %s - %s", len(blocksToPoll), blocksToPoll[0].String(), blocksToPoll[len(blocksToPoll)-1].String())
 }

From 70ea8714f028a1ca4f38ee85aa963fd6a2d41754 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 20:15:23 +0000
Subject: [PATCH 29/43] Simplified storage. Split kafka and redis

---
 cmd/migrate_valid.go               |  12 +-
 cmd/root.go                        |  16 +-
 configs/config.go                  |  42 +++--
 internal/handlers/logs_handlers.go |   2 +-
 internal/storage/connector.go      | 163 +++++++++++++---
 internal/storage/kafka.go          | 125 ++++++++++++
 internal/storage/kafka_redis.go    | 294 -----------------------------
 internal/storage/redis.go          | 125 ++++++++++++
 8 files changed, 434 insertions(+), 345 deletions(-)
 create mode 100644 internal/storage/kafka.go
 delete mode 100644 internal/storage/kafka_redis.go
 create mode 100644 internal/storage/redis.go

diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go
index 43f2988..04dcc60 100644
--- a/cmd/migrate_valid.go
+++ b/cmd/migrate_valid.go
@@ -67,7 +67,7 @@ func RunValidationMigration(cmd *cobra.Command, args []string) {
 		wg.Add(1)
 		go func(id int, startBlock, endBlock *big.Int) {
 			defer wg.Done()
-			
+
 			// Only check boundaries per-worker if we have multiple workers
 			// For single worker, we already determined boundaries globally
 			var actualStart, actualEnd *big.Int
@@ -78,7 +78,7 @@ func RunValidationMigration(cmd *cobra.Command, args []string) {
 					log.Info().Msgf("Worker %d: Range %s to %s already fully migrated", id, startBlock.String(), endBlock.String())
 					return
 				}
-				log.Info().Msgf("Worker %d starting: blocks %s to %s (adjusted from %s to %s)", 
+				log.Info().Msgf("Worker %d starting: blocks %s to %s (adjusted from %s to %s)",
 					id, actualStart.String(), actualEnd.String(), startBlock.String(), endBlock.String())
 			} else {
 				// Single worker: use the already-determined boundaries
@@ -314,7 +314,7 @@ func NewMigrator() *Migrator {
 
 	validator := orchestrator.NewValidator(rpcClient, sourceConnector)
 
-	destinationConnector, err := storage.NewConnector[storage.IMainStorage](&config.Cfg.Migrator.Destination)
+	destinationConnector, err := storage.NewMainConnector(&config.Cfg.Migrator.Destination)
 	if err != nil {
 		log.Fatal().Err(err).Msg("Failed to initialize storage")
 	}
@@ -396,7 +396,7 @@ func (m *Migrator) DetermineMigrationBoundariesForRange(rangeStart, rangeEnd *bi
 
 	expectedCount := new(big.Int).Sub(rangeEnd, rangeStart)
 	expectedCount = expectedCount.Add(expectedCount, big.NewInt(1))
-	
+
 	// If all blocks are already migrated, return nil
 	if expectedCount.Cmp(blockCount) == 0 {
 		log.Debug().Msgf("Range %s to %s already fully migrated (%s blocks)", rangeStart.String(), rangeEnd.String(), blockCount.String())
@@ -414,7 +414,7 @@ func (m *Migrator) DetermineMigrationBoundariesForRange(rangeStart, rangeEnd *bi
 	if maxStoredBlock != nil && maxStoredBlock.Cmp(rangeStart) >= 0 {
 		// We have some blocks already, start from the next one
 		actualStart = new(big.Int).Add(maxStoredBlock, big.NewInt(1))
-		
+
 		// If the new start is beyond our range end, the range is fully migrated
 		if actualStart.Cmp(rangeEnd) > 0 {
 			log.Debug().Msgf("Range %s to %s already fully migrated (max block: %s)", rangeStart.String(), rangeEnd.String(), maxStoredBlock.String())
@@ -422,7 +422,7 @@ func (m *Migrator) DetermineMigrationBoundariesForRange(rangeStart, rangeEnd *bi
 		}
 	}
 
-	log.Debug().Msgf("Range %s-%s: found %s blocks, max stored: %v, will migrate from %s", 
+	log.Debug().Msgf("Range %s-%s: found %s blocks, max stored: %v, will migrate from %s",
 		rangeStart.String(), rangeEnd.String(), blockCount.String(), maxStoredBlock, actualStart.String())
 
 	return actualStart, rangeEnd
diff --git a/cmd/root.go b/cmd/root.go
index 1afa037..6aa04c7 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -134,10 +134,10 @@ func init() {
 	rootCmd.PersistentFlags().String("storage-main-kafka-username", "", "Kafka username for main storage")
 	rootCmd.PersistentFlags().String("storage-main-kafka-password", "", "Kafka password for main storage")
 	rootCmd.PersistentFlags().Bool("storage-main-kafka-enable-tls", true, "Enable TLS for Kafka connection in main storage")
-	rootCmd.PersistentFlags().String("storage-main-kafka-redis-host", "", "Redis host for Kafka main storage metadata")
-	rootCmd.PersistentFlags().Int("storage-main-kafka-redis-port", 6379, "Redis port for Kafka main storage metadata")
-	rootCmd.PersistentFlags().String("storage-main-kafka-redis-password", "", "Redis password for Kafka main storage metadata")
-	rootCmd.PersistentFlags().Int("storage-main-kafka-redis-db", 0, "Redis database number for Kafka main storage metadata")
+	rootCmd.PersistentFlags().String("storage-orchestrator-redis-host", "", "Redis host for orchestrator storage metadata")
+	rootCmd.PersistentFlags().Int("storage-orchestrator-redis-port", 6379, "Redis port for orchestrator storage metadata")
+	rootCmd.PersistentFlags().String("storage-orchestrator-redis-password", "", "Redis password for orchestator storage  metadata")
+	rootCmd.PersistentFlags().Int("storage-orchestrator-redis-db", 0, "Redis database number for orchestrator storage metadata")
 	rootCmd.PersistentFlags().String("storage-staging-type", "auto", "Storage type for staging (auto, clickhouse, postgres, kafka, badger, s3)")
 	rootCmd.PersistentFlags().String("storage-main-type", "auto", "Storage type for main (auto, clickhouse, postgres, kafka, badger, s3)")
 	rootCmd.PersistentFlags().String("storage-orchestrator-type", "auto", "Storage type for orchestrator (auto, clickhouse, postgres, badger)")
@@ -325,10 +325,10 @@ func init() {
 	viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username"))
 	viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password"))
 	viper.BindPFlag("storage.main.kafka.enableTLS", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls"))
-	viper.BindPFlag("storage.main.kafka.redis.host", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-host"))
-	viper.BindPFlag("storage.main.kafka.redis.port", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-port"))
-	viper.BindPFlag("storage.main.kafka.redis.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-password"))
-	viper.BindPFlag("storage.main.kafka.redis.db", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-db"))
+	viper.BindPFlag("storage.orchestrator.redis.host", rootCmd.PersistentFlags().Lookup("storage-main-redis-host"))
+	viper.BindPFlag("storage.orchestrator.redis.port", rootCmd.PersistentFlags().Lookup("storage-main-redis-port"))
+	viper.BindPFlag("storage.orchestrator.redis.password", rootCmd.PersistentFlags().Lookup("storage-main-redis-password"))
+	viper.BindPFlag("storage.orchestrator.redis.db", rootCmd.PersistentFlags().Lookup("storage-main-redis-db"))
 	viper.BindPFlag("storage.staging.type", rootCmd.PersistentFlags().Lookup("storage-staging-type"))
 	viper.BindPFlag("storage.main.type", rootCmd.PersistentFlags().Lookup("storage-main-type"))
 	viper.BindPFlag("storage.orchestrator.type", rootCmd.PersistentFlags().Lookup("storage-orchestrator-type"))
diff --git a/configs/config.go b/configs/config.go
index 7c2bfce..9c3e7c0 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -48,13 +48,28 @@ type FailureRecovererConfig struct {
 }
 
 type StorageConfig struct {
-	Staging      StorageConnectionConfig `mapstructure:"staging"`
-	Main         StorageConnectionConfig `mapstructure:"main"`
-	Orchestrator StorageConnectionConfig `mapstructure:"orchestrator"`
+	Orchestrator StorageOrchestratorConfig `mapstructure:"orchestrator"`
+	Staging      StorageStagingConfig      `mapstructure:"staging"`
+	Main         StorageMainConfig         `mapstructure:"main"`
 }
 
-type StorageConnectionConfig struct {
-	Type       string            `mapstructure:"type"` // "auto", "clickhouse", "postgres", "kafka", "badger", "s3"
+type StorageOrchestratorConfig struct {
+	Type       string            `mapstructure:"type"`
+	Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"`
+	Postgres   *PostgresConfig   `mapstructure:"postgres"`
+	Redis      *RedisConfig      `mapstructure:"redis"`
+	Badger     *BadgerConfig     `mapstructure:"badger"`
+}
+
+type StorageStagingConfig struct {
+	Type       string            `mapstructure:"type"`
+	Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"`
+	Postgres   *PostgresConfig   `mapstructure:"postgres"`
+	Badger     *BadgerConfig     `mapstructure:"badger"`
+}
+
+type StorageMainConfig struct {
+	Type       string            `mapstructure:"type"`
 	Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"`
 	Postgres   *PostgresConfig   `mapstructure:"postgres"`
 	Kafka      *KafkaConfig      `mapstructure:"kafka"`
@@ -133,11 +148,10 @@ type RedisConfig struct {
 }
 
 type KafkaConfig struct {
-	Brokers   string       `mapstructure:"brokers"`
-	Username  string       `mapstructure:"username"`
-	Password  string       `mapstructure:"password"`
-	EnableTLS bool         `mapstructure:"enableTLS"`
-	Redis     *RedisConfig `mapstructure:"redis"`
+	Brokers   string `mapstructure:"brokers"`
+	Username  string `mapstructure:"username"`
+	Password  string `mapstructure:"password"`
+	EnableTLS bool   `mapstructure:"enableTLS"`
 }
 
 type RPCBatchRequestConfig struct {
@@ -233,10 +247,10 @@ type ValidationConfig struct {
 }
 
 type MigratorConfig struct {
-	Destination StorageConnectionConfig `mapstructure:"destination"`
-	StartBlock  uint                    `mapstructure:"startBlock"`
-	EndBlock    uint                    `mapstructure:"endBlock"`
-	BatchSize   uint                    `mapstructure:"batchSize"`
+	Destination StorageMainConfig `mapstructure:"destination"`
+	StartBlock  uint              `mapstructure:"startBlock"`
+	EndBlock    uint              `mapstructure:"endBlock"`
+	BatchSize   uint              `mapstructure:"batchSize"`
 }
 
 type Config struct {
diff --git a/internal/handlers/logs_handlers.go b/internal/handlers/logs_handlers.go
index 965aeae..89f9d2e 100644
--- a/internal/handlers/logs_handlers.go
+++ b/internal/handlers/logs_handlers.go
@@ -224,7 +224,7 @@ func decodeLogsIfNeeded(chainId string, logs []common.Log, eventABI *abi.Event,
 func getMainStorage() (storage.IMainStorage, error) {
 	storageOnce.Do(func() {
 		var err error
-		mainStorage, err = storage.NewConnector[storage.IMainStorage](&config.Cfg.Storage.Main)
+		mainStorage, err = storage.NewMainConnector(&config.Cfg.Storage.Main)
 		if err != nil {
 			storageErr = err
 			log.Error().Err(err).Msg("Error creating storage connector")
diff --git a/internal/storage/connector.go b/internal/storage/connector.go
index 60e5cfb..4767578 100644
--- a/internal/storage/connector.go
+++ b/internal/storage/connector.go
@@ -165,25 +165,25 @@ func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) {
 	var storage IStorage
 	var err error
 
-	storage.OrchestratorStorage, err = NewConnector[IOrchestratorStorage](&cfg.Orchestrator)
+	storage.OrchestratorStorage, err = NewOrchestratorConnector(&cfg.Orchestrator)
 	if err != nil {
 		return IStorage{}, fmt.Errorf("failed to create orchestrator storage: %w", err)
 	}
 
-	storage.MainStorage, err = NewConnector[IMainStorage](&cfg.Main)
+	storage.StagingStorage, err = NewStagingConnector(&cfg.Staging)
 	if err != nil {
-		return IStorage{}, fmt.Errorf("failed to create main storage: %w", err)
+		return IStorage{}, fmt.Errorf("failed to create staging storage: %w", err)
 	}
 
-	storage.StagingStorage, err = NewConnector[IStagingStorage](&cfg.Staging)
+	storage.MainStorage, err = NewMainConnector(&cfg.Main)
 	if err != nil {
-		return IStorage{}, fmt.Errorf("failed to create staging storage: %w", err)
+		return IStorage{}, fmt.Errorf("failed to create main storage: %w", err)
 	}
 
 	return storage, nil
 }
 
-func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) {
+func NewOrchestratorConnector(cfg *config.StorageOrchestratorConfig) (IOrchestratorStorage, error) {
 	var conn interface{}
 	var err error
 
@@ -196,58 +196,177 @@ func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) {
 	// Handle explicit type selection
 	if storageType != "auto" {
 		switch storageType {
-		case "kafka":
-			if cfg.Kafka == nil {
-				return *new(T), fmt.Errorf("kafka storage type specified but kafka config is nil")
+		case "redis":
+			if cfg.Redis == nil {
+				return nil, fmt.Errorf("redis storage type specified but redis config is nil")
+			}
+			conn, err = NewRedisConnector(cfg.Redis)
+		case "postgres":
+			if cfg.Postgres == nil {
+				return nil, fmt.Errorf("postgres storage type specified but postgres config is nil")
 			}
-			conn, err = NewKafkaRedisConnector(cfg.Kafka)
+			conn, err = NewPostgresConnector(cfg.Postgres)
+		case "clickhouse":
+			if cfg.Clickhouse == nil {
+				return nil, fmt.Errorf("clickhouse storage type specified but clickhouse config is nil")
+			}
+			conn, err = NewClickHouseConnector(cfg.Clickhouse)
+		case "badger":
+			if cfg.Badger == nil {
+				return nil, fmt.Errorf("badger storage type specified but badger config is nil")
+			}
+			conn, err = NewBadgerConnector(cfg.Badger)
+		default:
+			return nil, fmt.Errorf("unknown storage type: %s", storageType)
+		}
+	} else {
+		// Auto mode: use the first non-nil config (existing behavior)
+		if cfg.Redis != nil {
+			conn, err = NewRedisConnector(cfg.Redis)
+		} else if cfg.Postgres != nil {
+			conn, err = NewPostgresConnector(cfg.Postgres)
+		} else if cfg.Clickhouse != nil {
+			conn, err = NewClickHouseConnector(cfg.Clickhouse)
+		} else if cfg.Badger != nil {
+			conn, err = NewBadgerConnector(cfg.Badger)
+		} else {
+			return nil, fmt.Errorf("no storage driver configured")
+		}
+	}
+
+	if err != nil {
+		return nil, err
+	}
+
+	typedConn, ok := conn.(IOrchestratorStorage)
+	if !ok {
+		return nil, fmt.Errorf("connector does not implement the required interface")
+	}
+
+	return typedConn, nil
+}
+
+func NewStagingConnector(cfg *config.StorageStagingConfig) (IStagingStorage, error) {
+	var conn interface{}
+	var err error
+
+	// Default to "auto" if Type is not specified
+	storageType := cfg.Type
+	if storageType == "" {
+		storageType = "auto"
+	}
+
+	// Handle explicit type selection
+	if storageType != "auto" {
+		switch storageType {
 		case "postgres":
 			if cfg.Postgres == nil {
-				return *new(T), fmt.Errorf("postgres storage type specified but postgres config is nil")
+				return nil, fmt.Errorf("postgres storage type specified but postgres config is nil")
 			}
 			conn, err = NewPostgresConnector(cfg.Postgres)
 		case "clickhouse":
 			if cfg.Clickhouse == nil {
-				return *new(T), fmt.Errorf("clickhouse storage type specified but clickhouse config is nil")
+				return nil, fmt.Errorf("clickhouse storage type specified but clickhouse config is nil")
 			}
 			conn, err = NewClickHouseConnector(cfg.Clickhouse)
 		case "badger":
 			if cfg.Badger == nil {
-				return *new(T), fmt.Errorf("badger storage type specified but badger config is nil")
+				return nil, fmt.Errorf("badger storage type specified but badger config is nil")
 			}
 			conn, err = NewBadgerConnector(cfg.Badger)
+		default:
+			return nil, fmt.Errorf("unknown storage type: %s", storageType)
+		}
+	} else {
+		// Auto mode: use the first non-nil config (existing behavior)
+		if cfg.Postgres != nil {
+			conn, err = NewPostgresConnector(cfg.Postgres)
+		} else if cfg.Clickhouse != nil {
+			conn, err = NewClickHouseConnector(cfg.Clickhouse)
+		} else if cfg.Badger != nil {
+			conn, err = NewBadgerConnector(cfg.Badger)
+		} else {
+			return nil, fmt.Errorf("no storage driver configured")
+		}
+	}
+
+	if err != nil {
+		return nil, err
+	}
+
+	typedConn, ok := conn.(IStagingStorage)
+	if !ok {
+		return nil, fmt.Errorf("connector does not implement the required interface")
+	}
+
+	return typedConn, nil
+}
+
+func NewMainConnector(cfg *config.StorageMainConfig) (IMainStorage, error) {
+	var conn interface{}
+	var err error
+
+	// Default to "auto" if Type is not specified
+	storageType := cfg.Type
+	if storageType == "" {
+		storageType = "auto"
+	}
+
+	// Handle explicit type selection
+	if storageType != "auto" {
+		switch storageType {
+		case "kafka":
+			if cfg.Kafka == nil {
+				return nil, fmt.Errorf("kafka storage type specified but kafka config is nil")
+			}
+			conn, err = NewKafkaConnector(cfg.Kafka)
 		case "s3":
 			if cfg.S3 == nil {
-				return *new(T), fmt.Errorf("s3 storage type specified but s3 config is nil")
+				return nil, fmt.Errorf("s3 storage type specified but s3 config is nil")
 			}
 			conn, err = NewS3Connector(cfg.S3)
+		case "postgres":
+			if cfg.Postgres == nil {
+				return nil, fmt.Errorf("postgres storage type specified but postgres config is nil")
+			}
+			conn, err = NewPostgresConnector(cfg.Postgres)
+		case "clickhouse":
+			if cfg.Clickhouse == nil {
+				return nil, fmt.Errorf("clickhouse storage type specified but clickhouse config is nil")
+			}
+			conn, err = NewClickHouseConnector(cfg.Clickhouse)
+		case "badger":
+			if cfg.Badger == nil {
+				return nil, fmt.Errorf("badger storage type specified but badger config is nil")
+			}
+			conn, err = NewBadgerConnector(cfg.Badger)
 		default:
-			return *new(T), fmt.Errorf("unknown storage type: %s", storageType)
+			return nil, fmt.Errorf("unknown storage type: %s", storageType)
 		}
 	} else {
 		// Auto mode: use the first non-nil config (existing behavior)
 		if cfg.Kafka != nil {
-			conn, err = NewKafkaRedisConnector(cfg.Kafka)
+			conn, err = NewKafkaConnector(cfg.Kafka)
+		} else if cfg.S3 != nil {
+			conn, err = NewS3Connector(cfg.S3)
 		} else if cfg.Postgres != nil {
 			conn, err = NewPostgresConnector(cfg.Postgres)
 		} else if cfg.Clickhouse != nil {
 			conn, err = NewClickHouseConnector(cfg.Clickhouse)
 		} else if cfg.Badger != nil {
 			conn, err = NewBadgerConnector(cfg.Badger)
-		} else if cfg.S3 != nil {
-			conn, err = NewS3Connector(cfg.S3)
 		} else {
-			return *new(T), fmt.Errorf("no storage driver configured")
+			return nil, fmt.Errorf("no storage driver configured")
 		}
 	}
 
 	if err != nil {
-		return *new(T), err
+		return nil, err
 	}
 
-	typedConn, ok := conn.(T)
+	typedConn, ok := conn.(IMainStorage)
 	if !ok {
-		return *new(T), fmt.Errorf("connector does not implement the required interface")
+		return nil, fmt.Errorf("connector does not implement the required interface")
 	}
 
 	return typedConn, nil
diff --git a/internal/storage/kafka.go b/internal/storage/kafka.go
new file mode 100644
index 0000000..1de9014
--- /dev/null
+++ b/internal/storage/kafka.go
@@ -0,0 +1,125 @@
+package storage
+
+import (
+	"fmt"
+	"math/big"
+
+	"github.com/rs/zerolog/log"
+	config "github.com/thirdweb-dev/indexer/configs"
+	"github.com/thirdweb-dev/indexer/internal/common"
+)
+
+// KafkaConnector uses Redis for metadata storage and Kafka for block data delivery
+type KafkaConnector struct {
+	cfg            *config.KafkaConfig
+	kafkaPublisher *KafkaPublisher
+}
+
+func NewKafkaConnector(cfg *config.KafkaConfig) (*KafkaConnector, error) {
+	// Initialize Kafka publisher
+	kafkaPublisher, err := NewKafkaPublisher(cfg)
+	if err != nil {
+		return nil, err
+	}
+
+	return &KafkaConnector{
+		cfg:            cfg,
+		kafkaPublisher: kafkaPublisher,
+	}, nil
+}
+
+// InsertBlockData publishes block data to Kafka instead of storing in database
+func (kr *KafkaConnector) InsertBlockData(data []common.BlockData) error {
+	if len(data) == 0 {
+		return nil
+	}
+
+	// Publish to Kafka
+	if err := kr.kafkaPublisher.PublishBlockData(data); err != nil {
+		return fmt.Errorf("failed to publish block data to kafka: %w", err)
+	}
+	log.Debug().
+		Int("blocks", len(data)).
+		Msg("Published block data to Kafka")
+
+	return nil
+}
+
+// ReplaceBlockData handles reorg by publishing both old and new data to Kafka
+func (kr *KafkaConnector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) {
+	if len(data) == 0 {
+		return nil, nil
+	}
+
+	oldBlocks := []common.BlockData{}
+
+	// TODO: We need to fetch the old blocks from the primary data store
+	if err := kr.kafkaPublisher.PublishReorg(data, data); err != nil {
+		return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err)
+	}
+
+	// save cursor
+	return oldBlocks, nil
+}
+
+func (kr *KafkaConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) {
+	return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+func (kr *KafkaConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
+	return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+func (kr *KafkaConnector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
+	return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+func (kr *KafkaConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) {
+	return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+func (kr *KafkaConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) {
+	return QueryResult[common.TokenBalance]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+func (kr *KafkaConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) {
+	return QueryResult[common.TokenTransfer]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+func (kr *KafkaConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) {
+	return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+func (kr *KafkaConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) {
+	return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+func (kr *KafkaConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) {
+	return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+// Query methods return errors as this is a write-only connector for streaming
+func (kr *KafkaConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) {
+	return QueryResult[common.Block]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+func (kr *KafkaConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) {
+	return QueryResult[common.Transaction]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+func (kr *KafkaConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) {
+	return QueryResult[common.Log]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+func (kr *KafkaConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) {
+	return QueryResult[common.Trace]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+func (kr *KafkaConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) {
+	return QueryResult[interface{}]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+}
+
+// Close closes the Redis connection
+func (kr *KafkaConnector) Close() error {
+	return kr.kafkaPublisher.Close()
+}
diff --git a/internal/storage/kafka_redis.go b/internal/storage/kafka_redis.go
deleted file mode 100644
index e76a8e9..0000000
--- a/internal/storage/kafka_redis.go
+++ /dev/null
@@ -1,294 +0,0 @@
-package storage
-
-import (
-	"context"
-	"fmt"
-	"math/big"
-	"time"
-
-	"github.com/redis/go-redis/v9"
-	"github.com/rs/zerolog/log"
-	config "github.com/thirdweb-dev/indexer/configs"
-	"github.com/thirdweb-dev/indexer/internal/common"
-)
-
-// Redis key namespace constants for better organization and maintainability
-const (
-	// Cursor keys for tracking positions
-	KeyCursorReorg   = "cursor:reorg"   // String: cursor:reorg:{chainId}
-	KeyCursorPublish = "cursor:publish" // String: cursor:publish:{chainId}
-	KeyCursorCommit  = "cursor:commit"  // String: cursor:commit:{chainId}
-)
-
-// KafkaRedisConnector uses Redis for metadata storage and Kafka for block data delivery
-type KafkaRedisConnector struct {
-	redisClient    *redis.Client
-	cfg            *config.KafkaConfig
-	kafkaPublisher *KafkaPublisher
-}
-
-func NewKafkaRedisConnector(cfg *config.KafkaConfig) (*KafkaRedisConnector, error) {
-	// Connect to Redis
-	redisClient := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%d", cfg.Redis.Host, cfg.Redis.Port),
-		Password: cfg.Redis.Password,
-		DB:       cfg.Redis.DB,
-	})
-
-	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-	defer cancel()
-
-	if err := redisClient.Ping(ctx).Err(); err != nil {
-		return nil, fmt.Errorf("failed to connect to redis: %w", err)
-	}
-
-	// Initialize Kafka publisher
-	kafkaPublisher, err := NewKafkaPublisher(cfg)
-	if err != nil {
-		return nil, err
-	}
-
-	return &KafkaRedisConnector{
-		redisClient:    redisClient,
-		cfg:            cfg,
-		kafkaPublisher: kafkaPublisher,
-	}, nil
-}
-
-// Orchestrator Storage Implementation - Block failures not supported
-
-func (kr *KafkaRedisConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) {
-	return nil, fmt.Errorf("block failure tracking is not supported with KafkaRedis connector - use a different storage backend")
-}
-
-func (kr *KafkaRedisConnector) StoreBlockFailures(failures []common.BlockFailure) error {
-	return fmt.Errorf("block failure tracking is not supported with KafkaRedis connector - use a different storage backend")
-}
-
-func (kr *KafkaRedisConnector) DeleteBlockFailures(failures []common.BlockFailure) error {
-	return fmt.Errorf("block failure tracking is not supported with KafkaRedis connector - use a different storage backend")
-}
-
-func (kr *KafkaRedisConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) {
-	ctx := context.Background()
-	key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String())
-
-	val, err := kr.redisClient.Get(ctx, key).Result()
-	if err == redis.Nil {
-		return big.NewInt(0), nil
-	} else if err != nil {
-		return nil, err
-	}
-
-	blockNumber, ok := new(big.Int).SetString(val, 10)
-	if !ok {
-		return nil, fmt.Errorf("failed to parse block number: %s", val)
-	}
-
-	return blockNumber, nil
-}
-
-func (kr *KafkaRedisConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
-	ctx := context.Background()
-	key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String())
-	return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err()
-}
-
-// Staging Storage Implementation - Not supported for KafkaRedis connector
-
-func (kr *KafkaRedisConnector) InsertStagingData(data []common.BlockData) error {
-	return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging")
-}
-
-func (kr *KafkaRedisConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) {
-	return nil, fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging")
-}
-
-func (kr *KafkaRedisConnector) DeleteStagingData(data []common.BlockData) error {
-	return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging")
-}
-
-func (kr *KafkaRedisConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) {
-	ctx := context.Background()
-	key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String())
-
-	val, err := kr.redisClient.Get(ctx, key).Result()
-	if err == redis.Nil {
-		return big.NewInt(0), nil
-	} else if err != nil {
-		return nil, err
-	}
-
-	blockNumber, ok := new(big.Int).SetString(val, 10)
-	if !ok {
-		return nil, fmt.Errorf("failed to parse block number: %s", val)
-	}
-	return blockNumber, nil
-}
-
-func (kr *KafkaRedisConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
-	ctx := context.Background()
-	key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String())
-	return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err()
-}
-
-func (kr *KafkaRedisConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) {
-	ctx := context.Background()
-	key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String())
-
-	val, err := kr.redisClient.Get(ctx, key).Result()
-	if err == redis.Nil {
-		return big.NewInt(0), nil
-	} else if err != nil {
-		return nil, err
-	}
-
-	blockNumber, ok := new(big.Int).SetString(val, 10)
-	if !ok {
-		return nil, fmt.Errorf("failed to parse block number: %s", val)
-	}
-	return blockNumber, nil
-}
-
-func (kr *KafkaRedisConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
-	ctx := context.Background()
-	key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String())
-	return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err()
-}
-
-func (kr *KafkaRedisConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) {
-	return nil, fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging")
-}
-
-func (kr *KafkaRedisConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error {
-	return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging")
-}
-
-// InsertBlockData publishes block data to Kafka instead of storing in database
-func (kr *KafkaRedisConnector) InsertBlockData(data []common.BlockData) error {
-	if len(data) == 0 {
-		return nil
-	}
-
-	// Publish to Kafka
-	if err := kr.kafkaPublisher.PublishBlockData(data); err != nil {
-		return fmt.Errorf("failed to publish block data to kafka: %w", err)
-	}
-	log.Debug().
-		Int("blocks", len(data)).
-		Msg("Published block data to Kafka")
-
-	// Update cursor to track the highest block number published
-	if len(data) > 0 {
-		// Find the highest block number in the batch
-		var maxBlock *big.Int
-		for _, blockData := range data {
-			if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 {
-				maxBlock = blockData.Block.Number
-			}
-		}
-		if maxBlock != nil {
-			ctx := context.Background()
-			chainId := data[0].Block.ChainId
-			key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String())
-			if err := kr.redisClient.Set(ctx, key, maxBlock.String(), 0).Err(); err != nil {
-				return err
-			}
-		}
-	}
-
-	return nil
-}
-
-// ReplaceBlockData handles reorg by publishing both old and new data to Kafka
-func (kr *KafkaRedisConnector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) {
-	if len(data) == 0 {
-		return nil, nil
-	}
-
-	oldBlocks := []common.BlockData{}
-
-	// TODO: We need to fetch the old blocks from the primary data store
-	if err := kr.kafkaPublisher.PublishReorg(data, data); err != nil {
-		return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err)
-	}
-
-	// save cursor
-	return oldBlocks, nil
-}
-
-func (kr *KafkaRedisConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) {
-	ctx := context.Background()
-	key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String())
-
-	val, err := kr.redisClient.Get(ctx, key).Result()
-	if err == redis.Nil {
-		return big.NewInt(0), nil
-	} else if err != nil {
-		return nil, err
-	}
-
-	blockNumber, ok := new(big.Int).SetString(val, 10)
-	if !ok {
-		return nil, fmt.Errorf("failed to parse block number: %s", val)
-	}
-	return blockNumber, nil
-}
-
-func (kr *KafkaRedisConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
-	return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-func (kr *KafkaRedisConnector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
-	return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-func (kr *KafkaRedisConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) {
-	return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-func (kr *KafkaRedisConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) {
-	return QueryResult[common.TokenBalance]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-func (kr *KafkaRedisConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) {
-	return QueryResult[common.TokenTransfer]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-func (kr *KafkaRedisConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) {
-	return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-func (kr *KafkaRedisConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) {
-	return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-func (kr *KafkaRedisConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) {
-	return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-// Query methods return errors as this is a write-only connector for streaming
-func (kr *KafkaRedisConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) {
-	return QueryResult[common.Block]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-func (kr *KafkaRedisConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) {
-	return QueryResult[common.Transaction]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-func (kr *KafkaRedisConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) {
-	return QueryResult[common.Log]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-func (kr *KafkaRedisConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) {
-	return QueryResult[common.Trace]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-func (kr *KafkaRedisConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) {
-	return QueryResult[interface{}]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming")
-}
-
-// Close closes the Redis connection
-func (kr *KafkaRedisConnector) Close() error {
-	return kr.redisClient.Close()
-}
diff --git a/internal/storage/redis.go b/internal/storage/redis.go
new file mode 100644
index 0000000..d48b17f
--- /dev/null
+++ b/internal/storage/redis.go
@@ -0,0 +1,125 @@
+package storage
+
+import (
+	"context"
+	"fmt"
+	"math/big"
+	"time"
+
+	"github.com/redis/go-redis/v9"
+	config "github.com/thirdweb-dev/indexer/configs"
+)
+
+// Redis key namespace constants for better organization and maintainability
+const (
+	// Cursor keys for tracking positions
+	KeyCursorReorg   = "cursor:reorg"   // String: cursor:reorg:{chainId}
+	KeyCursorPublish = "cursor:publish" // String: cursor:publish:{chainId}
+	KeyCursorCommit  = "cursor:commit"  // String: cursor:commit:{chainId}
+)
+
+// RedisConnector uses Redis for metadata storage
+type RedisConnector struct {
+	redisClient *redis.Client
+	cfg         *config.RedisConfig
+}
+
+func NewRedisConnector(cfg *config.RedisConfig) (*RedisConnector, error) {
+	// Connect to Redis
+	redisClient := redis.NewClient(&redis.Options{
+		Addr:     fmt.Sprintf("%s:%d", cfg.Host, cfg.Port),
+		Password: cfg.Password,
+		DB:       cfg.DB,
+	})
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	if err := redisClient.Ping(ctx).Err(); err != nil {
+		return nil, fmt.Errorf("failed to connect to redis: %w", err)
+	}
+
+	return &RedisConnector{
+		redisClient: redisClient,
+		cfg:         cfg,
+	}, nil
+}
+
+// Orchestrator Storage Implementation
+func (kr *RedisConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String())
+
+	val, err := kr.redisClient.Get(ctx, key).Result()
+	if err == redis.Nil {
+		return big.NewInt(0), nil
+	} else if err != nil {
+		return nil, err
+	}
+
+	blockNumber, ok := new(big.Int).SetString(val, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", val)
+	}
+
+	return blockNumber, nil
+}
+
+func (kr *RedisConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String())
+	return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err()
+}
+
+func (kr *RedisConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String())
+
+	val, err := kr.redisClient.Get(ctx, key).Result()
+	if err == redis.Nil {
+		return big.NewInt(0), nil
+	} else if err != nil {
+		return nil, err
+	}
+
+	blockNumber, ok := new(big.Int).SetString(val, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", val)
+	}
+	return blockNumber, nil
+}
+
+func (kr *RedisConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String())
+	return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err()
+}
+
+func (kr *RedisConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String())
+
+	val, err := kr.redisClient.Get(ctx, key).Result()
+	if err == redis.Nil {
+		return big.NewInt(0), nil
+	} else if err != nil {
+		return nil, err
+	}
+
+	blockNumber, ok := new(big.Int).SetString(val, 10)
+	if !ok {
+		return nil, fmt.Errorf("failed to parse block number: %s", val)
+	}
+	return blockNumber, nil
+}
+
+func (kr *RedisConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String())
+	return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err()
+}
+
+// Close closes the Redis connection
+func (kr *RedisConnector) Close() error {
+	return kr.redisClient.Close()
+}

From f920a71b653aa12f100be381910f8f76438879da Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 20:45:47 +0000
Subject: [PATCH 30/43] kafka requires orchestrator

---
 cmd/migrate_valid.go               |  2 +-
 internal/handlers/logs_handlers.go |  3 ++-
 internal/orchestrator/validator.go |  1 +
 internal/storage/connector.go      | 14 ++++++++++----
 internal/storage/kafka.go          | 24 ++++++++++++++++++------
 5 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go
index 04dcc60..cb384de 100644
--- a/cmd/migrate_valid.go
+++ b/cmd/migrate_valid.go
@@ -314,7 +314,7 @@ func NewMigrator() *Migrator {
 
 	validator := orchestrator.NewValidator(rpcClient, sourceConnector)
 
-	destinationConnector, err := storage.NewMainConnector(&config.Cfg.Migrator.Destination)
+	destinationConnector, err := storage.NewMainConnector(&config.Cfg.Migrator.Destination, &sourceConnector.OrchestratorStorage)
 	if err != nil {
 		log.Fatal().Err(err).Msg("Failed to initialize storage")
 	}
diff --git a/internal/handlers/logs_handlers.go b/internal/handlers/logs_handlers.go
index 89f9d2e..63ac197 100644
--- a/internal/handlers/logs_handlers.go
+++ b/internal/handlers/logs_handlers.go
@@ -224,7 +224,8 @@ func decodeLogsIfNeeded(chainId string, logs []common.Log, eventABI *abi.Event,
 func getMainStorage() (storage.IMainStorage, error) {
 	storageOnce.Do(func() {
 		var err error
-		mainStorage, err = storage.NewMainConnector(&config.Cfg.Storage.Main)
+		// TODO: move this to a QueryConnector later to decouple read/write connector
+		mainStorage, err = storage.NewMainConnector(&config.Cfg.Storage.Main, nil)
 		if err != nil {
 			storageErr = err
 			log.Error().Err(err).Msg("Error creating storage connector")
diff --git a/internal/orchestrator/validator.go b/internal/orchestrator/validator.go
index b37b986..63a174f 100644
--- a/internal/orchestrator/validator.go
+++ b/internal/orchestrator/validator.go
@@ -186,5 +186,6 @@ func (v *Validator) FindAndFixGaps(startBlock *big.Int, endBlock *big.Int) error
 		log.Error().Err(err).Msgf("Failed to insert missing blocks: %v", polledBlocks)
 		return err
 	}
+
 	return nil
 }
diff --git a/internal/storage/connector.go b/internal/storage/connector.go
index 4767578..23fdb52 100644
--- a/internal/storage/connector.go
+++ b/internal/storage/connector.go
@@ -175,7 +175,7 @@ func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) {
 		return IStorage{}, fmt.Errorf("failed to create staging storage: %w", err)
 	}
 
-	storage.MainStorage, err = NewMainConnector(&cfg.Main)
+	storage.MainStorage, err = NewMainConnector(&cfg.Main, &storage.OrchestratorStorage)
 	if err != nil {
 		return IStorage{}, fmt.Errorf("failed to create main storage: %w", err)
 	}
@@ -302,7 +302,7 @@ func NewStagingConnector(cfg *config.StorageStagingConfig) (IStagingStorage, err
 	return typedConn, nil
 }
 
-func NewMainConnector(cfg *config.StorageMainConfig) (IMainStorage, error) {
+func NewMainConnector(cfg *config.StorageMainConfig, orchestratorStorage *IOrchestratorStorage) (IMainStorage, error) {
 	var conn interface{}
 	var err error
 
@@ -319,7 +319,10 @@ func NewMainConnector(cfg *config.StorageMainConfig) (IMainStorage, error) {
 			if cfg.Kafka == nil {
 				return nil, fmt.Errorf("kafka storage type specified but kafka config is nil")
 			}
-			conn, err = NewKafkaConnector(cfg.Kafka)
+			if orchestratorStorage == nil {
+				return nil, fmt.Errorf("orchestrator storage must be provided for kafka main storage")
+			}
+			conn, err = NewKafkaConnector(cfg.Kafka, orchestratorStorage)
 		case "s3":
 			if cfg.S3 == nil {
 				return nil, fmt.Errorf("s3 storage type specified but s3 config is nil")
@@ -346,7 +349,10 @@ func NewMainConnector(cfg *config.StorageMainConfig) (IMainStorage, error) {
 	} else {
 		// Auto mode: use the first non-nil config (existing behavior)
 		if cfg.Kafka != nil {
-			conn, err = NewKafkaConnector(cfg.Kafka)
+			if orchestratorStorage == nil {
+				return nil, fmt.Errorf("orchestrator storage must be provided for kafka main storage")
+			}
+			conn, err = NewKafkaConnector(cfg.Kafka, orchestratorStorage)
 		} else if cfg.S3 != nil {
 			conn, err = NewS3Connector(cfg.S3)
 		} else if cfg.Postgres != nil {
diff --git a/internal/storage/kafka.go b/internal/storage/kafka.go
index 1de9014..747d853 100644
--- a/internal/storage/kafka.go
+++ b/internal/storage/kafka.go
@@ -11,20 +11,26 @@ import (
 
 // KafkaConnector uses Redis for metadata storage and Kafka for block data delivery
 type KafkaConnector struct {
-	cfg            *config.KafkaConfig
-	kafkaPublisher *KafkaPublisher
+	cfg                 *config.KafkaConfig
+	kafkaPublisher      *KafkaPublisher
+	orchestratorStorage IOrchestratorStorage
 }
 
-func NewKafkaConnector(cfg *config.KafkaConfig) (*KafkaConnector, error) {
+func NewKafkaConnector(cfg *config.KafkaConfig, orchestratorStorage *IOrchestratorStorage) (*KafkaConnector, error) {
 	// Initialize Kafka publisher
 	kafkaPublisher, err := NewKafkaPublisher(cfg)
 	if err != nil {
 		return nil, err
 	}
 
+	if orchestratorStorage == nil {
+		return nil, fmt.Errorf("orchestrator storage must be provided for kafka connector")
+	}
+
 	return &KafkaConnector{
-		cfg:            cfg,
-		kafkaPublisher: kafkaPublisher,
+		cfg:                 cfg,
+		kafkaPublisher:      kafkaPublisher,
+		orchestratorStorage: *orchestratorStorage,
 	}, nil
 }
 
@@ -42,6 +48,12 @@ func (kr *KafkaConnector) InsertBlockData(data []common.BlockData) error {
 		Int("blocks", len(data)).
 		Msg("Published block data to Kafka")
 
+	chainId := data[0].Block.ChainId
+	maxBlockNumber := data[len(data)-1].Block.Number
+	if err := kr.orchestratorStorage.SetLastCommittedBlockNumber(chainId, maxBlockNumber); err != nil {
+		return fmt.Errorf("failed to update last committed block number in orchestrator storage: %w", err)
+	}
+
 	return nil
 }
 
@@ -63,7 +75,7 @@ func (kr *KafkaConnector) ReplaceBlockData(data []common.BlockData) ([]common.Bl
 }
 
 func (kr *KafkaConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) {
-	return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming")
+	return kr.orchestratorStorage.GetLastCommittedBlockNumber(chainId)
 }
 
 func (kr *KafkaConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {

From 32eece50d4bc366f9e0eeea674f04e37a311a961 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 21:17:59 +0000
Subject: [PATCH 31/43] Fix orchestrator flag

---
 cmd/root.go | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/cmd/root.go b/cmd/root.go
index 6aa04c7..1661638 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -311,6 +311,12 @@ func init() {
 	viper.BindPFlag("storage.orchestrator.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-maxIdleConns"))
 	viper.BindPFlag("storage.orchestrator.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-maxConnLifetime"))
 	viper.BindPFlag("storage.orchestrator.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-connectTimeout"))
+	viper.BindPFlag("storage.orchestrator.redis.host", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-host"))
+	viper.BindPFlag("storage.orchestrator.redis.port", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-port"))
+	viper.BindPFlag("storage.orchestrator.redis.password", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-password"))
+	viper.BindPFlag("storage.orchestrator.redis.db", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-db"))
+	viper.BindPFlag("storage.orchestrator.badger.path", rootCmd.PersistentFlags().Lookup("storage-orchestrator-badger-path"))
+	viper.BindPFlag("storage.orchestrator.type", rootCmd.PersistentFlags().Lookup("storage-orchestrator-type"))
 	viper.BindPFlag("storage.staging.postgres.host", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-host"))
 	viper.BindPFlag("storage.staging.postgres.port", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-port"))
 	viper.BindPFlag("storage.staging.postgres.username", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-username"))
@@ -321,19 +327,13 @@ func init() {
 	viper.BindPFlag("storage.staging.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxIdleConns"))
 	viper.BindPFlag("storage.staging.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxConnLifetime"))
 	viper.BindPFlag("storage.staging.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-connectTimeout"))
+	viper.BindPFlag("storage.staging.badger.path", rootCmd.PersistentFlags().Lookup("storage-staging-badger-path"))
+	viper.BindPFlag("storage.staging.type", rootCmd.PersistentFlags().Lookup("storage-staging-type"))
 	viper.BindPFlag("storage.main.kafka.brokers", rootCmd.PersistentFlags().Lookup("storage-main-kafka-brokers"))
 	viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username"))
 	viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password"))
 	viper.BindPFlag("storage.main.kafka.enableTLS", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls"))
-	viper.BindPFlag("storage.orchestrator.redis.host", rootCmd.PersistentFlags().Lookup("storage-main-redis-host"))
-	viper.BindPFlag("storage.orchestrator.redis.port", rootCmd.PersistentFlags().Lookup("storage-main-redis-port"))
-	viper.BindPFlag("storage.orchestrator.redis.password", rootCmd.PersistentFlags().Lookup("storage-main-redis-password"))
-	viper.BindPFlag("storage.orchestrator.redis.db", rootCmd.PersistentFlags().Lookup("storage-main-redis-db"))
-	viper.BindPFlag("storage.staging.type", rootCmd.PersistentFlags().Lookup("storage-staging-type"))
 	viper.BindPFlag("storage.main.type", rootCmd.PersistentFlags().Lookup("storage-main-type"))
-	viper.BindPFlag("storage.orchestrator.type", rootCmd.PersistentFlags().Lookup("storage-orchestrator-type"))
-	viper.BindPFlag("storage.staging.badger.path", rootCmd.PersistentFlags().Lookup("storage-staging-badger-path"))
-	viper.BindPFlag("storage.orchestrator.badger.path", rootCmd.PersistentFlags().Lookup("storage-orchestrator-badger-path"))
 	viper.BindPFlag("storage.main.s3.bucket", rootCmd.PersistentFlags().Lookup("storage-main-s3-bucket"))
 	viper.BindPFlag("storage.main.s3.region", rootCmd.PersistentFlags().Lookup("storage-main-s3-region"))
 	viper.BindPFlag("storage.main.s3.prefix", rootCmd.PersistentFlags().Lookup("storage-main-s3-prefix"))

From e35ff76abb61dd9b87b57c4929c2ab749216b620 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 21:57:28 +0000
Subject: [PATCH 32/43] Fix badger keys

---
 internal/storage/badger.go | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/internal/storage/badger.go b/internal/storage/badger.go
index e649040..991e479 100644
--- a/internal/storage/badger.go
+++ b/internal/storage/badger.go
@@ -91,8 +91,16 @@ func blockKey(chainId *big.Int, blockNumber *big.Int) []byte {
 	return []byte(fmt.Sprintf("blockdata:%s:%s", chainId.String(), blockNumber.String()))
 }
 
-func blockFailureKey(chainId *big.Int, blockNumber *big.Int, timestamp int64) []byte {
-	return []byte(fmt.Sprintf("blockfailure:%s:%s:%d", chainId.String(), blockNumber.String(), timestamp))
+func blockKeyRange(chainId *big.Int) []byte {
+	return []byte(fmt.Sprintf("blockdata:%s:", chainId.String()))
+}
+
+func blockFailureKey(chainId *big.Int, blockNumber *big.Int) []byte {
+	return []byte(fmt.Sprintf("blockfailure:%s:%s", chainId.String(), blockNumber.String()))
+}
+
+func blockFailureKeyRange(chainId *big.Int) []byte {
+	return []byte(fmt.Sprintf("blockfailure:%s:", chainId.String()))
 }
 
 func lastReorgKey(chainId *big.Int) []byte {
@@ -113,7 +121,7 @@ func (bc *BadgerConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailu
 	defer bc.mu.RUnlock()
 
 	var failures []common.BlockFailure
-	prefix := fmt.Sprintf("f:%d:", qf.ChainId.Uint64())
+	prefix := blockFailureKeyRange(qf.ChainId)
 
 	err := bc.db.View(func(txn *badger.Txn) error {
 		opts := badger.DefaultIteratorOptions
@@ -160,7 +168,7 @@ func (bc *BadgerConnector) StoreBlockFailures(failures []common.BlockFailure) er
 
 	return bc.db.Update(func(txn *badger.Txn) error {
 		for _, failure := range failures {
-			key := blockFailureKey(failure.ChainId, failure.BlockNumber, time.Now().Unix())
+			key := blockFailureKey(failure.ChainId, failure.BlockNumber)
 
 			var buf bytes.Buffer
 			if err := gob.NewEncoder(&buf).Encode(failure); err != nil {
@@ -182,7 +190,7 @@ func (bc *BadgerConnector) DeleteBlockFailures(failures []common.BlockFailure) e
 	return bc.db.Update(func(txn *badger.Txn) error {
 		for _, failure := range failures {
 			// Delete all failure entries for this block
-			prefix := fmt.Sprintf("f:%d:%s:", failure.ChainId.Uint64(), failure.BlockNumber.String())
+			prefix := blockFailureKey(failure.ChainId, failure.BlockNumber)
 
 			opts := badger.DefaultIteratorOptions
 			opts.Prefix = []byte(prefix)
@@ -293,7 +301,7 @@ func (bc *BadgerConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, e
 	}
 
 	// Range query
-	prefix := fmt.Sprintf("b:%d:", qf.ChainId.Uint64())
+	prefix := blockKeyRange(qf.ChainId)
 
 	err := bc.db.View(func(txn *badger.Txn) error {
 		opts := badger.DefaultIteratorOptions
@@ -366,7 +374,7 @@ func (bc *BadgerConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart
 	defer bc.mu.RUnlock()
 
 	var maxBlock *big.Int
-	prefix := fmt.Sprintf("b:%d:", chainId.Uint64())
+	prefix := blockKeyRange(chainId)
 
 	err := bc.db.View(func(txn *badger.Txn) error {
 		opts := badger.DefaultIteratorOptions
@@ -481,7 +489,7 @@ func (bc *BadgerConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNum
 	bc.mu.Lock()
 	defer bc.mu.Unlock()
 
-	prefix := fmt.Sprintf("b:%d:", chainId.Uint64())
+	prefix := blockKeyRange(chainId)
 
 	return bc.db.Update(func(txn *badger.Txn) error {
 		opts := badger.DefaultIteratorOptions

From 6233232efcb814bf803a00c40a3b75792443fce6 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 22:10:58 +0000
Subject: [PATCH 33/43] Fix backfill missing blocks in staging

---
 internal/orchestrator/committer.go | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go
index 1e00602..c557933 100644
--- a/internal/orchestrator/committer.go
+++ b/internal/orchestrator/committer.go
@@ -631,18 +631,7 @@ func (c *Committer) handleGap(ctx context.Context, expectedStartBlockNumber *big
 }
 
 func (c *Committer) handleMissingStagingData(ctx context.Context, blocksToCommit []*big.Int) {
-	// Checks if there are any blocks in staging after the current range end
-	lastStagedBlockNumber, err := c.storage.StagingStorage.GetLastStagedBlockNumber(c.rpc.GetChainID(), blocksToCommit[len(blocksToCommit)-1], big.NewInt(0))
-	if err != nil {
-		log.Error().Err(err).Msg("Error checking staged data for missing range")
-		return
-	}
-	if lastStagedBlockNumber == nil || lastStagedBlockNumber.Sign() <= 0 {
-		log.Debug().Msgf("Committer is caught up with staging. No need to poll for missing blocks.")
-		return
-	}
 	log.Debug().Msgf("Detected missing blocks in staging data starting from %s.", blocksToCommit[0].String())
-
 	blocksToPoll := blocksToCommit
 	if len(blocksToCommit) > int(c.poller.blocksPerPoll) {
 		blocksToPoll = blocksToCommit[:int(c.poller.blocksPerPoll)]

From 31d923f1211ff1a0aeb14f7aa542e9e7739b6350 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Tue, 26 Aug 2025 22:19:48 +0000
Subject: [PATCH 34/43] Revert "Fix backfill missing blocks in staging"

This reverts commit 6233232efcb814bf803a00c40a3b75792443fce6.
---
 internal/orchestrator/committer.go | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go
index c557933..1e00602 100644
--- a/internal/orchestrator/committer.go
+++ b/internal/orchestrator/committer.go
@@ -631,7 +631,18 @@ func (c *Committer) handleGap(ctx context.Context, expectedStartBlockNumber *big
 }
 
 func (c *Committer) handleMissingStagingData(ctx context.Context, blocksToCommit []*big.Int) {
+	// Checks if there are any blocks in staging after the current range end
+	lastStagedBlockNumber, err := c.storage.StagingStorage.GetLastStagedBlockNumber(c.rpc.GetChainID(), blocksToCommit[len(blocksToCommit)-1], big.NewInt(0))
+	if err != nil {
+		log.Error().Err(err).Msg("Error checking staged data for missing range")
+		return
+	}
+	if lastStagedBlockNumber == nil || lastStagedBlockNumber.Sign() <= 0 {
+		log.Debug().Msgf("Committer is caught up with staging. No need to poll for missing blocks.")
+		return
+	}
 	log.Debug().Msgf("Detected missing blocks in staging data starting from %s.", blocksToCommit[0].String())
+
 	blocksToPoll := blocksToCommit
 	if len(blocksToCommit) > int(c.poller.blocksPerPoll) {
 		blocksToPoll = blocksToCommit[:int(c.poller.blocksPerPoll)]

From 7cb6ff1ee64dd8b6f02ea5621b7bd21614141e58 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Wed, 27 Aug 2025 03:36:13 +0000
Subject: [PATCH 35/43] block buffer

---
 internal/storage/block_buffer.go | 229 +++++++++++++++++++++++++++++++
 internal/storage/s3.go           | 221 ++++++++++++++---------------
 2 files changed, 334 insertions(+), 116 deletions(-)
 create mode 100644 internal/storage/block_buffer.go

diff --git a/internal/storage/block_buffer.go b/internal/storage/block_buffer.go
new file mode 100644
index 0000000..ddec1aa
--- /dev/null
+++ b/internal/storage/block_buffer.go
@@ -0,0 +1,229 @@
+package storage
+
+import (
+	"fmt"
+	"math/big"
+	"sync"
+
+	"github.com/rs/zerolog/log"
+	"github.com/thirdweb-dev/indexer/internal/common"
+)
+
+// BlockBuffer manages buffering of block data with size and count limits
+type BlockBuffer struct {
+	mu           sync.RWMutex
+	data         []common.BlockData
+	sizeBytes    int64
+	maxSizeBytes int64
+	maxBlocks    int
+}
+
+// NewBlockBuffer creates a new block buffer
+func NewBlockBuffer(maxSizeMB int64, maxBlocks int) *BlockBuffer {
+	return &BlockBuffer{
+		data:         make([]common.BlockData, 0),
+		maxSizeBytes: maxSizeMB * 1024 * 1024,
+		maxBlocks:    maxBlocks,
+	}
+}
+
+// Add adds blocks to the buffer and returns true if flush is needed
+func (b *BlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64) bool {
+	if len(blocks) == 0 {
+		return false
+	}
+
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	// Add to buffer
+	b.data = append(b.data, blocks...)
+	b.sizeBytes += actualSizeBytes
+
+	log.Debug().
+		Int("block_count", len(blocks)).
+		Int64("size_bytes", actualSizeBytes).
+		Int64("total_size_bytes", b.sizeBytes).
+		Int("total_blocks", len(b.data)).
+		Msg("Added blocks to buffer")
+
+	// Check if flush is needed
+	return b.shouldFlushLocked()
+}
+
+// Flush removes all data from the buffer and returns it
+func (b *BlockBuffer) Flush() []common.BlockData {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	if len(b.data) == 0 {
+		return nil
+	}
+
+	// Take ownership of data
+	data := b.data
+	b.data = make([]common.BlockData, 0)
+	b.sizeBytes = 0
+
+	log.Info().
+		Int("block_count", len(data)).
+		Msg("Flushing buffer")
+
+	return data
+}
+
+// ShouldFlush checks if the buffer should be flushed based on configured thresholds
+func (b *BlockBuffer) ShouldFlush() bool {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+	return b.shouldFlushLocked()
+}
+
+// Size returns the current buffer size in bytes and block count
+func (b *BlockBuffer) Size() (int64, int) {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+	return b.sizeBytes, len(b.data)
+}
+
+// IsEmpty returns true if the buffer is empty
+func (b *BlockBuffer) IsEmpty() bool {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+	return len(b.data) == 0
+}
+
+// GetData returns a copy of the current buffer data
+func (b *BlockBuffer) GetData() []common.BlockData {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	result := make([]common.BlockData, len(b.data))
+	copy(result, b.data)
+	return result
+}
+
+// GetBlocksInRange returns blocks from the buffer that fall within the given range
+func (b *BlockBuffer) GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	var result []common.BlockData
+	for _, block := range b.data {
+		if block.Block.ChainId.Cmp(chainId) == 0 {
+			blockNum := block.Block.Number
+			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 {
+				result = append(result, block)
+			}
+		}
+	}
+	return result
+}
+
+// GetBlockByNumber returns a specific block from the buffer if it exists
+func (b *BlockBuffer) GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	for _, block := range b.data {
+		if block.Block.ChainId.Cmp(chainId) == 0 && block.Block.Number.Cmp(blockNumber) == 0 {
+			blockCopy := block
+			return &blockCopy
+		}
+	}
+	return nil
+}
+
+// GetMaxBlockNumber returns the maximum block number for a chain in the buffer
+func (b *BlockBuffer) GetMaxBlockNumber(chainId *big.Int) *big.Int {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	var maxBlock *big.Int
+	for _, block := range b.data {
+		if block.Block.ChainId.Cmp(chainId) == 0 {
+			if maxBlock == nil || block.Block.Number.Cmp(maxBlock) > 0 {
+				maxBlock = new(big.Int).Set(block.Block.Number)
+			}
+		}
+	}
+	return maxBlock
+}
+
+// Clear empties the buffer without returning data
+func (b *BlockBuffer) Clear() {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	b.data = make([]common.BlockData, 0)
+	b.sizeBytes = 0
+}
+
+// Stats returns statistics about the buffer
+func (b *BlockBuffer) Stats() BufferStats {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	stats := BufferStats{
+		BlockCount: len(b.data),
+		SizeBytes:  b.sizeBytes,
+		ChainCount: 0,
+		ChainStats: make(map[uint64]ChainStats),
+	}
+
+	// Calculate per-chain statistics
+	for _, block := range b.data {
+		chainId := block.Block.ChainId.Uint64()
+		chainStat := stats.ChainStats[chainId]
+
+		if chainStat.MinBlock == nil || block.Block.Number.Cmp(chainStat.MinBlock) < 0 {
+			chainStat.MinBlock = new(big.Int).Set(block.Block.Number)
+		}
+		if chainStat.MaxBlock == nil || block.Block.Number.Cmp(chainStat.MaxBlock) > 0 {
+			chainStat.MaxBlock = new(big.Int).Set(block.Block.Number)
+		}
+		chainStat.BlockCount++
+
+		stats.ChainStats[chainId] = chainStat
+	}
+
+	stats.ChainCount = len(stats.ChainStats)
+	return stats
+}
+
+// Private methods
+
+func (b *BlockBuffer) shouldFlushLocked() bool {
+	// Check size limit
+	if b.maxSizeBytes > 0 && b.sizeBytes >= b.maxSizeBytes {
+		return true
+	}
+
+	// Check block count limit
+	if b.maxBlocks > 0 && len(b.data) >= b.maxBlocks {
+		return true
+	}
+
+	return false
+}
+
+// BufferStats contains statistics about the buffer
+type BufferStats struct {
+	BlockCount int
+	SizeBytes  int64
+	ChainCount int
+	ChainStats map[uint64]ChainStats
+}
+
+// ChainStats contains per-chain statistics
+type ChainStats struct {
+	BlockCount int
+	MinBlock   *big.Int
+	MaxBlock   *big.Int
+}
+
+// String returns a string representation of buffer stats
+func (s BufferStats) String() string {
+	return fmt.Sprintf("BufferStats{blocks=%d, size=%dMB, chains=%d}",
+		s.BlockCount, s.SizeBytes/(1024*1024), s.ChainCount)
+}
diff --git a/internal/storage/s3.go b/internal/storage/s3.go
index 2ed0a74..4e1de31 100644
--- a/internal/storage/s3.go
+++ b/internal/storage/s3.go
@@ -27,15 +27,15 @@ type S3Connector struct {
 	client    *s3.Client
 	config    *config.S3Config
 	formatter DataFormatter
+	buffer    *BlockBuffer
 
-	// Buffering
-	buffer      []common.BlockData
-	bufferMu    sync.Mutex
-	bufferSize  int64 // Current buffer size in bytes
-	bufferTimer *time.Timer
+	// Flush control
 	stopCh      chan struct{}
 	flushCh     chan struct{}
 	flushDoneCh chan struct{} // Signals when flush is complete
+	flushTimer  *time.Timer
+	timerMu     sync.Mutex
+	lastAddTime time.Time
 	wg          sync.WaitGroup
 	closeOnce   sync.Once
 }
@@ -114,11 +114,14 @@ func NewS3Connector(cfg *config.S3Config) (*S3Connector, error) {
 		return nil, fmt.Errorf("unsupported format: %s", cfg.Format)
 	}
 
+	// Create buffer with configured settings
+	buffer := NewBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile)
+
 	s3c := &S3Connector{
 		client:      s3Client,
 		config:      cfg,
 		formatter:   formatter,
-		buffer:      make([]common.BlockData, 0),
+		buffer:      buffer,
 		stopCh:      make(chan struct{}),
 		flushCh:     make(chan struct{}, 1),
 		flushDoneCh: make(chan struct{}),
@@ -136,9 +139,6 @@ func (s *S3Connector) InsertBlockData(data []common.BlockData) error {
 		return nil
 	}
 
-	s.bufferMu.Lock()
-	defer s.bufferMu.Unlock()
-
 	// Calculate actual serialized size for accurate memory tracking
 	formattedData, err := s.formatter.FormatBlockData(data)
 	if err != nil {
@@ -147,42 +147,36 @@ func (s *S3Connector) InsertBlockData(data []common.BlockData) error {
 
 	// Use actual serialized size for accurate memory tracking
 	actualSize := int64(len(formattedData))
-	s.bufferSize += actualSize
 	log.Debug().
 		Int("block_count", len(data)).
 		Int64("size_bytes", actualSize).
 		Int64("avg_bytes_per_block", actualSize/int64(len(data))).
 		Msg("Calculated actual block data size")
 
-	// Add to buffer
-	s.buffer = append(s.buffer, data...)
-
-	// Reset timer if this is the first data in buffer
-	if len(s.buffer) == len(data) && s.bufferTimer == nil {
-		s.bufferTimer = time.AfterFunc(time.Duration(s.config.BufferTimeout)*time.Second, func() {
+	// Add to buffer and check if flush is needed
+	shouldFlush := s.buffer.Add(data, actualSize)
+
+	// Start or reset timer when first data is added
+	s.timerMu.Lock()
+	sizeBytes, blockCount := s.buffer.Size()
+	if sizeBytes == actualSize && blockCount == len(data) && s.config.BufferTimeout > 0 {
+		// First data added to buffer, track time and start timer
+		s.lastAddTime = time.Now()
+		if s.flushTimer != nil {
+			s.flushTimer.Stop()
+		}
+		s.flushTimer = time.AfterFunc(time.Duration(s.config.BufferTimeout)*time.Second, func() {
 			select {
 			case s.flushCh <- struct{}{}:
 			default:
 			}
 		})
 	}
-
-	// Check if we should flush based on size or block count
-	shouldFlush := s.bufferSize >= s.config.BufferSize*1024*1024 // Convert MB to bytes
-
-	// Only check block count if MaxBlocksPerFile is set (> 0)
-	if s.config.MaxBlocksPerFile > 0 && len(s.buffer) >= s.config.MaxBlocksPerFile {
-		shouldFlush = true
-	}
+	s.timerMu.Unlock()
 
 	if shouldFlush {
-		// Stop timer if running
-		if s.bufferTimer != nil {
-			s.bufferTimer.Stop()
-			s.bufferTimer = nil
-		}
-
-		// Trigger flush
+		// Stop timer and trigger flush
+		s.stopFlushTimer()
 		select {
 		case s.flushCh <- struct{}{}:
 		default:
@@ -196,6 +190,10 @@ func (s *S3Connector) InsertBlockData(data []common.BlockData) error {
 func (s *S3Connector) flushWorker() {
 	defer s.wg.Done()
 
+	// Check periodically for expired buffers
+	ticker := time.NewTicker(10 * time.Second)
+	defer ticker.Stop()
+
 	for {
 		select {
 		case <-s.stopCh:
@@ -209,30 +207,56 @@ func (s *S3Connector) flushWorker() {
 			case s.flushDoneCh <- struct{}{}:
 			default:
 			}
+		case <-ticker.C:
+			// Check if buffer has expired based on our own tracking
+			if s.isBufferExpired() {
+				s.flushBuffer()
+			}
 		}
 	}
 }
 
+// stopFlushTimer stops the flush timer if it's running
+func (s *S3Connector) stopFlushTimer() {
+	s.timerMu.Lock()
+	defer s.timerMu.Unlock()
+
+	if s.flushTimer != nil {
+		s.flushTimer.Stop()
+		s.flushTimer = nil
+	}
+}
+
+// isBufferExpired checks if the buffer has exceeded the timeout duration
+func (s *S3Connector) isBufferExpired() bool {
+	s.timerMu.Lock()
+	defer s.timerMu.Unlock()
+
+	if s.config.BufferTimeout <= 0 || s.lastAddTime.IsZero() || s.buffer.IsEmpty() {
+		return false
+	}
+
+	return time.Since(s.lastAddTime) > time.Duration(s.config.BufferTimeout)*time.Second
+}
+
 // flushBuffer writes buffered data to S3
 func (s *S3Connector) flushBuffer() error {
-	s.bufferMu.Lock()
-	if len(s.buffer) == 0 {
-		s.bufferMu.Unlock()
+	data := s.buffer.Flush()
+	if len(data) == 0 {
 		return nil
 	}
 
-	// Take ownership of buffer
-	data := s.buffer
-	s.buffer = make([]common.BlockData, 0)
-	s.bufferSize = 0
+	// Stop timer and reset last add time since we're flushing
+	s.stopFlushTimer()
+	s.timerMu.Lock()
+	s.lastAddTime = time.Time{}
+	s.timerMu.Unlock()
 
-	// Stop timer if running
-	if s.bufferTimer != nil {
-		s.bufferTimer.Stop()
-		s.bufferTimer = nil
-	}
-	s.bufferMu.Unlock()
+	return s.uploadBatchData(data)
+}
 
+// uploadBatchData handles uploading batched data to S3, grouped by chain
+func (s *S3Connector) uploadBatchData(data []common.BlockData) error {
 	// Group blocks by chain to generate appropriate keys
 	chainGroups := make(map[uint64][]common.BlockData)
 	for _, block := range data {
@@ -276,11 +300,7 @@ func (s *S3Connector) flushBuffer() error {
 // Flush manually triggers a buffer flush and waits for completion
 func (s *S3Connector) Flush() error {
 	// Check if buffer has data
-	s.bufferMu.Lock()
-	hasData := len(s.buffer) > 0
-	s.bufferMu.Unlock()
-
-	if !hasData {
+	if s.buffer.IsEmpty() {
 		return nil
 	}
 
@@ -315,8 +335,11 @@ func (s *S3Connector) Flush() error {
 // Close closes the S3 connector and flushes any remaining data
 func (s *S3Connector) Close() error {
 	var closeErr error
-	
+
 	s.closeOnce.Do(func() {
+		// Stop the flush timer
+		s.stopFlushTimer()
+
 		// First, ensure any pending data is flushed
 		if err := s.Flush(); err != nil {
 			log.Error().Err(err).Msg("Error flushing buffer during close")
@@ -573,16 +596,11 @@ func (s *S3Connector) GetTokenTransfers(qf TransfersQueryFilter, fields ...strin
 }
 
 func (s *S3Connector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) {
-	maxBlock := big.NewInt(0)
-
 	// First check the buffer for blocks from this chain
-	s.bufferMu.Lock()
-	for _, block := range s.buffer {
-		if block.Block.ChainId.Cmp(chainId) == 0 && block.Block.Number.Cmp(maxBlock) > 0 {
-			maxBlock = new(big.Int).Set(block.Block.Number)
-		}
+	maxBlock := s.buffer.GetMaxBlockNumber(chainId)
+	if maxBlock == nil {
+		maxBlock = big.NewInt(0)
 	}
-	s.bufferMu.Unlock()
 
 	// Then check S3 for the maximum block number
 	prefix := fmt.Sprintf("chain_%d/", chainId.Uint64())
@@ -622,19 +640,14 @@ func (s *S3Connector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big
 	foundAny := false
 
 	// First check the buffer for blocks in this range
-	s.bufferMu.Lock()
-	for _, block := range s.buffer {
-		if block.Block.ChainId.Cmp(chainId) == 0 {
-			blockNum := block.Block.Number
-			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 {
-				if !foundAny || blockNum.Cmp(maxBlock) > 0 {
-					maxBlock = new(big.Int).Set(blockNum)
-					foundAny = true
-				}
-			}
+	bufferBlocks := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock)
+	for _, block := range bufferBlocks {
+		blockNum := block.Block.Number
+		if !foundAny || blockNum.Cmp(maxBlock) > 0 {
+			maxBlock = new(big.Int).Set(blockNum)
+			foundAny = true
 		}
 	}
-	s.bufferMu.Unlock()
 
 	// Then check S3 files
 	prefix := fmt.Sprintf("chain_%d/", chainId.Uint64())
@@ -693,29 +706,24 @@ func (s *S3Connector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBl
 	foundAny := false
 
 	// First check the buffer for blocks in this range
-	s.bufferMu.Lock()
-	for _, block := range s.buffer {
-		if block.Block.ChainId.Cmp(chainId) == 0 {
-			blockNum := block.Block.Number
-			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 {
-				count.Add(count, big.NewInt(1))
-
-				if !foundAny {
-					minBlock = new(big.Int).Set(blockNum)
-					maxBlock = new(big.Int).Set(blockNum)
-					foundAny = true
-				} else {
-					if blockNum.Cmp(minBlock) < 0 {
-						minBlock = new(big.Int).Set(blockNum)
-					}
-					if blockNum.Cmp(maxBlock) > 0 {
-						maxBlock = new(big.Int).Set(blockNum)
-					}
-				}
+	bufferBlocks := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock)
+	for _, block := range bufferBlocks {
+		blockNum := block.Block.Number
+		count.Add(count, big.NewInt(1))
+
+		if !foundAny {
+			minBlock = new(big.Int).Set(blockNum)
+			maxBlock = new(big.Int).Set(blockNum)
+			foundAny = true
+		} else {
+			if blockNum.Cmp(minBlock) < 0 {
+				minBlock = new(big.Int).Set(blockNum)
+			}
+			if blockNum.Cmp(maxBlock) > 0 {
+				maxBlock = new(big.Int).Set(blockNum)
 			}
 		}
 	}
-	s.bufferMu.Unlock()
 
 	// Then check S3 files
 	prefix := fmt.Sprintf("chain_%d/", chainId.Uint64())
@@ -786,8 +794,8 @@ func (s *S3Connector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int,
 	var headers []common.BlockHeader
 
 	// First get headers from buffer
-	s.bufferMu.Lock()
-	for _, block := range s.buffer {
+	bufferData := s.buffer.GetData()
+	for _, block := range bufferData {
 		if block.Block.ChainId.Cmp(chainId) == 0 {
 			// Check if block is in range (if from is specified)
 			if from != nil && block.Block.Number.Cmp(from) > 0 {
@@ -804,7 +812,6 @@ func (s *S3Connector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int,
 			})
 		}
 	}
-	s.bufferMu.Unlock()
 
 	// If we need more headers, get from S3
 	if to == nil || len(headers) < int(to.Int64()) {
@@ -846,19 +853,8 @@ func (s *S3Connector) GetValidationBlockData(chainId *big.Int, startBlock *big.I
 		return nil, fmt.Errorf("start block must be less than or equal to end block")
 	}
 
-	var blockData []common.BlockData
-
 	// First check buffer for blocks in range
-	s.bufferMu.Lock()
-	for _, block := range s.buffer {
-		if block.Block.ChainId.Cmp(chainId) == 0 {
-			blockNum := block.Block.Number
-			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 {
-				blockData = append(blockData, block)
-			}
-		}
-	}
-	s.bufferMu.Unlock()
+	blockData := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock)
 
 	// Then find and download relevant files from S3
 	files, err := s.findFilesInRange(chainId, startBlock, endBlock)
@@ -888,16 +884,10 @@ func (s *S3Connector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.
 	blockSet := make(map[string]bool)
 
 	// First add blocks from buffer
-	s.bufferMu.Lock()
-	for _, block := range s.buffer {
-		if block.Block.ChainId.Cmp(chainId) == 0 {
-			blockNum := block.Block.Number
-			if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 {
-				blockSet[blockNum.String()] = true
-			}
-		}
+	bufferBlocks := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock)
+	for _, block := range bufferBlocks {
+		blockSet[block.Block.Number.String()] = true
 	}
-	s.bufferMu.Unlock()
 
 	// Then check S3 files in range
 	files, err := s.findFilesInRange(chainId, startBlock, endBlock)
@@ -944,8 +934,8 @@ func (s *S3Connector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int
 	var result []common.BlockData
 
 	// First check buffer for requested blocks
-	s.bufferMu.Lock()
-	for _, block := range s.buffer {
+	bufferData := s.buffer.GetData()
+	for _, block := range bufferData {
 		if block.Block.ChainId.Cmp(chainId) == 0 {
 			if blockNumMap[block.Block.Number.String()] {
 				result = append(result, block)
@@ -954,7 +944,6 @@ func (s *S3Connector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int
 			}
 		}
 	}
-	s.bufferMu.Unlock()
 
 	// If all blocks were in buffer, return early
 	if len(blockNumMap) == 0 {

From 59aad94a2e72c0482684e4a63f2bd231c2bfbd15 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Wed, 27 Aug 2025 08:35:19 +0000
Subject: [PATCH 36/43] Poller S3 support

---
 cmd/root.go                           |   25 +
 configs/config.go                     |   48 +-
 internal/orchestrator/orchestrator.go |    6 +-
 internal/orchestrator/poller.go       |   27 +-
 internal/source/s3.go                 | 1119 +++++++++++++++++++++++++
 internal/source/source.go             |   14 +
 internal/storage/s3.go                |    4 +-
 internal/worker/worker.go             |  353 ++++++--
 8 files changed, 1496 insertions(+), 100 deletions(-)
 create mode 100644 internal/source/s3.go
 create mode 100644 internal/source/source.go

diff --git a/cmd/root.go b/cmd/root.go
index 1661638..fb3999f 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -56,6 +56,19 @@ func init() {
 	rootCmd.PersistentFlags().Bool("poller-force-from-block", false, "Force the poller to start from the block specified in `poller-from-block`")
 	rootCmd.PersistentFlags().Int("poller-until-block", 0, "Until which block to poll")
 	rootCmd.PersistentFlags().Int("poller-parallel-pollers", 5, "Maximum number of parallel pollers")
+	rootCmd.PersistentFlags().String("poller-s3-bucket", "", "S3 bucket for poller archive source")
+	rootCmd.PersistentFlags().String("poller-s3-region", "", "S3 region for poller archive source")
+	rootCmd.PersistentFlags().String("poller-s3-prefix", "", "S3 prefix for poller archive source")
+	rootCmd.PersistentFlags().String("poller-s3-accessKeyId", "", "S3 access key ID for poller archive source")
+	rootCmd.PersistentFlags().String("poller-s3-secretAccessKey", "", "S3 secret access key for poller archive source")
+	rootCmd.PersistentFlags().String("poller-s3-endpoint", "", "S3 endpoint for poller archive source (for S3-compatible services)")
+	rootCmd.PersistentFlags().String("poller-s3-format", "parquet", "S3 storage format for poller archive source")
+	rootCmd.PersistentFlags().String("poller-s3-cacheDir", "/tmp/insight-archive", "Local cache directory for poller archive source")
+	rootCmd.PersistentFlags().Int("poller-s3-metadataTTL", 0, "Metadata cache TTL in seconds for poller archive source")
+	rootCmd.PersistentFlags().Int("poller-s3-fileCacheTTL", 0, "File cache TTL in seconds for poller archive source")
+	rootCmd.PersistentFlags().Int64("poller-s3-maxCacheSize", 0, "Max cache size in bytes for poller archive source (default 5GB)")
+	rootCmd.PersistentFlags().Int("poller-s3-cleanupInterval", 0, "Cache cleanup interval in seconds for poller archive source")
+	rootCmd.PersistentFlags().Int("poller-s3-maxConcurrentDownloads", 3, "Max concurrent downloads for poller archive source")
 	rootCmd.PersistentFlags().Bool("committer-enabled", true, "Toggle committer")
 	rootCmd.PersistentFlags().Int("committer-blocks-per-commit", 10, "How many blocks to commit each interval")
 	rootCmd.PersistentFlags().Int("committer-interval", 1000, "How often to commit blocks in milliseconds")
@@ -247,6 +260,18 @@ func init() {
 	viper.BindPFlag("poller.forceFromBlock", rootCmd.PersistentFlags().Lookup("poller-force-from-block"))
 	viper.BindPFlag("poller.untilBlock", rootCmd.PersistentFlags().Lookup("poller-until-block"))
 	viper.BindPFlag("poller.parallelPollers", rootCmd.PersistentFlags().Lookup("poller-parallel-pollers"))
+	viper.BindPFlag("poller.s3.endpoint", rootCmd.PersistentFlags().Lookup("poller-s3-endpoint"))
+	viper.BindPFlag("poller.s3.accessKeyId", rootCmd.PersistentFlags().Lookup("poller-s3-accessKeyId"))
+	viper.BindPFlag("poller.s3.secretAccessKey", rootCmd.PersistentFlags().Lookup("poller-s3-secretAccessKey"))
+	viper.BindPFlag("poller.s3.bucket", rootCmd.PersistentFlags().Lookup("poller-s3-bucket"))
+	viper.BindPFlag("poller.s3.region", rootCmd.PersistentFlags().Lookup("poller-s3-region"))
+	viper.BindPFlag("poller.s3.prefix", rootCmd.PersistentFlags().Lookup("poller-s3-prefix"))
+	viper.BindPFlag("poller.s3.cacheDir", rootCmd.PersistentFlags().Lookup("poller-s3-cacheDir"))
+	viper.BindPFlag("poller.s3.metadataTTL", rootCmd.PersistentFlags().Lookup("poller-s3-metadataTTL"))
+	viper.BindPFlag("poller.s3.fileCacheTTL", rootCmd.PersistentFlags().Lookup("poller-s3-fileCacheTTL"))
+	viper.BindPFlag("poller.s3.maxCacheSize", rootCmd.PersistentFlags().Lookup("poller-s3-maxCacheSize"))
+	viper.BindPFlag("poller.s3.cleanupInterval", rootCmd.PersistentFlags().Lookup("poller-s3-cleanupInterval"))
+	viper.BindPFlag("poller.s3.maxConcurrentDownloads", rootCmd.PersistentFlags().Lookup("poller-s3-maxConcurrentDownloads"))
 	viper.BindPFlag("committer.enabled", rootCmd.PersistentFlags().Lookup("committer-enabled"))
 	viper.BindPFlag("committer.blocksPerCommit", rootCmd.PersistentFlags().Lookup("committer-blocks-per-commit"))
 	viper.BindPFlag("committer.interval", rootCmd.PersistentFlags().Lookup("committer-interval"))
diff --git a/configs/config.go b/configs/config.go
index 9c3e7c0..78daec6 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"os"
 	"strings"
+	"time"
 
 	"github.com/rs/zerolog/log"
 	"github.com/spf13/viper"
@@ -16,13 +17,14 @@ type LogConfig struct {
 }
 
 type PollerConfig struct {
-	Enabled         bool `mapstructure:"enabled"`
-	Interval        int  `mapstructure:"interval"`
-	BlocksPerPoll   int  `mapstructure:"blocksPerPoll"`
-	FromBlock       int  `mapstructure:"fromBlock"`
-	ForceFromBlock  bool `mapstructure:"forceFromBlock"`
-	UntilBlock      int  `mapstructure:"untilBlock"`
-	ParallelPollers int  `mapstructure:"parallelPollers"`
+	Enabled         bool            `mapstructure:"enabled"`
+	Interval        int             `mapstructure:"interval"`
+	BlocksPerPoll   int             `mapstructure:"blocksPerPoll"`
+	FromBlock       int             `mapstructure:"fromBlock"`
+	ForceFromBlock  bool            `mapstructure:"forceFromBlock"`
+	UntilBlock      int             `mapstructure:"untilBlock"`
+	ParallelPollers int             `mapstructure:"parallelPollers"`
+	S3              *S3SourceConfig `mapstructure:"s3"`
 }
 
 type CommitterConfig struct {
@@ -74,7 +76,7 @@ type StorageMainConfig struct {
 	Postgres   *PostgresConfig   `mapstructure:"postgres"`
 	Kafka      *KafkaConfig      `mapstructure:"kafka"`
 	Badger     *BadgerConfig     `mapstructure:"badger"`
-	S3         *S3Config         `mapstructure:"s3"`
+	S3         *S3StorageConfig  `mapstructure:"s3"`
 }
 
 type BadgerConfig struct {
@@ -82,14 +84,18 @@ type BadgerConfig struct {
 }
 
 type S3Config struct {
-	Bucket          string         `mapstructure:"bucket"`
-	Region          string         `mapstructure:"region"`
-	Prefix          string         `mapstructure:"prefix"`
-	AccessKeyID     string         `mapstructure:"accessKeyId"`
-	SecretAccessKey string         `mapstructure:"secretAccessKey"`
-	Endpoint        string         `mapstructure:"endpoint"`
-	Format          string         `mapstructure:"format"`
-	Parquet         *ParquetConfig `mapstructure:"parquet"`
+	Bucket          string `mapstructure:"bucket"`
+	Region          string `mapstructure:"region"`
+	Prefix          string `mapstructure:"prefix"`
+	AccessKeyID     string `mapstructure:"accessKeyId"`
+	SecretAccessKey string `mapstructure:"secretAccessKey"`
+	Endpoint        string `mapstructure:"endpoint"`
+}
+
+type S3StorageConfig struct {
+	S3Config `mapstructure:",squash"`
+	Format   string         `mapstructure:"format"`
+	Parquet  *ParquetConfig `mapstructure:"parquet"`
 	// Buffering configuration
 	BufferSize       int64 `mapstructure:"bufferSizeMB"`         // Target buffer size in MB before flush (default 512 MB)
 	BufferTimeout    int   `mapstructure:"bufferTimeoutSeconds"` // Max time in seconds before flush (default 300 = 5 min)
@@ -237,6 +243,16 @@ type PublisherConfig struct {
 	Events       EventPublisherConfig       `mapstructure:"events"`
 }
 
+type S3SourceConfig struct {
+	S3Config               `mapstructure:",squash"`
+	CacheDir               string        `mapstructure:"cacheDir"`
+	MetadataTTL            time.Duration `mapstructure:"metadataTTL"`
+	FileCacheTTL           time.Duration `mapstructure:"fileCacheTTL"`
+	MaxCacheSize           int64         `mapstructure:"maxCacheSize"`
+	CleanupInterval        time.Duration `mapstructure:"cleanupInterval"`
+	MaxConcurrentDownloads int           `mapstructure:"maxConcurrentDownloads"`
+}
+
 type WorkModeConfig struct {
 	CheckIntervalMinutes int   `mapstructure:"checkIntervalMinutes"`
 	LiveModeThreshold    int64 `mapstructure:"liveModeThreshold"`
diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go
index f412dff..ab54eb5 100644
--- a/internal/orchestrator/orchestrator.go
+++ b/internal/orchestrator/orchestrator.go
@@ -22,7 +22,6 @@ type Orchestrator struct {
 	reorgHandlerEnabled     bool
 	cancel                  context.CancelFunc
 	wg                      sync.WaitGroup
-	shutdownOnce            sync.Once
 }
 
 func NewOrchestrator(rpc rpc.IRPCClient) (*Orchestrator, error) {
@@ -65,7 +64,10 @@ func (o *Orchestrator) Start() {
 			workModeMonitor.RegisterChannel(pollerWorkModeChan)
 			defer workModeMonitor.UnregisterChannel(pollerWorkModeChan)
 
-			poller := NewPoller(o.rpc, o.storage, WithPollerWorkModeChan(pollerWorkModeChan))
+			poller := NewPoller(o.rpc, o.storage,
+				WithPollerWorkModeChan(pollerWorkModeChan),
+				WithPollerS3Source(config.Cfg.Poller.S3),
+			)
 			poller.Start(ctx)
 
 			log.Info().Msg("Poller completed")
diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go
index 331f00c..527bf8d 100644
--- a/internal/orchestrator/poller.go
+++ b/internal/orchestrator/poller.go
@@ -12,6 +12,7 @@ import (
 	"github.com/thirdweb-dev/indexer/internal/common"
 	"github.com/thirdweb-dev/indexer/internal/metrics"
 	"github.com/thirdweb-dev/indexer/internal/rpc"
+	"github.com/thirdweb-dev/indexer/internal/source"
 	"github.com/thirdweb-dev/indexer/internal/storage"
 	"github.com/thirdweb-dev/indexer/internal/worker"
 )
@@ -21,6 +22,7 @@ const DEFAULT_TRIGGER_INTERVAL = 1000
 
 type Poller struct {
 	rpc                  rpc.IRPCClient
+	worker               *worker.Worker
 	blocksPerPoll        int64
 	triggerIntervalMs    int64
 	storage              storage.IStorage
@@ -47,15 +49,33 @@ func WithPollerWorkModeChan(ch chan WorkMode) PollerOption {
 	}
 }
 
+func WithPollerS3Source(cfg *config.S3SourceConfig) PollerOption {
+	return func(p *Poller) {
+		if cfg == nil || cfg.Region == "" || cfg.Bucket == "" {
+			return
+		}
+
+		source, err := source.NewS3Source(cfg, p.rpc.GetChainID())
+		if err != nil {
+			log.Fatal().Err(err).Msg("Failed to create S3 source")
+		}
+
+		log.Info().Msg("Poller S3 source configuration detected, setting up S3 source for poller")
+		p.worker = worker.NewWorkerWithArchive(p.rpc, source)
+	}
+}
+
 func NewBoundlessPoller(rpc rpc.IRPCClient, storage storage.IStorage, opts ...PollerOption) *Poller {
 	blocksPerPoll := config.Cfg.Poller.BlocksPerPoll
 	if blocksPerPoll == 0 {
 		blocksPerPoll = DEFAULT_BLOCKS_PER_POLL
 	}
+
 	triggerInterval := config.Cfg.Poller.Interval
 	if triggerInterval == 0 {
 		triggerInterval = DEFAULT_TRIGGER_INTERVAL
 	}
+
 	poller := &Poller{
 		rpc:               rpc,
 		triggerIntervalMs: int64(triggerInterval),
@@ -68,6 +88,10 @@ func NewBoundlessPoller(rpc rpc.IRPCClient, storage storage.IStorage, opts ...Po
 		opt(poller)
 	}
 
+	if poller.worker == nil {
+		poller.worker = worker.NewWorker(poller.rpc)
+	}
+
 	return poller
 }
 
@@ -235,8 +259,7 @@ func (p *Poller) PollWithoutSaving(ctx context.Context, blockNumbers []*big.Int)
 	endBlockNumberFloat, _ := endBlock.Float64()
 	metrics.PollerLastTriggeredBlock.Set(endBlockNumberFloat)
 
-	worker := worker.NewWorker(p.rpc)
-	results := worker.Run(ctx, blockNumbers)
+	results := p.worker.Run(ctx, blockNumbers)
 	blockData, failedResults := p.convertPollResultsToBlockData(results)
 	return blockData, failedResults
 }
diff --git a/internal/source/s3.go b/internal/source/s3.go
new file mode 100644
index 0000000..676a9ad
--- /dev/null
+++ b/internal/source/s3.go
@@ -0,0 +1,1119 @@
+package source
+
+import (
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"io"
+	"math/big"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/aws/aws-sdk-go-v2/aws"
+	awsconfig "github.com/aws/aws-sdk-go-v2/config"
+	"github.com/aws/aws-sdk-go-v2/service/s3"
+	"github.com/parquet-go/parquet-go"
+	"github.com/rs/zerolog/log"
+	config "github.com/thirdweb-dev/indexer/configs"
+	"github.com/thirdweb-dev/indexer/internal/common"
+	"github.com/thirdweb-dev/indexer/internal/rpc"
+)
+
+// FileMetadata represents cached information about S3 files
+type FileMetadata struct {
+	Key        string
+	MinBlock   *big.Int
+	MaxBlock   *big.Int
+	Size       int64
+	LastAccess time.Time
+}
+
+// BlockIndex represents the index of blocks within a file
+type BlockIndex struct {
+	BlockNumber uint64
+	RowOffset   int64
+	RowSize     int
+}
+
+type S3Source struct {
+	client   *s3.Client
+	config   *config.S3SourceConfig
+	chainId  *big.Int
+	cacheDir string
+
+	// Configurable settings
+	metadataTTL            time.Duration // How long to cache metadata
+	fileCacheTTL           time.Duration // How long to keep files in cache
+	maxCacheSize           int64         // Max cache size in bytes
+	cleanupInterval        time.Duration // How often to run cleanup
+	maxConcurrentDownloads int           // Max concurrent S3 downloads
+
+	// Metadata cache
+	metaMu       sync.RWMutex
+	fileMetadata map[string]*FileMetadata // S3 key -> metadata
+	minBlock     *big.Int
+	maxBlock     *big.Int
+	metaLoaded   bool
+	metaLoadTime time.Time // When metadata was last loaded
+
+	// Local file cache
+	cacheMu    sync.RWMutex
+	cacheMap   map[string]time.Time    // Track cache file access times
+	blockIndex map[string][]BlockIndex // File -> block indices
+	downloadMu sync.Mutex              // Prevent duplicate downloads
+
+	// Download tracking
+	downloading map[string]*sync.WaitGroup // Files currently downloading
+
+	// Active use tracking
+	activeUseMu sync.RWMutex
+	activeUse   map[string]int // Files currently being read (reference count)
+}
+
+// ParquetBlockData represents the block data structure in parquet files
+type ParquetBlockData struct {
+	ChainId        uint64 `parquet:"chain_id"`
+	BlockNumber    uint64 `parquet:"block_number"`
+	BlockHash      string `parquet:"block_hash"`
+	BlockTimestamp int64  `parquet:"block_timestamp"`
+	Block          []byte `parquet:"block_json"`
+	Transactions   []byte `parquet:"transactions_json"`
+	Logs           []byte `parquet:"logs_json"`
+	Traces         []byte `parquet:"traces_json"`
+}
+
+func NewS3Source(cfg *config.S3SourceConfig, chainId *big.Int) (*S3Source, error) {
+	// Apply defaults
+	if cfg.MetadataTTL == 0 {
+		cfg.MetadataTTL = 10 * time.Minute
+	}
+	if cfg.FileCacheTTL == 0 {
+		cfg.FileCacheTTL = 15 * time.Minute // 15 minutes
+	}
+	if cfg.MaxCacheSize == 0 {
+		cfg.MaxCacheSize = 5 * 1024 * 1024 * 1024 // Increased from 5GB to 10GB
+	}
+	if cfg.CleanupInterval == 0 {
+		cfg.CleanupInterval = 5 * time.Minute // 5 minutes
+	}
+	if cfg.MaxConcurrentDownloads == 0 {
+		cfg.MaxConcurrentDownloads = 3
+	}
+
+	awsCfg, err := awsconfig.LoadDefaultConfig(context.Background(),
+		awsconfig.WithRegion(cfg.Region),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("failed to load AWS config: %w", err)
+	}
+
+	// Override with explicit credentials if provided
+	if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" {
+		awsCfg.Credentials = aws.CredentialsProviderFunc(func(ctx context.Context) (aws.Credentials, error) {
+			return aws.Credentials{
+				AccessKeyID:     cfg.AccessKeyID,
+				SecretAccessKey: cfg.SecretAccessKey,
+			}, nil
+		})
+	}
+
+	s3Client := s3.NewFromConfig(awsCfg, func(o *s3.Options) {
+		if cfg.Endpoint != "" {
+			o.BaseEndpoint = aws.String(cfg.Endpoint)
+		}
+	})
+
+	// Create cache directory
+	cacheDir := cfg.CacheDir
+	if cacheDir == "" {
+		cacheDir = filepath.Join(os.TempDir(), "s3-archive-cache", fmt.Sprintf("chain_%d", chainId.Uint64()))
+	}
+	if err := os.MkdirAll(cacheDir, 0755); err != nil {
+		return nil, fmt.Errorf("failed to create cache directory: %w", err)
+	}
+
+	archive := &S3Source{
+		client:                 s3Client,
+		config:                 cfg,
+		chainId:                chainId,
+		cacheDir:               cacheDir,
+		metadataTTL:            cfg.MetadataTTL,
+		fileCacheTTL:           cfg.FileCacheTTL,
+		maxCacheSize:           cfg.MaxCacheSize,
+		cleanupInterval:        cfg.CleanupInterval,
+		maxConcurrentDownloads: cfg.MaxConcurrentDownloads,
+		fileMetadata:           make(map[string]*FileMetadata),
+		cacheMap:               make(map[string]time.Time),
+		blockIndex:             make(map[string][]BlockIndex),
+		downloading:            make(map[string]*sync.WaitGroup),
+		activeUse:              make(map[string]int),
+	}
+
+	// Start cache cleanup goroutine
+	go archive.cleanupCache()
+
+	// Load metadata in background (optional)
+	if cfg.Bucket != "" {
+		go func() {
+			ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+			defer cancel()
+			if err := archive.loadMetadata(ctx); err != nil {
+				log.Warn().Err(err).Msg("Failed to preload S3 metadata")
+			}
+		}()
+	}
+
+	return archive, nil
+}
+
+func (s *S3Source) GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult {
+	if len(blockNumbers) == 0 {
+		return nil
+	}
+
+	// Ensure metadata is loaded
+	if err := s.ensureMetadataLoaded(ctx); err != nil {
+		log.Error().Err(err).Msg("Failed to load metadata")
+		return s.makeErrorResults(blockNumbers, err)
+	}
+
+	// Sort block numbers for efficient file access
+	sortedBlocks := make([]*big.Int, len(blockNumbers))
+	copy(sortedBlocks, blockNumbers)
+	sort.Slice(sortedBlocks, func(i, j int) bool {
+		return sortedBlocks[i].Cmp(sortedBlocks[j]) < 0
+	})
+
+	// Group blocks by files that contain them
+	fileGroups := s.groupBlocksByFiles(sortedBlocks)
+
+	// Mark files as being actively used
+	s.activeUseMu.Lock()
+	for fileKey := range fileGroups {
+		s.activeUse[fileKey]++
+		log.Trace().
+			Str("file", fileKey).
+			Int("new_count", s.activeUse[fileKey]).
+			Msg("Incrementing file reference count")
+	}
+	s.activeUseMu.Unlock()
+
+	// Ensure we release the hold on files when done
+	defer func() {
+		s.activeUseMu.Lock()
+		for fileKey := range fileGroups {
+			s.activeUse[fileKey]--
+			log.Trace().
+				Str("file", fileKey).
+				Int("new_count", s.activeUse[fileKey]).
+				Msg("Decrementing file reference count")
+			if s.activeUse[fileKey] <= 0 {
+				delete(s.activeUse, fileKey)
+			}
+		}
+		s.activeUseMu.Unlock()
+
+		// Update access times to keep files in cache
+		s.cacheMu.Lock()
+		now := time.Now()
+		for fileKey := range fileGroups {
+			s.cacheMap[fileKey] = now
+		}
+		s.cacheMu.Unlock()
+	}()
+
+	// Download required files and wait for ALL to be ready
+	if err := s.ensureFilesAvailable(ctx, fileGroups); err != nil {
+		log.Error().Err(err).Msg("Failed to ensure files are available")
+		return s.makeErrorResults(blockNumbers, err)
+	}
+
+	// Read blocks from local files - at this point all files should be available
+	results := make([]rpc.GetFullBlockResult, 0, len(blockNumbers))
+	resultMap := make(map[uint64]rpc.GetFullBlockResult)
+
+	for fileKey, blocks := range fileGroups {
+		localPath := s.getCacheFilePath(fileKey)
+
+		// Double-check file still exists (defensive programming)
+		if !s.isFileCached(localPath) {
+			log.Error().Str("file", fileKey).Str("path", localPath).Msg("File disappeared after ensureFilesAvailable")
+			// Try to re-download the file synchronously as a last resort
+			if err := s.downloadFile(ctx, fileKey); err != nil {
+				log.Error().Err(err).Str("file", fileKey).Msg("Failed to re-download disappeared file")
+				for _, bn := range blocks {
+					resultMap[bn.Uint64()] = rpc.GetFullBlockResult{
+						BlockNumber: bn,
+						Error:       fmt.Errorf("file disappeared and re-download failed: %w", err),
+					}
+				}
+				continue
+			}
+		}
+
+		// Read blocks from local file efficiently
+		fileResults, err := s.readBlocksFromLocalFile(localPath, blocks)
+		if err != nil {
+			log.Error().Err(err).Str("file", fileKey).Msg("Failed to read blocks from local file")
+			// Even if one file fails, continue with others
+			for _, bn := range blocks {
+				resultMap[bn.Uint64()] = rpc.GetFullBlockResult{
+					BlockNumber: bn,
+					Error:       fmt.Errorf("failed to read from file: %w", err),
+				}
+			}
+			continue
+		}
+
+		for blockNum, result := range fileResults {
+			resultMap[blockNum] = result
+		}
+	}
+
+	// Build ordered results
+	for _, bn := range blockNumbers {
+		if result, ok := resultMap[bn.Uint64()]; ok {
+			results = append(results, result)
+		} else {
+			results = append(results, rpc.GetFullBlockResult{
+				BlockNumber: bn,
+				Error:       fmt.Errorf("block %s not found", bn.String()),
+			})
+		}
+	}
+
+	return results
+}
+
+func (s *S3Source) GetSupportedBlockRange(ctx context.Context) (minBlockNumber *big.Int, maxBlockNumber *big.Int, err error) {
+	if err := s.ensureMetadataLoaded(ctx); err != nil {
+		return nil, nil, err
+	}
+
+	s.metaMu.RLock()
+	defer s.metaMu.RUnlock()
+
+	if s.minBlock == nil || s.maxBlock == nil {
+		return big.NewInt(0), big.NewInt(0), fmt.Errorf("no blocks found for chain %d", s.chainId.Uint64())
+	}
+
+	return new(big.Int).Set(s.minBlock), new(big.Int).Set(s.maxBlock), nil
+}
+
+func (s *S3Source) Close() {
+	// Clean up cache directory
+	if s.cacheDir != "" {
+		os.RemoveAll(s.cacheDir)
+	}
+}
+
+// Metadata management
+
+func (s *S3Source) loadMetadata(ctx context.Context) error {
+	s.metaMu.Lock()
+	defer s.metaMu.Unlock()
+
+	// Check if metadata is still fresh
+	if s.metaLoaded && time.Since(s.metaLoadTime) < s.metadataTTL {
+		return nil
+	}
+
+	prefix := fmt.Sprintf("chain_%d/", s.chainId.Uint64())
+	if s.config.Prefix != "" {
+		prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix)
+	}
+
+	paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{
+		Bucket: aws.String(s.config.Bucket),
+		Prefix: aws.String(prefix),
+	})
+
+	for paginator.HasMorePages() {
+		page, err := paginator.NextPage(ctx)
+		if err != nil {
+			return fmt.Errorf("failed to list S3 objects: %w", err)
+		}
+
+		for _, obj := range page.Contents {
+			if obj.Key == nil || obj.Size == nil {
+				continue
+			}
+
+			startBlock, endBlock := s.extractBlockRangeFromKey(*obj.Key)
+			if startBlock == nil || endBlock == nil {
+				continue
+			}
+
+			// Store metadata
+			s.fileMetadata[*obj.Key] = &FileMetadata{
+				Key:      *obj.Key,
+				MinBlock: startBlock,
+				MaxBlock: endBlock,
+				Size:     *obj.Size,
+			}
+
+			// Update global min/max
+			if s.minBlock == nil || startBlock.Cmp(s.minBlock) < 0 {
+				s.minBlock = new(big.Int).Set(startBlock)
+			}
+			if s.maxBlock == nil || endBlock.Cmp(s.maxBlock) > 0 {
+				s.maxBlock = new(big.Int).Set(endBlock)
+			}
+		}
+	}
+
+	s.metaLoaded = true
+	s.metaLoadTime = time.Now()
+	log.Info().
+		Int("files", len(s.fileMetadata)).
+		Str("min_block", s.minBlock.String()).
+		Str("max_block", s.maxBlock.String()).
+		Dur("ttl", s.metadataTTL).
+		Msg("Loaded S3 metadata cache")
+
+	return nil
+}
+
+func (s *S3Source) ensureMetadataLoaded(ctx context.Context) error {
+	s.metaMu.RLock()
+	// Check if metadata is loaded and still fresh
+	if s.metaLoaded && time.Since(s.metaLoadTime) < s.metadataTTL {
+		s.metaMu.RUnlock()
+		return nil
+	}
+	s.metaMu.RUnlock()
+
+	return s.loadMetadata(ctx)
+}
+
+// File grouping and downloading
+
+func (s *S3Source) ensureFilesAvailable(ctx context.Context, fileGroups map[string][]*big.Int) error {
+	var wg sync.WaitGroup
+	errChan := make(chan error, len(fileGroups))
+
+	// Limit concurrent downloads
+	sem := make(chan struct{}, s.maxConcurrentDownloads)
+
+	for fileKey := range fileGroups {
+		wg.Add(1)
+		go func(key string) {
+			defer wg.Done()
+
+			// First check if file is already being downloaded by another goroutine
+			s.downloadMu.Lock()
+			if downloadWg, downloading := s.downloading[key]; downloading {
+				s.downloadMu.Unlock()
+				// Wait for the existing download to complete
+				downloadWg.Wait()
+
+				// Verify file exists after waiting
+				localPath := s.getCacheFilePath(key)
+				if !s.isFileCached(localPath) {
+					errChan <- fmt.Errorf("file %s not available after waiting for download", key)
+				} else {
+					// Ensure file is tracked in cache map
+					s.ensureFileInCacheMap(key)
+					// Update access time for this file since we'll be using it
+					s.cacheMu.Lock()
+					s.cacheMap[key] = time.Now()
+					s.cacheMu.Unlock()
+				}
+				return
+			}
+			s.downloadMu.Unlock()
+
+			// Check if file is already cached
+			localPath := s.getCacheFilePath(key)
+			if s.isFileCached(localPath) {
+				// Ensure file is in cache map (in case it was on disk but not tracked)
+				s.ensureFileInCacheMap(key)
+				// Update access time
+				s.cacheMu.Lock()
+				s.cacheMap[key] = time.Now()
+				s.cacheMu.Unlock()
+				return
+			}
+
+			// Need to download the file
+			sem <- struct{}{}
+			defer func() { <-sem }()
+
+			if err := s.downloadFile(ctx, key); err != nil {
+				errChan <- fmt.Errorf("failed to download %s: %w", key, err)
+				return
+			}
+
+			// Verify file exists after download
+			if !s.isFileCached(localPath) {
+				errChan <- fmt.Errorf("file %s not cached after download", key)
+			}
+		}(fileKey)
+	}
+
+	// Wait for all files to be available
+	wg.Wait()
+	close(errChan)
+
+	// Collect any errors
+	var errors []string
+	for err := range errChan {
+		if err != nil {
+			errors = append(errors, err.Error())
+		}
+	}
+
+	if len(errors) > 0 {
+		return fmt.Errorf("failed to ensure files available: %s", strings.Join(errors, "; "))
+	}
+
+	return nil
+}
+
+func (s *S3Source) groupBlocksByFiles(blockNumbers []*big.Int) map[string][]*big.Int {
+	s.metaMu.RLock()
+	defer s.metaMu.RUnlock()
+
+	fileGroups := make(map[string][]*big.Int)
+
+	for _, blockNum := range blockNumbers {
+		// Find files that contain this block
+		for _, meta := range s.fileMetadata {
+			if blockNum.Cmp(meta.MinBlock) >= 0 && blockNum.Cmp(meta.MaxBlock) <= 0 {
+				fileGroups[meta.Key] = append(fileGroups[meta.Key], blockNum)
+				break // Each block should only be in one file
+			}
+		}
+	}
+
+	return fileGroups
+}
+
+func (s *S3Source) downloadFile(ctx context.Context, fileKey string) error {
+	// Prevent duplicate downloads
+	s.downloadMu.Lock()
+	if wg, downloading := s.downloading[fileKey]; downloading {
+		s.downloadMu.Unlock()
+		wg.Wait()
+		return nil
+	}
+
+	wg := &sync.WaitGroup{}
+	wg.Add(1)
+	s.downloading[fileKey] = wg
+	s.downloadMu.Unlock()
+
+	defer func() {
+		wg.Done()
+		s.downloadMu.Lock()
+		delete(s.downloading, fileKey)
+		s.downloadMu.Unlock()
+	}()
+
+	localPath := s.getCacheFilePath(fileKey)
+
+	// Create temp file for atomic write
+	tempPath := localPath + ".tmp"
+
+	// Download from S3
+	result, err := s.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s.config.Bucket),
+		Key:    aws.String(fileKey),
+	})
+	if err != nil {
+		return fmt.Errorf("failed to download file: %w", err)
+	}
+	defer result.Body.Close()
+
+	// Create directory if needed
+	dir := filepath.Dir(tempPath)
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		return err
+	}
+
+	// Write to temp file
+	file, err := os.Create(tempPath)
+	if err != nil {
+		return err
+	}
+
+	_, err = io.Copy(file, result.Body)
+	file.Close()
+
+	if err != nil {
+		os.Remove(tempPath)
+		return err
+	}
+
+	// Atomic rename
+	if err := os.Rename(tempPath, localPath); err != nil {
+		os.Remove(tempPath)
+		return err
+	}
+
+	// Build block index for the file
+	go s.buildBlockIndex(localPath, fileKey)
+
+	// Update cache map
+	s.cacheMu.Lock()
+	s.cacheMap[fileKey] = time.Now()
+	s.cacheMu.Unlock()
+
+	log.Info().Str("file", fileKey).Str("path", localPath).Msg("Downloaded file from S3")
+
+	return nil
+}
+
+// Optimized parquet reading
+
+func (s *S3Source) buildBlockIndex(filePath, fileKey string) error {
+	file, err := os.Open(filePath)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+
+	stat, err := file.Stat()
+	if err != nil {
+		return err
+	}
+
+	pFile, err := parquet.OpenFile(file, stat.Size())
+	if err != nil {
+		return err
+	}
+
+	// Read only the block_number column to build index
+	blockNumCol := -1
+	for i, field := range pFile.Schema().Fields() {
+		if field.Name() == "block_number" {
+			blockNumCol = i
+			break
+		}
+	}
+
+	if blockNumCol < 0 {
+		return fmt.Errorf("block_number column not found")
+	}
+
+	var index []BlockIndex
+	for _, rg := range pFile.RowGroups() {
+		chunk := rg.ColumnChunks()[blockNumCol]
+		pages := chunk.Pages()
+		offset := int64(0)
+
+		for {
+			page, err := pages.ReadPage()
+			if err != nil {
+				break
+			}
+
+			values := page.Values()
+			// Type assert to the specific reader type
+			switch reader := values.(type) {
+			case parquet.Int64Reader:
+				// Handle int64 block numbers
+				blockNums := make([]int64, page.NumValues())
+				n, _ := reader.ReadInt64s(blockNums)
+
+				for i := 0; i < n; i++ {
+					if blockNums[i] >= 0 {
+						index = append(index, BlockIndex{
+							BlockNumber: uint64(blockNums[i]),
+							RowOffset:   offset + int64(i),
+							RowSize:     1,
+						})
+					}
+				}
+			default:
+				// Try to read as generic values
+				values := make([]parquet.Value, page.NumValues())
+				n, _ := reader.ReadValues(values)
+
+				for i := 0; i < n; i++ {
+					if !values[i].IsNull() {
+						blockNum := values[i].Uint64()
+						index = append(index, BlockIndex{
+							BlockNumber: blockNum,
+							RowOffset:   offset + int64(i),
+							RowSize:     1,
+						})
+					}
+				}
+			}
+			offset += int64(page.NumValues())
+		}
+	}
+
+	// Store index
+	s.cacheMu.Lock()
+	s.blockIndex[fileKey] = index
+	s.cacheMu.Unlock()
+
+	return nil
+}
+
+func (s *S3Source) readBlocksFromLocalFile(filePath string, blockNumbers []*big.Int) (map[uint64]rpc.GetFullBlockResult, error) {
+	// Update access time for this file
+	fileKey := s.getFileKeyFromPath(filePath)
+	if fileKey != "" {
+		s.cacheMu.Lock()
+		s.cacheMap[fileKey] = time.Now()
+		s.cacheMu.Unlock()
+	}
+
+	file, err := os.Open(filePath)
+	if err != nil {
+		return nil, err
+	}
+	defer file.Close()
+
+	stat, err := file.Stat()
+	if err != nil {
+		return nil, err
+	}
+
+	// Create block map for quick lookup
+	blockMap := make(map[uint64]bool)
+	for _, bn := range blockNumbers {
+		blockMap[bn.Uint64()] = true
+	}
+
+	// Use optimized parquet reading
+	pFile, err := parquet.OpenFile(file, stat.Size())
+	if err != nil {
+		return nil, err
+	}
+
+	results := make(map[uint64]rpc.GetFullBlockResult)
+
+	// Read row groups
+	for _, rg := range pFile.RowGroups() {
+		// Check row group statistics to see if it contains our blocks
+		if !s.rowGroupContainsBlocks(rg, blockMap) {
+			continue
+		}
+
+		// Read rows from this row group using generic reader
+		rows := make([]parquet.Row, rg.NumRows())
+		reader := parquet.NewRowGroupReader(rg)
+
+		n, err := reader.ReadRows(rows)
+		if err != nil && err != io.EOF {
+			log.Warn().Err(err).Msg("Error reading row group")
+			continue
+		}
+
+		// Convert rows to our struct
+		for i := 0; i < n; i++ {
+			row := rows[i]
+			if len(row) < 8 {
+				continue // Not enough columns
+			}
+
+			// Extract block number first to check if we need this row
+			blockNum := row[1].Uint64() // block_number is second column
+
+			// Skip if not in requested blocks
+			if !blockMap[blockNum] {
+				continue
+			}
+
+			// Build ParquetBlockData from row
+			pd := ParquetBlockData{
+				ChainId:        row[0].Uint64(),
+				BlockNumber:    blockNum,
+				BlockHash:      row[2].String(),
+				BlockTimestamp: row[3].Int64(),
+				Block:          row[4].ByteArray(),
+				Transactions:   row[5].ByteArray(),
+				Logs:           row[6].ByteArray(),
+				Traces:         row[7].ByteArray(),
+			}
+
+			// Parse block data
+			result, err := s.parseBlockData(pd)
+			if err != nil {
+				log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to parse block data")
+				continue
+			}
+
+			results[pd.BlockNumber] = result
+		}
+	}
+
+	return results, nil
+}
+
+func (s *S3Source) rowGroupContainsBlocks(rg parquet.RowGroup, blockMap map[uint64]bool) bool {
+	// Get the block_number column chunk
+	for i, col := range rg.Schema().Fields() {
+		if col.Name() == "block_number" {
+			chunk := rg.ColumnChunks()[i]
+			ci, _ := chunk.ColumnIndex()
+			if ci != nil {
+				// Check min/max values
+				for j := 0; j < ci.NumPages(); j++ {
+					minVal := ci.MinValue(j)
+					maxVal := ci.MaxValue(j)
+
+					if minVal.IsNull() || maxVal.IsNull() {
+						continue
+					}
+
+					minBlock := minVal.Uint64()
+					maxBlock := maxVal.Uint64()
+
+					// Check if any requested blocks fall in this range
+					for blockNum := range blockMap {
+						if blockNum >= minBlock && blockNum <= maxBlock {
+							return true
+						}
+					}
+				}
+			}
+			break
+		}
+	}
+
+	// If no statistics, assume it might contain blocks
+	return true
+}
+
+func (s *S3Source) parseBlockData(pd ParquetBlockData) (rpc.GetFullBlockResult, error) {
+	var block common.Block
+	if err := json.Unmarshal(pd.Block, &block); err != nil {
+		return rpc.GetFullBlockResult{}, err
+	}
+
+	var transactions []common.Transaction
+	if len(pd.Transactions) > 0 {
+		if err := json.Unmarshal(pd.Transactions, &transactions); err != nil {
+			log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal transactions")
+		}
+	}
+
+	var logs []common.Log
+	if len(pd.Logs) > 0 {
+		if err := json.Unmarshal(pd.Logs, &logs); err != nil {
+			log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal logs")
+		}
+	}
+
+	var traces []common.Trace
+	if len(pd.Traces) > 0 {
+		if err := json.Unmarshal(pd.Traces, &traces); err != nil {
+			log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal traces")
+		}
+	}
+
+	return rpc.GetFullBlockResult{
+		BlockNumber: new(big.Int).SetUint64(pd.BlockNumber),
+		Data: common.BlockData{
+			Block:        block,
+			Transactions: transactions,
+			Logs:         logs,
+			Traces:       traces,
+		},
+		Error: nil,
+	}, nil
+}
+
+// RefreshMetadata forces a refresh of the metadata cache
+func (s *S3Source) RefreshMetadata(ctx context.Context) error {
+	s.metaMu.Lock()
+	s.metaLoaded = false
+	s.metaLoadTime = time.Time{}
+	s.metaMu.Unlock()
+
+	return s.loadMetadata(ctx)
+}
+
+// GetCacheStats returns statistics about the cache
+func (s *S3Source) GetCacheStats() (fileCount int, totalSize int64, oldestAccess time.Time) {
+	s.cacheMu.RLock()
+	defer s.cacheMu.RUnlock()
+
+	fileCount = len(s.cacheMap)
+	now := time.Now()
+
+	for key, accessTime := range s.cacheMap {
+		path := s.getCacheFilePath(key)
+		if info, err := os.Stat(path); err == nil {
+			totalSize += info.Size()
+		}
+		if oldestAccess.IsZero() || accessTime.Before(oldestAccess) {
+			oldestAccess = accessTime
+		}
+	}
+
+	// Also check metadata freshness
+	s.metaMu.RLock()
+	metaAge := now.Sub(s.metaLoadTime)
+	s.metaMu.RUnlock()
+
+	log.Debug().
+		Int("file_count", fileCount).
+		Int64("total_size_mb", totalSize/(1024*1024)).
+		Dur("oldest_file_age", now.Sub(oldestAccess)).
+		Dur("metadata_age", metaAge).
+		Msg("Cache statistics")
+
+	return fileCount, totalSize, oldestAccess
+}
+
+// Helper functions
+
+func (s *S3Source) extractBlockRangeFromKey(key string) (*big.Int, *big.Int) {
+	parts := strings.Split(key, "/")
+	if len(parts) == 0 {
+		return nil, nil
+	}
+
+	filename := parts[len(parts)-1]
+	if !strings.HasPrefix(filename, "blocks_") || !strings.HasSuffix(filename, ".parquet") {
+		return nil, nil
+	}
+
+	rangeStr := strings.TrimPrefix(filename, "blocks_")
+	rangeStr = strings.TrimSuffix(rangeStr, ".parquet")
+
+	rangeParts := strings.Split(rangeStr, "_")
+	if len(rangeParts) != 2 {
+		return nil, nil
+	}
+
+	startBlock, ok1 := new(big.Int).SetString(rangeParts[0], 10)
+	endBlock, ok2 := new(big.Int).SetString(rangeParts[1], 10)
+	if !ok1 || !ok2 {
+		return nil, nil
+	}
+
+	return startBlock, endBlock
+}
+
+func (s *S3Source) getCacheFilePath(fileKey string) string {
+	// Create a safe filename from the S3 key
+	hash := sha256.Sum256([]byte(fileKey))
+	filename := hex.EncodeToString(hash[:])[:16] + ".parquet"
+	return filepath.Join(s.cacheDir, filename)
+}
+
+func (s *S3Source) getFileKeyFromPath(filePath string) string {
+	// Reverse lookup - find the key for a given cache path
+	s.cacheMu.RLock()
+	defer s.cacheMu.RUnlock()
+
+	for key := range s.cacheMap {
+		if s.getCacheFilePath(key) == filePath {
+			return key
+		}
+	}
+	return ""
+}
+
+func (s *S3Source) isFileCached(filePath string) bool {
+	// First check if file exists at all
+	info, err := os.Stat(filePath)
+	if err != nil {
+		return false
+	}
+
+	// Check if file has content
+	if info.Size() == 0 {
+		return false
+	}
+
+	// Check if a temp file exists (indicating incomplete download)
+	tempPath := filePath + ".tmp"
+	if _, err := os.Stat(tempPath); err == nil {
+		// Temp file exists, download is incomplete
+		return false
+	}
+
+	// File exists, has content, and no temp file - it's cached
+	return true
+}
+
+// ensureFileInCacheMap ensures a file that exists on disk is tracked in the cache map
+func (s *S3Source) ensureFileInCacheMap(fileKey string) {
+	s.cacheMu.Lock()
+	defer s.cacheMu.Unlock()
+
+	// If not in cache map, add it with current time
+	if _, exists := s.cacheMap[fileKey]; !exists {
+		localPath := s.getCacheFilePath(fileKey)
+		if info, err := os.Stat(localPath); err == nil {
+			// Use file modification time if it's recent, otherwise use current time
+			modTime := info.ModTime()
+			if time.Since(modTime) < s.fileCacheTTL {
+				s.cacheMap[fileKey] = modTime
+			} else {
+				s.cacheMap[fileKey] = time.Now()
+			}
+			log.Trace().
+				Str("file", fileKey).
+				Time("access_time", s.cacheMap[fileKey]).
+				Msg("Added existing file to cache map")
+		}
+	}
+}
+
+func (s *S3Source) makeErrorResults(blockNumbers []*big.Int, err error) []rpc.GetFullBlockResult {
+	results := make([]rpc.GetFullBlockResult, len(blockNumbers))
+	for i, bn := range blockNumbers {
+		results[i] = rpc.GetFullBlockResult{
+			BlockNumber: bn,
+			Error:       err,
+		}
+	}
+	return results
+}
+
+func (s *S3Source) cleanupCache() {
+	ticker := time.NewTicker(s.cleanupInterval)
+	defer ticker.Stop()
+
+	for range ticker.C {
+		s.cacheMu.Lock()
+		s.downloadMu.Lock()
+		s.activeUseMu.RLock()
+
+		// Remove files not accessed within the TTL
+		cutoff := time.Now().Add(-s.fileCacheTTL)
+		protectedCount := 0
+		expiredCount := 0
+
+		for fileKey, accessTime := range s.cacheMap {
+			// Skip files that are currently being downloaded
+			if _, downloading := s.downloading[fileKey]; downloading {
+				protectedCount++
+				continue
+			}
+
+			// Skip files that are actively being used
+			if count, active := s.activeUse[fileKey]; active && count > 0 {
+				protectedCount++
+				// Only log at trace level to reduce noise
+				log.Trace().
+					Str("file", fileKey).
+					Int("ref_count", count).
+					Msg("Skipping actively used file in cleanup")
+				continue
+			}
+
+			if accessTime.Before(cutoff) {
+				expiredCount++
+				cacheFile := s.getCacheFilePath(fileKey)
+				log.Debug().
+					Str("file", fileKey).
+					Str("path", cacheFile).
+					Time("last_access", accessTime).
+					Time("cutoff", cutoff).
+					Msg("Removing expired file from cache")
+				os.Remove(cacheFile)
+				delete(s.cacheMap, fileKey)
+				delete(s.blockIndex, fileKey)
+			}
+		}
+
+		s.activeUseMu.RUnlock()
+		s.downloadMu.Unlock()
+		s.cacheMu.Unlock()
+
+		// Only log if something interesting happened (files were deleted)
+		if expiredCount > 0 {
+			log.Debug().
+				Int("protected", protectedCount).
+				Int("expired", expiredCount).
+				Int("total_cached", len(s.cacheMap)).
+				Msg("Cache cleanup cycle completed - removed expired files")
+		} else if protectedCount > 0 {
+			// Use trace level for routine cleanup cycles with no deletions
+			log.Trace().
+				Int("protected", protectedCount).
+				Int("total_cached", len(s.cacheMap)).
+				Msg("Cache cleanup cycle completed - no files expired")
+		}
+
+		// Also check disk usage and remove oldest files if needed
+		s.enforceMaxCacheSize()
+	}
+}
+
+func (s *S3Source) enforceMaxCacheSize() {
+	maxSize := s.maxCacheSize
+
+	var totalSize int64
+	var files []struct {
+		path   string
+		key    string
+		size   int64
+		access time.Time
+	}
+
+	s.cacheMu.RLock()
+	for key, accessTime := range s.cacheMap {
+		path := s.getCacheFilePath(key)
+		if info, err := os.Stat(path); err == nil {
+			totalSize += info.Size()
+			files = append(files, struct {
+				path   string
+				key    string
+				size   int64
+				access time.Time
+			}{path, key, info.Size(), accessTime})
+		}
+	}
+	s.cacheMu.RUnlock()
+
+	if totalSize <= maxSize {
+		return
+	}
+
+	log.Debug().
+		Int64("total_size_mb", totalSize/(1024*1024)).
+		Int64("max_size_mb", maxSize/(1024*1024)).
+		Int("file_count", len(files)).
+		Msg("Cache size exceeded, removing old files")
+
+	// Sort by access time (oldest first)
+	sort.Slice(files, func(i, j int) bool {
+		return files[i].access.Before(files[j].access)
+	})
+
+	// Remove oldest files until under limit
+	s.cacheMu.Lock()
+	s.downloadMu.Lock()
+	s.activeUseMu.RLock()
+	defer s.activeUseMu.RUnlock()
+	defer s.downloadMu.Unlock()
+	defer s.cacheMu.Unlock()
+
+	for _, f := range files {
+		if totalSize <= maxSize {
+			break
+		}
+
+		// Skip files that are currently being downloaded
+		if _, downloading := s.downloading[f.key]; downloading {
+			continue
+		}
+
+		// Skip files that are actively being used
+		if count, active := s.activeUse[f.key]; active && count > 0 {
+			continue
+		}
+
+		os.Remove(f.path)
+		delete(s.cacheMap, f.key)
+		delete(s.blockIndex, f.key)
+		totalSize -= f.size
+	}
+}
diff --git a/internal/source/source.go b/internal/source/source.go
new file mode 100644
index 0000000..2b9ef85
--- /dev/null
+++ b/internal/source/source.go
@@ -0,0 +1,14 @@
+package source
+
+import (
+	"context"
+	"math/big"
+
+	"github.com/thirdweb-dev/indexer/internal/rpc"
+)
+
+type ISource interface {
+	GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult
+	GetSupportedBlockRange(ctx context.Context) (minBlockNumber *big.Int, maxBlockNumber *big.Int, err error)
+	Close()
+}
diff --git a/internal/storage/s3.go b/internal/storage/s3.go
index 4e1de31..9cd5f24 100644
--- a/internal/storage/s3.go
+++ b/internal/storage/s3.go
@@ -25,7 +25,7 @@ import (
 
 type S3Connector struct {
 	client    *s3.Client
-	config    *config.S3Config
+	config    *config.S3StorageConfig
 	formatter DataFormatter
 	buffer    *BlockBuffer
 
@@ -59,7 +59,7 @@ type ParquetBlockData struct {
 	Traces         []byte `parquet:"traces_json"`
 }
 
-func NewS3Connector(cfg *config.S3Config) (*S3Connector, error) {
+func NewS3Connector(cfg *config.S3StorageConfig) (*S3Connector, error) {
 	awsCfg, err := awsconfig.LoadDefaultConfig(context.Background(),
 		awsconfig.WithRegion(cfg.Region),
 	)
diff --git a/internal/worker/worker.go b/internal/worker/worker.go
index d25294f..fd94bab 100644
--- a/internal/worker/worker.go
+++ b/internal/worker/worker.go
@@ -2,6 +2,7 @@ package worker
 
 import (
 	"context"
+	"fmt"
 	"math/big"
 	"sort"
 	"sync"
@@ -12,138 +13,334 @@ import (
 	"github.com/thirdweb-dev/indexer/internal/common"
 	"github.com/thirdweb-dev/indexer/internal/metrics"
 	"github.com/thirdweb-dev/indexer/internal/rpc"
+	"github.com/thirdweb-dev/indexer/internal/source"
 )
 
+// SourceType represents the type of data source
+type SourceType string
+
+const (
+	// SourceTypeRPC represents RPC data source
+	SourceTypeRPC SourceType = "rpc"
+	// SourceTypeArchive represents archive data source (e.g., S3)
+	SourceTypeArchive SourceType = "archive"
+)
+
+// String returns the string representation of the source type
+func (s SourceType) String() string {
+	return string(s)
+}
+
+// Worker handles block data fetching from RPC and optional archive
 type Worker struct {
-	rpc rpc.IRPCClient
+	rpc          rpc.IRPCClient
+	archive      source.ISource // Optional alternative source
+	rpcSemaphore chan struct{}  // Limit concurrent RPC requests
 }
 
 func NewWorker(rpc rpc.IRPCClient) *Worker {
 	return &Worker{
-		rpc: rpc,
+		rpc:          rpc,
+		rpcSemaphore: make(chan struct{}, 20),
+	}
+}
+
+// NewWorkerWithArchive creates a new Worker with optional archive support
+func NewWorkerWithArchive(rpc rpc.IRPCClient, source source.ISource) *Worker {
+	return &Worker{
+		rpc:          rpc,
+		archive:      source,
+		rpcSemaphore: make(chan struct{}, 20),
+	}
+}
+
+// fetchFromRPC fetches blocks directly from RPC
+func (w *Worker) fetchFromRPC(ctx context.Context, blocks []*big.Int) []rpc.GetFullBlockResult {
+	// Acquire semaphore for rate limiting
+	select {
+	case w.rpcSemaphore <- struct{}{}:
+		defer func() { <-w.rpcSemaphore }()
+	case <-ctx.Done():
+		return nil
+	}
+
+	return w.rpc.GetFullBlocks(ctx, blocks)
+}
+
+// fetchFromArchive fetches blocks from archive if available
+func (w *Worker) fetchFromArchive(ctx context.Context, blocks []*big.Int) []rpc.GetFullBlockResult {
+	if w.archive == nil {
+		return nil
 	}
+	return w.archive.GetFullBlocks(ctx, blocks)
 }
 
-func (w *Worker) processChunkWithRetry(ctx context.Context, chunk []*big.Int, resultsCh chan<- []rpc.GetFullBlockResult, sem chan struct{}) {
+// processChunkWithRetry processes a chunk with automatic retry on failure
+func (w *Worker) processChunkWithRetry(ctx context.Context, chunk []*big.Int, fetchFunc func(context.Context, []*big.Int) []rpc.GetFullBlockResult) []rpc.GetFullBlockResult {
 	select {
 	case <-ctx.Done():
-		return
+		// Return error results for all blocks if context cancelled
+		var results []rpc.GetFullBlockResult
+		for _, block := range chunk {
+			results = append(results, rpc.GetFullBlockResult{
+				BlockNumber: block,
+				Error:       fmt.Errorf("context cancelled"),
+			})
+		}
+		return results
 	default:
 	}
 
-	// Acquire semaphore only for the RPC request
-	sem <- struct{}{}
-	results := w.rpc.GetFullBlocks(ctx, chunk)
-	<-sem // Release semaphore immediately after RPC request
+	// Fetch the chunk
+	results := fetchFunc(ctx, chunk)
 
-	if len(chunk) == 1 {
-		// chunk size 1 is the minimum, so we return whatever we get
-		resultsCh <- results
-		return
+	// If we got all results, return them
+	if len(results) == len(chunk) {
+		allSuccess := true
+		for _, r := range results {
+			if r.Error != nil {
+				allSuccess = false
+				break
+			}
+		}
+		if allSuccess {
+			return results
+		}
 	}
 
-	// Check for failed blocks
+	// Separate successful and failed
+	successMap := make(map[string]rpc.GetFullBlockResult)
 	var failedBlocks []*big.Int
-	var successfulResults []rpc.GetFullBlockResult
 
 	for i, result := range results {
-		if result.Error != nil {
-			failedBlocks = append(failedBlocks, chunk[i])
-		} else {
-			successfulResults = append(successfulResults, result)
+		if i < len(chunk) {
+			if result.Error == nil {
+				successMap[chunk[i].String()] = result
+			} else {
+				failedBlocks = append(failedBlocks, chunk[i])
+			}
 		}
 	}
 
-	log.Debug().Msgf("Out of %d blocks, %d successful, %d failed", len(results), len(successfulResults), len(failedBlocks))
-	// If we have successful results, send them
-	if len(successfulResults) > 0 {
-		resultsCh <- successfulResults
-	}
+	// If only one block failed, retry once more
+	if len(failedBlocks) == 1 {
+		retryResults := fetchFunc(ctx, failedBlocks)
+		if len(retryResults) > 0 {
+			if retryResults[0].Error == nil {
+				successMap[failedBlocks[0].String()] = retryResults[0]
+			} else {
+				// Keep the error result
+				successMap[failedBlocks[0].String()] = rpc.GetFullBlockResult{
+					BlockNumber: failedBlocks[0],
+					Error:       retryResults[0].Error,
+				}
+			}
+		}
+	} else if len(failedBlocks) > 1 {
+		// Split failed blocks and retry recursively
+		mid := len(failedBlocks) / 2
+		leftChunk := failedBlocks[:mid]
+		rightChunk := failedBlocks[mid:]
 
-	// If no blocks failed, we're done
-	if len(failedBlocks) == 0 {
-		return
-	}
+		log.Debug().
+			Int("failed_count", len(failedBlocks)).
+			Int("left_chunk", len(leftChunk)).
+			Int("right_chunk", len(rightChunk)).
+			Msg("Splitting failed blocks for retry")
 
-	// can't split any further, so try one last time
-	if len(failedBlocks) == 1 {
-		w.processChunkWithRetry(ctx, failedBlocks, resultsCh, sem)
-		return
-	}
+		// Process both halves
+		leftResults := w.processChunkWithRetry(ctx, leftChunk, fetchFunc)
+		rightResults := w.processChunkWithRetry(ctx, rightChunk, fetchFunc)
 
-	// Split failed blocks in half and retry
-	mid := len(failedBlocks) / 2
-	leftChunk := failedBlocks[:mid]
-	rightChunk := failedBlocks[mid:]
+		// Add results to map
+		for _, r := range leftResults {
+			if r.BlockNumber != nil {
+				successMap[r.BlockNumber.String()] = r
+			}
+		}
+		for _, r := range rightResults {
+			if r.BlockNumber != nil {
+				successMap[r.BlockNumber.String()] = r
+			}
+		}
+	}
 
-	log.Debug().Msgf("Splitting %d failed blocks into chunks of %d and %d", len(failedBlocks), len(leftChunk), len(rightChunk))
+	// Build final results in original order
+	var finalResults []rpc.GetFullBlockResult
+	for _, block := range chunk {
+		if result, ok := successMap[block.String()]; ok {
+			finalResults = append(finalResults, result)
+		} else {
+			// Add error result for missing blocks
+			finalResults = append(finalResults, rpc.GetFullBlockResult{
+				BlockNumber: block,
+				Error:       fmt.Errorf("failed to fetch block"),
+			})
+		}
+	}
 
-	var wg sync.WaitGroup
-	wg.Add(2)
+	return finalResults
+}
 
-	go func() {
-		defer wg.Done()
-		w.processChunkWithRetry(ctx, leftChunk, resultsCh, sem)
-	}()
+// processBatch processes a batch of blocks from a specific source
+func (w *Worker) processBatch(ctx context.Context, blocks []*big.Int, sourceType SourceType, fetchFunc func(context.Context, []*big.Int) []rpc.GetFullBlockResult) []rpc.GetFullBlockResult {
+	if len(blocks) == 0 {
+		return nil
+	}
 
-	go func() {
-		defer wg.Done()
-		w.processChunkWithRetry(ctx, rightChunk, resultsCh, sem)
-	}()
+	// Determine chunk size based on source
+	chunkSize := w.rpc.GetBlocksPerRequest().Blocks
+	if sourceType == SourceTypeArchive && w.archive != nil {
+		chunkSize = len(blocks) // Fetch all at once from archive
+	}
 
-	wg.Wait()
-}
+	chunks := common.SliceToChunks(blocks, chunkSize)
 
-func (w *Worker) Run(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult {
-	blockCount := len(blockNumbers)
-	chunks := common.SliceToChunks(blockNumbers, w.rpc.GetBlocksPerRequest().Blocks)
+	log.Debug().
+		Str("source", sourceType.String()).
+		Int("total_blocks", len(blocks)).
+		Int("chunks", len(chunks)).
+		Int("chunk_size", chunkSize).
+		Msg("Processing blocks")
 
+	var allResults []rpc.GetFullBlockResult
+	var mu sync.Mutex
 	var wg sync.WaitGroup
-	resultsCh := make(chan []rpc.GetFullBlockResult, blockCount)
 
-	// Create a semaphore channel to limit concurrent goroutines
-	sem := make(chan struct{}, 20)
-
-	log.Debug().Msgf("Worker Processing %d blocks in %d chunks of max %d blocks", blockCount, len(chunks), w.rpc.GetBlocksPerRequest().Blocks)
+	batchDelay := time.Duration(config.Cfg.RPC.Blocks.BatchDelay) * time.Millisecond
 
 	for i, chunk := range chunks {
-		if i > 0 {
-			time.Sleep(time.Duration(config.Cfg.RPC.Blocks.BatchDelay) * time.Millisecond)
+		// Check context before starting new work
+		if ctx.Err() != nil {
+			log.Debug().Msg("Context canceled, skipping remaining chunks")
+			break // Don't start new chunks, but let existing ones finish
 		}
-		select {
-		case <-ctx.Done():
-			log.Debug().Msg("Context canceled, stopping Worker")
-			return nil
-		default:
-			// continue processing
+
+		// Add delay between batches for RPC (except first batch)
+		if i > 0 && sourceType == SourceTypeRPC && batchDelay > 0 {
+			select {
+			case <-ctx.Done():
+				log.Debug().Msg("Context canceled during batch delay")
+				break
+			case <-time.After(batchDelay):
+				// Continue after delay
+			}
 		}
 
 		wg.Add(1)
 		go func(chunk []*big.Int) {
 			defer wg.Done()
-			w.processChunkWithRetry(ctx, chunk, resultsCh, sem)
+			results := w.processChunkWithRetry(ctx, chunk, fetchFunc)
+
+			mu.Lock()
+			allResults = append(allResults, results...)
+			mu.Unlock()
 		}(chunk)
 	}
 
-	go func() {
-		wg.Wait()
-		close(resultsCh)
-	}()
+	// Wait for all started goroutines to complete
+	wg.Wait()
+
+	// Sort results by block number (only if we have results)
+	if len(allResults) > 0 {
+		sort.Slice(allResults, func(i, j int) bool {
+			return allResults[i].BlockNumber.Cmp(allResults[j].BlockNumber) < 0
+		})
+	}
+
+	return allResults
+}
 
-	results := make([]rpc.GetFullBlockResult, 0, blockCount)
-	for batchResults := range resultsCh {
-		results = append(results, batchResults...)
+// shouldUseArchive determines if ALL requested blocks are within archive range
+func (w *Worker) shouldUseArchive(ctx context.Context, blockNumbers []*big.Int) bool {
+	// Check if archive is configured and we have blocks to process
+	if w.archive == nil || len(blockNumbers) == 0 {
+		return false
 	}
 
-	// Sort results by block number
-	sort.Slice(results, func(i, j int) bool {
-		return results[i].BlockNumber.Cmp(results[j].BlockNumber) < 0
-	})
+	// Get archive block range
+	minArchive, maxArchive, err := w.archive.GetSupportedBlockRange(ctx)
+	if err != nil {
+		log.Warn().Err(err).Msg("Failed to get archive block range")
+		return false
+	}
 
-	// track the last fetched block number
+	// Check if ALL blocks are within archive range
+	for _, block := range blockNumbers {
+		if block.Cmp(minArchive) < 0 || block.Cmp(maxArchive) > 0 {
+			// At least one block is outside archive range
+			return false
+		}
+	}
+
+	// All blocks are within archive range
+	return true
+}
+
+// Run processes blocks using either archive OR rpc
+func (w *Worker) Run(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult {
+	if len(blockNumbers) == 0 {
+		return nil
+	}
+
+	var results []rpc.GetFullBlockResult
+
+	// Determine which source to use
+	sourceType := SourceTypeRPC
+	fetchFunc := w.fetchFromRPC
+
+	if w.shouldUseArchive(ctx, blockNumbers) {
+		sourceType = SourceTypeArchive
+		fetchFunc = w.fetchFromArchive
+		log.Debug().
+			Int("count", len(blockNumbers)).
+			Str("source", sourceType.String()).
+			Msg("Using archive for all blocks")
+	} else {
+		log.Debug().
+			Int("count", len(blockNumbers)).
+			Str("source", sourceType.String()).
+			Msg("Using RPC for all blocks")
+	}
+
+	// Process all blocks with the selected source
+	results = w.processBatch(ctx, blockNumbers, sourceType, fetchFunc)
+
+	// Update metrics and log summary
 	if len(results) > 0 {
 		lastBlockNumberFloat, _ := results[len(results)-1].BlockNumber.Float64()
 		metrics.LastFetchedBlock.Set(lastBlockNumberFloat)
+
+		// Count successes and failures
+		successful := 0
+		failed := 0
+		for _, r := range results {
+			if r.Error == nil {
+				successful++
+			} else {
+				failed++
+			}
+		}
+
+		log.Debug().
+			Int("total", len(results)).
+			Int("successful", successful).
+			Int("failed", failed).
+			Str("source", sourceType.String()).
+			Msg("Block fetching complete")
 	}
+
 	return results
 }
+
+// Close gracefully shuts down the worker and cleans up resources
+func (w *Worker) Close() error {
+	// Close archive if it exists
+	if w.archive != nil {
+		log.Debug().Msg("Closing archive connection")
+		w.archive.Close()
+	}
+
+	log.Debug().Msg("Worker closed successfully")
+	return nil
+}

From 884a3aa46b5aa145a04b96189068ea038687cb09 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Wed, 27 Aug 2025 17:50:56 +0000
Subject: [PATCH 37/43] Fix boundaries for migration

---
 cmd/migrate_valid.go | 11 +++++++++--
 cmd/root.go          |  3 +++
 configs/config.go    |  1 +
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go
index cb384de..33b93ad 100644
--- a/cmd/migrate_valid.go
+++ b/cmd/migrate_valid.go
@@ -55,6 +55,9 @@ func RunValidationMigration(cmd *cobra.Command, args []string) {
 
 	// Calculate work distribution for workers
 	numWorkers := DEFAULT_WORKERS
+	if config.Cfg.Migrator.WorkerCount > 0 {
+		numWorkers = int(config.Cfg.Migrator.WorkerCount)
+	}
 	workRanges := divideBlockRange(rangeStartBlock, rangeEndBlock, numWorkers)
 	log.Info().Msgf("Starting %d workers to process migration", len(workRanges))
 
@@ -376,7 +379,9 @@ func (m *Migrator) DetermineMigrationBoundaries(targetStartBlock, targetEndBlock
 	}
 
 	log.Info().Msgf("Block in the target storage for range %s to %s: count=%s, max=%s", startBlock.String(), endBlock.String(), blockCount.String(), maxStoredBlock.String())
-	if maxStoredBlock != nil && maxStoredBlock.Cmp(startBlock) >= 0 {
+	// Only adjust start block if we actually have blocks stored (count > 0)
+	// When count is 0, maxStoredBlock might be 0 but that doesn't mean block 0 exists
+	if blockCount.Sign() > 0 && maxStoredBlock != nil && maxStoredBlock.Cmp(startBlock) >= 0 {
 		startBlock = new(big.Int).Add(maxStoredBlock, big.NewInt(1))
 	}
 
@@ -411,7 +416,9 @@ func (m *Migrator) DetermineMigrationBoundariesForRange(rangeStart, rangeEnd *bi
 	}
 
 	actualStart := rangeStart
-	if maxStoredBlock != nil && maxStoredBlock.Cmp(rangeStart) >= 0 {
+	// Only adjust start block if we actually have blocks stored (blockCount > 0)
+	// When blockCount is 0, maxStoredBlock might be 0 but that doesn't mean block 0 exists
+	if blockCount.Sign() > 0 && maxStoredBlock != nil && maxStoredBlock.Cmp(rangeStart) >= 0 {
 		// We have some blocks already, start from the next one
 		actualStart = new(big.Int).Add(maxStoredBlock, big.NewInt(1))
 
diff --git a/cmd/root.go b/cmd/root.go
index fb3999f..efcd200 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -239,6 +239,7 @@ func init() {
 	rootCmd.PersistentFlags().Uint("migrator-batchSize", 2000, "Batch size for storage operations in migrator")
 	rootCmd.PersistentFlags().Uint("migrator-startBlock", 0, "Start block for migration")
 	rootCmd.PersistentFlags().Uint("migrator-endBlock", 0, "End block for migration")
+	rootCmd.PersistentFlags().Uint("migrator-workerCount", 0, "Worker count for migration")
 
 	viper.BindPFlag("rpc.url", rootCmd.PersistentFlags().Lookup("rpc-url"))
 	viper.BindPFlag("rpc.blocks.blocksPerRequest", rootCmd.PersistentFlags().Lookup("rpc-blocks-blocksPerRequest"))
@@ -443,6 +444,8 @@ func init() {
 	viper.BindPFlag("migrator.startBlock", rootCmd.PersistentFlags().Lookup("migrator-startBlock"))
 	viper.BindPFlag("migrator.endBlock", rootCmd.PersistentFlags().Lookup("migrator-endBlock"))
 	viper.BindPFlag("migrator.batchSize", rootCmd.PersistentFlags().Lookup("migrator-batchSize"))
+	viper.BindPFlag("migrator.workerCount", rootCmd.PersistentFlags().Lookup("migrator-workerCount"))
+
 	rootCmd.AddCommand(orchestratorCmd)
 	rootCmd.AddCommand(apiCmd)
 	rootCmd.AddCommand(validateAndFixCmd)
diff --git a/configs/config.go b/configs/config.go
index 78daec6..c8c52e0 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -267,6 +267,7 @@ type MigratorConfig struct {
 	StartBlock  uint              `mapstructure:"startBlock"`
 	EndBlock    uint              `mapstructure:"endBlock"`
 	BatchSize   uint              `mapstructure:"batchSize"`
+	WorkerCount uint              `mapstructure:"workerCount"`
 }
 
 type Config struct {

From 86f3d68bba3f64d25b70f30d2f277e2c90594293 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Wed, 27 Aug 2025 22:27:08 +0000
Subject: [PATCH 38/43] Badger for caching in s3 connector

---
 internal/storage/block_buffer.go             |  23 +-
 internal/storage/block_buffer_badger.go      | 476 +++++++++++++++++++
 internal/storage/block_buffer_badger_test.go | 144 ++++++
 internal/storage/s3.go                       |  20 +-
 4 files changed, 659 insertions(+), 4 deletions(-)
 create mode 100644 internal/storage/block_buffer_badger.go
 create mode 100644 internal/storage/block_buffer_badger_test.go

diff --git a/internal/storage/block_buffer.go b/internal/storage/block_buffer.go
index ddec1aa..a2d9158 100644
--- a/internal/storage/block_buffer.go
+++ b/internal/storage/block_buffer.go
@@ -18,7 +18,22 @@ type BlockBuffer struct {
 	maxBlocks    int
 }
 
-// NewBlockBuffer creates a new block buffer
+// IBlockBuffer defines the interface for block buffer implementations
+type IBlockBuffer interface {
+	Add(blocks []common.BlockData, actualSizeBytes int64) bool
+	Flush() []common.BlockData
+	ShouldFlush() bool
+	Size() (int64, int)
+	IsEmpty() bool
+	GetData() []common.BlockData
+	GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData
+	GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData
+	GetMaxBlockNumber(chainId *big.Int) *big.Int
+	Clear()
+	Stats() BufferStats
+}
+
+// NewBlockBuffer creates a new in-memory block buffer
 func NewBlockBuffer(maxSizeMB int64, maxBlocks int) *BlockBuffer {
 	return &BlockBuffer{
 		data:         make([]common.BlockData, 0),
@@ -27,6 +42,12 @@ func NewBlockBuffer(maxSizeMB int64, maxBlocks int) *BlockBuffer {
 	}
 }
 
+// NewBlockBufferWithBadger creates a new Badger-backed block buffer for better memory management
+// This uses ephemeral storage with optimized settings for caching
+func NewBlockBufferWithBadger(maxSizeMB int64, maxBlocks int) (IBlockBuffer, error) {
+	return NewBadgerBlockBuffer(maxSizeMB, maxBlocks)
+}
+
 // Add adds blocks to the buffer and returns true if flush is needed
 func (b *BlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64) bool {
 	if len(blocks) == 0 {
diff --git a/internal/storage/block_buffer_badger.go b/internal/storage/block_buffer_badger.go
new file mode 100644
index 0000000..39775a8
--- /dev/null
+++ b/internal/storage/block_buffer_badger.go
@@ -0,0 +1,476 @@
+package storage
+
+import (
+	"bytes"
+	"encoding/gob"
+	"fmt"
+	"math/big"
+	"os"
+	"sync"
+	"time"
+
+	"github.com/dgraph-io/badger/v4"
+	"github.com/dgraph-io/badger/v4/options"
+	"github.com/rs/zerolog/log"
+	"github.com/thirdweb-dev/indexer/internal/common"
+)
+
+// BadgerBlockBuffer manages buffering of block data using Badger as an ephemeral cache
+type BadgerBlockBuffer struct {
+	mu           sync.RWMutex
+	db           *badger.DB
+	tempDir      string
+	sizeBytes    int64
+	maxSizeBytes int64
+	maxBlocks    int
+	blockCount   int
+	gcTicker     *time.Ticker
+	stopGC       chan struct{}
+
+	// Chain metadata cache for O(1) lookups
+	chainMetadata map[uint64]*ChainMetadata
+}
+
+// ChainMetadata tracks per-chain statistics for fast lookups
+type ChainMetadata struct {
+	MinBlock   *big.Int
+	MaxBlock   *big.Int
+	BlockCount int
+}
+
+// NewBadgerBlockBuffer creates a new Badger-backed block buffer with ephemeral storage
+func NewBadgerBlockBuffer(maxSizeMB int64, maxBlocks int) (*BadgerBlockBuffer, error) {
+	// Create temporary directory for ephemeral storage
+	tempDir, err := os.MkdirTemp("", "blockbuffer-*")
+	if err != nil {
+		return nil, fmt.Errorf("failed to create temp dir: %w", err)
+	}
+
+	// Configure Badger with optimized settings for ephemeral cache
+	opts := badger.DefaultOptions(tempDir)
+
+	// Memory optimization settings (similar to badger.go but tuned for ephemeral use)
+	opts.ValueLogFileSize = 256 * 1024 * 1024 // 256MB (smaller for cache)
+	opts.BaseTableSize = 64 * 1024 * 1024     // 64MB
+	opts.BaseLevelSize = 64 * 1024 * 1024     // 64MB
+	opts.LevelSizeMultiplier = 10             // Aggressive growth
+	opts.NumMemtables = 5                     // ~320MB
+	opts.MemTableSize = opts.BaseTableSize    // 64MB per memtable
+	opts.NumLevelZeroTables = 5
+	opts.NumLevelZeroTablesStall = 10
+	opts.SyncWrites = false                 // No durability needed for cache
+	opts.DetectConflicts = false            // No ACID needed
+	opts.NumCompactors = 2                  // Less compactors for cache
+	opts.CompactL0OnClose = false           // Don't compact on close (ephemeral)
+	opts.ValueLogMaxEntries = 100000        // Smaller for cache
+	opts.ValueThreshold = 1024              // Store values > 512 bytes in value log
+	opts.IndexCacheSize = 128 * 1024 * 1024 // 128MB index cache
+	opts.BlockCacheSize = 64 * 1024 * 1024  // 64MB block cache
+	opts.Compression = options.Snappy
+	opts.Logger = nil // Disable badger's internal logging
+
+	// Ephemeral-specific settings
+	opts.InMemory = false // Use disk but in temp directory
+	opts.ReadOnly = false
+	opts.MetricsEnabled = false
+
+	db, err := badger.Open(opts)
+	if err != nil {
+		os.RemoveAll(tempDir)
+		return nil, fmt.Errorf("failed to open badger db: %w", err)
+	}
+
+	b := &BadgerBlockBuffer{
+		db:            db,
+		tempDir:       tempDir,
+		maxSizeBytes:  maxSizeMB * 1024 * 1024,
+		maxBlocks:     maxBlocks,
+		stopGC:        make(chan struct{}),
+		chainMetadata: make(map[uint64]*ChainMetadata),
+	}
+
+	// Start GC routine with faster interval for cache
+	b.gcTicker = time.NewTicker(30 * time.Second)
+	go b.runGC()
+
+	return b, nil
+}
+
+// Add adds blocks to the buffer and returns true if flush is needed
+func (b *BadgerBlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64) bool {
+	if len(blocks) == 0 {
+		return false
+	}
+
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	err := b.db.Update(func(txn *badger.Txn) error {
+		for _, block := range blocks {
+			key := b.makeKey(block.Block.ChainId, block.Block.Number)
+
+			var buf bytes.Buffer
+			if err := gob.NewEncoder(&buf).Encode(block); err != nil {
+				return err
+			}
+
+			if err := txn.Set(key, buf.Bytes()); err != nil {
+				return err
+			}
+		}
+		return nil
+	})
+
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to add blocks to badger buffer")
+		return false
+	}
+
+	// Update counters
+	b.blockCount += len(blocks)
+	b.sizeBytes += actualSizeBytes
+
+	// Update chain metadata for O(1) lookups
+	for _, block := range blocks {
+		chainId := block.Block.ChainId.Uint64()
+		meta, exists := b.chainMetadata[chainId]
+		if !exists {
+			meta = &ChainMetadata{
+				MinBlock:   new(big.Int).Set(block.Block.Number),
+				MaxBlock:   new(big.Int).Set(block.Block.Number),
+				BlockCount: 1,
+			}
+			b.chainMetadata[chainId] = meta
+		} else {
+			if block.Block.Number.Cmp(meta.MinBlock) < 0 {
+				meta.MinBlock = new(big.Int).Set(block.Block.Number)
+			}
+			if block.Block.Number.Cmp(meta.MaxBlock) > 0 {
+				meta.MaxBlock = new(big.Int).Set(block.Block.Number)
+			}
+			meta.BlockCount++
+		}
+	}
+
+	log.Debug().
+		Int("block_count", len(blocks)).
+		Int64("size_bytes", actualSizeBytes).
+		Int64("total_size_bytes", b.sizeBytes).
+		Int("total_blocks", b.blockCount).
+		Msg("Added blocks to badger buffer")
+
+	// Check if flush is needed
+	return b.shouldFlushLocked()
+}
+
+// Flush removes all data from the buffer and returns it
+func (b *BadgerBlockBuffer) Flush() []common.BlockData {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	if b.blockCount == 0 {
+		return nil
+	}
+
+	var result []common.BlockData
+
+	// Read all data
+	err := b.db.View(func(txn *badger.Txn) error {
+		opts := badger.DefaultIteratorOptions
+		opts.PrefetchValues = true
+		opts.PrefetchSize = 100
+		it := txn.NewIterator(opts)
+		defer it.Close()
+
+		for it.Rewind(); it.Valid(); it.Next() {
+			item := it.Item()
+			err := item.Value(func(val []byte) error {
+				var blockData common.BlockData
+				if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil {
+					return err
+				}
+				result = append(result, blockData)
+				return nil
+			})
+			if err != nil {
+				log.Error().Err(err).Msg("Failed to decode block data during flush")
+			}
+		}
+		return nil
+	})
+
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to read blocks during flush")
+	}
+
+	// Clear the database
+	err = b.db.DropAll()
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to clear badger buffer")
+	}
+
+	// Reset counters and metadata
+	oldCount := b.blockCount
+	b.blockCount = 0
+	b.sizeBytes = 0
+	b.chainMetadata = make(map[uint64]*ChainMetadata)
+
+	log.Info().
+		Int("block_count", oldCount).
+		Msg("Flushing badger buffer")
+
+	return result
+}
+
+// ShouldFlush checks if the buffer should be flushed based on configured thresholds
+func (b *BadgerBlockBuffer) ShouldFlush() bool {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+	return b.shouldFlushLocked()
+}
+
+// Size returns the current buffer size in bytes and block count
+func (b *BadgerBlockBuffer) Size() (int64, int) {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+	return b.sizeBytes, b.blockCount
+}
+
+// IsEmpty returns true if the buffer is empty
+func (b *BadgerBlockBuffer) IsEmpty() bool {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+	return b.blockCount == 0
+}
+
+// GetData returns a copy of the current buffer data
+func (b *BadgerBlockBuffer) GetData() []common.BlockData {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	var result []common.BlockData
+
+	err := b.db.View(func(txn *badger.Txn) error {
+		opts := badger.DefaultIteratorOptions
+		opts.PrefetchValues = true
+		it := txn.NewIterator(opts)
+		defer it.Close()
+
+		for it.Rewind(); it.Valid(); it.Next() {
+			item := it.Item()
+			err := item.Value(func(val []byte) error {
+				var blockData common.BlockData
+				if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil {
+					return err
+				}
+				result = append(result, blockData)
+				return nil
+			})
+			if err != nil {
+				log.Error().Err(err).Msg("Failed to decode block data")
+			}
+		}
+		return nil
+	})
+
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to get data from badger buffer")
+	}
+
+	return result
+}
+
+// GetBlocksInRange returns blocks from the buffer that fall within the given range
+func (b *BadgerBlockBuffer) GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	var result []common.BlockData
+	prefix := b.makePrefix(chainId)
+
+	err := b.db.View(func(txn *badger.Txn) error {
+		opts := badger.DefaultIteratorOptions
+		opts.Prefix = prefix
+		it := txn.NewIterator(opts)
+		defer it.Close()
+
+		for it.Rewind(); it.Valid(); it.Next() {
+			item := it.Item()
+			err := item.Value(func(val []byte) error {
+				var blockData common.BlockData
+				if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil {
+					return err
+				}
+
+				blockNum := blockData.Block.Number
+				if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 {
+					result = append(result, blockData)
+				}
+				return nil
+			})
+			if err != nil {
+				log.Error().Err(err).Msg("Failed to decode block data in range")
+			}
+		}
+		return nil
+	})
+
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to get blocks in range from badger buffer")
+	}
+
+	return result
+}
+
+// GetBlockByNumber returns a specific block from the buffer if it exists
+func (b *BadgerBlockBuffer) GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	var result *common.BlockData
+	key := b.makeKey(chainId, blockNumber)
+
+	err := b.db.View(func(txn *badger.Txn) error {
+		item, err := txn.Get(key)
+		if err == badger.ErrKeyNotFound {
+			return nil
+		}
+		if err != nil {
+			return err
+		}
+
+		return item.Value(func(val []byte) error {
+			var blockData common.BlockData
+			if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil {
+				return err
+			}
+			result = &blockData
+			return nil
+		})
+	})
+
+	if err != nil && err != badger.ErrKeyNotFound {
+		log.Error().Err(err).Msg("Failed to get block by number from badger buffer")
+	}
+
+	return result
+}
+
+// GetMaxBlockNumber returns the maximum block number for a chain in the buffer
+func (b *BadgerBlockBuffer) GetMaxBlockNumber(chainId *big.Int) *big.Int {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	// O(1) lookup using cached metadata
+	meta, exists := b.chainMetadata[chainId.Uint64()]
+	if !exists || meta.MaxBlock == nil {
+		return nil
+	}
+
+	// Return a copy to prevent external modification
+	return new(big.Int).Set(meta.MaxBlock)
+}
+
+// Clear empties the buffer without returning data
+func (b *BadgerBlockBuffer) Clear() {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	err := b.db.DropAll()
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to clear badger buffer")
+	}
+
+	b.blockCount = 0
+	b.sizeBytes = 0
+	b.chainMetadata = make(map[uint64]*ChainMetadata)
+}
+
+// Stats returns statistics about the buffer
+func (b *BadgerBlockBuffer) Stats() BufferStats {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	stats := BufferStats{
+		BlockCount: b.blockCount,
+		SizeBytes:  b.sizeBytes,
+		ChainCount: len(b.chainMetadata),
+		ChainStats: make(map[uint64]ChainStats),
+	}
+
+	// Use cached metadata for O(1) stats generation
+	for chainId, meta := range b.chainMetadata {
+		if meta.MinBlock != nil && meta.MaxBlock != nil {
+			stats.ChainStats[chainId] = ChainStats{
+				BlockCount: meta.BlockCount,
+				MinBlock:   new(big.Int).Set(meta.MinBlock),
+				MaxBlock:   new(big.Int).Set(meta.MaxBlock),
+			}
+		}
+	}
+
+	return stats
+}
+
+// Close closes the buffer and cleans up resources
+func (b *BadgerBlockBuffer) Close() error {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	// Stop GC routine
+	if b.gcTicker != nil {
+		b.gcTicker.Stop()
+		close(b.stopGC)
+	}
+
+	// Close database
+	if err := b.db.Close(); err != nil {
+		log.Error().Err(err).Msg("Failed to close badger buffer database")
+	}
+
+	// Clean up temporary directory
+	if err := os.RemoveAll(b.tempDir); err != nil {
+		log.Error().Err(err).Msg("Failed to remove temp directory")
+	}
+
+	return nil
+}
+
+// Private methods
+
+func (b *BadgerBlockBuffer) shouldFlushLocked() bool {
+	// Check size limit
+	if b.maxSizeBytes > 0 && b.sizeBytes >= b.maxSizeBytes {
+		return true
+	}
+
+	// Check block count limit
+	if b.maxBlocks > 0 && b.blockCount >= b.maxBlocks {
+		return true
+	}
+
+	return false
+}
+
+func (b *BadgerBlockBuffer) makeKey(chainId *big.Int, blockNumber *big.Int) []byte {
+	// Use padded format to ensure lexicographic ordering matches numeric ordering
+	return fmt.Appendf(nil, "block:%s:%s", chainId.String(), blockNumber.String())
+}
+
+func (b *BadgerBlockBuffer) makePrefix(chainId *big.Int) []byte {
+	return fmt.Appendf(nil, "block:%s:", chainId.String())
+}
+
+func (b *BadgerBlockBuffer) runGC() {
+	for {
+		select {
+		case <-b.gcTicker.C:
+			err := b.db.RunValueLogGC(0.7) // More aggressive GC for cache
+			if err != nil && err != badger.ErrNoRewrite {
+				log.Debug().Err(err).Msg("BadgerBlockBuffer GC error")
+			}
+		case <-b.stopGC:
+			return
+		}
+	}
+}
diff --git a/internal/storage/block_buffer_badger_test.go b/internal/storage/block_buffer_badger_test.go
new file mode 100644
index 0000000..7901a67
--- /dev/null
+++ b/internal/storage/block_buffer_badger_test.go
@@ -0,0 +1,144 @@
+package storage
+
+import (
+	"math/big"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"github.com/thirdweb-dev/indexer/internal/common"
+)
+
+func TestBadgerBlockBufferMetadataOptimization(t *testing.T) {
+	// Create a new Badger buffer
+	buffer, err := NewBadgerBlockBuffer(10, 1000) // 10MB, 1000 blocks max
+	require.NoError(t, err)
+	defer buffer.Close()
+
+	chainId := big.NewInt(1)
+	
+	// Add blocks
+	blocks := []common.BlockData{
+		{
+			Block: common.Block{
+				ChainId: chainId,
+				Number:  big.NewInt(100),
+				Hash:    "0x1234",
+			},
+		},
+		{
+			Block: common.Block{
+				ChainId: chainId,
+				Number:  big.NewInt(101),
+				Hash:    "0x5678",
+			},
+		},
+		{
+			Block: common.Block{
+				ChainId: chainId,
+				Number:  big.NewInt(99),
+				Hash:    "0xabcd",
+			},
+		},
+	}
+
+	buffer.Add(blocks, 1024)
+
+	// Test O(1) GetMaxBlockNumber
+	start := time.Now()
+	maxBlock := buffer.GetMaxBlockNumber(chainId)
+	elapsed := time.Since(start)
+	
+	assert.NotNil(t, maxBlock)
+	assert.Equal(t, big.NewInt(101), maxBlock)
+	assert.Less(t, elapsed, time.Millisecond, "GetMaxBlockNumber should be O(1) and very fast")
+
+	// Test O(1) Stats
+	start = time.Now()
+	stats := buffer.Stats()
+	elapsed = time.Since(start)
+	
+	assert.Equal(t, 3, stats.BlockCount)
+	assert.Equal(t, 1, stats.ChainCount)
+	chainStats := stats.ChainStats[1]
+	assert.Equal(t, 3, chainStats.BlockCount)
+	assert.Equal(t, big.NewInt(99), chainStats.MinBlock)
+	assert.Equal(t, big.NewInt(101), chainStats.MaxBlock)
+	assert.Less(t, elapsed, time.Millisecond, "Stats should be O(1) and very fast")
+
+	// Test metadata is updated after flush
+	buffer.Flush()
+	maxBlock = buffer.GetMaxBlockNumber(chainId)
+	assert.Nil(t, maxBlock)
+	
+	// Add new blocks and verify metadata is rebuilt
+	newBlocks := []common.BlockData{
+		{
+			Block: common.Block{
+				ChainId: chainId,
+				Number:  big.NewInt(200),
+				Hash:    "0xffff",
+			},
+		},
+	}
+	buffer.Add(newBlocks, 512)
+	
+	maxBlock = buffer.GetMaxBlockNumber(chainId)
+	assert.NotNil(t, maxBlock)
+	assert.Equal(t, big.NewInt(200), maxBlock)
+}
+
+func BenchmarkBadgerBlockBufferGetMaxBlockNumber(b *testing.B) {
+	buffer, err := NewBadgerBlockBuffer(100, 10000)
+	require.NoError(b, err)
+	defer buffer.Close()
+
+	chainId := big.NewInt(1)
+	
+	// Add many blocks
+	for i := 0; i < 1000; i++ {
+		blocks := []common.BlockData{
+			{
+				Block: common.Block{
+					ChainId: chainId,
+					Number:  big.NewInt(int64(i)),
+					Hash:    "0x1234",
+				},
+			},
+		}
+		buffer.Add(blocks, 1024)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = buffer.GetMaxBlockNumber(chainId)
+	}
+}
+
+func BenchmarkBadgerBlockBufferStats(b *testing.B) {
+	buffer, err := NewBadgerBlockBuffer(100, 10000)
+	require.NoError(b, err)
+	defer buffer.Close()
+
+	// Add blocks for multiple chains
+	for chainId := 1; chainId <= 5; chainId++ {
+		for i := 0; i < 100; i++ {
+			blocks := []common.BlockData{
+				{
+					Block: common.Block{
+						ChainId: big.NewInt(int64(chainId)),
+						Number:  big.NewInt(int64(i)),
+						Hash:    "0x1234",
+					},
+				},
+			}
+			buffer.Add(blocks, 1024)
+		}
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = buffer.Stats()
+	}
+}
\ No newline at end of file
diff --git a/internal/storage/s3.go b/internal/storage/s3.go
index 9cd5f24..d50328b 100644
--- a/internal/storage/s3.go
+++ b/internal/storage/s3.go
@@ -27,7 +27,7 @@ type S3Connector struct {
 	client    *s3.Client
 	config    *config.S3StorageConfig
 	formatter DataFormatter
-	buffer    *BlockBuffer
+	buffer    IBlockBuffer
 
 	// Flush control
 	stopCh      chan struct{}
@@ -115,7 +115,12 @@ func NewS3Connector(cfg *config.S3StorageConfig) (*S3Connector, error) {
 	}
 
 	// Create buffer with configured settings
-	buffer := NewBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile)
+	buffer, err := NewBlockBufferWithBadger(cfg.BufferSize, cfg.MaxBlocksPerFile)
+	if err != nil {
+		// Fall back to in-memory buffer if Badger fails
+		log.Warn().Err(err).Msg("Failed to create Badger buffer, falling back to in-memory buffer")
+		buffer = NewBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile)
+	}
 
 	s3c := &S3Connector{
 		client:      s3Client,
@@ -351,6 +356,16 @@ func (s *S3Connector) Close() error {
 
 		// Wait for worker to finish
 		s.wg.Wait()
+
+		// Clean up buffer resources (especially important for BadgerBlockBuffer)
+		if badgerBuffer, ok := s.buffer.(*BadgerBlockBuffer); ok {
+			if err := badgerBuffer.Close(); err != nil {
+				log.Error().Err(err).Msg("Error closing badger buffer")
+				if closeErr == nil {
+					closeErr = err
+				}
+			}
+		}
 	})
 
 	return closeErr
@@ -481,7 +496,6 @@ func (f *ParquetFormatter) FormatBlockData(data []common.BlockData) ([]byte, err
 		}
 
 		// Convert block number to uint64 for efficient queries
-		// If block number is too large for uint64, use MaxUint64
 		blockNum := d.Block.Number.Uint64()
 		if d.Block.Number.BitLen() > 64 {
 			return nil, fmt.Errorf("block number exceeds uint64 is not supported")

From 4595fa61dd08304155ca8ea406c70e74ac2f36d3 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Wed, 27 Aug 2025 23:54:57 +0000
Subject: [PATCH 39/43] optimize s3 insertion

---
 internal/storage/block_buffer.go             | 40 ++++++++++++++++++--
 internal/storage/block_buffer_badger.go      | 30 +++++++++------
 internal/storage/block_buffer_badger_test.go |  8 ++--
 internal/storage/s3.go                       | 40 +++++++-------------
 4 files changed, 71 insertions(+), 47 deletions(-)

diff --git a/internal/storage/block_buffer.go b/internal/storage/block_buffer.go
index a2d9158..90c6ed8 100644
--- a/internal/storage/block_buffer.go
+++ b/internal/storage/block_buffer.go
@@ -1,6 +1,8 @@
 package storage
 
 import (
+	"bytes"
+	"encoding/gob"
 	"fmt"
 	"math/big"
 	"sync"
@@ -20,7 +22,7 @@ type BlockBuffer struct {
 
 // IBlockBuffer defines the interface for block buffer implementations
 type IBlockBuffer interface {
-	Add(blocks []common.BlockData, actualSizeBytes int64) bool
+	Add(blocks []common.BlockData) bool
 	Flush() []common.BlockData
 	ShouldFlush() bool
 	Size() (int64, int)
@@ -31,6 +33,7 @@ type IBlockBuffer interface {
 	GetMaxBlockNumber(chainId *big.Int) *big.Int
 	Clear()
 	Stats() BufferStats
+	Close() error
 }
 
 // NewBlockBuffer creates a new in-memory block buffer
@@ -49,7 +52,7 @@ func NewBlockBufferWithBadger(maxSizeMB int64, maxBlocks int) (IBlockBuffer, err
 }
 
 // Add adds blocks to the buffer and returns true if flush is needed
-func (b *BlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64) bool {
+func (b *BlockBuffer) Add(blocks []common.BlockData) bool {
 	if len(blocks) == 0 {
 		return false
 	}
@@ -57,13 +60,27 @@ func (b *BlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64) bool
 	b.mu.Lock()
 	defer b.mu.Unlock()
 
+	// Calculate actual size by marshaling the entire batch once
+	// This gives us accurate size with minimal overhead since we marshal once per Add call
+	var actualSize int64
+	var buf bytes.Buffer
+	enc := gob.NewEncoder(&buf)
+
+	// Marshal all blocks to get actual serialized size
+	if err := enc.Encode(blocks); err != nil {
+		// If encoding fails, use estimation as fallback
+		log.Warn().Err(err).Msg("Failed to marshal blocks for size calculation, buffer size is not reported correctly")
+	} else {
+		actualSize = int64(buf.Len())
+	}
+
 	// Add to buffer
 	b.data = append(b.data, blocks...)
-	b.sizeBytes += actualSizeBytes
+	b.sizeBytes += actualSize
 
 	log.Debug().
 		Int("block_count", len(blocks)).
-		Int64("size_bytes", actualSizeBytes).
+		Int64("actual_size_bytes", actualSize).
 		Int64("total_size_bytes", b.sizeBytes).
 		Int("total_blocks", len(b.data)).
 		Msg("Added blocks to buffer")
@@ -248,3 +265,18 @@ func (s BufferStats) String() string {
 	return fmt.Sprintf("BufferStats{blocks=%d, size=%dMB, chains=%d}",
 		s.BlockCount, s.SizeBytes/(1024*1024), s.ChainCount)
 }
+
+// Close closes the buffer (no-op for in-memory buffer)
+func (b *BlockBuffer) Close() error {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	// Clear the buffer to free memory
+	b.data = nil
+	b.sizeBytes = 0
+
+	return nil
+}
+
+// Ensure BlockBuffer implements IBlockBuffer interface
+var _ IBlockBuffer = (*BlockBuffer)(nil)
diff --git a/internal/storage/block_buffer_badger.go b/internal/storage/block_buffer_badger.go
index 39775a8..9d28cc5 100644
--- a/internal/storage/block_buffer_badger.go
+++ b/internal/storage/block_buffer_badger.go
@@ -20,7 +20,6 @@ type BadgerBlockBuffer struct {
 	mu           sync.RWMutex
 	db           *badger.DB
 	tempDir      string
-	sizeBytes    int64
 	maxSizeBytes int64
 	maxBlocks    int
 	blockCount   int
@@ -97,7 +96,7 @@ func NewBadgerBlockBuffer(maxSizeMB int64, maxBlocks int) (*BadgerBlockBuffer, e
 }
 
 // Add adds blocks to the buffer and returns true if flush is needed
-func (b *BadgerBlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64) bool {
+func (b *BadgerBlockBuffer) Add(blocks []common.BlockData) bool {
 	if len(blocks) == 0 {
 		return false
 	}
@@ -128,7 +127,6 @@ func (b *BadgerBlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64
 
 	// Update counters
 	b.blockCount += len(blocks)
-	b.sizeBytes += actualSizeBytes
 
 	// Update chain metadata for O(1) lookups
 	for _, block := range blocks {
@@ -154,8 +152,6 @@ func (b *BadgerBlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64
 
 	log.Debug().
 		Int("block_count", len(blocks)).
-		Int64("size_bytes", actualSizeBytes).
-		Int64("total_size_bytes", b.sizeBytes).
 		Int("total_blocks", b.blockCount).
 		Msg("Added blocks to badger buffer")
 
@@ -212,7 +208,6 @@ func (b *BadgerBlockBuffer) Flush() []common.BlockData {
 	// Reset counters and metadata
 	oldCount := b.blockCount
 	b.blockCount = 0
-	b.sizeBytes = 0
 	b.chainMetadata = make(map[uint64]*ChainMetadata)
 
 	log.Info().
@@ -233,7 +228,10 @@ func (b *BadgerBlockBuffer) ShouldFlush() bool {
 func (b *BadgerBlockBuffer) Size() (int64, int) {
 	b.mu.RLock()
 	defer b.mu.RUnlock()
-	return b.sizeBytes, b.blockCount
+	
+	// Get actual size from Badger's LSM tree
+	lsm, _ := b.db.Size()
+	return lsm, b.blockCount
 }
 
 // IsEmpty returns true if the buffer is empty
@@ -382,7 +380,6 @@ func (b *BadgerBlockBuffer) Clear() {
 	}
 
 	b.blockCount = 0
-	b.sizeBytes = 0
 	b.chainMetadata = make(map[uint64]*ChainMetadata)
 }
 
@@ -391,9 +388,12 @@ func (b *BadgerBlockBuffer) Stats() BufferStats {
 	b.mu.RLock()
 	defer b.mu.RUnlock()
 
+	// Get actual size from Badger
+	lsm, _ := b.db.Size()
+	
 	stats := BufferStats{
 		BlockCount: b.blockCount,
-		SizeBytes:  b.sizeBytes,
+		SizeBytes:  lsm,
 		ChainCount: len(b.chainMetadata),
 		ChainStats: make(map[uint64]ChainStats),
 	}
@@ -439,9 +439,12 @@ func (b *BadgerBlockBuffer) Close() error {
 // Private methods
 
 func (b *BadgerBlockBuffer) shouldFlushLocked() bool {
-	// Check size limit
-	if b.maxSizeBytes > 0 && b.sizeBytes >= b.maxSizeBytes {
-		return true
+	// Check size limit using Badger's actual size
+	if b.maxSizeBytes > 0 {
+		lsm, _ := b.db.Size()
+		if lsm >= b.maxSizeBytes {
+			return true
+		}
 	}
 
 	// Check block count limit
@@ -474,3 +477,6 @@ func (b *BadgerBlockBuffer) runGC() {
 		}
 	}
 }
+
+// Ensure BadgerBlockBuffer implements IBlockBuffer interface
+var _ IBlockBuffer = (*BadgerBlockBuffer)(nil)
diff --git a/internal/storage/block_buffer_badger_test.go b/internal/storage/block_buffer_badger_test.go
index 7901a67..b1c4c83 100644
--- a/internal/storage/block_buffer_badger_test.go
+++ b/internal/storage/block_buffer_badger_test.go
@@ -43,7 +43,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) {
 		},
 	}
 
-	buffer.Add(blocks, 1024)
+	buffer.Add(blocks)
 
 	// Test O(1) GetMaxBlockNumber
 	start := time.Now()
@@ -82,7 +82,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) {
 			},
 		},
 	}
-	buffer.Add(newBlocks, 512)
+	buffer.Add(newBlocks)
 	
 	maxBlock = buffer.GetMaxBlockNumber(chainId)
 	assert.NotNil(t, maxBlock)
@@ -107,7 +107,7 @@ func BenchmarkBadgerBlockBufferGetMaxBlockNumber(b *testing.B) {
 				},
 			},
 		}
-		buffer.Add(blocks, 1024)
+		buffer.Add(blocks)
 	}
 
 	b.ResetTimer()
@@ -133,7 +133,7 @@ func BenchmarkBadgerBlockBufferStats(b *testing.B) {
 					},
 				},
 			}
-			buffer.Add(blocks, 1024)
+			buffer.Add(blocks)
 		}
 	}
 
diff --git a/internal/storage/s3.go b/internal/storage/s3.go
index d50328b..2e37aa6 100644
--- a/internal/storage/s3.go
+++ b/internal/storage/s3.go
@@ -115,10 +115,11 @@ func NewS3Connector(cfg *config.S3StorageConfig) (*S3Connector, error) {
 	}
 
 	// Create buffer with configured settings
-	buffer, err := NewBlockBufferWithBadger(cfg.BufferSize, cfg.MaxBlocksPerFile)
+	var buffer IBlockBuffer
+	buffer, err = NewBadgerBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile)
 	if err != nil {
-		// Fall back to in-memory buffer if Badger fails
-		log.Warn().Err(err).Msg("Failed to create Badger buffer, falling back to in-memory buffer")
+		// fallback
+		log.Error().Err(err).Msg("Failed to create Badger buffer, falling back to in-memory buffer")
 		buffer = NewBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile)
 	}
 
@@ -144,27 +145,14 @@ func (s *S3Connector) InsertBlockData(data []common.BlockData) error {
 		return nil
 	}
 
-	// Calculate actual serialized size for accurate memory tracking
-	formattedData, err := s.formatter.FormatBlockData(data)
-	if err != nil {
-		return fmt.Errorf("failed to format block data for size calculation: %w", err)
-	}
-
-	// Use actual serialized size for accurate memory tracking
-	actualSize := int64(len(formattedData))
-	log.Debug().
-		Int("block_count", len(data)).
-		Int64("size_bytes", actualSize).
-		Int64("avg_bytes_per_block", actualSize/int64(len(data))).
-		Msg("Calculated actual block data size")
-
 	// Add to buffer and check if flush is needed
-	shouldFlush := s.buffer.Add(data, actualSize)
+	shouldFlush := s.buffer.Add(data)
 
 	// Start or reset timer when first data is added
 	s.timerMu.Lock()
-	sizeBytes, blockCount := s.buffer.Size()
-	if sizeBytes == actualSize && blockCount == len(data) && s.config.BufferTimeout > 0 {
+	_, blockCount := s.buffer.Size()
+	// Check if this is the first batch added (buffer was empty before)
+	if blockCount == len(data) && s.config.BufferTimeout > 0 {
 		// First data added to buffer, track time and start timer
 		s.lastAddTime = time.Now()
 		if s.flushTimer != nil {
@@ -357,13 +345,11 @@ func (s *S3Connector) Close() error {
 		// Wait for worker to finish
 		s.wg.Wait()
 
-		// Clean up buffer resources (especially important for BadgerBlockBuffer)
-		if badgerBuffer, ok := s.buffer.(*BadgerBlockBuffer); ok {
-			if err := badgerBuffer.Close(); err != nil {
-				log.Error().Err(err).Msg("Error closing badger buffer")
-				if closeErr == nil {
-					closeErr = err
-				}
+		// Clean up buffer resources
+		if err := s.buffer.Close(); err != nil {
+			log.Error().Err(err).Msg("Error closing buffer")
+			if closeErr == nil {
+				closeErr = err
 			}
 		}
 	})

From 136a346c3892ac07fc242e6ba36f5d5159d7b28b Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Thu, 28 Aug 2025 01:05:56 +0000
Subject: [PATCH 40/43] redis tls. erc1155 batch mv

---
 cmd/root.go                                   |  2 +
 configs/config.go                             |  9 +-
 internal/storage/redis.go                     | 15 +++-
 ...7_clickhouse_create_token_transfers_mv.sql | 86 ++++++++++---------
 4 files changed, 64 insertions(+), 48 deletions(-)

diff --git a/cmd/root.go b/cmd/root.go
index efcd200..391ad78 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -151,6 +151,7 @@ func init() {
 	rootCmd.PersistentFlags().Int("storage-orchestrator-redis-port", 6379, "Redis port for orchestrator storage metadata")
 	rootCmd.PersistentFlags().String("storage-orchestrator-redis-password", "", "Redis password for orchestator storage  metadata")
 	rootCmd.PersistentFlags().Int("storage-orchestrator-redis-db", 0, "Redis database number for orchestrator storage metadata")
+	rootCmd.PersistentFlags().Bool("storage-orchestrator-redis-enableTLS", true, "Enable TLS for Redis connection in orchestrator storage metadata")
 	rootCmd.PersistentFlags().String("storage-staging-type", "auto", "Storage type for staging (auto, clickhouse, postgres, kafka, badger, s3)")
 	rootCmd.PersistentFlags().String("storage-main-type", "auto", "Storage type for main (auto, clickhouse, postgres, kafka, badger, s3)")
 	rootCmd.PersistentFlags().String("storage-orchestrator-type", "auto", "Storage type for orchestrator (auto, clickhouse, postgres, badger)")
@@ -341,6 +342,7 @@ func init() {
 	viper.BindPFlag("storage.orchestrator.redis.port", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-port"))
 	viper.BindPFlag("storage.orchestrator.redis.password", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-password"))
 	viper.BindPFlag("storage.orchestrator.redis.db", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-db"))
+	viper.BindPFlag("storage.orchestrator.redis.enableTLS", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-enableTLS"))
 	viper.BindPFlag("storage.orchestrator.badger.path", rootCmd.PersistentFlags().Lookup("storage-orchestrator-badger-path"))
 	viper.BindPFlag("storage.orchestrator.type", rootCmd.PersistentFlags().Lookup("storage-orchestrator-type"))
 	viper.BindPFlag("storage.staging.postgres.host", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-host"))
diff --git a/configs/config.go b/configs/config.go
index c8c52e0..395d2f1 100644
--- a/configs/config.go
+++ b/configs/config.go
@@ -147,10 +147,11 @@ type PostgresConfig struct {
 }
 
 type RedisConfig struct {
-	Host     string `mapstructure:"host"`
-	Port     int    `mapstructure:"port"`
-	Password string `mapstructure:"password"`
-	DB       int    `mapstructure:"db"`
+	Host      string `mapstructure:"host"`
+	Port      int    `mapstructure:"port"`
+	Password  string `mapstructure:"password"`
+	DB        int    `mapstructure:"db"`
+	EnableTLS bool   `mapstructure:"enableTLS"`
 }
 
 type KafkaConfig struct {
diff --git a/internal/storage/redis.go b/internal/storage/redis.go
index d48b17f..bb71810 100644
--- a/internal/storage/redis.go
+++ b/internal/storage/redis.go
@@ -2,6 +2,7 @@ package storage
 
 import (
 	"context"
+	"crypto/tls"
 	"fmt"
 	"math/big"
 	"time"
@@ -26,10 +27,18 @@ type RedisConnector struct {
 
 func NewRedisConnector(cfg *config.RedisConfig) (*RedisConnector, error) {
 	// Connect to Redis
+	var tlsConfig *tls.Config
+	if cfg.EnableTLS {
+		tlsConfig = &tls.Config{
+			MinVersion: tls.VersionTLS12, // Ensure a secure TLS version
+		}
+	}
+
 	redisClient := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%d", cfg.Host, cfg.Port),
-		Password: cfg.Password,
-		DB:       cfg.DB,
+		Addr:      fmt.Sprintf("%s:%d", cfg.Host, cfg.Port),
+		Password:  cfg.Password,
+		DB:        cfg.DB,
+		TLSConfig: tlsConfig,
 	})
 
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
diff --git a/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql
index 7c09aea..30d01a5 100644
--- a/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql
+++ b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql
@@ -80,52 +80,56 @@ WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0
 -- ERC1155 (batch)
 CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc1155_batch_mv
 TO token_transfers
-AS
-SELECT
-    chain_id,
-    address AS token_address,
-    'erc1155' AS token_type,
-    reinterpretAsUInt256(reverse(unhex(id_hex))) AS token_id,
-    concat('0x', substring(topic_2, 27, 40)) AS from_address,
-    concat('0x', substring(topic_3, 27, 40)) AS to_address,
+AS 
+SELECT 
+  chain_id, 
+  address AS token_address, 
+  'erc1155' AS token_type, 
+  reinterpretAsUInt256(reverse(substring(bin, (ids_base + ((i - 1) * 32)) + 1, 32))) AS token_id, 
+  concat('0x', substring(topic_2, 27, 40)) AS from_address, 
+  concat('0x', substring(topic_3, 27, 40)) AS to_address, 
+  block_number, 
+  block_timestamp, 
+  transaction_hash, 
+  transaction_index, 
+  reinterpretAsUInt256(reverse(substring(bin, (am_base + ((i - 1) * 32)) + 1, 32))) AS amount,
+  log_index,
+  toNullable(toUInt16(i - 1)) AS batch_index,
+  insert_timestamp,
+  is_deleted 
+FROM (
+  SELECT 
+    chain_id, 
+    address, 
+    topic_2,
+    topic_3,
     block_number,
     block_timestamp,
     transaction_hash,
     transaction_index,
-    reinterpretAsUInt256(reverse(unhex(amount_hex))) AS amount,
     log_index,
-    toNullable(toUInt16(array_index - 1)) AS batch_index,
-    insert_timestamp,
-    is_deleted
-FROM (
-    SELECT 
-        chain_id, 
-        address, 
-        topic_2, 
-        topic_3,
-        block_number, 
-        block_timestamp, 
-        transaction_hash, 
-        transaction_index, 
-        log_index, 
-        is_deleted, 
-        insert_timestamp,
-        toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64))))) AS ids_offset,
-        toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64))))) AS amounts_offset,
-        toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + ids_offset * 2, 64))))) AS ids_length,
-        toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + amounts_offset * 2, 64))))) AS amounts_length,
-        arrayMap(i -> substring(data, 3 + ids_offset * 2 + 64 + (i-1)*64, 64), range(1, least(ids_length, 10000) + 1)) AS ids_array,
-        arrayMap(i -> substring(data, 3 + amounts_offset * 2 + 64 + (i-1)*64, 64), range(1, least(amounts_length, 10000) + 1)) AS amounts_array
-    FROM logs
-    WHERE topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb'
-      AND length(topic_2) = 66 
-      AND length(topic_3) = 66
-      AND ids_length = amounts_length
-)
-ARRAY JOIN 
-    ids_array AS id_hex,
-    amounts_array AS amount_hex,
-    arrayEnumerate(ids_array) AS array_index;
+    is_deleted,
+    insert_timestamp, 
+    unhex(substring(data, 3)) AS bin,
+    length(unhex(substring(data, 3))) AS bin_len, 
+    toUInt32(reinterpretAsUInt256(reverse(substring(unhex(substring(data, 3)), 1, 32)))) AS ids_off,
+    toUInt32(reinterpretAsUInt256(reverse(substring(unhex(substring(data, 3)), 33, 32)))) AS am_off,
+    toUInt32(reinterpretAsUInt256(reverse(substring(unhex(substring(data, 3)), ids_off + 1, 32)))) AS ids_len,
+    toUInt32(reinterpretAsUInt256(reverse(substring(unhex(substring(data, 3)), am_off + 1, 32)))) AS am_len,
+    ids_off + 32 AS ids_base,
+    am_off + 32 AS am_base
+FROM default.logs
+WHERE (topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb')
+  AND (length(topic_2) = 66)
+  AND (length(topic_3) = 66)
+  AND (ids_len = am_len)
+  AND (ids_len > 0)
+  AND ((ids_off + 32) <= bin_len)
+  AND ((am_off + 32) <= bin_len)
+  AND ((ids_base + (ids_len * 32)) <= bin_len)
+  AND ((am_base + (am_len * 32)) <= bin_len)
+) ARRAY JOIN range(1, ids_len + 1) AS i;
+
 
 -- ERC6909
 CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc6909_mv

From b9828af1d479d1ec9f4ec4640f421ccc515bff61 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Thu, 28 Aug 2025 05:08:09 +0000
Subject: [PATCH 41/43] fix projections, use _part_offset projections

---
 .../0000_clickhouse_create_blocks_table.sql   | 16 +++++++
 ...1_clickhouse_create_transactions_table.sql | 28 ++++++------
 .../0002_clickhouse_create_logs_table.sql     | 16 +++----
 .../0003_clickhouse_create_traces_table.sql   |  6 +--
 ...0006_clickhouse_create_token_transfers.sql | 44 +++++++++----------
 .../0008_clickhouse_create_token_balances.sql |  9 +++-
 ...clickhouse_create_address_transactions.sql | 11 ++---
 ...12_clickhouse_create_address_transfers.sql | 24 +++++-----
 8 files changed, 90 insertions(+), 64 deletions(-)

diff --git a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
index a1d1979..1bab7b8 100644
--- a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
+++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql
@@ -27,6 +27,22 @@ CREATE TABLE IF NOT EXISTS blocks (
 
     INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
     INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2,
+
+    PROJECTION chain_state_projection
+    (
+        SELECT
+          chain_id,
+          count() AS count,
+          uniqExact(block_number) AS unique_block_count,
+          min(block_number) AS min_block_number,
+          min(block_timestamp) AS min_block_timestamp,
+          max(block_number) AS max_block_number,
+          max(block_timestamp) AS max_block_timestamp
+        GROUP BY
+          chain_id
+    )
+
+
 ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 ORDER BY (chain_id, block_number)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
diff --git a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
index 11dff13..562f339 100644
--- a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
+++ b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql
@@ -45,7 +45,7 @@ CREATE TABLE IF NOT EXISTS transactions (
     PROJECTION from_address_projection
     (
         SELECT
-          *
+          _part_offset
         ORDER BY 
           chain_id,
           from_address,
@@ -55,7 +55,7 @@ CREATE TABLE IF NOT EXISTS transactions (
     PROJECTION to_address_projection
     (
         SELECT
-          *
+          _part_offset
         ORDER BY
           chain_id,
           to_address,
@@ -67,11 +67,12 @@ CREATE TABLE IF NOT EXISTS transactions (
         SELECT
           chain_id,
           from_address,
-          countState() AS tx_count_state,
-          minState(block_number) AS min_block_number_state,
-          minState(block_timestamp) AS min_block_timestamp_state,
-          maxState(block_number) AS max_block_number_state,
-          maxState(block_timestamp) AS max_block_timestamp_state
+          count() AS tx_count,
+          uniqExact(hash) AS unique_tx_count,
+          min(block_number) AS min_block_number,
+          min(block_timestamp) AS min_block_timestamp,
+          max(block_number) AS max_block_number,
+          max(block_timestamp) AS max_block_timestamp
         GROUP BY
           chain_id,
           from_address
@@ -81,11 +82,12 @@ CREATE TABLE IF NOT EXISTS transactions (
         SELECT
           chain_id,
           to_address,
-          countState() AS tx_count_state,
-          minState(block_number) AS min_block_number_state,
-          minState(block_timestamp) AS min_block_timestamp_state,
-          maxState(block_number) AS max_block_number_state,
-          maxState(block_timestamp) AS max_block_timestamp_state
+          count() AS tx_count,
+          uniqExact(hash) AS unique_tx_count,
+          min(block_number) AS min_block_number,
+          min(block_timestamp) AS min_block_timestamp,
+          max(block_number) AS max_block_number,
+          max(block_timestamp) AS max_block_timestamp
         GROUP BY
           chain_id,
           to_address
@@ -93,4 +95,4 @@ CREATE TABLE IF NOT EXISTS transactions (
 ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 ORDER BY (chain_id, block_number, hash)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
-SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
\ No newline at end of file
+SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1;
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
index 89f6e1c..d4e202c 100644
--- a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
+++ b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql
@@ -28,7 +28,7 @@ CREATE TABLE IF NOT EXISTS logs (
     PROJECTION chain_address_topic0_projection
     (
         SELECT
-            *
+            _part_offset
         ORDER BY 
             chain_id,
             address,
@@ -40,7 +40,7 @@ CREATE TABLE IF NOT EXISTS logs (
     PROJECTION chain_topic0_projection
     (
         SELECT
-            *
+            _part_offset
         ORDER BY 
             chain_id,
             topic_0,
@@ -55,11 +55,11 @@ CREATE TABLE IF NOT EXISTS logs (
             chain_id,
             address,
             topic_0,
-            countState() AS log_count_state,
-            minState(block_number) AS min_block_number_state,
-            minState(block_timestamp) AS min_block_timestamp_state,
-            maxState(block_number) AS max_block_number_state,
-            maxState(block_timestamp) AS max_block_timestamp_state
+            count() AS log_count,
+            min(block_number) AS min_block_number,
+            min(block_timestamp) AS min_block_timestamp,
+            max(block_number) AS max_block_number,
+            max(block_timestamp) AS max_block_timestamp
         GROUP BY
             chain_id,
             address,
@@ -68,4 +68,4 @@ CREATE TABLE IF NOT EXISTS logs (
 ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 ORDER BY (chain_id, block_number, transaction_hash, log_index)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
-SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
+SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1;
diff --git a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
index 8f69a1f..6b65467 100644
--- a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
+++ b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql
@@ -32,7 +32,7 @@ CREATE TABLE IF NOT EXISTS traces (
     PROJECTION from_address_projection
     (
         SELECT
-          *
+          _part_offset
         ORDER BY 
           chain_id,
           from_address,
@@ -43,7 +43,7 @@ CREATE TABLE IF NOT EXISTS traces (
     PROJECTION to_address_projection
     (
         SELECT
-          *
+          _part_offset
         ORDER BY 
           chain_id,
           to_address,
@@ -55,4 +55,4 @@ CREATE TABLE IF NOT EXISTS traces (
 ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 ORDER BY (chain_id, transaction_hash, trace_address)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
-SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild';
+SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1;
diff --git a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql
index 9007649..edb92cb 100644
--- a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql
+++ b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql
@@ -24,7 +24,7 @@ CREATE TABLE IF NOT EXISTS token_transfers
 
     PROJECTION from_address_projection (
         SELECT
-            *
+            _part_offset
         ORDER BY
             chain_id,
             from_address,
@@ -34,7 +34,7 @@ CREATE TABLE IF NOT EXISTS token_transfers
     ),
     PROJECTION to_address_projection (
         SELECT
-            *
+            _part_offset
         ORDER BY
             chain_id,
             to_address,
@@ -44,7 +44,7 @@ CREATE TABLE IF NOT EXISTS token_transfers
     ),
     PROJECTION token_id_projection (
         SELECT 
-            *
+            _part_offset
         ORDER BY
             chain_id,
             token_address,
@@ -59,12 +59,12 @@ CREATE TABLE IF NOT EXISTS token_transfers
             from_address,
             token_address,
             token_type,
-            countState() AS transfer_count_state,
-            sumState(toInt256(amount)) AS total_amount_state,
-            minState(block_number) AS min_block_number_state,
-            minState(block_timestamp) AS min_block_timestamp_state,
-            maxState(block_number) AS max_block_number_state,
-            maxState(block_timestamp) AS max_block_timestamp_state
+            count() AS transfer_count,
+            sum(toInt256(amount)) AS total_amount,
+            min(block_number) AS min_block_number,
+            min(block_timestamp) AS min_block_timestamp,
+            max(block_number) AS max_block_number,
+            max(block_timestamp) AS max_block_timestamp
         GROUP BY
             chain_id,
             from_address,
@@ -77,12 +77,12 @@ CREATE TABLE IF NOT EXISTS token_transfers
             to_address,
             token_address,
             token_type,
-            countState() AS transfer_count_state,
-            sumState(toInt256(amount)) AS total_amount_state,
-            minState(block_number) AS min_block_number_state,
-            minState(block_timestamp) AS min_block_timestamp_state,
-            maxState(block_number) AS max_block_number_state,
-            maxState(block_timestamp) AS max_block_timestamp_state
+            count() AS transfer_count,
+            sum(toInt256(amount)) AS total_amount,
+            min(block_number) AS min_block_number,
+            min(block_timestamp) AS min_block_timestamp,
+            max(block_number) AS max_block_number,
+            max(block_timestamp) AS max_block_timestamp
         GROUP BY
             chain_id,
             to_address,
@@ -95,12 +95,12 @@ CREATE TABLE IF NOT EXISTS token_transfers
             token_address,
             token_id,
             token_type,
-            countState() AS transfer_count_state,
-            sumState(toInt256(amount)) AS total_volume_state,
-            minState(block_number) AS min_block_number_state,
-            minState(block_timestamp) AS min_block_timestamp_state,
-            maxState(block_number) AS max_block_number_state,
-            maxState(block_timestamp) AS max_block_timestamp_state
+            count() AS transfer_count,
+            sum(toInt256(amount)) AS total_volume,
+            min(block_number) AS min_block_number,
+            min(block_timestamp) AS min_block_timestamp,
+            max(block_number) AS max_block_number,
+            max(block_timestamp) AS max_block_timestamp
         GROUP BY
             chain_id,
             token_address,
@@ -111,4 +111,4 @@ CREATE TABLE IF NOT EXISTS token_transfers
 ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 PARTITION BY (chain_id, toStartOfQuarter(block_timestamp))
 ORDER BY (chain_id, token_address, block_number, transaction_index, log_index)
-SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild';
\ No newline at end of file
+SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1;
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql
index 11e0c6a..49444f1 100644
--- a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql
+++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql
@@ -52,9 +52,16 @@ CREATE TABLE IF NOT EXISTS token_balances
       maxState(block_number) AS max_block_number_state,
       maxState(block_timestamp) AS max_block_timestamp_state
     GROUP BY chain_id, token_address, token_id, owner_address
+  ),
+
+  PROJECTION token_projection
+  (
+    SELECT
+      _part_offset
+    ORDER BY chain_id, token_address, token_id, owner_address
   )
 )
 ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
 PARTITION BY chain_id
 ORDER BY (chain_id, owner_address, token_address, token_id, block_number, transaction_index, log_index, direction)
-SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild';
\ No newline at end of file
+SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1;
\ No newline at end of file
diff --git a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
index fa9f55a..f546f40 100644
--- a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
+++ b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql
@@ -47,11 +47,12 @@ CREATE TABLE IF NOT EXISTS address_transactions (
         SELECT
           chain_id,
           address,
-          countState() AS tx_count_state,
-          minState(block_number) AS min_block_number_state,
-          minState(block_timestamp) AS min_block_timestamp_state,
-          maxState(block_number) AS max_block_number_state,
-          maxState(block_timestamp) AS max_block_timestamp_state
+          count() AS tx_count,
+          uniqExact(hash) AS unique_tx_count,
+          min(block_number) AS min_block_number,
+          min(block_timestamp) AS min_block_timestamp,
+          max(block_number) AS max_block_number,
+          max(block_timestamp) AS max_block_timestamp
         GROUP BY
           chain_id,
           address
diff --git a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
index 3803323..c130e70 100644
--- a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
+++ b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql
@@ -30,12 +30,12 @@ CREATE TABLE IF NOT EXISTS address_transfers (
             address_type,
             token_address,
             token_type,
-            countState() AS transfer_count_state,
-            sumState(toInt256(amount)) AS total_amount_state,
-            minState(block_number) AS min_block_number_state,
-            minState(block_timestamp) AS min_block_timestamp_state,
-            maxState(block_number) AS max_block_number_state,
-            maxState(block_timestamp) AS max_block_timestamp_state
+            count() AS transfer_count,
+            sum(toInt256(amount)) AS total_amount,
+            min(block_number) AS min_block_number,
+            min(block_timestamp) AS min_block_timestamp,
+            max(block_number) AS max_block_number,
+            max(block_timestamp) AS max_block_timestamp
         GROUP BY
             chain_id,
             address,
@@ -49,12 +49,12 @@ CREATE TABLE IF NOT EXISTS address_transfers (
             address,
             token_address,
             token_type,
-            countState() AS transfer_count_state,
-            sumState(toInt256(amount)) AS total_amount_state,
-            minState(block_number) AS min_block_number_state,
-            minState(block_timestamp) AS min_block_timestamp_state,
-            maxState(block_number) AS max_block_number_state,
-            maxState(block_timestamp) AS max_block_timestamp_state
+            count() AS transfer_count,
+            sum(toInt256(amount)) AS total_amount,
+            min(block_number) AS min_block_number,
+            min(block_timestamp) AS min_block_timestamp,
+            max(block_number) AS max_block_number,
+            max(block_timestamp) AS max_block_timestamp
         GROUP BY
             chain_id,
             address,

From 69f5f78656253b07702a61cde422a51e967910e4 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Thu, 28 Aug 2025 05:19:16 +0000
Subject: [PATCH 42/43] gofmt

---
 internal/storage/block_buffer_badger.go      |  4 ++--
 internal/storage/block_buffer_badger_test.go | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/internal/storage/block_buffer_badger.go b/internal/storage/block_buffer_badger.go
index 9d28cc5..09469c4 100644
--- a/internal/storage/block_buffer_badger.go
+++ b/internal/storage/block_buffer_badger.go
@@ -228,7 +228,7 @@ func (b *BadgerBlockBuffer) ShouldFlush() bool {
 func (b *BadgerBlockBuffer) Size() (int64, int) {
 	b.mu.RLock()
 	defer b.mu.RUnlock()
-	
+
 	// Get actual size from Badger's LSM tree
 	lsm, _ := b.db.Size()
 	return lsm, b.blockCount
@@ -390,7 +390,7 @@ func (b *BadgerBlockBuffer) Stats() BufferStats {
 
 	// Get actual size from Badger
 	lsm, _ := b.db.Size()
-	
+
 	stats := BufferStats{
 		BlockCount: b.blockCount,
 		SizeBytes:  lsm,
diff --git a/internal/storage/block_buffer_badger_test.go b/internal/storage/block_buffer_badger_test.go
index b1c4c83..b10e8d8 100644
--- a/internal/storage/block_buffer_badger_test.go
+++ b/internal/storage/block_buffer_badger_test.go
@@ -17,7 +17,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) {
 	defer buffer.Close()
 
 	chainId := big.NewInt(1)
-	
+
 	// Add blocks
 	blocks := []common.BlockData{
 		{
@@ -49,7 +49,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) {
 	start := time.Now()
 	maxBlock := buffer.GetMaxBlockNumber(chainId)
 	elapsed := time.Since(start)
-	
+
 	assert.NotNil(t, maxBlock)
 	assert.Equal(t, big.NewInt(101), maxBlock)
 	assert.Less(t, elapsed, time.Millisecond, "GetMaxBlockNumber should be O(1) and very fast")
@@ -58,7 +58,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) {
 	start = time.Now()
 	stats := buffer.Stats()
 	elapsed = time.Since(start)
-	
+
 	assert.Equal(t, 3, stats.BlockCount)
 	assert.Equal(t, 1, stats.ChainCount)
 	chainStats := stats.ChainStats[1]
@@ -71,7 +71,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) {
 	buffer.Flush()
 	maxBlock = buffer.GetMaxBlockNumber(chainId)
 	assert.Nil(t, maxBlock)
-	
+
 	// Add new blocks and verify metadata is rebuilt
 	newBlocks := []common.BlockData{
 		{
@@ -83,7 +83,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) {
 		},
 	}
 	buffer.Add(newBlocks)
-	
+
 	maxBlock = buffer.GetMaxBlockNumber(chainId)
 	assert.NotNil(t, maxBlock)
 	assert.Equal(t, big.NewInt(200), maxBlock)
@@ -95,7 +95,7 @@ func BenchmarkBadgerBlockBufferGetMaxBlockNumber(b *testing.B) {
 	defer buffer.Close()
 
 	chainId := big.NewInt(1)
-	
+
 	// Add many blocks
 	for i := 0; i < 1000; i++ {
 		blocks := []common.BlockData{
@@ -141,4 +141,4 @@ func BenchmarkBadgerBlockBufferStats(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		_ = buffer.Stats()
 	}
-}
\ No newline at end of file
+}

From e551c17c2cef9618f1480ecf2c9b63dc76cb1371 Mon Sep 17 00:00:00 2001
From: Jake Loo <2171134+jakeloo@users.noreply.github.com>
Date: Thu, 28 Aug 2025 05:31:39 +0000
Subject: [PATCH 43/43] Fix test

---
 internal/orchestrator/committer_test.go |   2 +-
 test/mocks/MockIMainStorage.go          | 167 +++++++++++++++-
 test/mocks/MockIOrchestratorStorage.go  | 242 +++++++++++++++++-------
 test/mocks/MockIRPCClient.go            |   2 +-
 test/mocks/MockIStagingStorage.go       | 230 +++++++++++++++-------
 5 files changed, 500 insertions(+), 143 deletions(-)

diff --git a/internal/orchestrator/committer_test.go b/internal/orchestrator/committer_test.go
index 8e2cb90..160a748 100644
--- a/internal/orchestrator/committer_test.go
+++ b/internal/orchestrator/committer_test.go
@@ -426,7 +426,7 @@ func TestHandleGap(t *testing.T) {
 	mockRPC.EXPECT().GetBlocksPerRequest().Return(rpc.BlocksPerRequestConfig{
 		Blocks: 5,
 	})
-	mockRPC.EXPECT().GetChainID().Return(big.NewInt(1))
+	// GetChainID is not called in this flow since there are no block failures
 	mockRPC.EXPECT().GetFullBlocks(context.Background(), []*big.Int{big.NewInt(100), big.NewInt(101), big.NewInt(102), big.NewInt(103), big.NewInt(104)}).Return([]rpc.GetFullBlockResult{
 		{BlockNumber: big.NewInt(100), Data: common.BlockData{Block: common.Block{Number: big.NewInt(100)}}},
 		{BlockNumber: big.NewInt(101), Data: common.BlockData{Block: common.Block{Number: big.NewInt(101)}}},
diff --git a/test/mocks/MockIMainStorage.go b/test/mocks/MockIMainStorage.go
index a77c398..e13e4ee 100644
--- a/test/mocks/MockIMainStorage.go
+++ b/test/mocks/MockIMainStorage.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.50.4. DO NOT EDIT.
+// Code generated by mockery v2.53.5. DO NOT EDIT.
 
 //go:build !production
 
@@ -26,6 +26,51 @@ func (_m *MockIMainStorage) EXPECT() *MockIMainStorage_Expecter {
 	return &MockIMainStorage_Expecter{mock: &_m.Mock}
 }
 
+// Close provides a mock function with no fields
+func (_m *MockIMainStorage) Close() error {
+	ret := _m.Called()
+
+	if len(ret) == 0 {
+		panic("no return value specified for Close")
+	}
+
+	var r0 error
+	if rf, ok := ret.Get(0).(func() error); ok {
+		r0 = rf()
+	} else {
+		r0 = ret.Error(0)
+	}
+
+	return r0
+}
+
+// MockIMainStorage_Close_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Close'
+type MockIMainStorage_Close_Call struct {
+	*mock.Call
+}
+
+// Close is a helper method to define mock.On call
+func (_e *MockIMainStorage_Expecter) Close() *MockIMainStorage_Close_Call {
+	return &MockIMainStorage_Close_Call{Call: _e.mock.On("Close")}
+}
+
+func (_c *MockIMainStorage_Close_Call) Run(run func()) *MockIMainStorage_Close_Call {
+	_c.Call.Run(func(args mock.Arguments) {
+		run()
+	})
+	return _c
+}
+
+func (_c *MockIMainStorage_Close_Call) Return(_a0 error) *MockIMainStorage_Close_Call {
+	_c.Call.Return(_a0)
+	return _c
+}
+
+func (_c *MockIMainStorage_Close_Call) RunAndReturn(run func() error) *MockIMainStorage_Close_Call {
+	_c.Call.Return(run)
+	return _c
+}
+
 // FindMissingBlockNumbers provides a mock function with given fields: chainId, startBlock, endBlock
 func (_m *MockIMainStorage) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) {
 	ret := _m.Called(chainId, startBlock, endBlock)
@@ -143,6 +188,66 @@ func (_c *MockIMainStorage_GetAggregations_Call) RunAndReturn(run func(string, s
 	return _c
 }
 
+// GetBlockCount provides a mock function with given fields: chainId, startBlock, endBlock
+func (_m *MockIMainStorage) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
+	ret := _m.Called(chainId, startBlock, endBlock)
+
+	if len(ret) == 0 {
+		panic("no return value specified for GetBlockCount")
+	}
+
+	var r0 *big.Int
+	var r1 error
+	if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) (*big.Int, error)); ok {
+		return rf(chainId, startBlock, endBlock)
+	}
+	if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) *big.Int); ok {
+		r0 = rf(chainId, startBlock, endBlock)
+	} else {
+		if ret.Get(0) != nil {
+			r0 = ret.Get(0).(*big.Int)
+		}
+	}
+
+	if rf, ok := ret.Get(1).(func(*big.Int, *big.Int, *big.Int) error); ok {
+		r1 = rf(chainId, startBlock, endBlock)
+	} else {
+		r1 = ret.Error(1)
+	}
+
+	return r0, r1
+}
+
+// MockIMainStorage_GetBlockCount_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetBlockCount'
+type MockIMainStorage_GetBlockCount_Call struct {
+	*mock.Call
+}
+
+// GetBlockCount is a helper method to define mock.On call
+//   - chainId *big.Int
+//   - startBlock *big.Int
+//   - endBlock *big.Int
+func (_e *MockIMainStorage_Expecter) GetBlockCount(chainId interface{}, startBlock interface{}, endBlock interface{}) *MockIMainStorage_GetBlockCount_Call {
+	return &MockIMainStorage_GetBlockCount_Call{Call: _e.mock.On("GetBlockCount", chainId, startBlock, endBlock)}
+}
+
+func (_c *MockIMainStorage_GetBlockCount_Call) Run(run func(chainId *big.Int, startBlock *big.Int, endBlock *big.Int)) *MockIMainStorage_GetBlockCount_Call {
+	_c.Call.Run(func(args mock.Arguments) {
+		run(args[0].(*big.Int), args[1].(*big.Int), args[2].(*big.Int))
+	})
+	return _c
+}
+
+func (_c *MockIMainStorage_GetBlockCount_Call) Return(blockCount *big.Int, err error) *MockIMainStorage_GetBlockCount_Call {
+	_c.Call.Return(blockCount, err)
+	return _c
+}
+
+func (_c *MockIMainStorage_GetBlockCount_Call) RunAndReturn(run func(*big.Int, *big.Int, *big.Int) (*big.Int, error)) *MockIMainStorage_GetBlockCount_Call {
+	_c.Call.Return(run)
+	return _c
+}
+
 // GetBlockHeadersDescending provides a mock function with given fields: chainId, from, to
 func (_m *MockIMainStorage) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) {
 	ret := _m.Called(chainId, from, to)
@@ -462,6 +567,66 @@ func (_c *MockIMainStorage_GetMaxBlockNumber_Call) RunAndReturn(run func(*big.In
 	return _c
 }
 
+// GetMaxBlockNumberInRange provides a mock function with given fields: chainId, startBlock, endBlock
+func (_m *MockIMainStorage) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) {
+	ret := _m.Called(chainId, startBlock, endBlock)
+
+	if len(ret) == 0 {
+		panic("no return value specified for GetMaxBlockNumberInRange")
+	}
+
+	var r0 *big.Int
+	var r1 error
+	if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) (*big.Int, error)); ok {
+		return rf(chainId, startBlock, endBlock)
+	}
+	if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) *big.Int); ok {
+		r0 = rf(chainId, startBlock, endBlock)
+	} else {
+		if ret.Get(0) != nil {
+			r0 = ret.Get(0).(*big.Int)
+		}
+	}
+
+	if rf, ok := ret.Get(1).(func(*big.Int, *big.Int, *big.Int) error); ok {
+		r1 = rf(chainId, startBlock, endBlock)
+	} else {
+		r1 = ret.Error(1)
+	}
+
+	return r0, r1
+}
+
+// MockIMainStorage_GetMaxBlockNumberInRange_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetMaxBlockNumberInRange'
+type MockIMainStorage_GetMaxBlockNumberInRange_Call struct {
+	*mock.Call
+}
+
+// GetMaxBlockNumberInRange is a helper method to define mock.On call
+//   - chainId *big.Int
+//   - startBlock *big.Int
+//   - endBlock *big.Int
+func (_e *MockIMainStorage_Expecter) GetMaxBlockNumberInRange(chainId interface{}, startBlock interface{}, endBlock interface{}) *MockIMainStorage_GetMaxBlockNumberInRange_Call {
+	return &MockIMainStorage_GetMaxBlockNumberInRange_Call{Call: _e.mock.On("GetMaxBlockNumberInRange", chainId, startBlock, endBlock)}
+}
+
+func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) Run(run func(chainId *big.Int, startBlock *big.Int, endBlock *big.Int)) *MockIMainStorage_GetMaxBlockNumberInRange_Call {
+	_c.Call.Run(func(args mock.Arguments) {
+		run(args[0].(*big.Int), args[1].(*big.Int), args[2].(*big.Int))
+	})
+	return _c
+}
+
+func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) Return(maxBlockNumber *big.Int, err error) *MockIMainStorage_GetMaxBlockNumberInRange_Call {
+	_c.Call.Return(maxBlockNumber, err)
+	return _c
+}
+
+func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) RunAndReturn(run func(*big.Int, *big.Int, *big.Int) (*big.Int, error)) *MockIMainStorage_GetMaxBlockNumberInRange_Call {
+	_c.Call.Return(run)
+	return _c
+}
+
 // GetTokenBalances provides a mock function with given fields: qf, fields
 func (_m *MockIMainStorage) GetTokenBalances(qf storage.BalancesQueryFilter, fields ...string) (storage.QueryResult[common.TokenBalance], error) {
 	_va := make([]interface{}, len(fields))
diff --git a/test/mocks/MockIOrchestratorStorage.go b/test/mocks/MockIOrchestratorStorage.go
index fe382f0..c8d0932 100644
--- a/test/mocks/MockIOrchestratorStorage.go
+++ b/test/mocks/MockIOrchestratorStorage.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.50.4. DO NOT EDIT.
+// Code generated by mockery v2.53.5. DO NOT EDIT.
 
 //go:build !production
 
@@ -8,9 +8,6 @@ import (
 	big "math/big"
 
 	mock "github.com/stretchr/testify/mock"
-	common "github.com/thirdweb-dev/indexer/internal/common"
-
-	storage "github.com/thirdweb-dev/indexer/internal/storage"
 )
 
 // MockIOrchestratorStorage is an autogenerated mock type for the IOrchestratorStorage type
@@ -26,17 +23,17 @@ func (_m *MockIOrchestratorStorage) EXPECT() *MockIOrchestratorStorage_Expecter
 	return &MockIOrchestratorStorage_Expecter{mock: &_m.Mock}
 }
 
-// DeleteBlockFailures provides a mock function with given fields: failures
-func (_m *MockIOrchestratorStorage) DeleteBlockFailures(failures []common.BlockFailure) error {
-	ret := _m.Called(failures)
+// Close provides a mock function with no fields
+func (_m *MockIOrchestratorStorage) Close() error {
+	ret := _m.Called()
 
 	if len(ret) == 0 {
-		panic("no return value specified for DeleteBlockFailures")
+		panic("no return value specified for Close")
 	}
 
 	var r0 error
-	if rf, ok := ret.Get(0).(func([]common.BlockFailure) error); ok {
-		r0 = rf(failures)
+	if rf, ok := ret.Get(0).(func() error); ok {
+		r0 = rf()
 	} else {
 		r0 = ret.Error(0)
 	}
@@ -44,57 +41,56 @@ func (_m *MockIOrchestratorStorage) DeleteBlockFailures(failures []common.BlockF
 	return r0
 }
 
-// MockIOrchestratorStorage_DeleteBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteBlockFailures'
-type MockIOrchestratorStorage_DeleteBlockFailures_Call struct {
+// MockIOrchestratorStorage_Close_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Close'
+type MockIOrchestratorStorage_Close_Call struct {
 	*mock.Call
 }
 
-// DeleteBlockFailures is a helper method to define mock.On call
-//   - failures []common.BlockFailure
-func (_e *MockIOrchestratorStorage_Expecter) DeleteBlockFailures(failures interface{}) *MockIOrchestratorStorage_DeleteBlockFailures_Call {
-	return &MockIOrchestratorStorage_DeleteBlockFailures_Call{Call: _e.mock.On("DeleteBlockFailures", failures)}
+// Close is a helper method to define mock.On call
+func (_e *MockIOrchestratorStorage_Expecter) Close() *MockIOrchestratorStorage_Close_Call {
+	return &MockIOrchestratorStorage_Close_Call{Call: _e.mock.On("Close")}
 }
 
-func (_c *MockIOrchestratorStorage_DeleteBlockFailures_Call) Run(run func(failures []common.BlockFailure)) *MockIOrchestratorStorage_DeleteBlockFailures_Call {
+func (_c *MockIOrchestratorStorage_Close_Call) Run(run func()) *MockIOrchestratorStorage_Close_Call {
 	_c.Call.Run(func(args mock.Arguments) {
-		run(args[0].([]common.BlockFailure))
+		run()
 	})
 	return _c
 }
 
-func (_c *MockIOrchestratorStorage_DeleteBlockFailures_Call) Return(_a0 error) *MockIOrchestratorStorage_DeleteBlockFailures_Call {
+func (_c *MockIOrchestratorStorage_Close_Call) Return(_a0 error) *MockIOrchestratorStorage_Close_Call {
 	_c.Call.Return(_a0)
 	return _c
 }
 
-func (_c *MockIOrchestratorStorage_DeleteBlockFailures_Call) RunAndReturn(run func([]common.BlockFailure) error) *MockIOrchestratorStorage_DeleteBlockFailures_Call {
+func (_c *MockIOrchestratorStorage_Close_Call) RunAndReturn(run func() error) *MockIOrchestratorStorage_Close_Call {
 	_c.Call.Return(run)
 	return _c
 }
 
-// GetBlockFailures provides a mock function with given fields: qf
-func (_m *MockIOrchestratorStorage) GetBlockFailures(qf storage.QueryFilter) ([]common.BlockFailure, error) {
-	ret := _m.Called(qf)
+// GetLastCommittedBlockNumber provides a mock function with given fields: chainId
+func (_m *MockIOrchestratorStorage) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	ret := _m.Called(chainId)
 
 	if len(ret) == 0 {
-		panic("no return value specified for GetBlockFailures")
+		panic("no return value specified for GetLastCommittedBlockNumber")
 	}
 
-	var r0 []common.BlockFailure
+	var r0 *big.Int
 	var r1 error
-	if rf, ok := ret.Get(0).(func(storage.QueryFilter) ([]common.BlockFailure, error)); ok {
-		return rf(qf)
+	if rf, ok := ret.Get(0).(func(*big.Int) (*big.Int, error)); ok {
+		return rf(chainId)
 	}
-	if rf, ok := ret.Get(0).(func(storage.QueryFilter) []common.BlockFailure); ok {
-		r0 = rf(qf)
+	if rf, ok := ret.Get(0).(func(*big.Int) *big.Int); ok {
+		r0 = rf(chainId)
 	} else {
 		if ret.Get(0) != nil {
-			r0 = ret.Get(0).([]common.BlockFailure)
+			r0 = ret.Get(0).(*big.Int)
 		}
 	}
 
-	if rf, ok := ret.Get(1).(func(storage.QueryFilter) error); ok {
-		r1 = rf(qf)
+	if rf, ok := ret.Get(1).(func(*big.Int) error); ok {
+		r1 = rf(chainId)
 	} else {
 		r1 = ret.Error(1)
 	}
@@ -102,30 +98,88 @@ func (_m *MockIOrchestratorStorage) GetBlockFailures(qf storage.QueryFilter) ([]
 	return r0, r1
 }
 
-// MockIOrchestratorStorage_GetBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetBlockFailures'
-type MockIOrchestratorStorage_GetBlockFailures_Call struct {
+// MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetLastCommittedBlockNumber'
+type MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call struct {
 	*mock.Call
 }
 
-// GetBlockFailures is a helper method to define mock.On call
-//   - qf storage.QueryFilter
-func (_e *MockIOrchestratorStorage_Expecter) GetBlockFailures(qf interface{}) *MockIOrchestratorStorage_GetBlockFailures_Call {
-	return &MockIOrchestratorStorage_GetBlockFailures_Call{Call: _e.mock.On("GetBlockFailures", qf)}
+// GetLastCommittedBlockNumber is a helper method to define mock.On call
+//   - chainId *big.Int
+func (_e *MockIOrchestratorStorage_Expecter) GetLastCommittedBlockNumber(chainId interface{}) *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call {
+	return &MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call{Call: _e.mock.On("GetLastCommittedBlockNumber", chainId)}
 }
 
-func (_c *MockIOrchestratorStorage_GetBlockFailures_Call) Run(run func(qf storage.QueryFilter)) *MockIOrchestratorStorage_GetBlockFailures_Call {
+func (_c *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call) Run(run func(chainId *big.Int)) *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call {
 	_c.Call.Run(func(args mock.Arguments) {
-		run(args[0].(storage.QueryFilter))
+		run(args[0].(*big.Int))
 	})
 	return _c
 }
 
-func (_c *MockIOrchestratorStorage_GetBlockFailures_Call) Return(_a0 []common.BlockFailure, _a1 error) *MockIOrchestratorStorage_GetBlockFailures_Call {
-	_c.Call.Return(_a0, _a1)
+func (_c *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call) Return(blockNumber *big.Int, err error) *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call {
+	_c.Call.Return(blockNumber, err)
+	return _c
+}
+
+func (_c *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call) RunAndReturn(run func(*big.Int) (*big.Int, error)) *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call {
+	_c.Call.Return(run)
+	return _c
+}
+
+// GetLastPublishedBlockNumber provides a mock function with given fields: chainId
+func (_m *MockIOrchestratorStorage) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) {
+	ret := _m.Called(chainId)
+
+	if len(ret) == 0 {
+		panic("no return value specified for GetLastPublishedBlockNumber")
+	}
+
+	var r0 *big.Int
+	var r1 error
+	if rf, ok := ret.Get(0).(func(*big.Int) (*big.Int, error)); ok {
+		return rf(chainId)
+	}
+	if rf, ok := ret.Get(0).(func(*big.Int) *big.Int); ok {
+		r0 = rf(chainId)
+	} else {
+		if ret.Get(0) != nil {
+			r0 = ret.Get(0).(*big.Int)
+		}
+	}
+
+	if rf, ok := ret.Get(1).(func(*big.Int) error); ok {
+		r1 = rf(chainId)
+	} else {
+		r1 = ret.Error(1)
+	}
+
+	return r0, r1
+}
+
+// MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetLastPublishedBlockNumber'
+type MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call struct {
+	*mock.Call
+}
+
+// GetLastPublishedBlockNumber is a helper method to define mock.On call
+//   - chainId *big.Int
+func (_e *MockIOrchestratorStorage_Expecter) GetLastPublishedBlockNumber(chainId interface{}) *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call {
+	return &MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call{Call: _e.mock.On("GetLastPublishedBlockNumber", chainId)}
+}
+
+func (_c *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call) Run(run func(chainId *big.Int)) *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call {
+	_c.Call.Run(func(args mock.Arguments) {
+		run(args[0].(*big.Int))
+	})
+	return _c
+}
+
+func (_c *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call) Return(blockNumber *big.Int, err error) *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call {
+	_c.Call.Return(blockNumber, err)
 	return _c
 }
 
-func (_c *MockIOrchestratorStorage_GetBlockFailures_Call) RunAndReturn(run func(storage.QueryFilter) ([]common.BlockFailure, error)) *MockIOrchestratorStorage_GetBlockFailures_Call {
+func (_c *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call) RunAndReturn(run func(*big.Int) (*big.Int, error)) *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call {
 	_c.Call.Return(run)
 	return _c
 }
@@ -188,12 +242,12 @@ func (_c *MockIOrchestratorStorage_GetLastReorgCheckedBlockNumber_Call) RunAndRe
 	return _c
 }
 
-// SetLastReorgCheckedBlockNumber provides a mock function with given fields: chainId, blockNumber
-func (_m *MockIOrchestratorStorage) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+// SetLastCommittedBlockNumber provides a mock function with given fields: chainId, blockNumber
+func (_m *MockIOrchestratorStorage) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
 	ret := _m.Called(chainId, blockNumber)
 
 	if len(ret) == 0 {
-		panic("no return value specified for SetLastReorgCheckedBlockNumber")
+		panic("no return value specified for SetLastCommittedBlockNumber")
 	}
 
 	var r0 error
@@ -206,46 +260,46 @@ func (_m *MockIOrchestratorStorage) SetLastReorgCheckedBlockNumber(chainId *big.
 	return r0
 }
 
-// MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastReorgCheckedBlockNumber'
-type MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call struct {
+// MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastCommittedBlockNumber'
+type MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call struct {
 	*mock.Call
 }
 
-// SetLastReorgCheckedBlockNumber is a helper method to define mock.On call
+// SetLastCommittedBlockNumber is a helper method to define mock.On call
 //   - chainId *big.Int
 //   - blockNumber *big.Int
-func (_e *MockIOrchestratorStorage_Expecter) SetLastReorgCheckedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call {
-	return &MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call{Call: _e.mock.On("SetLastReorgCheckedBlockNumber", chainId, blockNumber)}
+func (_e *MockIOrchestratorStorage_Expecter) SetLastCommittedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call {
+	return &MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call{Call: _e.mock.On("SetLastCommittedBlockNumber", chainId, blockNumber)}
 }
 
-func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call {
+func (_c *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call {
 	_c.Call.Run(func(args mock.Arguments) {
 		run(args[0].(*big.Int), args[1].(*big.Int))
 	})
 	return _c
 }
 
-func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) Return(_a0 error) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call {
+func (_c *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call) Return(_a0 error) *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call {
 	_c.Call.Return(_a0)
 	return _c
 }
 
-func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call {
+func (_c *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call {
 	_c.Call.Return(run)
 	return _c
 }
 
-// StoreBlockFailures provides a mock function with given fields: failures
-func (_m *MockIOrchestratorStorage) StoreBlockFailures(failures []common.BlockFailure) error {
-	ret := _m.Called(failures)
+// SetLastPublishedBlockNumber provides a mock function with given fields: chainId, blockNumber
+func (_m *MockIOrchestratorStorage) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	ret := _m.Called(chainId, blockNumber)
 
 	if len(ret) == 0 {
-		panic("no return value specified for StoreBlockFailures")
+		panic("no return value specified for SetLastPublishedBlockNumber")
 	}
 
 	var r0 error
-	if rf, ok := ret.Get(0).(func([]common.BlockFailure) error); ok {
-		r0 = rf(failures)
+	if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok {
+		r0 = rf(chainId, blockNumber)
 	} else {
 		r0 = ret.Error(0)
 	}
@@ -253,30 +307,78 @@ func (_m *MockIOrchestratorStorage) StoreBlockFailures(failures []common.BlockFa
 	return r0
 }
 
-// MockIOrchestratorStorage_StoreBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'StoreBlockFailures'
-type MockIOrchestratorStorage_StoreBlockFailures_Call struct {
+// MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastPublishedBlockNumber'
+type MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call struct {
 	*mock.Call
 }
 
-// StoreBlockFailures is a helper method to define mock.On call
-//   - failures []common.BlockFailure
-func (_e *MockIOrchestratorStorage_Expecter) StoreBlockFailures(failures interface{}) *MockIOrchestratorStorage_StoreBlockFailures_Call {
-	return &MockIOrchestratorStorage_StoreBlockFailures_Call{Call: _e.mock.On("StoreBlockFailures", failures)}
+// SetLastPublishedBlockNumber is a helper method to define mock.On call
+//   - chainId *big.Int
+//   - blockNumber *big.Int
+func (_e *MockIOrchestratorStorage_Expecter) SetLastPublishedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call {
+	return &MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call{Call: _e.mock.On("SetLastPublishedBlockNumber", chainId, blockNumber)}
 }
 
-func (_c *MockIOrchestratorStorage_StoreBlockFailures_Call) Run(run func(failures []common.BlockFailure)) *MockIOrchestratorStorage_StoreBlockFailures_Call {
+func (_c *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call {
 	_c.Call.Run(func(args mock.Arguments) {
-		run(args[0].([]common.BlockFailure))
+		run(args[0].(*big.Int), args[1].(*big.Int))
 	})
 	return _c
 }
 
-func (_c *MockIOrchestratorStorage_StoreBlockFailures_Call) Return(_a0 error) *MockIOrchestratorStorage_StoreBlockFailures_Call {
+func (_c *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call) Return(_a0 error) *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call {
 	_c.Call.Return(_a0)
 	return _c
 }
 
-func (_c *MockIOrchestratorStorage_StoreBlockFailures_Call) RunAndReturn(run func([]common.BlockFailure) error) *MockIOrchestratorStorage_StoreBlockFailures_Call {
+func (_c *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call {
+	_c.Call.Return(run)
+	return _c
+}
+
+// SetLastReorgCheckedBlockNumber provides a mock function with given fields: chainId, blockNumber
+func (_m *MockIOrchestratorStorage) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
+	ret := _m.Called(chainId, blockNumber)
+
+	if len(ret) == 0 {
+		panic("no return value specified for SetLastReorgCheckedBlockNumber")
+	}
+
+	var r0 error
+	if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok {
+		r0 = rf(chainId, blockNumber)
+	} else {
+		r0 = ret.Error(0)
+	}
+
+	return r0
+}
+
+// MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastReorgCheckedBlockNumber'
+type MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call struct {
+	*mock.Call
+}
+
+// SetLastReorgCheckedBlockNumber is a helper method to define mock.On call
+//   - chainId *big.Int
+//   - blockNumber *big.Int
+func (_e *MockIOrchestratorStorage_Expecter) SetLastReorgCheckedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call {
+	return &MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call{Call: _e.mock.On("SetLastReorgCheckedBlockNumber", chainId, blockNumber)}
+}
+
+func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call {
+	_c.Call.Run(func(args mock.Arguments) {
+		run(args[0].(*big.Int), args[1].(*big.Int))
+	})
+	return _c
+}
+
+func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) Return(_a0 error) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call {
+	_c.Call.Return(_a0)
+	return _c
+}
+
+func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call {
 	_c.Call.Return(run)
 	return _c
 }
diff --git a/test/mocks/MockIRPCClient.go b/test/mocks/MockIRPCClient.go
index 42f37ef..f7045c4 100644
--- a/test/mocks/MockIRPCClient.go
+++ b/test/mocks/MockIRPCClient.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.50.4. DO NOT EDIT.
+// Code generated by mockery v2.53.5. DO NOT EDIT.
 
 //go:build !production
 
diff --git a/test/mocks/MockIStagingStorage.go b/test/mocks/MockIStagingStorage.go
index bd73136..53964d3 100644
--- a/test/mocks/MockIStagingStorage.go
+++ b/test/mocks/MockIStagingStorage.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.50.4. DO NOT EDIT.
+// Code generated by mockery v2.53.5. DO NOT EDIT.
 
 //go:build !production
 
@@ -26,6 +26,97 @@ func (_m *MockIStagingStorage) EXPECT() *MockIStagingStorage_Expecter {
 	return &MockIStagingStorage_Expecter{mock: &_m.Mock}
 }
 
+// Close provides a mock function with no fields
+func (_m *MockIStagingStorage) Close() error {
+	ret := _m.Called()
+
+	if len(ret) == 0 {
+		panic("no return value specified for Close")
+	}
+
+	var r0 error
+	if rf, ok := ret.Get(0).(func() error); ok {
+		r0 = rf()
+	} else {
+		r0 = ret.Error(0)
+	}
+
+	return r0
+}
+
+// MockIStagingStorage_Close_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Close'
+type MockIStagingStorage_Close_Call struct {
+	*mock.Call
+}
+
+// Close is a helper method to define mock.On call
+func (_e *MockIStagingStorage_Expecter) Close() *MockIStagingStorage_Close_Call {
+	return &MockIStagingStorage_Close_Call{Call: _e.mock.On("Close")}
+}
+
+func (_c *MockIStagingStorage_Close_Call) Run(run func()) *MockIStagingStorage_Close_Call {
+	_c.Call.Run(func(args mock.Arguments) {
+		run()
+	})
+	return _c
+}
+
+func (_c *MockIStagingStorage_Close_Call) Return(_a0 error) *MockIStagingStorage_Close_Call {
+	_c.Call.Return(_a0)
+	return _c
+}
+
+func (_c *MockIStagingStorage_Close_Call) RunAndReturn(run func() error) *MockIStagingStorage_Close_Call {
+	_c.Call.Return(run)
+	return _c
+}
+
+// DeleteBlockFailures provides a mock function with given fields: failures
+func (_m *MockIStagingStorage) DeleteBlockFailures(failures []common.BlockFailure) error {
+	ret := _m.Called(failures)
+
+	if len(ret) == 0 {
+		panic("no return value specified for DeleteBlockFailures")
+	}
+
+	var r0 error
+	if rf, ok := ret.Get(0).(func([]common.BlockFailure) error); ok {
+		r0 = rf(failures)
+	} else {
+		r0 = ret.Error(0)
+	}
+
+	return r0
+}
+
+// MockIStagingStorage_DeleteBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteBlockFailures'
+type MockIStagingStorage_DeleteBlockFailures_Call struct {
+	*mock.Call
+}
+
+// DeleteBlockFailures is a helper method to define mock.On call
+//   - failures []common.BlockFailure
+func (_e *MockIStagingStorage_Expecter) DeleteBlockFailures(failures interface{}) *MockIStagingStorage_DeleteBlockFailures_Call {
+	return &MockIStagingStorage_DeleteBlockFailures_Call{Call: _e.mock.On("DeleteBlockFailures", failures)}
+}
+
+func (_c *MockIStagingStorage_DeleteBlockFailures_Call) Run(run func(failures []common.BlockFailure)) *MockIStagingStorage_DeleteBlockFailures_Call {
+	_c.Call.Run(func(args mock.Arguments) {
+		run(args[0].([]common.BlockFailure))
+	})
+	return _c
+}
+
+func (_c *MockIStagingStorage_DeleteBlockFailures_Call) Return(_a0 error) *MockIStagingStorage_DeleteBlockFailures_Call {
+	_c.Call.Return(_a0)
+	return _c
+}
+
+func (_c *MockIStagingStorage_DeleteBlockFailures_Call) RunAndReturn(run func([]common.BlockFailure) error) *MockIStagingStorage_DeleteBlockFailures_Call {
+	_c.Call.Return(run)
+	return _c
+}
+
 // DeleteStagingData provides a mock function with given fields: data
 func (_m *MockIStagingStorage) DeleteStagingData(data []common.BlockData) error {
 	ret := _m.Called(data)
@@ -72,107 +163,107 @@ func (_c *MockIStagingStorage_DeleteStagingData_Call) RunAndReturn(run func([]co
 	return _c
 }
 
-// GetLastPublishedBlockNumber provides a mock function with given fields: chainId
-func (_m *MockIStagingStorage) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) {
-	ret := _m.Called(chainId)
+// DeleteStagingDataOlderThan provides a mock function with given fields: chainId, blockNumber
+func (_m *MockIStagingStorage) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error {
+	ret := _m.Called(chainId, blockNumber)
 
 	if len(ret) == 0 {
-		panic("no return value specified for GetLastPublishedBlockNumber")
-	}
-
-	var r0 *big.Int
-	var r1 error
-	if rf, ok := ret.Get(0).(func(*big.Int) (*big.Int, error)); ok {
-		return rf(chainId)
-	}
-	if rf, ok := ret.Get(0).(func(*big.Int) *big.Int); ok {
-		r0 = rf(chainId)
-	} else {
-		if ret.Get(0) != nil {
-			r0 = ret.Get(0).(*big.Int)
-		}
+		panic("no return value specified for DeleteStagingDataOlderThan")
 	}
 
-	if rf, ok := ret.Get(1).(func(*big.Int) error); ok {
-		r1 = rf(chainId)
+	var r0 error
+	if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok {
+		r0 = rf(chainId, blockNumber)
 	} else {
-		r1 = ret.Error(1)
+		r0 = ret.Error(0)
 	}
 
-	return r0, r1
+	return r0
 }
 
-// MockIStagingStorage_GetLastPublishedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetLastPublishedBlockNumber'
-type MockIStagingStorage_GetLastPublishedBlockNumber_Call struct {
+// MockIStagingStorage_DeleteStagingDataOlderThan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteStagingDataOlderThan'
+type MockIStagingStorage_DeleteStagingDataOlderThan_Call struct {
 	*mock.Call
 }
 
-// GetLastPublishedBlockNumber is a helper method to define mock.On call
+// DeleteStagingDataOlderThan is a helper method to define mock.On call
 //   - chainId *big.Int
-func (_e *MockIStagingStorage_Expecter) GetLastPublishedBlockNumber(chainId interface{}) *MockIStagingStorage_GetLastPublishedBlockNumber_Call {
-	return &MockIStagingStorage_GetLastPublishedBlockNumber_Call{Call: _e.mock.On("GetLastPublishedBlockNumber", chainId)}
+//   - blockNumber *big.Int
+func (_e *MockIStagingStorage_Expecter) DeleteStagingDataOlderThan(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_DeleteStagingDataOlderThan_Call {
+	return &MockIStagingStorage_DeleteStagingDataOlderThan_Call{Call: _e.mock.On("DeleteStagingDataOlderThan", chainId, blockNumber)}
 }
 
-func (_c *MockIStagingStorage_GetLastPublishedBlockNumber_Call) Run(run func(chainId *big.Int)) *MockIStagingStorage_GetLastPublishedBlockNumber_Call {
+func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_DeleteStagingDataOlderThan_Call {
 	_c.Call.Run(func(args mock.Arguments) {
-		run(args[0].(*big.Int))
+		run(args[0].(*big.Int), args[1].(*big.Int))
 	})
 	return _c
 }
 
-func (_c *MockIStagingStorage_GetLastPublishedBlockNumber_Call) Return(maxBlockNumber *big.Int, err error) *MockIStagingStorage_GetLastPublishedBlockNumber_Call {
-	_c.Call.Return(maxBlockNumber, err)
+func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Return(_a0 error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call {
+	_c.Call.Return(_a0)
 	return _c
 }
 
-func (_c *MockIStagingStorage_GetLastPublishedBlockNumber_Call) RunAndReturn(run func(*big.Int) (*big.Int, error)) *MockIStagingStorage_GetLastPublishedBlockNumber_Call {
+func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call {
 	_c.Call.Return(run)
 	return _c
 }
 
-// SetLastPublishedBlockNumber provides a mock function with given fields: chainId, blockNumber
-func (_m *MockIStagingStorage) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error {
-	ret := _m.Called(chainId, blockNumber)
+// GetBlockFailures provides a mock function with given fields: qf
+func (_m *MockIStagingStorage) GetBlockFailures(qf storage.QueryFilter) ([]common.BlockFailure, error) {
+	ret := _m.Called(qf)
 
 	if len(ret) == 0 {
-		panic("no return value specified for SetLastPublishedBlockNumber")
+		panic("no return value specified for GetBlockFailures")
 	}
 
-	var r0 error
-	if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok {
-		r0 = rf(chainId, blockNumber)
+	var r0 []common.BlockFailure
+	var r1 error
+	if rf, ok := ret.Get(0).(func(storage.QueryFilter) ([]common.BlockFailure, error)); ok {
+		return rf(qf)
+	}
+	if rf, ok := ret.Get(0).(func(storage.QueryFilter) []common.BlockFailure); ok {
+		r0 = rf(qf)
 	} else {
-		r0 = ret.Error(0)
+		if ret.Get(0) != nil {
+			r0 = ret.Get(0).([]common.BlockFailure)
+		}
 	}
 
-	return r0
+	if rf, ok := ret.Get(1).(func(storage.QueryFilter) error); ok {
+		r1 = rf(qf)
+	} else {
+		r1 = ret.Error(1)
+	}
+
+	return r0, r1
 }
 
-// MockIStagingStorage_SetLastPublishedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastPublishedBlockNumber'
-type MockIStagingStorage_SetLastPublishedBlockNumber_Call struct {
+// MockIStagingStorage_GetBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetBlockFailures'
+type MockIStagingStorage_GetBlockFailures_Call struct {
 	*mock.Call
 }
 
-// SetLastPublishedBlockNumber is a helper method to define mock.On call
-//   - chainId *big.Int
-//   - blockNumber *big.Int
-func (_e *MockIStagingStorage_Expecter) SetLastPublishedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_SetLastPublishedBlockNumber_Call {
-	return &MockIStagingStorage_SetLastPublishedBlockNumber_Call{Call: _e.mock.On("SetLastPublishedBlockNumber", chainId, blockNumber)}
+// GetBlockFailures is a helper method to define mock.On call
+//   - qf storage.QueryFilter
+func (_e *MockIStagingStorage_Expecter) GetBlockFailures(qf interface{}) *MockIStagingStorage_GetBlockFailures_Call {
+	return &MockIStagingStorage_GetBlockFailures_Call{Call: _e.mock.On("GetBlockFailures", qf)}
 }
 
-func (_c *MockIStagingStorage_SetLastPublishedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_SetLastPublishedBlockNumber_Call {
+func (_c *MockIStagingStorage_GetBlockFailures_Call) Run(run func(qf storage.QueryFilter)) *MockIStagingStorage_GetBlockFailures_Call {
 	_c.Call.Run(func(args mock.Arguments) {
-		run(args[0].(*big.Int), args[1].(*big.Int))
+		run(args[0].(storage.QueryFilter))
 	})
 	return _c
 }
 
-func (_c *MockIStagingStorage_SetLastPublishedBlockNumber_Call) Return(_a0 error) *MockIStagingStorage_SetLastPublishedBlockNumber_Call {
-	_c.Call.Return(_a0)
+func (_c *MockIStagingStorage_GetBlockFailures_Call) Return(_a0 []common.BlockFailure, _a1 error) *MockIStagingStorage_GetBlockFailures_Call {
+	_c.Call.Return(_a0, _a1)
 	return _c
 }
 
-func (_c *MockIStagingStorage_SetLastPublishedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_SetLastPublishedBlockNumber_Call {
+func (_c *MockIStagingStorage_GetBlockFailures_Call) RunAndReturn(run func(storage.QueryFilter) ([]common.BlockFailure, error)) *MockIStagingStorage_GetBlockFailures_Call {
 	_c.Call.Return(run)
 	return _c
 }
@@ -341,17 +432,17 @@ func (_c *MockIStagingStorage_InsertStagingData_Call) RunAndReturn(run func([]co
 	return _c
 }
 
-// DeleteStagingDataOlderThan provides a mock function with given fields: chainId, blockNumber
-func (_m *MockIStagingStorage) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error {
-	ret := _m.Called(chainId, blockNumber)
+// StoreBlockFailures provides a mock function with given fields: failures
+func (_m *MockIStagingStorage) StoreBlockFailures(failures []common.BlockFailure) error {
+	ret := _m.Called(failures)
 
 	if len(ret) == 0 {
-		panic("no return value specified for DeleteStagingDataOlderThan")
+		panic("no return value specified for StoreBlockFailures")
 	}
 
 	var r0 error
-	if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok {
-		r0 = rf(chainId, blockNumber)
+	if rf, ok := ret.Get(0).(func([]common.BlockFailure) error); ok {
+		r0 = rf(failures)
 	} else {
 		r0 = ret.Error(0)
 	}
@@ -359,31 +450,30 @@ func (_m *MockIStagingStorage) DeleteStagingDataOlderThan(chainId *big.Int, bloc
 	return r0
 }
 
-// MockIStagingStorage_DeleteStagingDataOlderThan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteStagingDataOlderThan'
-type MockIStagingStorage_DeleteStagingDataOlderThan_Call struct {
+// MockIStagingStorage_StoreBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'StoreBlockFailures'
+type MockIStagingStorage_StoreBlockFailures_Call struct {
 	*mock.Call
 }
 
-// DeleteStagingDataOlderThan is a helper method to define mock.On call
-//   - chainId *big.Int
-//   - blockNumber *big.Int
-func (_e *MockIStagingStorage_Expecter) DeleteStagingDataOlderThan(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_DeleteStagingDataOlderThan_Call {
-	return &MockIStagingStorage_DeleteStagingDataOlderThan_Call{Call: _e.mock.On("DeleteStagingDataOlderThan", chainId, blockNumber)}
+// StoreBlockFailures is a helper method to define mock.On call
+//   - failures []common.BlockFailure
+func (_e *MockIStagingStorage_Expecter) StoreBlockFailures(failures interface{}) *MockIStagingStorage_StoreBlockFailures_Call {
+	return &MockIStagingStorage_StoreBlockFailures_Call{Call: _e.mock.On("StoreBlockFailures", failures)}
 }
 
-func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_DeleteStagingDataOlderThan_Call {
+func (_c *MockIStagingStorage_StoreBlockFailures_Call) Run(run func(failures []common.BlockFailure)) *MockIStagingStorage_StoreBlockFailures_Call {
 	_c.Call.Run(func(args mock.Arguments) {
-		run(args[0].(*big.Int), args[1].(*big.Int))
+		run(args[0].([]common.BlockFailure))
 	})
 	return _c
 }
 
-func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Return(_a0 error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call {
+func (_c *MockIStagingStorage_StoreBlockFailures_Call) Return(_a0 error) *MockIStagingStorage_StoreBlockFailures_Call {
 	_c.Call.Return(_a0)
 	return _c
 }
 
-func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call {
+func (_c *MockIStagingStorage_StoreBlockFailures_Call) RunAndReturn(run func([]common.BlockFailure) error) *MockIStagingStorage_StoreBlockFailures_Call {
 	_c.Call.Return(run)
 	return _c
 }