From d3c7146d3e932bdebda6c89609f20c8df14707a5 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Wed, 13 Aug 2025 07:44:53 +0000 Subject: [PATCH 01/43] Initial kafka committer --- cmd/root.go | 26 + configs/config.go | 9 + internal/common/block.go | 9 +- internal/orchestrator/committer_test.go | 1 + internal/orchestrator/failure_recoverer.go | 1 + internal/orchestrator/poller.go | 1 + internal/orchestrator/reorg_handler.go | 1 + internal/storage/clickhouse.go | 2 + internal/storage/connector.go | 2 + internal/storage/kafka_postgres.go | 620 +++++++++++++++++++++ internal/storage/kafka_publisher.go | 183 ++++++ 11 files changed, 851 insertions(+), 4 deletions(-) create mode 100644 internal/storage/kafka_postgres.go create mode 100644 internal/storage/kafka_publisher.go diff --git a/cmd/root.go b/cmd/root.go index 6ba9702..88b8428 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -124,6 +124,19 @@ func init() { rootCmd.PersistentFlags().Int("storage-staging-postgres-maxIdleConns", 25, "PostgreSQL max idle connections for staging storage") rootCmd.PersistentFlags().Int("storage-staging-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for staging storage") rootCmd.PersistentFlags().Int("storage-staging-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for staging storage") + // Kafka storage flags - only for main storage (where blockchain data is committed) + rootCmd.PersistentFlags().Bool("storage-main-kafka-enabled", false, "Enable Kafka storage for main storage") + rootCmd.PersistentFlags().String("storage-main-kafka-brokers", "", "Kafka brokers for main storage") + rootCmd.PersistentFlags().String("storage-main-kafka-username", "", "Kafka username for main storage") + rootCmd.PersistentFlags().String("storage-main-kafka-password", "", "Kafka password for main storage") + rootCmd.PersistentFlags().String("storage-main-kafka-postgres-host", "", "PostgreSQL host for Kafka main storage bookkeeping") + rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-port", 5432, "PostgreSQL port for Kafka main storage bookkeeping") + rootCmd.PersistentFlags().String("storage-main-kafka-postgres-username", "", "PostgreSQL username for Kafka main storage bookkeeping") + rootCmd.PersistentFlags().String("storage-main-kafka-postgres-password", "", "PostgreSQL password for Kafka main storage bookkeeping") + rootCmd.PersistentFlags().String("storage-main-kafka-postgres-database", "", "PostgreSQL database for Kafka main storage bookkeeping") + rootCmd.PersistentFlags().String("storage-main-kafka-postgres-sslMode", "require", "PostgreSQL SSL mode for Kafka main storage bookkeeping") + rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-maxOpenConns", 25, "PostgreSQL max open connections for Kafka main storage bookkeeping") + rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-maxIdleConns", 10, "PostgreSQL max idle connections for Kafka main storage bookkeeping") rootCmd.PersistentFlags().String("api-host", "localhost:3000", "API host") rootCmd.PersistentFlags().String("api-basicAuth-username", "", "API basic auth username") rootCmd.PersistentFlags().String("api-basicAuth-password", "", "API basic auth password") @@ -240,6 +253,19 @@ func init() { viper.BindPFlag("storage.staging.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxIdleConns")) viper.BindPFlag("storage.staging.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxConnLifetime")) viper.BindPFlag("storage.staging.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-connectTimeout")) + // Bind Kafka storage flags - only for main storage + viper.BindPFlag("storage.main.kafka.enabled", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enabled")) + viper.BindPFlag("storage.main.kafka.brokers", rootCmd.PersistentFlags().Lookup("storage-main-kafka-brokers")) + viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username")) + viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password")) + viper.BindPFlag("storage.main.kafka.postgres.host", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-host")) + viper.BindPFlag("storage.main.kafka.postgres.port", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-port")) + viper.BindPFlag("storage.main.kafka.postgres.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-username")) + viper.BindPFlag("storage.main.kafka.postgres.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-password")) + viper.BindPFlag("storage.main.kafka.postgres.database", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-database")) + viper.BindPFlag("storage.main.kafka.postgres.sslMode", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-sslMode")) + viper.BindPFlag("storage.main.kafka.postgres.maxOpenConns", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-maxOpenConns")) + viper.BindPFlag("storage.main.kafka.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-maxIdleConns")) viper.BindPFlag("api.host", rootCmd.PersistentFlags().Lookup("api-host")) viper.BindPFlag("api.basicAuth.username", rootCmd.PersistentFlags().Lookup("api-basicAuth-username")) viper.BindPFlag("api.basicAuth.password", rootCmd.PersistentFlags().Lookup("api-basicAuth-password")) diff --git a/configs/config.go b/configs/config.go index 0be0feb..10a824f 100644 --- a/configs/config.go +++ b/configs/config.go @@ -62,6 +62,7 @@ const ( type StorageConnectionConfig struct { Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"` Postgres *PostgresConfig `mapstructure:"postgres"` + Kafka *KafkaConfig `mapstructure:"kafka"` } type TableConfig struct { @@ -100,6 +101,14 @@ type PostgresConfig struct { ConnectTimeout int `mapstructure:"connectTimeout"` } +type KafkaConfig struct { + Enabled bool `mapstructure:"enabled"` + Brokers string `mapstructure:"brokers"` + Username string `mapstructure:"username"` + Password string `mapstructure:"password"` + Postgres *PostgresConfig `mapstructure:"postgres"` +} + type RPCBatchRequestConfig struct { BlocksPerRequest int `mapstructure:"blocksPerRequest"` BatchDelay int `mapstructure:"batchDelay"` diff --git a/internal/common/block.go b/internal/common/block.go index 4c9e8dc..eacf1f1 100644 --- a/internal/common/block.go +++ b/internal/common/block.go @@ -59,10 +59,11 @@ type BlockModel struct { } type BlockData struct { - Block Block - Transactions []Transaction - Logs []Log - Traces []Trace + ChainId uint64 `json:"chain_id"` + Block Block `json:"block"` + Transactions []Transaction `json:"transactions"` + Logs []Log `json:"logs"` + Traces []Trace `json:"traces"` } type BlockHeader struct { diff --git a/internal/orchestrator/committer_test.go b/internal/orchestrator/committer_test.go index 0c39ba4..c6d5906 100644 --- a/internal/orchestrator/committer_test.go +++ b/internal/orchestrator/committer_test.go @@ -426,6 +426,7 @@ func TestHandleGap(t *testing.T) { mockRPC.EXPECT().GetBlocksPerRequest().Return(rpc.BlocksPerRequestConfig{ Blocks: 5, }) + mockRPC.EXPECT().GetChainID().Return(big.NewInt(1)) mockRPC.EXPECT().GetFullBlocks(context.Background(), []*big.Int{big.NewInt(100), big.NewInt(101), big.NewInt(102), big.NewInt(103), big.NewInt(104)}).Return([]rpc.GetFullBlockResult{ {BlockNumber: big.NewInt(100), Data: common.BlockData{Block: common.Block{Number: big.NewInt(100)}}}, {BlockNumber: big.NewInt(101), Data: common.BlockData{Block: common.Block{Number: big.NewInt(101)}}}, diff --git a/internal/orchestrator/failure_recoverer.go b/internal/orchestrator/failure_recoverer.go index da1ae91..a097034 100644 --- a/internal/orchestrator/failure_recoverer.go +++ b/internal/orchestrator/failure_recoverer.go @@ -110,6 +110,7 @@ func (fr *FailureRecoverer) handleWorkerResults(blockFailures []common.BlockFail }) } else { successfulResults = append(successfulResults, common.BlockData{ + ChainId: fr.rpc.GetChainID().Uint64(), Block: result.Data.Block, Logs: result.Data.Logs, Transactions: result.Data.Transactions, diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go index a1cca21..5e3b313 100644 --- a/internal/orchestrator/poller.go +++ b/internal/orchestrator/poller.go @@ -262,6 +262,7 @@ func (p *Poller) convertPollResultsToBlockData(results []rpc.GetFullBlockResult) blockData := make([]common.BlockData, 0, len(successfulResults)) for _, result := range successfulResults { blockData = append(blockData, common.BlockData{ + ChainId: p.rpc.GetChainID().Uint64(), Block: result.Data.Block, Logs: result.Data.Logs, Transactions: result.Data.Transactions, diff --git a/internal/orchestrator/reorg_handler.go b/internal/orchestrator/reorg_handler.go index 2de8b95..889801c 100644 --- a/internal/orchestrator/reorg_handler.go +++ b/internal/orchestrator/reorg_handler.go @@ -274,6 +274,7 @@ func (rh *ReorgHandler) handleReorg(ctx context.Context, reorgedBlockNumbers []* return fmt.Errorf("cannot fix reorg: failed block %s: %w", result.BlockNumber.String(), result.Error) } data = append(data, common.BlockData{ + ChainId: rh.rpc.GetChainID().Uint64(), Block: result.Data.Block, Logs: result.Data.Logs, Transactions: result.Data.Transactions, diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go index c61256b..517a201 100644 --- a/internal/storage/clickhouse.go +++ b/internal/storage/clickhouse.go @@ -1959,6 +1959,7 @@ func (c *ClickHouseConnector) GetValidationBlockData(chainId *big.Int, startBloc for i, block := range blocksResult.blocks { blockNum := block.Number.String() blockData[i] = common.BlockData{ + ChainId: chainId.Uint64(), Block: block, Logs: logsResult.logMap[blockNum], Transactions: txsResult.txMap[blockNum], @@ -2138,6 +2139,7 @@ func (c *ClickHouseConnector) GetFullBlockData(chainId *big.Int, blockNumbers [] for i, block := range blocksResult.blocks { blockNum := block.Number.String() blockData[i] = common.BlockData{ + ChainId: chainId.Uint64(), Block: block, Logs: logsResult.logMap[blockNum], Transactions: txsResult.txMap[blockNum], diff --git a/internal/storage/connector.go b/internal/storage/connector.go index 1253213..e45db44 100644 --- a/internal/storage/connector.go +++ b/internal/storage/connector.go @@ -152,6 +152,8 @@ func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) { conn, err = NewPostgresConnector(cfg.Postgres) } else if cfg.Clickhouse != nil { conn, err = NewClickHouseConnector(cfg.Clickhouse) + } else if cfg.Kafka != nil { + conn, err = NewKafkaPostgresConnector(cfg.Kafka) } else { return *new(T), fmt.Errorf("no storage driver configured") } diff --git a/internal/storage/kafka_postgres.go b/internal/storage/kafka_postgres.go new file mode 100644 index 0000000..74b8714 --- /dev/null +++ b/internal/storage/kafka_postgres.go @@ -0,0 +1,620 @@ +package storage + +import ( + "database/sql" + "encoding/json" + "fmt" + "math/big" + "strings" + "time" + + _ "github.com/lib/pq" + "github.com/rs/zerolog/log" + config "github.com/thirdweb-dev/indexer/configs" + "github.com/thirdweb-dev/indexer/internal/common" +) + +// KafkaPostgresConnector uses PostgreSQL for metadata storage and Kafka for block data delivery +type KafkaPostgresConnector struct { + db *sql.DB + cfg *config.KafkaConfig + kafkaPublisher *KafkaPublisher +} + +func NewKafkaPostgresConnector(cfg *config.KafkaConfig) (*KafkaPostgresConnector, error) { + // Connect to PostgreSQL + connStr := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s", + cfg.Postgres.Host, cfg.Postgres.Port, cfg.Postgres.Username, cfg.Postgres.Password, cfg.Postgres.Database) + + // Default to "require" for security if SSL mode not specified + sslMode := cfg.Postgres.SSLMode + if sslMode == "" { + sslMode = "require" + log.Info().Msg("No SSL mode specified, defaulting to 'require' for secure connection") + } + connStr += fmt.Sprintf(" sslmode=%s", sslMode) + + if cfg.Postgres.ConnectTimeout > 0 { + connStr += fmt.Sprintf(" connect_timeout=%d", cfg.Postgres.ConnectTimeout) + } + + db, err := sql.Open("postgres", connStr) + if err != nil { + return nil, fmt.Errorf("failed to connect to postgres: %w", err) + } + + db.SetMaxOpenConns(cfg.Postgres.MaxOpenConns) + db.SetMaxIdleConns(cfg.Postgres.MaxIdleConns) + + if cfg.Postgres.MaxConnLifetime > 0 { + db.SetConnMaxLifetime(time.Duration(cfg.Postgres.MaxConnLifetime) * time.Second) + } + + if err := db.Ping(); err != nil { + return nil, fmt.Errorf("failed to ping postgres: %w", err) + } + + // Initialize Kafka publisher if enabled + var kafkaPublisher *KafkaPublisher + if cfg.Enabled && cfg.Brokers != "" { + kafkaPublisher, err = NewKafkaPublisher(cfg) + if err != nil { + log.Warn().Err(err).Msg("Failed to initialize Kafka publisher, continuing without publishing") + kafkaPublisher = nil + } + } + + return &KafkaPostgresConnector{ + db: db, + cfg: cfg, + kafkaPublisher: kafkaPublisher, + }, nil +} + +// Orchestrator Storage Implementation (PostgreSQL) + +func (kp *KafkaPostgresConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) { + query := `SELECT chain_id, block_number, last_error_timestamp, failure_count, reason + FROM block_failures WHERE 1=1` + + args := []interface{}{} + argCount := 0 + + if qf.ChainId != nil && qf.ChainId.Sign() > 0 { + argCount++ + query += fmt.Sprintf(" AND chain_id = $%d", argCount) + args = append(args, qf.ChainId.String()) + } + + if len(qf.BlockNumbers) > 0 { + placeholders := make([]string, len(qf.BlockNumbers)) + for i, bn := range qf.BlockNumbers { + argCount++ + placeholders[i] = fmt.Sprintf("$%d", argCount) + args = append(args, bn.String()) + } + query += fmt.Sprintf(" AND block_number IN (%s)", strings.Join(placeholders, ",")) + } + + if qf.SortBy != "" { + query += fmt.Sprintf(" ORDER BY %s", qf.SortBy) + if qf.SortOrder != "" { + query += " " + qf.SortOrder + } + } else { + query += " ORDER BY block_number DESC" + } + + if qf.Limit > 0 { + argCount++ + query += fmt.Sprintf(" LIMIT $%d", argCount) + args = append(args, qf.Limit) + } + + if qf.Offset > 0 { + argCount++ + query += fmt.Sprintf(" OFFSET $%d", argCount) + args = append(args, qf.Offset) + } + + rows, err := kp.db.Query(query, args...) + if err != nil { + return nil, err + } + defer func() { + if err := rows.Close(); err != nil { + log.Error().Err(err).Msg("Failed to close rows in GetBlockFailures") + } + }() + + var failures []common.BlockFailure + for rows.Next() { + var failure common.BlockFailure + var chainIdStr, blockNumberStr string + var timestamp int64 + var count int + + err := rows.Scan(&chainIdStr, &blockNumberStr, ×tamp, &count, &failure.FailureReason) + if err != nil { + return nil, fmt.Errorf("error scanning block failure: %w", err) + } + + var ok bool + failure.ChainId, ok = new(big.Int).SetString(chainIdStr, 10) + if !ok { + return nil, fmt.Errorf("failed to parse chain_id '%s' as big.Int", chainIdStr) + } + + failure.BlockNumber, ok = new(big.Int).SetString(blockNumberStr, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block_number '%s' as big.Int", blockNumberStr) + } + + failure.FailureTime = time.Unix(timestamp, 0) + failure.FailureCount = count + + failures = append(failures, failure) + } + + return failures, rows.Err() +} + +func (kp *KafkaPostgresConnector) StoreBlockFailures(failures []common.BlockFailure) error { + if len(failures) == 0 { + return nil + } + + valueStrings := make([]string, 0, len(failures)) + valueArgs := make([]interface{}, 0, len(failures)*5) + + for i, failure := range failures { + valueStrings = append(valueStrings, fmt.Sprintf("($%d, $%d, $%d, $%d, $%d)", + i*5+1, i*5+2, i*5+3, i*5+4, i*5+5)) + valueArgs = append(valueArgs, + failure.ChainId.String(), + failure.BlockNumber.String(), + failure.FailureTime.Unix(), + failure.FailureCount, + failure.FailureReason, + ) + } + + query := fmt.Sprintf(`INSERT INTO block_failures (chain_id, block_number, last_error_timestamp, failure_count, reason) + VALUES %s + ON CONFLICT (chain_id, block_number) + DO UPDATE SET + last_error_timestamp = EXCLUDED.last_error_timestamp, + failure_count = EXCLUDED.failure_count, + reason = EXCLUDED.reason, + updated_at = NOW()`, strings.Join(valueStrings, ",")) + + _, err := kp.db.Exec(query, valueArgs...) + return err +} + +func (kp *KafkaPostgresConnector) DeleteBlockFailures(failures []common.BlockFailure) error { + if len(failures) == 0 { + return nil + } + + tuples := make([]string, 0, len(failures)) + args := make([]interface{}, 0, len(failures)*2) + + for i, failure := range failures { + tuples = append(tuples, fmt.Sprintf("($%d, $%d)", i*2+1, i*2+2)) + args = append(args, failure.ChainId.String(), failure.BlockNumber.String()) + } + + query := fmt.Sprintf(`DELETE FROM block_failures + WHERE ctid IN ( + SELECT ctid + FROM block_failures + WHERE (chain_id, block_number) IN (%s) + FOR UPDATE SKIP LOCKED + )`, strings.Join(tuples, ",")) + + _, err := kp.db.Exec(query, args...) + return err +} + +func (kp *KafkaPostgresConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { + query := `SELECT cursor_value FROM cursors + WHERE cursor_type = 'reorg' AND chain_id = $1` + + var blockNumberString string + err := kp.db.QueryRow(query, chainId.String()).Scan(&blockNumberString) + if err != nil { + if err == sql.ErrNoRows { + return big.NewInt(0), nil + } + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) + } + + return blockNumber, nil +} + +func (kp *KafkaPostgresConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value) + VALUES ($1, 'reorg', $2) + ON CONFLICT (chain_id, cursor_type) + DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()` + + _, err := kp.db.Exec(query, chainId.String(), blockNumber.String()) + return err +} + +// Staging Storage Implementation (PostgreSQL) + +func (kp *KafkaPostgresConnector) InsertStagingData(data []common.BlockData) error { + if len(data) == 0 { + return nil + } + + valueStrings := make([]string, 0, len(data)) + valueArgs := make([]interface{}, 0, len(data)*3) + + for i, blockData := range data { + blockDataJSON, err := json.Marshal(blockData) + if err != nil { + return err + } + + valueStrings = append(valueStrings, fmt.Sprintf("($%d, $%d, $%d)", + i*3+1, i*3+2, i*3+3)) + valueArgs = append(valueArgs, + blockData.Block.ChainId.String(), + blockData.Block.Number.String(), + string(blockDataJSON), + ) + } + + query := fmt.Sprintf(`INSERT INTO block_data (chain_id, block_number, data) + VALUES %s + ON CONFLICT (chain_id, block_number) + DO UPDATE SET data = EXCLUDED.data, updated_at = NOW()`, strings.Join(valueStrings, ",")) + + _, err := kp.db.Exec(query, valueArgs...) + return err +} + +func (kp *KafkaPostgresConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) { + query := `SELECT data FROM block_data WHERE 1=1` + + args := []interface{}{} + argCount := 0 + + if qf.ChainId != nil && qf.ChainId.Sign() > 0 { + argCount++ + query += fmt.Sprintf(" AND chain_id = $%d", argCount) + args = append(args, qf.ChainId.String()) + } + + if len(qf.BlockNumbers) > 0 { + placeholders := make([]string, len(qf.BlockNumbers)) + for i, bn := range qf.BlockNumbers { + argCount++ + placeholders[i] = fmt.Sprintf("$%d", argCount) + args = append(args, bn.String()) + } + query += fmt.Sprintf(" AND block_number IN (%s)", strings.Join(placeholders, ",")) + } else if qf.StartBlock != nil && qf.EndBlock != nil { + argCount++ + query += fmt.Sprintf(" AND block_number BETWEEN $%d AND $%d", argCount, argCount+1) + args = append(args, qf.StartBlock.String(), qf.EndBlock.String()) + argCount++ + } + + query += " ORDER BY block_number ASC" + + if qf.Limit > 0 { + argCount++ + query += fmt.Sprintf(" LIMIT $%d", argCount) + args = append(args, qf.Limit) + } + + rows, err := kp.db.Query(query, args...) + if err != nil { + return nil, err + } + defer func() { + if err := rows.Close(); err != nil { + log.Error().Err(err).Msg("Failed to close rows in GetStagingData") + } + }() + + blockDataList := make([]common.BlockData, 0) + for rows.Next() { + var blockDataJson string + if err := rows.Scan(&blockDataJson); err != nil { + return nil, fmt.Errorf("error scanning block data: %w", err) + } + + var blockData common.BlockData + if err := json.Unmarshal([]byte(blockDataJson), &blockData); err != nil { + return nil, err + } + + blockDataList = append(blockDataList, blockData) + } + + return blockDataList, rows.Err() +} + +func (kp *KafkaPostgresConnector) DeleteStagingData(data []common.BlockData) error { + if len(data) == 0 { + return nil + } + + tuples := make([]string, 0, len(data)) + args := make([]interface{}, 0, len(data)*2) + + for i, blockData := range data { + tuples = append(tuples, fmt.Sprintf("($%d, $%d)", i*2+1, i*2+2)) + args = append(args, blockData.Block.ChainId.String(), blockData.Block.Number.String()) + } + + query := fmt.Sprintf(`DELETE FROM block_data + WHERE ctid IN ( + SELECT ctid + FROM block_data + WHERE (chain_id, block_number) IN (%s) + FOR UPDATE SKIP LOCKED + )`, strings.Join(tuples, ",")) + + _, err := kp.db.Exec(query, args...) + return err +} + +func (kp *KafkaPostgresConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { + query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'publish' AND chain_id = $1` + + var blockNumberString string + err := kp.db.QueryRow(query, chainId.String()).Scan(&blockNumberString) + if err != nil { + if err == sql.ErrNoRows { + return big.NewInt(0), nil + } + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) + } + return blockNumber, nil +} + +func (kp *KafkaPostgresConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value) + VALUES ($1, 'publish', $2) + ON CONFLICT (chain_id, cursor_type) + DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()` + + _, err := kp.db.Exec(query, chainId.String(), blockNumber.String()) + return err +} + +func (kp *KafkaPostgresConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) { + query := `SELECT MAX(block_number) FROM block_data WHERE 1=1` + + args := []interface{}{} + argCount := 0 + + if chainId != nil && chainId.Sign() > 0 { + argCount++ + query += fmt.Sprintf(" AND chain_id = $%d", argCount) + args = append(args, chainId.String()) + } + + if rangeStart != nil && rangeStart.Sign() > 0 { + argCount++ + query += fmt.Sprintf(" AND block_number >= $%d", argCount) + args = append(args, rangeStart.String()) + } + + if rangeEnd != nil && rangeEnd.Sign() > 0 { + argCount++ + query += fmt.Sprintf(" AND block_number <= $%d", argCount) + args = append(args, rangeEnd.String()) + } + + var blockNumberStr sql.NullString + err := kp.db.QueryRow(query, args...).Scan(&blockNumberStr) + if err != nil { + return nil, err + } + + if !blockNumberStr.Valid { + return big.NewInt(0), nil + } + + blockNumber, ok := new(big.Int).SetString(blockNumberStr.String, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", blockNumberStr.String) + } + + return blockNumber, nil +} + +func (kp *KafkaPostgresConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error { + query := `DELETE FROM block_data + WHERE ctid IN ( + SELECT ctid + FROM block_data + WHERE chain_id = $1 + AND block_number <= $2 + FOR UPDATE SKIP LOCKED + )` + _, err := kp.db.Exec(query, chainId.String(), blockNumber.String()) + return err +} + +// InsertBlockData publishes block data to Kafka instead of storing in database +func (kp *KafkaPostgresConnector) InsertBlockData(data []common.BlockData) error { + if len(data) == 0 { + return nil + } + + // Publish to Kafka + if err := kp.kafkaPublisher.PublishBlockData(data); err != nil { + return fmt.Errorf("failed to publish block data to kafka: %w", err) + } + log.Debug(). + Int("blocks", len(data)). + Msg("Published block data to Kafka") + + // Update cursor to track the highest block number published + if len(data) > 0 { + // Find the highest block number in the batch + var maxBlock *big.Int + for _, blockData := range data { + if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 { + maxBlock = blockData.Block.Number + } + } + if maxBlock != nil { + chainId := data[0].Block.ChainId + blockNumber := maxBlock + query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value) + VALUES ($1, 'commit', $2) + ON CONFLICT (chain_id, cursor_type) + DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()` + if _, err := kp.db.Exec(query, chainId.String(), blockNumber.String()); err != nil { + return err + } + } + } + + return nil +} + +// ReplaceBlockData handles reorg by publishing both old and new data to Kafka +func (kp *KafkaPostgresConnector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) { + if len(data) == 0 { + return nil, nil + } + + oldBlocks := []common.BlockData{} + + // Publish reorg event to Kafka + if kp.kafkaPublisher != nil { + // Publish new blocks (the reorg handler will mark old ones as reverted) + if err := kp.kafkaPublisher.PublishBlockData(data); err != nil { + return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err) + } + } + + // Update cursor to track the highest block number + if len(data) > 0 { + var maxBlock *big.Int + for _, blockData := range data { + if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 { + maxBlock = blockData.Block.Number + } + } + if maxBlock != nil { + if err := kp.SetLastPublishedBlockNumber(data[0].Block.ChainId, maxBlock); err != nil { + return nil, fmt.Errorf("failed to update published block cursor: %w", err) + } + } + } + + return oldBlocks, nil +} + +func (kp *KafkaPostgresConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { + query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'commit' AND chain_id = $1` + + var blockNumberString string + err := kp.db.QueryRow(query, chainId.String()).Scan(&blockNumberString) + if err != nil { + if err == sql.ErrNoRows { + return big.NewInt(0), nil + } + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) + } + return blockNumber, nil +} + +func (kp *KafkaPostgresConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + // Get the last published block number + lastPublished, err := kp.GetLastPublishedBlockNumber(chainId) + if err != nil { + return nil, err + } + + // Check if it's within the range + if lastPublished.Cmp(startBlock) >= 0 && lastPublished.Cmp(endBlock) <= 0 { + return lastPublished, nil + } + + // If outside range, return appropriate boundary + if lastPublished.Cmp(endBlock) > 0 { + return endBlock, nil + } + if lastPublished.Cmp(startBlock) < 0 { + return big.NewInt(0), nil + } + + return lastPublished, nil +} + +func (kp *KafkaPostgresConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { + return []common.BlockHeader{}, nil +} + +func (kp *KafkaPostgresConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) { + return QueryResult[common.TokenBalance]{Data: []common.TokenBalance{}}, nil +} + +func (kp *KafkaPostgresConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) { + return QueryResult[common.TokenTransfer]{Data: []common.TokenTransfer{}}, nil +} + +func (kp *KafkaPostgresConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) { + return []common.BlockData{}, nil +} + +func (kp *KafkaPostgresConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { + return []*big.Int{}, nil +} + +func (kp *KafkaPostgresConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) { + return []common.BlockData{}, nil +} + +// Query methods return empty results as this connector uses Kafka for data delivery +func (kp *KafkaPostgresConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) { + return QueryResult[common.Block]{Data: []common.Block{}}, nil +} + +func (kp *KafkaPostgresConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) { + return QueryResult[common.Transaction]{Data: []common.Transaction{}}, nil +} + +func (kp *KafkaPostgresConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) { + return QueryResult[common.Log]{Data: []common.Log{}}, nil +} + +func (kp *KafkaPostgresConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) { + return QueryResult[common.Trace]{Data: []common.Trace{}}, nil +} + +func (kp *KafkaPostgresConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) { + return QueryResult[interface{}]{Aggregates: []map[string]interface{}{}}, nil +} + +// Close closes the database connection +func (kp *KafkaPostgresConnector) Close() error { + return kp.db.Close() +} diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go new file mode 100644 index 0000000..880ea57 --- /dev/null +++ b/internal/storage/kafka_publisher.go @@ -0,0 +1,183 @@ +package storage + +import ( + "context" + "crypto/tls" + "encoding/json" + "fmt" + "net" + "strings" + "sync" + "time" + + "github.com/rs/zerolog/log" + config "github.com/thirdweb-dev/indexer/configs" + "github.com/thirdweb-dev/indexer/internal/common" + "github.com/twmb/franz-go/pkg/kgo" + "github.com/twmb/franz-go/pkg/sasl/plain" +) + +type KafkaPublisher struct { + client *kgo.Client + mu sync.RWMutex +} + +type PublishableMessage[T common.BlockData] struct { + Data T `json:"data"` + Status string `json:"status"` +} + +// NewKafkaPublisher method for storage connector (public) +func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) { + brokers := strings.Split(cfg.Brokers, ",") + opts := []kgo.Opt{ + kgo.SeedBrokers(brokers...), + kgo.AllowAutoTopicCreation(), + kgo.ProducerBatchCompression(kgo.SnappyCompression()), + kgo.ClientID(fmt.Sprintf("insight-indexer-kafka-storage-%s", config.Cfg.RPC.ChainID)), + kgo.MaxBufferedRecords(1_000_000), + kgo.ProducerBatchMaxBytes(16_000_000), + kgo.RecordPartitioner(kgo.UniformBytesPartitioner(1_000_000, false, false, nil)), + kgo.MetadataMaxAge(60 * time.Second), + kgo.DialTimeout(10 * time.Second), + } + + if cfg.Username != "" && cfg.Password != "" { + opts = append(opts, kgo.SASL(plain.Auth{ + User: cfg.Username, + Pass: cfg.Password, + }.AsMechanism())) + tlsDialer := &tls.Dialer{NetDialer: &net.Dialer{Timeout: 10 * time.Second}} + opts = append(opts, kgo.Dialer(tlsDialer.DialContext)) + } + + client, err := kgo.NewClient(opts...) + if err != nil { + return nil, fmt.Errorf("failed to create Kafka client: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := client.Ping(ctx); err != nil { + client.Close() + return nil, fmt.Errorf("failed to connect to Kafka: %v", err) + } + + publisher := &KafkaPublisher{ + client: client, + } + return publisher, nil +} + +func (p *KafkaPublisher) PublishBlockData(blockData []common.BlockData) error { + return p.publishBlockData(blockData, false) +} + +func (p *KafkaPublisher) PublishReorg(oldData []common.BlockData, newData []common.BlockData) error { + // TODO: need to revisit how reorg blocks get published to downstream + if err := p.publishBlockData(oldData, true); err != nil { + return fmt.Errorf("failed to publish old block data: %v", err) + } + + if err := p.publishBlockData(newData, false); err != nil { + return fmt.Errorf("failed to publish new block data: %v", err) + } + return nil +} + +func (p *KafkaPublisher) Close() error { + p.mu.Lock() + defer p.mu.Unlock() + + if p.client != nil { + p.client.Close() + log.Debug().Msg("Publisher client closed") + } + return nil +} + +func (p *KafkaPublisher) publishMessages(ctx context.Context, messages []*kgo.Record) error { + if len(messages) == 0 { + return nil + } + + p.mu.RLock() + defer p.mu.RUnlock() + + if p.client == nil { + return nil // Skip if no client configured + } + + var wg sync.WaitGroup + wg.Add(len(messages)) + // Publish to all configured producers + for _, msg := range messages { + p.client.Produce(ctx, msg, func(_ *kgo.Record, err error) { + defer wg.Done() + if err != nil { + log.Error().Err(err).Msg("Failed to publish message to Kafka") + } + }) + } + wg.Wait() + + return nil +} + +func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isReorg bool) error { + if p.client == nil || len(blockData) == 0 { + return nil + } + + publishStart := time.Now() + + // Prepare messages for blocks, events, transactions and traces + blockMessages := make([]*kgo.Record, len(blockData)) + + status := "new" + if isReorg { + status = "reverted" + } + + for i, data := range blockData { + // Block message + if blockMsg, err := p.createBlockDataMessage(data, status); err == nil { + blockMessages[i] = blockMsg + } else { + return fmt.Errorf("failed to create block message: %v", err) + } + } + + if err := p.publishMessages(context.Background(), blockMessages); err != nil { + return fmt.Errorf("failed to publish block messages: %v", err) + } + + log.Debug().Str("metric", "publish_duration").Msgf("Publisher.PublishBlockData duration: %f", time.Since(publishStart).Seconds()) + return nil +} + +func (p *KafkaPublisher) createBlockDataMessage(data common.BlockData, status string) (*kgo.Record, error) { + msg := PublishableMessage[common.BlockData]{ + Data: data, + Status: status, + } + msgJson, err := json.Marshal(msg) + if err != nil { + return nil, fmt.Errorf("failed to marshal block data: %v", err) + } + return &kgo.Record{ + Topic: p.getTopicName("commit", data.ChainId), + Key: []byte(fmt.Sprintf("block-%s-%d-%s", status, data.ChainId, data.Block.Hash)), + Value: msgJson, + }, nil +} + +func (p *KafkaPublisher) getTopicName(entity string, chainId uint64) string { + switch entity { + case "commit": + return fmt.Sprintf("insight.commit.blocks.%d", chainId) + default: + panic(fmt.Errorf("unknown topic entity: %s", entity)) + } +} From cbbea0706f04c2253935300827822d1038e8523a Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Wed, 13 Aug 2025 16:03:52 +0000 Subject: [PATCH 02/43] Update config --- cmd/root.go | 3 --- configs/config.go | 1 - internal/storage/connector.go | 6 +++--- internal/storage/kafka_postgres.go | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 88b8428..b18d947 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -125,7 +125,6 @@ func init() { rootCmd.PersistentFlags().Int("storage-staging-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for staging storage") rootCmd.PersistentFlags().Int("storage-staging-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for staging storage") // Kafka storage flags - only for main storage (where blockchain data is committed) - rootCmd.PersistentFlags().Bool("storage-main-kafka-enabled", false, "Enable Kafka storage for main storage") rootCmd.PersistentFlags().String("storage-main-kafka-brokers", "", "Kafka brokers for main storage") rootCmd.PersistentFlags().String("storage-main-kafka-username", "", "Kafka username for main storage") rootCmd.PersistentFlags().String("storage-main-kafka-password", "", "Kafka password for main storage") @@ -253,8 +252,6 @@ func init() { viper.BindPFlag("storage.staging.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxIdleConns")) viper.BindPFlag("storage.staging.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxConnLifetime")) viper.BindPFlag("storage.staging.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-connectTimeout")) - // Bind Kafka storage flags - only for main storage - viper.BindPFlag("storage.main.kafka.enabled", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enabled")) viper.BindPFlag("storage.main.kafka.brokers", rootCmd.PersistentFlags().Lookup("storage-main-kafka-brokers")) viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username")) viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password")) diff --git a/configs/config.go b/configs/config.go index 10a824f..1aeb450 100644 --- a/configs/config.go +++ b/configs/config.go @@ -102,7 +102,6 @@ type PostgresConfig struct { } type KafkaConfig struct { - Enabled bool `mapstructure:"enabled"` Brokers string `mapstructure:"brokers"` Username string `mapstructure:"username"` Password string `mapstructure:"password"` diff --git a/internal/storage/connector.go b/internal/storage/connector.go index e45db44..9a90b16 100644 --- a/internal/storage/connector.go +++ b/internal/storage/connector.go @@ -148,12 +148,12 @@ func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) { func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) { var conn interface{} var err error - if cfg.Postgres != nil { + if cfg.Kafka != nil { + conn, err = NewKafkaPostgresConnector(cfg.Kafka) + } else if cfg.Postgres != nil { conn, err = NewPostgresConnector(cfg.Postgres) } else if cfg.Clickhouse != nil { conn, err = NewClickHouseConnector(cfg.Clickhouse) - } else if cfg.Kafka != nil { - conn, err = NewKafkaPostgresConnector(cfg.Kafka) } else { return *new(T), fmt.Errorf("no storage driver configured") } diff --git a/internal/storage/kafka_postgres.go b/internal/storage/kafka_postgres.go index 74b8714..9621940 100644 --- a/internal/storage/kafka_postgres.go +++ b/internal/storage/kafka_postgres.go @@ -56,7 +56,7 @@ func NewKafkaPostgresConnector(cfg *config.KafkaConfig) (*KafkaPostgresConnector // Initialize Kafka publisher if enabled var kafkaPublisher *KafkaPublisher - if cfg.Enabled && cfg.Brokers != "" { + if cfg.Brokers != "" { kafkaPublisher, err = NewKafkaPublisher(cfg) if err != nil { log.Warn().Err(err).Msg("Failed to initialize Kafka publisher, continuing without publishing") From 4775d57abc3cfd1a11bce8d30b2a49c9735bda8f Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Wed, 13 Aug 2025 16:14:55 +0000 Subject: [PATCH 03/43] Error on uninitialize brokers --- internal/storage/kafka_postgres.go | 18 ++++++------------ internal/storage/kafka_publisher.go | 3 ++- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/internal/storage/kafka_postgres.go b/internal/storage/kafka_postgres.go index 9621940..23e7bfd 100644 --- a/internal/storage/kafka_postgres.go +++ b/internal/storage/kafka_postgres.go @@ -55,13 +55,9 @@ func NewKafkaPostgresConnector(cfg *config.KafkaConfig) (*KafkaPostgresConnector } // Initialize Kafka publisher if enabled - var kafkaPublisher *KafkaPublisher - if cfg.Brokers != "" { - kafkaPublisher, err = NewKafkaPublisher(cfg) - if err != nil { - log.Warn().Err(err).Msg("Failed to initialize Kafka publisher, continuing without publishing") - kafkaPublisher = nil - } + kafkaPublisher, err := NewKafkaPublisher(cfg) + if err != nil { + return nil, err } return &KafkaPostgresConnector{ @@ -502,11 +498,9 @@ func (kp *KafkaPostgresConnector) ReplaceBlockData(data []common.BlockData) ([]c oldBlocks := []common.BlockData{} // Publish reorg event to Kafka - if kp.kafkaPublisher != nil { - // Publish new blocks (the reorg handler will mark old ones as reverted) - if err := kp.kafkaPublisher.PublishBlockData(data); err != nil { - return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err) - } + // TODO: Publish new blocks (the reorg handler will mark old ones as reverted) + if err := kp.kafkaPublisher.PublishBlockData(data); err != nil { + return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err) } // Update cursor to track the highest block number diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go index 880ea57..84aca54 100644 --- a/internal/storage/kafka_publisher.go +++ b/internal/storage/kafka_publisher.go @@ -126,7 +126,7 @@ func (p *KafkaPublisher) publishMessages(ctx context.Context, messages []*kgo.Re } func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isReorg bool) error { - if p.client == nil || len(blockData) == 0 { + if len(blockData) == 0 { return nil } @@ -135,6 +135,7 @@ func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isReorg // Prepare messages for blocks, events, transactions and traces blockMessages := make([]*kgo.Record, len(blockData)) + // TODO: handle reorg status := "new" if isReorg { status = "reverted" From 661b1507b8e2100bc4c55b699e958b0ab0e3f404 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Thu, 14 Aug 2025 08:23:05 +0000 Subject: [PATCH 04/43] Update queries --- ...> 0000_clickhouse_create_blocks_table.sql} | 8 +- ...clickhouse_create_block_failures_table.sql | 12 - ..._clickhouse_create_transactions_table.sql} | 46 ++-- .../0002_clickhouse_create_cursors_table.sql | 7 - ... => 0002_clickhouse_create_logs_table.sql} | 42 ++-- .../0003_clickhouse_create_staging_table.sql | 11 - ...> 0003_clickhouse_create_traces_table.sql} | 40 +++- ...4_clickhouse_create_insert_null_table.sql} | 12 +- ...0005_clickhouse_create_insert_data_mv.sql} | 16 +- .../0006_clickhouse_create_logs_transfer.sql | 62 +++++ ...007_clickhouse_create_logs_transfer_mv.sql | 145 ++++++++++++ .../0008_clickhouse_create_token_balance.sql | 44 ++++ ...009_clickhouse_create_token_balance_mv.sql | 157 +++++++++++++ ...09_clickhouse_create_token_balances_mv.sql | 117 ---------- ...0_clickhouse_create_token_transfers_mv.sql | 211 ------------------ 15 files changed, 522 insertions(+), 408 deletions(-) rename internal/tools/clickhouse/{0004_clickhouse_create_blocks_table.sql => 0000_clickhouse_create_blocks_table.sql} (90%) delete mode 100644 internal/tools/clickhouse/0001_clickhouse_create_block_failures_table.sql rename internal/tools/clickhouse/{0005_clickhouse_create_transactions_table.sql => 0001_clickhouse_create_transactions_table.sql} (70%) delete mode 100644 internal/tools/clickhouse/0002_clickhouse_create_cursors_table.sql rename internal/tools/clickhouse/{0006_clickhouse_create_logs_table.sql => 0002_clickhouse_create_logs_table.sql} (54%) delete mode 100644 internal/tools/clickhouse/0003_clickhouse_create_staging_table.sql rename internal/tools/clickhouse/{0007_clickhouse_create_traces_table.sql => 0003_clickhouse_create_traces_table.sql} (64%) rename internal/tools/clickhouse/{0000_clickhouse_create_insert_null_table.sql => 0004_clickhouse_create_insert_null_table.sql} (93%) rename internal/tools/clickhouse/{0008_clickhouse_create_insert_mvs.sql => 0005_clickhouse_create_insert_data_mv.sql} (87%) create mode 100644 internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql create mode 100644 internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql create mode 100644 internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql create mode 100644 internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql delete mode 100644 internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql delete mode 100644 internal/tools/clickhouse/0010_clickhouse_create_token_transfers_mv.sql diff --git a/internal/tools/clickhouse/0004_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql similarity index 90% rename from internal/tools/clickhouse/0004_clickhouse_create_blocks_table.sql rename to internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql index 68bee0e..ada0c9d 100644 --- a/internal/tools/clickhouse/0004_clickhouse_create_blocks_table.sql +++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql @@ -21,10 +21,12 @@ CREATE TABLE IF NOT EXISTS blocks ( `gas_used` UInt256, `withdrawals_root` FixedString(66), `base_fee_per_gas` Nullable(UInt64), + `insert_timestamp` DateTime DEFAULT now(), `sign` Int8 DEFAULT 1, - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 3, - INDEX idx_hash hash TYPE bloom_filter GRANULARITY 3, + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2, ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) ORDER BY (chain_id, block_number) -PARTITION BY chain_id; \ No newline at end of file +PARTITION BY toYYYYMM(block_timestamp); \ No newline at end of file diff --git a/internal/tools/clickhouse/0001_clickhouse_create_block_failures_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_block_failures_table.sql deleted file mode 100644 index 669842d..0000000 --- a/internal/tools/clickhouse/0001_clickhouse_create_block_failures_table.sql +++ /dev/null @@ -1,12 +0,0 @@ -CREATE TABLE IF NOT EXISTS block_failures ( - `chain_id` UInt256, - `block_number` UInt256, - `last_error_timestamp` UInt64 CODEC(Delta, ZSTD), - `count` UInt16, - `reason` String, - `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` UInt8 DEFAULT 0, - INDEX idx_block_number block_number TYPE minmax GRANULARITY 1, -) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -ORDER BY (chain_id, block_number) -SETTINGS allow_experimental_replacing_merge_with_cleanup = 1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0005_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql similarity index 70% rename from internal/tools/clickhouse/0005_clickhouse_create_transactions_table.sql rename to internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql index 48f1e0d..c8a31cc 100644 --- a/internal/tools/clickhouse/0005_clickhouse_create_transactions_table.sql +++ b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql @@ -31,26 +31,46 @@ CREATE TABLE IF NOT EXISTS transactions ( `blob_gas_price` Nullable(UInt256), `logs_bloom` Nullable(String), `status` Nullable(UInt64), + `sign` Int8 DEFAULT 1, `insert_timestamp` DateTime DEFAULT now(), - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 3, + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_hash hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 1, - INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 1, - INDEX idx_function_selector function_selector TYPE bloom_filter GRANULARITY 1, - PROJECTION txs_chainid_from_address + INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2, + INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 4, + INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 4, + INDEX idx_function_selector function_selector TYPE bloom_filter GRANULARITY 2, + + PROJECTION from_address_projection ( - SELECT * + SELECT + chain_id, + block_number, + block_timestamp, + hash, + from_address, + to_address, + value, + data ORDER BY chain_id, from_address, - block_number + block_number, + hash ), - PROJECTION txs_chainid_to_address + PROJECTION to_address_projection ( - SELECT * - ORDER BY + SELECT + chain_id, + block_number, + block_timestamp, + hash, + from_address, + to_address, + value, + data + ORDER BY chain_id, to_address, block_number, @@ -58,5 +78,5 @@ CREATE TABLE IF NOT EXISTS transactions ( ) ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) ORDER BY (chain_id, block_number, hash) -PARTITION BY chain_id -SETTINGS deduplicate_merge_projection_mode = 'drop', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file +PARTITION BY toYYYYMM(block_timestamp) +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0002_clickhouse_create_cursors_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_cursors_table.sql deleted file mode 100644 index 6574a3b..0000000 --- a/internal/tools/clickhouse/0002_clickhouse_create_cursors_table.sql +++ /dev/null @@ -1,7 +0,0 @@ -CREATE TABLE IF NOT EXISTS cursors ( - `chain_id` UInt256, - `cursor_type` String, - `cursor_value` String, - `insert_timestamp` DateTime DEFAULT now(), -) ENGINE = ReplacingMergeTree(insert_timestamp) -ORDER BY (chain_id, cursor_type); diff --git a/internal/tools/clickhouse/0006_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql similarity index 54% rename from internal/tools/clickhouse/0006_clickhouse_create_logs_table.sql rename to internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql index b1d3db3..f93cb9d 100644 --- a/internal/tools/clickhouse/0006_clickhouse_create_logs_table.sql +++ b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql @@ -12,28 +12,44 @@ CREATE TABLE IF NOT EXISTS logs ( `topic_1` String, `topic_2` String, `topic_3` String, - `insert_timestamp` DateTime DEFAULT now(), + `sign` Int8 DEFAULT 1, - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 3, + `insert_timestamp` DateTime DEFAULT now(), + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_address address TYPE bloom_filter GRANULARITY 1, - INDEX idx_topic0 topic_0 TYPE bloom_filter GRANULARITY 1, - INDEX idx_topic1 topic_1 TYPE bloom_filter GRANULARITY 1, - INDEX idx_topic2 topic_2 TYPE bloom_filter GRANULARITY 1, - INDEX idx_topic3 topic_3 TYPE bloom_filter GRANULARITY 1, - PROJECTION logs_chainid_topic0_address + INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 2, + INDEX idx_address address TYPE bloom_filter GRANULARITY 3, + INDEX idx_topic0 topic_0 TYPE bloom_filter GRANULARITY 3, + INDEX idx_topic1 topic_1 TYPE bloom_filter GRANULARITY 4, + INDEX idx_topic2 topic_2 TYPE bloom_filter GRANULARITY 4, + INDEX idx_topic3 topic_3 TYPE bloom_filter GRANULARITY 4, + + PROJECTION chain_address_topic0_projection ( - SELECT * + SELECT + * ORDER BY chain_id, - topic_0, address, + topic_0, block_number, transaction_index, log_index + ), + PROJECTION chain_topic0_projection + ( + SELECT + * + ORDER BY + chain_id, + topic_0, + block_number, + transaction_index, + log_index, + address ) ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) ORDER BY (chain_id, block_number, transaction_hash, log_index) -PARTITION BY chain_id -SETTINGS deduplicate_merge_projection_mode = 'drop', lightweight_mutation_projection_mode = 'rebuild'; +PARTITION BY toYYYYMM(block_timestamp) +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; diff --git a/internal/tools/clickhouse/0003_clickhouse_create_staging_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_staging_table.sql deleted file mode 100644 index cd015ac..0000000 --- a/internal/tools/clickhouse/0003_clickhouse_create_staging_table.sql +++ /dev/null @@ -1,11 +0,0 @@ -CREATE TABLE IF NOT EXISTS block_data ( - `chain_id` UInt256, - `block_number` UInt256, - `data` String, - `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` UInt8 DEFAULT 0, - INDEX idx_block_number block_number TYPE minmax GRANULARITY 1, -) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -ORDER BY (chain_id, block_number) -PARTITION BY chain_id -SETTINGS allow_experimental_replacing_merge_with_cleanup = 1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0007_clickhouse_create_traces_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql similarity index 64% rename from internal/tools/clickhouse/0007_clickhouse_create_traces_table.sql rename to internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql index f3dc25d..b07ec88 100644 --- a/internal/tools/clickhouse/0007_clickhouse_create_traces_table.sql +++ b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql @@ -20,13 +20,39 @@ CREATE TABLE IF NOT EXISTS traces ( `author` Nullable(FixedString(42)), `reward_type` LowCardinality(Nullable(String)), `refund_address` Nullable(FixedString(42)), + `sign` Int8 DEFAULT 1, `insert_timestamp` DateTime DEFAULT now(), - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 3, - INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 1, - INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 1, + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 2, + INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3, + INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 3, + + PROJECTION from_address_projection + ( + SELECT + * + ORDER BY + chain_id, + from_address, + block_number, + transaction_hash, + trace_address + ), + PROJECTION to_address_projection + ( + SELECT + * + ORDER BY + chain_id, + to_address, + block_number, + transaction_hash, + trace_address + ) + ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) -ORDER BY (chain_id, block_number, transaction_hash, trace_address) -PARTITION BY chain_id; \ No newline at end of file +ORDER BY (chain_id, transaction_hash, trace_address) +PARTITION BY toYYYYMM(block_timestamp) +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; diff --git a/internal/tools/clickhouse/0000_clickhouse_create_insert_null_table.sql b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql similarity index 93% rename from internal/tools/clickhouse/0000_clickhouse_create_insert_null_table.sql rename to internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql index 25fc90e..46f1541 100644 --- a/internal/tools/clickhouse/0000_clickhouse_create_insert_null_table.sql +++ b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql @@ -1,5 +1,6 @@ -CREATE TABLE IF NOT EXISTS inserts_null_table ( +CREATE TABLE IF NOT EXISTS insert_null_block_data ( chain_id UInt256, + block Tuple( block_number UInt256, block_timestamp DateTime, @@ -92,8 +93,7 @@ CREATE TABLE IF NOT EXISTS inserts_null_table ( reward_type LowCardinality(Nullable(String)), refund_address Nullable(FixedString(42)) )), - insert_timestamp DateTime DEFAULT now(), - sign Int8 DEFAULT 1 -) ENGINE = MergeTree -ORDER BY (chain_id, insert_timestamp) -PARTITION BY chain_id; + + sign Int8 DEFAULT 1, + insert_timestamp DateTime DEFAULT now() +) ENGINE = Null; diff --git a/internal/tools/clickhouse/0008_clickhouse_create_insert_mvs.sql b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql similarity index 87% rename from internal/tools/clickhouse/0008_clickhouse_create_insert_mvs.sql rename to internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql index 0ea2673..f7c7c46 100644 --- a/internal/tools/clickhouse/0008_clickhouse_create_insert_mvs.sql +++ b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql @@ -1,4 +1,4 @@ -CREATE MATERIALIZED VIEW IF NOT EXISTS mv_blocks_inserts +CREATE MATERIALIZED VIEW IF NOT EXISTS insert_blocks_mv TO blocks AS SELECT @@ -26,9 +26,9 @@ SELECT block.21 AS base_fee_per_gas, insert_timestamp, sign -FROM inserts_null_table; +FROM insert_null_block_data; -CREATE MATERIALIZED VIEW IF NOT EXISTS mv_transactions_inserts +CREATE MATERIALIZED VIEW IF NOT EXISTS insert_transactions_mv TO transactions AS SELECT @@ -66,10 +66,10 @@ SELECT t.31 AS status, insert_timestamp, sign -FROM inserts_null_table +FROM insert_null_block_data ARRAY JOIN transactions AS t; -CREATE MATERIALIZED VIEW IF NOT EXISTS mv_logs_inserts +CREATE MATERIALIZED VIEW IF NOT EXISTS insert_logs_mv TO logs AS SELECT @@ -88,10 +88,10 @@ SELECT l.12 AS topic_3, insert_timestamp, sign -FROM inserts_null_table +FROM insert_null_block_data ARRAY JOIN logs AS l; -CREATE MATERIALIZED VIEW IF NOT EXISTS mv_traces_inserts +CREATE MATERIALIZED VIEW IF NOT EXISTS insert_traces_mv TO traces AS SELECT @@ -118,5 +118,5 @@ SELECT tr.20 AS refund_address, insert_timestamp, sign -FROM inserts_null_table +FROM insert_null_block_data ARRAY JOIN traces AS tr; diff --git a/internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql b/internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql new file mode 100644 index 0000000..2a985b0 --- /dev/null +++ b/internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql @@ -0,0 +1,62 @@ +CREATE TABLE IF NOT EXISTS logs_transfer +( + `chain_id` UInt256, + `token_type` LowCardinality(String), + `token_address` FixedString(42), + `token_id` UInt256, + `from_address` FixedString(42), + `to_address` FixedString(42), + `block_number` UInt256, + `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)), + `transaction_hash` FixedString(66), + `transaction_index` UInt64, + `amount` UInt256, + `log_index` UInt64, + `batch_index` Nullable(UInt16) DEFAULT NULL, + + `sign` Int8 DEFAULT 1, + `insert_timestamp` DateTime DEFAULT now(), + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_token_address token_address TYPE bloom_filter GRANULARITY 2, + INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3, + INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 3, + INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 4, + + PROJECTION from_address_projection ( + SELECT + * + ORDER BY + chain_id, + from_address, + block_number, + transaction_index, + log_index + ), + PROJECTION to_address_projection ( + SELECT + * + ORDER BY + chain_id, + to_address, + block_number, + transaction_index, + log_index + + ), + PROJECTION token_id_projection ( + SELECT + * + ORDER BY + chain_id, + token_address, + token_id, + block_number, + transaction_index, + log_index + ) +) +ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) +PARTITION BY toYYYYMM(block_timestamp) +ORDER BY (chain_id, token_address, block_number, transaction_index, log_index) +SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql b/internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql new file mode 100644 index 0000000..9a9b6bc --- /dev/null +++ b/internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql @@ -0,0 +1,145 @@ +-- ERC20 +CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc20_mv +TO logs_transfer +AS +SELECT + chain_id, + address AS token_address, + 'erc20' AS token_type, + 0 AS token_id, + concat('0x', substring(topic_1, 27, 40)) AS from_address, + concat('0x', substring(topic_2, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS amount, + log_index, + CAST(NULL AS Nullable(UInt16)) AS batch_index, + sign, + insert_timestamp +FROM logs +WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' -- Transfer + AND length(topic_1) = 66 AND startsWith(topic_1, '0x') + AND length(topic_2) = 66 AND startsWith(topic_2, '0x') + AND topic_3 = '' + AND length(data) = 66; + +-- ERC721 +CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc721_mv +TO logs_transfer +AS +SELECT + chain_id, + address AS token_address, + 'erc721' AS token_type, + reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id, + concat('0x', substring(topic_1, 27, 40)) AS from_address, + concat('0x', substring(topic_2, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + toUInt8(1) AS amount, + log_index, + CAST(NULL AS Nullable(UInt16)) AS batch_index, + sign, + insert_timestamp +FROM logs +WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' + AND length(topic_1) = 66 AND startsWith(topic_1, '0x') + AND length(topic_2) = 66 AND startsWith(topic_2, '0x') + AND length(topic_3) = 66 AND startsWith(topic_3, '0x') + AND length(data) = 2; + +-- ERC1155 (single) +CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc1155_single_mv +TO logs_transfer +AS +SELECT + chain_id, + address AS token_address, + 'erc1155' AS token_type, + reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS token_id, + concat('0x', substring(topic_2, 27, 40)) AS from_address, + concat('0x', substring(topic_3, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount, + log_index, + toNullable(toUInt16(0)) AS batch_index, + sign, + insert_timestamp +FROM logs +WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62' -- TransferSingle + AND length(topic_2) = 66 AND length(topic_3) = 66 + AND length(data) = (2 + 2*64); + +-- ERC1155 (batch) +CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc1155_batch_mv +TO logs_transfer +AS +SELECT + chain_id, + address AS token_address, + 'erc1155' AS token_type, + reinterpretAsUInt256(reverse(unhex(id_hex))) AS token_id, + concat('0x', substring(topic_2, 27, 40)) AS from_address, + concat('0x', substring(topic_3, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(amount_hex))) AS amount, + log_index, + toNullable(toUInt16(array_index - 1)) AS batch_index, + sign, + insert_timestamp +FROM ( + SELECT + chain_id, address, topic_2, topic_3, + block_number, block_timestamp, transaction_hash, transaction_index, log_index, sign, insert_timestamp, + toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64))))) AS ids_offset, + toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64))))) AS amounts_offset, + toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + ids_offset * 2, 64))))) AS ids_length, + toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + amounts_offset * 2, 64))))) AS amounts_length, + arrayMap(i -> substring(data, 3 + ids_offset * 2 + 64 + (i-1)*64, 64), range(1, least(ids_length, 10000) + 1)) AS ids_array, + arrayMap(i -> substring(data, 3 + amounts_offset * 2 + 64 + (i-1)*64, 64), range(1, least(amounts_length, 10000) + 1)) AS amounts_array + FROM logs + WHERE topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb' + AND length(topic_2) = 66 + AND length(topic_3) = 66 + AND ids_length = amounts_length +) +ARRAY JOIN + ids_array AS id_hex, + amounts_array AS amount_hex, + arrayEnumerate(ids_array) AS array_index; + +-- ERC6909 +CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc6909_mv +TO logs_transfer +AS +SELECT + chain_id, + address AS token_address, + 'erc6909' AS token_type, + reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id, + concat('0x', substring(topic_1, 27, 40)) AS from_address, + concat('0x', substring(topic_2, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount, + log_index, + CAST(NULL AS Nullable(UInt16)) AS batch_index, + sign, + insert_timestamp +FROM logs +WHERE topic_0 = '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859' + AND length(topic_1) = 66 + AND length(topic_2) = 66 + AND length(data) == 2 + 128; \ No newline at end of file diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql new file mode 100644 index 0000000..e476123 --- /dev/null +++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql @@ -0,0 +1,44 @@ +CREATE TABLE IF NOT EXISTS token_balance +( + `chain_id` UInt256, + `token_type` LowCardinality(String), + `token_address` FixedString(42), + `owner_address` FixedString(42), + `token_id` UInt256, + + `balance_state` AggregateFunction(sum, Int256), + `last_block_number_state` AggregateFunction(max, UInt256), + `last_block_timestamp_state` AggregateFunction(max, DateTime), + + INDEX idx_last_block_number (finalizeAggregation(last_block_number_state)) TYPE minmax GRANULARITY 1, + INDEX idx_last_block_timestamp (finalizeAggregation(last_block_timestamp_state)) TYPE minmax GRANULARITY 1, + + PROJECTION owner_balances_projection + ( + SELECT + chain_id, + owner_address, + token_address, + token_id, + sumMerge(balance_state) AS balance, + maxMerge(last_block_number_state) AS last_block_number, + maxMerge(last_block_timestamp_state) AS last_block_timestamp + GROUP BY chain_id, owner_address, token_address, token_id + ), + PROJECTION token_projection + ( + SELECT + chain_id, + token_address, + token_id, + owner_address, + balance_state, + last_block_number_state, + last_block_timestamp_state + ORDER BY chain_id, token_address, token_id, owner_address + ) +) +ENGINE = AggregatingMergeTree +PARTITION BY chain_id +ORDER BY (chain_id, owner_address, token_address, token_id) +SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql new file mode 100644 index 0000000..bb039fb --- /dev/null +++ b/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql @@ -0,0 +1,157 @@ +-- ERC20 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc20_mv +TO token_balance +AS +SELECT + chain_id, + token_type, + token_address, + owner_address, + token_id, + sumState(delta) AS balance_state, + maxState(block_number) AS last_block_number_state, + maxState(block_timestamp) AS last_block_timestamp_state +FROM +( + -- FROM side (negative) + SELECT + chain_id, + token_type, + token_address, + token_id, + from_address AS owner_address, + toInt256(amount) * (-1) * sign AS delta, + block_number, + block_timestamp + FROM logs_transfer WHERE token_type = 'erc20' + UNION ALL + -- TO side (positive) + SELECT + chain_id, + token_type, + token_address, + token_id, + to_address AS owner_address, + toInt256(amount) * (+1) * sign AS delta, + block_number, + block_timestamp + FROM logs_transfer WHERE token_type = 'erc20' +) +GROUP BY chain_id, token_type, token_address, owner_address, token_id; + +-- ERC721 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc721_mv +TO token_balance +AS +SELECT + chain_id, + token_type, + token_address, + owner_address, + token_id, + sumState(delta) AS balance_state, + maxState(block_number) AS last_block_number_state, + maxState(block_timestamp) AS last_block_timestamp_state +FROM +( + SELECT + chain_id, + token_type, + token_address, + from_address AS owner_address, + token_id, + toInt256(1) * (-1) * sign AS delta, + block_number, + block_timestamp + FROM logs_transfer WHERE token_type = 'erc721' + UNION ALL + SELECT + chain_id, + token_type, + token_address, + to_address AS owner_address, + token_id, + toInt256(1) * (+1) * sign AS delta, + block_number, + block_timestamp + FROM logs_transfer WHERE token_type = 'erc721' +) +GROUP BY chain_id, token_type, token_address, owner_address, token_id; + +-- ERC1155 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc1155_mv +TO token_balance +AS +SELECT + chain_id, + token_type, + token_address, + owner_address, + token_id, + sumState(delta) AS balance_state, + maxState(block_number) AS last_block_number_state, + maxState(block_timestamp) AS last_block_timestamp_state +FROM +( + SELECT + chain_id, + token_type, + token_address, + from_address AS owner_address, + token_id, + toInt256(amount) * (-1) * sign AS delta, + block_number, + block_timestamp + FROM logs_transfer WHERE token_type = 'erc1155' + UNION ALL + SELECT + chain_id, + token_type, + token_address, + to_address AS owner_address, + token_id, + toInt256(amount) * (+1) * sign AS delta, + block_number, + block_timestamp + FROM logs_transfer WHERE token_type = 'erc1155' +) +GROUP BY chain_id, token_type, token_address, owner_address, token_id; + +-- ERC6909 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc6909_mv +TO token_balance +AS +SELECT + chain_id, + token_type, + token_address, + owner_address, + token_id, + sumState(delta) AS balance_state, + maxState(block_number) AS last_block_number_state, + maxState(block_timestamp) AS last_block_timestamp_state +FROM +( + SELECT + chain_id, + token_type, + token_address, + from_address AS owner_address, + token_id, + toInt256(amount) * (-1) * sign AS delta, + block_number, + block_timestamp + FROM logs_transfer WHERE token_type = 'erc6909' + UNION ALL + SELECT + chain_id, + token_type, + token_address, + to_address AS owner_address, + token_id, + toInt256(amount) * (+1) * sign AS delta, + block_number, + block_timestamp + FROM logs_transfer WHERE token_type = 'erc6909' +) +GROUP BY chain_id, token_type, token_address, owner_address, token_id; \ No newline at end of file diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql deleted file mode 100644 index c9e54cb..0000000 --- a/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql +++ /dev/null @@ -1,117 +0,0 @@ -CREATE TABLE IF NOT EXISTS token_balances -( - `token_type` String, - `chain_id` UInt256, - `owner` FixedString(42), - `address` FixedString(42), - `token_id` UInt256, - `balance` Int256, - PROJECTION address_projection - ( - SELECT * - ORDER BY - token_type, - chain_id, - address, - token_id - ) -) -ENGINE = SummingMergeTree -ORDER BY (token_type, chain_id, owner, address, token_id) -SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild'; - -CREATE MATERIALIZED VIEW IF NOT EXISTS single_token_transfers_mv TO token_balances AS -SELECT chain_id, owner, address, token_type, token_id, sum(amount) as balance -FROM -( - SELECT - chain_id, - address, - (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' AND topic_3 = '') as is_erc20, - (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' AND topic_3 != '') as is_erc721, - (topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62') as is_erc1155, - if(is_erc1155, concat('0x', substring(topic_2, 27, 40)), concat('0x', substring(topic_1, 27, 40))) AS sender_address, -- ERC20 & ERC721 both have topic_1 as sender - if(is_erc1155, concat('0x', substring(topic_3, 27, 40)), concat('0x', substring(topic_2, 27, 40))) AS receiver_address, -- ERC20 & ERC721 both have topic_2 as receiver - multiIf(is_erc20, 'erc20', is_erc721, 'erc721', 'erc1155') as token_type, - multiIf( - is_erc1155, - reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))), - is_erc721, - reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))), - toUInt256(0) -- other - ) AS token_id, - multiIf( - is_erc20 AND length(data) = 66, - reinterpretAsInt256(reverse(unhex(substring(data, 3)))), - is_erc721, - toInt256(1), - is_erc1155, - if(length(data) = 130, reinterpretAsInt256(reverse(unhex(substring(data, 67, 64)))), toInt256(1)), - toInt256(0) -- unknown - ) AS transfer_amount, - (sign * transfer_amount) as amount - FROM logs - WHERE - topic_0 IN ( - '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', - '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62' - ) -) -array join - [chain_id, chain_id] AS chain_id, - [sender_address, receiver_address] AS owner, - [-amount, amount] as amount, - [token_type, token_type] AS token_type, - [token_id, token_id] AS token_id, - [address, address] AS address -GROUP BY chain_id, owner, address, token_type, token_id; - -CREATE MATERIALIZED VIEW IF NOT EXISTS erc1155_batch_token_transfers_mv TO token_balances AS -SELECT chain_id, owner, address, token_type, token_id, sum(amount) as balance -FROM ( - WITH - metadata as ( - SELECT - *, - 3 + 2 * 64 as ids_length_idx, - ids_length_idx + 64 as ids_values_idx, - reinterpretAsUInt64(reverse(unhex(substring(data, ids_length_idx, 64)))) AS ids_length, - ids_length_idx + 64 + (ids_length * 64) as amounts_length_idx, - reinterpretAsUInt64(reverse(unhex(substring(data, amounts_length_idx, 64)))) AS amounts_length, - amounts_length_idx + 64 as amounts_values_idx - FROM logs - WHERE topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb' AND topic_2 != '' AND topic_3 != '' AND ids_length = amounts_length - ), - decoded AS ( - SELECT - *, - arrayMap( - x -> substring(data, ids_values_idx + (x - 1) * 64, 64), - range(1, ids_length + 1) - ) AS ids_hex, - arrayMap( - x -> substring(data, amounts_values_idx + (x - 1) * 64, 64), - range(1, amounts_length + 1) - ) AS amounts_hex - FROM metadata - ) - SELECT - chain_id, - address, - concat('0x', substring(topic_2, 27, 40)) AS sender_address, - concat('0x', substring(topic_3, 27, 40)) AS receiver_address, - 'erc1155' as token_type, - reinterpretAsUInt256(reverse(unhex(substring(hex_id, 1, 64)))) AS token_id, - reinterpretAsInt256(reverse(unhex(substring(hex_amount, 1, 64)))) AS transfer_amount, - (sign * transfer_amount) as amount - FROM decoded - ARRAY JOIN ids_hex AS hex_id, amounts_hex AS hex_amount -) -array join - [chain_id, chain_id] AS chain_id, - [sender_address, receiver_address] AS owner, - [-amount, amount] as amount, - [token_type, token_type] AS token_type, - [token_id, token_id] AS token_id, - [address, address] AS address -GROUP BY chain_id, owner, address, token_type, token_id; \ No newline at end of file diff --git a/internal/tools/clickhouse/0010_clickhouse_create_token_transfers_mv.sql b/internal/tools/clickhouse/0010_clickhouse_create_token_transfers_mv.sql deleted file mode 100644 index b87e35b..0000000 --- a/internal/tools/clickhouse/0010_clickhouse_create_token_transfers_mv.sql +++ /dev/null @@ -1,211 +0,0 @@ -CREATE TABLE IF NOT EXISTS token_transfers -( - `token_type` LowCardinality(String), - `chain_id` UInt256, - `token_address` FixedString(42), - `from_address` FixedString(42), - `to_address` FixedString(42), - `block_number` UInt256, - `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)), - `transaction_hash` FixedString(66), - `token_id` UInt256, - `amount` UInt256, - `log_index` UInt64, - `sign` Int8 DEFAULT 1, - `insert_timestamp` DateTime DEFAULT now(), - - INDEX minmax_block_number block_number TYPE minmax GRANULARITY 16, - INDEX minmax_block_timestamp block_timestamp TYPE minmax GRANULARITY 16, - - PROJECTION from_address_projection - ( - SELECT * - ORDER BY - chain_id, - token_type, - from_address, - block_number, - log_index - ), - PROJECTION to_address_projection - ( - SELECT * - ORDER BY - chain_id, - token_type, - to_address, - block_number, - log_index - ), - PROJECTION transaction_hash_projection - ( - SELECT * - ORDER BY - chain_id, - token_type, - transaction_hash, - block_number, - log_index - ), - PROJECTION token_aggregation_projection - ( - SELECT - chain_id, - token_type, - max(block_number) AS max_block_number, - count() AS total_count - GROUP BY - chain_id, - token_type - ) -) -ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) -PARTITION BY chain_id -ORDER BY (chain_id, token_type, token_address, block_number, log_index) -SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild'; - -CREATE MATERIALIZED VIEW IF NOT EXISTS logs_to_token_transfers TO token_transfers -( - `chain_id` UInt256, - `token_address` FixedString(42), - `from_address` String, - `to_address` String, - `token_type` String, - `block_number` UInt256, - `block_timestamp` DateTime, - `transaction_hash` FixedString(66), - `log_index` UInt64, - `sign` Int8, - `insert_timestamp` DateTime, - `token_id` UInt256, - `amount` UInt256 -) -AS WITH - transfer_logs AS - ( - SELECT - chain_id, - address AS token_address, - topic_0, - topic_1, - topic_2, - topic_3, - (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef') AND (topic_3 = '') AS is_erc20, - (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef') AND (topic_3 != '') AS is_erc721, - topic_0 IN ('0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') AS is_erc1155, - multiIf(is_erc20, 'erc20', is_erc721, 'erc721', 'erc1155') AS token_type, - if(is_erc1155, concat('0x', substring(topic_2, 27, 40)), concat('0x', substring(topic_1, 27, 40))) AS from_address, - if(is_erc1155, concat('0x', substring(topic_3, 27, 40)), concat('0x', substring(topic_2, 27, 40))) AS to_address, - data, - block_number, - block_timestamp, - transaction_hash, - log_index, - sign, - insert_timestamp - FROM logs - WHERE topic_0 IN ('0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') - ), - batch_transfer_metadata AS - ( - SELECT - *, - 3 + (2 * 64) AS ids_length_idx, - ids_length_idx + 64 AS ids_values_idx, - reinterpretAsUInt64(reverse(unhex(substring(data, ids_length_idx, 64)))) AS ids_length, - (ids_length_idx + 64) + (ids_length * 64) AS amounts_length_idx, - reinterpretAsUInt64(reverse(unhex(substring(data, amounts_length_idx, 64)))) AS amounts_length, - amounts_length_idx + 64 AS amounts_values_idx - FROM transfer_logs - WHERE (topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') AND (length(topic_1) = 66) AND (length(topic_2) = 66) AND (length(topic_3) = 66) AND (length(data) != (258 + ((ids_length + amounts_length) * 64))) AND (ids_length = amounts_length) - ), - batch_transfer_logs AS - ( - SELECT - *, - arrayMap(x -> substring(data, ids_values_idx + ((x - 1) * 64), 64), range(1, toInt32(ids_length) + 1)) AS ids_hex, - arrayMap(x -> substring(data, amounts_values_idx + ((x - 1) * 64), 64), range(1, toInt32(amounts_length) + 1)) AS amounts_hex - FROM batch_transfer_metadata - ) -SELECT - chain_id, - token_address, - from_address, - to_address, - token_type, - block_number, - block_timestamp, - transaction_hash, - log_index, - sign, - insert_timestamp, - multiIf(is_erc1155, reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))), is_erc721, reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))), toUInt256(0)) AS token_id, - multiIf(is_erc20 AND (length(data) = 66), reinterpretAsUInt256(reverse(unhex(substring(data, 3)))), is_erc721, toUInt256(1), is_erc1155, if(length(data) = 130, reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))), toUInt256(1)), toUInt256(0)) AS amount -FROM transfer_logs -WHERE topic_0 IN ('0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62') -UNION ALL -WITH - transfer_logs AS - ( - SELECT - chain_id, - address AS token_address, - topic_0, - topic_1, - topic_2, - topic_3, - (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef') AND (topic_3 = '') AS is_erc20, - (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef') AND (topic_3 != '') AS is_erc721, - topic_0 IN ('0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') AS is_erc1155, - multiIf(is_erc20, 'erc20', is_erc721, 'erc721', 'erc1155') AS token_type, - if(is_erc1155, concat('0x', substring(topic_2, 27, 40)), concat('0x', substring(topic_1, 27, 40))) AS from_address, - if(is_erc1155, concat('0x', substring(topic_3, 27, 40)), concat('0x', substring(topic_2, 27, 40))) AS to_address, - data, - block_number, - block_timestamp, - transaction_hash, - log_index, - sign, - insert_timestamp - FROM logs - WHERE topic_0 IN ('0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') - ), - batch_transfer_metadata AS - ( - SELECT - *, - 3 + (2 * 64) AS ids_length_idx, - ids_length_idx + 64 AS ids_values_idx, - reinterpretAsUInt64(reverse(unhex(substring(data, ids_length_idx, 64)))) AS ids_length, - (ids_length_idx + 64) + (ids_length * 64) AS amounts_length_idx, - reinterpretAsUInt64(reverse(unhex(substring(data, amounts_length_idx, 64)))) AS amounts_length, - amounts_length_idx + 64 AS amounts_values_idx - FROM transfer_logs - WHERE (topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') AND (length(topic_1) = 66) AND (length(topic_2) = 66) AND (length(topic_3) = 66) AND (length(data) != (258 + ((ids_length + amounts_length) * 64))) AND (ids_length = amounts_length) - ), - batch_transfer_logs AS - ( - SELECT - *, - arrayMap(x -> substring(data, ids_values_idx + ((x - 1) * 64), 64), range(1, toInt32(ids_length) + 1)) AS ids_hex, - arrayMap(x -> substring(data, amounts_values_idx + ((x - 1) * 64), 64), range(1, toInt32(amounts_length) + 1)) AS amounts_hex - FROM batch_transfer_metadata - ) -SELECT - chain_id, - token_address, - from_address, - to_address, - token_type, - block_number, - block_timestamp, - transaction_hash, - log_index, - sign, - insert_timestamp, - reinterpretAsUInt256(reverse(unhex(substring(hex_id, 1, 64)))) AS token_id, - reinterpretAsUInt256(reverse(unhex(substring(hex_amount, 1, 64)))) AS amount -FROM batch_transfer_logs -ARRAY JOIN - ids_hex AS hex_id, - amounts_hex AS hex_amount \ No newline at end of file From e152d5b9e82cca896214402760c3f71aa125e104 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Thu, 14 Aug 2025 08:28:00 +0000 Subject: [PATCH 05/43] Option to disable TLS for kafka --- cmd/root.go | 9 ++++++++- configs/config.go | 10 ++++++---- internal/publisher/publisher.go | 3 +++ internal/storage/kafka_publisher.go | 3 +++ 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index b18d947..8f798a6 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -124,10 +124,10 @@ func init() { rootCmd.PersistentFlags().Int("storage-staging-postgres-maxIdleConns", 25, "PostgreSQL max idle connections for staging storage") rootCmd.PersistentFlags().Int("storage-staging-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for staging storage") rootCmd.PersistentFlags().Int("storage-staging-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for staging storage") - // Kafka storage flags - only for main storage (where blockchain data is committed) rootCmd.PersistentFlags().String("storage-main-kafka-brokers", "", "Kafka brokers for main storage") rootCmd.PersistentFlags().String("storage-main-kafka-username", "", "Kafka username for main storage") rootCmd.PersistentFlags().String("storage-main-kafka-password", "", "Kafka password for main storage") + rootCmd.PersistentFlags().Bool("storage-main-kafka-enable-tls", true, "Enable TLS for Kafka connection in main storage") rootCmd.PersistentFlags().String("storage-main-kafka-postgres-host", "", "PostgreSQL host for Kafka main storage bookkeeping") rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-port", 5432, "PostgreSQL port for Kafka main storage bookkeeping") rootCmd.PersistentFlags().String("storage-main-kafka-postgres-username", "", "PostgreSQL username for Kafka main storage bookkeeping") @@ -149,6 +149,9 @@ func init() { rootCmd.PersistentFlags().Bool("publisher-enabled", false, "Toggle publisher") rootCmd.PersistentFlags().String("publisher-mode", "default", "Publisher mode: default or parallel") rootCmd.PersistentFlags().String("publisher-brokers", "", "Kafka brokers") + rootCmd.PersistentFlags().String("publisher-username", "", "Kafka username for publisher") + rootCmd.PersistentFlags().String("publisher-password", "", "Kafka password for publisher") + rootCmd.PersistentFlags().Bool("publisher-enable-tls", true, "Enable TLS for Kafka connection in publisher") rootCmd.PersistentFlags().Bool("publisher-blocks-enabled", false, "Toggle block publisher") rootCmd.PersistentFlags().String("publisher-blocks-topicName", "", "Kafka topic name for blocks") rootCmd.PersistentFlags().Bool("publisher-transactions-enabled", false, "Toggle transaction publisher") @@ -255,6 +258,7 @@ func init() { viper.BindPFlag("storage.main.kafka.brokers", rootCmd.PersistentFlags().Lookup("storage-main-kafka-brokers")) viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username")) viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password")) + viper.BindPFlag("storage.main.kafka.enable_tls", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls")) viper.BindPFlag("storage.main.kafka.postgres.host", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-host")) viper.BindPFlag("storage.main.kafka.postgres.port", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-port")) viper.BindPFlag("storage.main.kafka.postgres.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-username")) @@ -276,6 +280,9 @@ func init() { viper.BindPFlag("publisher.enabled", rootCmd.PersistentFlags().Lookup("publisher-enabled")) viper.BindPFlag("publisher.mode", rootCmd.PersistentFlags().Lookup("publisher-mode")) viper.BindPFlag("publisher.brokers", rootCmd.PersistentFlags().Lookup("publisher-brokers")) + viper.BindPFlag("publisher.username", rootCmd.PersistentFlags().Lookup("publisher-username")) + viper.BindPFlag("publisher.password", rootCmd.PersistentFlags().Lookup("publisher-password")) + viper.BindPFlag("publisher.enable_tls", rootCmd.PersistentFlags().Lookup("publisher-enable-tls")) viper.BindPFlag("publisher.blocks.enabled", rootCmd.PersistentFlags().Lookup("publisher-blocks-enabled")) viper.BindPFlag("publisher.blocks.topicName", rootCmd.PersistentFlags().Lookup("publisher-blocks-topicName")) viper.BindPFlag("publisher.transactions.enabled", rootCmd.PersistentFlags().Lookup("publisher-transactions-enabled")) diff --git a/configs/config.go b/configs/config.go index 1aeb450..b9d036b 100644 --- a/configs/config.go +++ b/configs/config.go @@ -102,10 +102,11 @@ type PostgresConfig struct { } type KafkaConfig struct { - Brokers string `mapstructure:"brokers"` - Username string `mapstructure:"username"` - Password string `mapstructure:"password"` - Postgres *PostgresConfig `mapstructure:"postgres"` + Brokers string `mapstructure:"brokers"` + Username string `mapstructure:"username"` + Password string `mapstructure:"password"` + EnableTLS bool `mapstructure:"enable_tls"` + Postgres *PostgresConfig `mapstructure:"postgres"` } type RPCBatchRequestConfig struct { @@ -184,6 +185,7 @@ type PublisherConfig struct { Brokers string `mapstructure:"brokers"` Username string `mapstructure:"username"` Password string `mapstructure:"password"` + EnableTLS bool `mapstructure:"enable_tls"` Blocks BlockPublisherConfig `mapstructure:"blocks"` Transactions TransactionPublisherConfig `mapstructure:"transactions"` Traces TracePublisherConfig `mapstructure:"traces"` diff --git a/internal/publisher/publisher.go b/internal/publisher/publisher.go index 984115a..0f8a761 100644 --- a/internal/publisher/publisher.go +++ b/internal/publisher/publisher.go @@ -76,6 +76,9 @@ func (p *Publisher) initialize() error { User: config.Cfg.Publisher.Username, Pass: config.Cfg.Publisher.Password, }.AsMechanism())) + } + + if config.Cfg.Publisher.EnableTLS { tlsDialer := &tls.Dialer{NetDialer: &net.Dialer{Timeout: 10 * time.Second}} opts = append(opts, kgo.Dialer(tlsDialer.DialContext)) } diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go index 84aca54..b0b82ca 100644 --- a/internal/storage/kafka_publisher.go +++ b/internal/storage/kafka_publisher.go @@ -47,6 +47,9 @@ func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) { User: cfg.Username, Pass: cfg.Password, }.AsMechanism())) + } + + if cfg.EnableTLS { tlsDialer := &tls.Dialer{NetDialer: &net.Dialer{Timeout: 10 * time.Second}} opts = append(opts, kgo.Dialer(tlsDialer.DialContext)) } From e61fae708f4e5497ddbe0e7aaea372519d860fe2 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Thu, 14 Aug 2025 09:04:34 +0000 Subject: [PATCH 06/43] Add projection mode in blocks --- .../tools/clickhouse/0000_clickhouse_create_blocks_table.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql index ada0c9d..4768462 100644 --- a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql +++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql @@ -29,4 +29,5 @@ CREATE TABLE IF NOT EXISTS blocks ( INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2, ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) ORDER BY (chain_id, block_number) -PARTITION BY toYYYYMM(block_timestamp); \ No newline at end of file +PARTITION BY toYYYYMM(block_timestamp) +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file From fc2ae64608837e6a4e80bb59da616fb56ace8bf4 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Thu, 14 Aug 2025 17:02:46 +0000 Subject: [PATCH 07/43] Fix publish parallel mode --- internal/orchestrator/committer.go | 60 +++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go index d85213a..228812a 100644 --- a/internal/orchestrator/committer.go +++ b/internal/orchestrator/committer.go @@ -31,6 +31,7 @@ type Committer struct { lastPublishedBlock atomic.Uint64 publisher *publisher.Publisher workMode WorkMode + workModeMutex sync.RWMutex workModeChan chan WorkMode validator *Validator } @@ -101,7 +102,28 @@ func (c *Committer) Start(ctx context.Context) { // corrected by the worker loop. log.Error().Err(err).Msg("failed to get last published block number") } else if lastPublished != nil && lastPublished.Sign() > 0 { - c.lastPublishedBlock.Store(lastPublished.Uint64()) + // Always ensure publisher starts from at least the committed value + if latestCommittedBlockNumber != nil && latestCommittedBlockNumber.Sign() > 0 { + if lastPublished.Cmp(latestCommittedBlockNumber) < 0 { + gap := new(big.Int).Sub(latestCommittedBlockNumber, lastPublished) + log.Warn(). + Str("last_published", lastPublished.String()). + Str("latest_committed", latestCommittedBlockNumber.String()). + Str("gap", gap.String()). + Msg("Publisher is behind committed position, seeking forward to committed value") + + c.lastPublishedBlock.Store(latestCommittedBlockNumber.Uint64()) + if err := c.storage.StagingStorage.SetLastPublishedBlockNumber(chainID, latestCommittedBlockNumber); err != nil { + log.Error().Err(err).Msg("Failed to update last published block number after seeking forward") + // Fall back to the stored value on error + c.lastPublishedBlock.Store(lastPublished.Uint64()) + } + } else { + c.lastPublishedBlock.Store(lastPublished.Uint64()) + } + } else { + c.lastPublishedBlock.Store(lastPublished.Uint64()) + } } else { c.lastPublishedBlock.Store(c.lastCommittedBlock.Load()) } @@ -143,13 +165,21 @@ func (c *Committer) runCommitLoop(ctx context.Context, interval time.Duration) { case <-ctx.Done(): return case workMode := <-c.workModeChan: - if workMode != c.workMode && workMode != "" { - log.Info().Msgf("Committer work mode changing from %s to %s", c.workMode, workMode) - c.workMode = workMode + if workMode != "" { + c.workModeMutex.Lock() + oldMode := c.workMode + if workMode != oldMode { + log.Info().Msgf("Committer work mode changing from %s to %s", oldMode, workMode) + c.workMode = workMode + } + c.workModeMutex.Unlock() } default: time.Sleep(interval) - if c.workMode == "" { + c.workModeMutex.RLock() + currentMode := c.workMode + c.workModeMutex.RUnlock() + if currentMode == "" { log.Debug().Msg("Committer work mode not set, skipping commit") continue } @@ -176,7 +206,10 @@ func (c *Committer) runPublishLoop(ctx context.Context, interval time.Duration) return default: time.Sleep(interval) - if c.workMode == "" { + c.workModeMutex.RLock() + currentMode := c.workMode + c.workModeMutex.RUnlock() + if currentMode == "" { log.Debug().Msg("Committer work mode not set, skipping publish") continue } @@ -297,7 +330,10 @@ func (c *Committer) getBlockNumbersToPublish(ctx context.Context) ([]*big.Int, e func (c *Committer) getBlockToCommitUntil(ctx context.Context, latestCommittedBlockNumber *big.Int) (*big.Int, error) { untilBlock := new(big.Int).Add(latestCommittedBlockNumber, big.NewInt(int64(c.blocksPerCommit))) - if c.workMode == WorkModeBackfill { + c.workModeMutex.RLock() + currentMode := c.workMode + c.workModeMutex.RUnlock() + if currentMode == WorkModeBackfill { return untilBlock, nil } else { // get latest block from RPC and if that's less than until block, return that @@ -314,7 +350,10 @@ func (c *Committer) getBlockToCommitUntil(ctx context.Context, latestCommittedBl } func (c *Committer) fetchBlockData(ctx context.Context, blockNumbers []*big.Int) ([]common.BlockData, error) { - if c.workMode == WorkModeBackfill { + c.workModeMutex.RLock() + currentMode := c.workMode + c.workModeMutex.RUnlock() + if currentMode == WorkModeBackfill { startTime := time.Now() blocksData, err := c.storage.StagingStorage.GetStagingData(storage.QueryFilter{BlockNumbers: blockNumbers, ChainId: c.rpc.GetChainID()}) log.Debug().Str("metric", "get_staging_data_duration").Msgf("StagingStorage.GetStagingData duration: %f", time.Since(startTime).Seconds()) @@ -489,7 +528,10 @@ func (c *Committer) handleGap(ctx context.Context, expectedStartBlockNumber *big // record the first missed block number in prometheus metrics.MissedBlockNumbers.Set(float64(expectedStartBlockNumber.Int64())) - if c.workMode == WorkModeLive { + c.workModeMutex.RLock() + currentMode := c.workMode + c.workModeMutex.RUnlock() + if currentMode == WorkModeLive { log.Debug().Msgf("Skipping gap handling in live mode. Expected block %s, actual first block %s", expectedStartBlockNumber.String(), actualFirstBlock.Number.String()) return nil } From 191298b116d6ed9fac44b409aab6b81a7c0524c0 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Thu, 14 Aug 2025 17:03:53 +0000 Subject: [PATCH 08/43] Gofmt --- internal/orchestrator/committer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go index 228812a..1316f0e 100644 --- a/internal/orchestrator/committer.go +++ b/internal/orchestrator/committer.go @@ -111,7 +111,7 @@ func (c *Committer) Start(ctx context.Context) { Str("latest_committed", latestCommittedBlockNumber.String()). Str("gap", gap.String()). Msg("Publisher is behind committed position, seeking forward to committed value") - + c.lastPublishedBlock.Store(latestCommittedBlockNumber.Uint64()) if err := c.storage.StagingStorage.SetLastPublishedBlockNumber(chainID, latestCommittedBlockNumber); err != nil { log.Error().Err(err).Msg("Failed to update last published block number after seeking forward") From e45907a25f749ada01715ba19b755fe6c7656e27 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Fri, 15 Aug 2025 05:36:50 +0000 Subject: [PATCH 09/43] Update schema --- .../0000_clickhouse_create_blocks_table.sql | 2 +- ...1_clickhouse_create_transactions_table.sql | 2 +- .../0002_clickhouse_create_logs_table.sql | 2 +- .../0003_clickhouse_create_traces_table.sql | 2 +- ...006_clickhouse_create_token_transfers.sql} | 6 +- ..._clickhouse_create_token_transfers_mv.sql} | 20 +- .../0008_clickhouse_create_token_balance.sql | 2 +- ...009_clickhouse_create_token_balance_mv.sql | 32 +-- ...clickhouse_create_address_transactions.sql | 43 ++++ ...ckhouse_create_address_transactions_mv.sql | 42 ++++ ...12_clickhouse_create_address_transfers.sql | 24 +++ ...clickhouse_create_address_transfers_mv.sql | 22 ++ ...0000_clickhouse_backfill_logs_transfer.sql | 202 ++++++++++++++++++ 13 files changed, 366 insertions(+), 35 deletions(-) rename internal/tools/clickhouse/{0006_clickhouse_create_logs_transfer.sql => 0006_clickhouse_create_token_transfers.sql} (91%) rename internal/tools/clickhouse/{0007_clickhouse_create_logs_transfer_mv.sql => 0007_clickhouse_create_token_transfers_mv.sql} (91%) create mode 100644 internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql create mode 100644 internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql create mode 100644 internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql create mode 100644 internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql create mode 100644 internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql diff --git a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql index 4768462..b311f24 100644 --- a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql +++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql @@ -29,5 +29,5 @@ CREATE TABLE IF NOT EXISTS blocks ( INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2, ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) ORDER BY (chain_id, block_number) -PARTITION BY toYYYYMM(block_timestamp) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql index c8a31cc..02a0294 100644 --- a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql +++ b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql @@ -78,5 +78,5 @@ CREATE TABLE IF NOT EXISTS transactions ( ) ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) ORDER BY (chain_id, block_number, hash) -PARTITION BY toYYYYMM(block_timestamp) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql index f93cb9d..e327edb 100644 --- a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql +++ b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql @@ -51,5 +51,5 @@ CREATE TABLE IF NOT EXISTS logs ( ) ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) ORDER BY (chain_id, block_number, transaction_hash, log_index) -PARTITION BY toYYYYMM(block_timestamp) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; diff --git a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql index b07ec88..17a032b 100644 --- a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql +++ b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql @@ -54,5 +54,5 @@ CREATE TABLE IF NOT EXISTS traces ( ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) ORDER BY (chain_id, transaction_hash, trace_address) -PARTITION BY toYYYYMM(block_timestamp) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; diff --git a/internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql similarity index 91% rename from internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql rename to internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql index 2a985b0..0d6ef92 100644 --- a/internal/tools/clickhouse/0006_clickhouse_create_logs_transfer.sql +++ b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql @@ -1,4 +1,4 @@ -CREATE TABLE IF NOT EXISTS logs_transfer +CREATE TABLE IF NOT EXISTS token_transfers ( `chain_id` UInt256, `token_type` LowCardinality(String), @@ -18,7 +18,6 @@ CREATE TABLE IF NOT EXISTS logs_transfer `insert_timestamp` DateTime DEFAULT now(), INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, - INDEX idx_token_address token_address TYPE bloom_filter GRANULARITY 2, INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3, INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 3, INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 4, @@ -42,7 +41,6 @@ CREATE TABLE IF NOT EXISTS logs_transfer block_number, transaction_index, log_index - ), PROJECTION token_id_projection ( SELECT @@ -57,6 +55,6 @@ CREATE TABLE IF NOT EXISTS logs_transfer ) ) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) -PARTITION BY toYYYYMM(block_timestamp) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) ORDER BY (chain_id, token_address, block_number, transaction_index, log_index) SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql similarity index 91% rename from internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql rename to internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql index 9a9b6bc..e03b1a0 100644 --- a/internal/tools/clickhouse/0007_clickhouse_create_logs_transfer_mv.sql +++ b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql @@ -1,6 +1,6 @@ -- ERC20 -CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc20_mv -TO logs_transfer +CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc20_mv +TO token_transfers AS SELECT chain_id, @@ -26,8 +26,8 @@ WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b AND length(data) = 66; -- ERC721 -CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc721_mv -TO logs_transfer +CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc721_mv +TO token_transfers AS SELECT chain_id, @@ -53,8 +53,8 @@ WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b AND length(data) = 2; -- ERC1155 (single) -CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc1155_single_mv -TO logs_transfer +CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc1155_single_mv +TO token_transfers AS SELECT chain_id, @@ -78,8 +78,8 @@ WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0 AND length(data) = (2 + 2*64); -- ERC1155 (batch) -CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc1155_batch_mv -TO logs_transfer +CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc1155_batch_mv +TO token_transfers AS SELECT chain_id, @@ -119,8 +119,8 @@ ARRAY JOIN arrayEnumerate(ids_array) AS array_index; -- ERC6909 -CREATE MATERIALIZED VIEW IF NOT EXISTS logs_transfer_erc6909_mv -TO logs_transfer +CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc6909_mv +TO token_transfers AS SELECT chain_id, diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql index e476123..0cf38c9 100644 --- a/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql +++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql @@ -1,4 +1,4 @@ -CREATE TABLE IF NOT EXISTS token_balance +CREATE TABLE IF NOT EXISTS token_balances ( `chain_id` UInt256, `token_type` LowCardinality(String), diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql index bb039fb..be000df 100644 --- a/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql +++ b/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql @@ -1,6 +1,6 @@ -- ERC20 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc20_mv -TO token_balance +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc20_mv +TO token_balances AS SELECT chain_id, @@ -23,7 +23,7 @@ FROM toInt256(amount) * (-1) * sign AS delta, block_number, block_timestamp - FROM logs_transfer WHERE token_type = 'erc20' + FROM token_transfers WHERE token_type = 'erc20' UNION ALL -- TO side (positive) SELECT @@ -35,13 +35,13 @@ FROM toInt256(amount) * (+1) * sign AS delta, block_number, block_timestamp - FROM logs_transfer WHERE token_type = 'erc20' + FROM token_transfers WHERE token_type = 'erc20' ) GROUP BY chain_id, token_type, token_address, owner_address, token_id; -- ERC721 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc721_mv -TO token_balance +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc721_mv +TO token_balances AS SELECT chain_id, @@ -63,7 +63,7 @@ FROM toInt256(1) * (-1) * sign AS delta, block_number, block_timestamp - FROM logs_transfer WHERE token_type = 'erc721' + FROM token_transfers WHERE token_type = 'erc721' UNION ALL SELECT chain_id, @@ -74,13 +74,13 @@ FROM toInt256(1) * (+1) * sign AS delta, block_number, block_timestamp - FROM logs_transfer WHERE token_type = 'erc721' + FROM token_transfers WHERE token_type = 'erc721' ) GROUP BY chain_id, token_type, token_address, owner_address, token_id; -- ERC1155 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc1155_mv -TO token_balance +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc1155_mv +TO token_balances AS SELECT chain_id, @@ -102,7 +102,7 @@ FROM toInt256(amount) * (-1) * sign AS delta, block_number, block_timestamp - FROM logs_transfer WHERE token_type = 'erc1155' + FROM token_transfers WHERE token_type = 'erc1155' UNION ALL SELECT chain_id, @@ -113,13 +113,13 @@ FROM toInt256(amount) * (+1) * sign AS delta, block_number, block_timestamp - FROM logs_transfer WHERE token_type = 'erc1155' + FROM token_transfers WHERE token_type = 'erc1155' ) GROUP BY chain_id, token_type, token_address, owner_address, token_id; -- ERC6909 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balance_erc6909_mv -TO token_balance +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc6909_mv +TO token_balances AS SELECT chain_id, @@ -141,7 +141,7 @@ FROM toInt256(amount) * (-1) * sign AS delta, block_number, block_timestamp - FROM logs_transfer WHERE token_type = 'erc6909' + FROM token_transfers WHERE token_type = 'erc6909' UNION ALL SELECT chain_id, @@ -152,6 +152,6 @@ FROM toInt256(amount) * (+1) * sign AS delta, block_number, block_timestamp - FROM logs_transfer WHERE token_type = 'erc6909' + FROM token_transfers WHERE token_type = 'erc6909' ) GROUP BY chain_id, token_type, token_address, owner_address, token_id; \ No newline at end of file diff --git a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql new file mode 100644 index 0000000..c33638e --- /dev/null +++ b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql @@ -0,0 +1,43 @@ +CREATE TABLE IF NOT EXISTS address_transactions ( + `chain_id` UInt256, + `hash` FixedString(66), + `nonce` UInt64, + `block_hash` FixedString(66), + `block_number` UInt256, + `block_timestamp` DateTime CODEC(Delta, ZSTD), + `transaction_index` UInt64, + `address` FixedString(42), + `address_type` Enum8('from' = 1, 'to' = 2), + `value` UInt256, + `gas` UInt64, + `gas_price` UInt256, + `data` String, + `function_selector` FixedString(10), + `max_fee_per_gas` UInt128, + `max_priority_fee_per_gas` UInt128, + `max_fee_per_blob_gas` UInt256, + `blob_versioned_hashes` Array(String), + `transaction_type` UInt8, + `r` UInt256, + `s` UInt256, + `v` UInt256, + `access_list` Nullable(String), + `authorization_list` Nullable(String), + `contract_address` Nullable(FixedString(42)), + `gas_used` Nullable(UInt64), + `cumulative_gas_used` Nullable(UInt64), + `effective_gas_price` Nullable(UInt256), + `blob_gas_used` Nullable(UInt64), + `blob_gas_price` Nullable(UInt256), + `logs_bloom` Nullable(String), + `status` Nullable(UInt64), + + `sign` Int8 DEFAULT 1, + `insert_timestamp` DateTime DEFAULT now(), + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3 +) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) +ORDER BY (chain_id, address, block_number, hash, transaction_index) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql new file mode 100644 index 0000000..46d64d3 --- /dev/null +++ b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql @@ -0,0 +1,42 @@ +CREATE MATERIALIZED VIEW address_transactions_mv +TO address_transactions +AS +SELECT + chain_id, + hash, + nonce, + block_hash, + block_number, + block_timestamp, + transaction_index, + address_tuple.1 AS address, + address_tuple.2 AS address_type, + value, + gas, + gas_price, + data, + function_selector, + max_fee_per_gas, + max_priority_fee_per_gas, + max_fee_per_blob_gas, + blob_versioned_hashes, + transaction_type, + r, + s, + v, + access_list, + authorization_list, + contract_address, + gas_used, + cumulative_gas_used, + effective_gas_price, + blob_gas_used, + blob_gas_price, + logs_bloom, + status, + + sign, + insert_timestamp +FROM transactions +ARRAY JOIN + arrayZip([from_address, to_address], ['from', 'to']) AS address_tuple; \ No newline at end of file diff --git a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql new file mode 100644 index 0000000..2e8d071 --- /dev/null +++ b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql @@ -0,0 +1,24 @@ +CREATE TABLE IF NOT EXISTS address_transfers ( + `chain_id` UInt256, + `token_type` LowCardinality(String), + `token_address` FixedString(42), + `token_id` UInt256, + `address` FixedString(42), + `address_type` Enum8('from' = 1, 'to' = 2), + `block_number` UInt256, + `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)), + `transaction_hash` FixedString(66), + `transaction_index` UInt64, + `amount` UInt256, + `log_index` UInt64, + `batch_index` Nullable(UInt16) DEFAULT NULL, + + `sign` Int8 DEFAULT 1, + `insert_timestamp` DateTime DEFAULT now(), + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3 +) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) +ORDER BY (chain_id, address, block_number, transaction_hash, transaction_index) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql new file mode 100644 index 0000000..72a3ebb --- /dev/null +++ b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql @@ -0,0 +1,22 @@ +CREATE MATERIALIZED VIEW address_transfers_mv +TO address_transfers +AS +SELECT + chain_id, + token_type, + token_address, + token_id, + address_tuple.1 AS address, + address_tuple.2 AS address_type, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + amount, + log_index, + batch_index, + sign, + insert_timestamp +FROM token_transfers +ARRAY JOIN + arrayZip([from_address, to_address], ['from', 'to']) AS address_tuple; \ No newline at end of file diff --git a/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql b/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql new file mode 100644 index 0000000..22b3c17 --- /dev/null +++ b/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql @@ -0,0 +1,202 @@ +CREATE TABLE IF NOT EXISTS backfill_logs +( + `chain_id` UInt256, + `block_number` UInt256, + `block_hash` FixedString(66), + `block_timestamp` DateTime CODEC(Delta, ZSTD), + `transaction_hash` FixedString(66), + `transaction_index` UInt64, + `log_index` UInt64, + `address` FixedString(42), + `data` String, + `topic_0` String, + `topic_1` String, + `topic_2` String, + `topic_3` String, + + `sign` Int8 DEFAULT 1, + `insert_timestamp` DateTime DEFAULT now(), +) ENGINE = Null; + + +--- Materialize view running to the correct tables +-- ERC20 +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc20_mv +TO logs_transfer +AS +SELECT + chain_id, + address AS token_address, + 'erc20' AS token_type, + 0 AS token_id, + concat('0x', substring(topic_1, 27, 40)) AS from_address, + concat('0x', substring(topic_2, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS amount, + log_index, + CAST(NULL AS Nullable(UInt16)) AS batch_index, + sign, + insert_timestamp +FROM backfill_logs +WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' + AND length(topic_1) = 66 AND startsWith(topic_1, '0x') + AND length(topic_2) = 66 AND startsWith(topic_2, '0x') + AND topic_3 = '' + AND length(data) = 66; + +-- ERC721 +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc721_mv +TO logs_transfer +AS +SELECT + chain_id, + address AS token_address, + 'erc721' AS token_type, + reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id, + concat('0x', substring(topic_1, 27, 40)) AS from_address, + concat('0x', substring(topic_2, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + toUInt8(1) AS amount, + log_index, + CAST(NULL AS Nullable(UInt16)) AS batch_index, + sign, + insert_timestamp +FROM backfill_logs +WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' + AND length(topic_1) = 66 AND startsWith(topic_1, '0x') + AND length(topic_2) = 66 AND startsWith(topic_2, '0x') + AND length(topic_3) = 66 AND startsWith(topic_3, '0x') + AND length(data) = 2; + +-- ERC1155 (single) +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc1155_single_mv +TO logs_transfer +AS +SELECT + chain_id, + address AS token_address, + 'erc1155' AS token_type, + reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS token_id, + concat('0x', substring(topic_2, 27, 40)) AS from_address, + concat('0x', substring(topic_3, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount, + log_index, + toNullable(toUInt16(0)) AS batch_index, + sign, + insert_timestamp +FROM backfill_logs +WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62' -- TransferSingle + AND length(topic_2) = 66 AND length(topic_3) = 66 + AND length(data) = (2 + 2*64); + +-- ERC1155 (batch) +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc1155_batch_mv +TO logs_transfer +AS +SELECT + chain_id, + address AS token_address, + 'erc1155' AS token_type, + reinterpretAsUInt256(reverse(unhex(id_hex))) AS token_id, + concat('0x', substring(topic_2, 27, 40)) AS from_address, + concat('0x', substring(topic_3, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(amount_hex))) AS amount, + log_index, + toNullable(toUInt16(array_index - 1)) AS batch_index, + sign, + insert_timestamp +FROM ( + SELECT + chain_id, address, topic_2, topic_3, + block_number, block_timestamp, transaction_hash, transaction_index, log_index, sign, insert_timestamp, + toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64))))) AS ids_offset, + toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64))))) AS amounts_offset, + toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + ids_offset * 2, 64))))) AS ids_length, + toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + amounts_offset * 2, 64))))) AS amounts_length, + arrayMap(i -> substring(data, 3 + ids_offset * 2 + 64 + (i-1)*64, 64), range(1, least(ids_length, 10000) + 1)) AS ids_array, + arrayMap(i -> substring(data, 3 + amounts_offset * 2 + 64 + (i-1)*64, 64), range(1, least(amounts_length, 10000) + 1)) AS amounts_array + FROM backfill_logs + WHERE topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb' + AND length(topic_2) = 66 + AND length(topic_3) = 66 + AND ids_length = amounts_length +) +ARRAY JOIN + ids_array AS id_hex, + amounts_array AS amount_hex, + arrayEnumerate(ids_array) AS array_index; + + +-- ERC6909 +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc6909_mv +TO logs_transfer +AS +SELECT + chain_id, + lower(address) AS token_address, + 'erc6909' AS token_type, + reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id, + lower(concat('0x', substring(topic_1, 27, 40))) AS from_address, + lower(concat('0x', substring(topic_2, 27, 40))) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount, + log_index, + CAST(NULL AS Nullable(UInt16)) AS batch_index, + sign, + insert_timestamp +FROM backfill_logs +WHERE topic_0 = '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859' + AND length(topic_1) = 66 + AND length(topic_2) = 66 + AND length(data) == 2 + 128; + +--- INITIATE BACKFILL BY RUNNING: +-- INSERT INTO backfill_logs +-- SELECT +-- chain_id, +-- block_number, +-- block_hash, +-- block_timestamp, +-- transaction_hash , +-- transaction_index, +-- log_index, +-- address, +-- data, +-- topic_0, +-- topic_1, +-- topic_2, +-- topic_3, +-- sign, +-- insert_timestamp, +-- FROM logs +-- WHERE 1=1 +-- AND chain_id = 1 +-- AND block_number >= 0 AND block_number < 10000000 +-- AND topic_0 IN ( +-- '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', -- 20/721 +-- '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', -- 1155 single +-- '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb', -- 1155 batch +-- '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859' -- 6909 +-- ); + +-- DROP TABLE logs_transfer, token_balance; +-- DROP TABLE bf__logs_transfer_erc20_mv, bf__logs_transfer_erc721_mv, bf__logs_transfer_erc1155_mv, bf__logs_transfer_erc6909_mv; +-- DROP TABLE logs_transfer_erc20_mv, logs_transfer_erc721_mv, logs_transfer_erc1155_mv, logs_transfer_erc6909_mv; +-- DROP TABLE token_balance_erc20_mv, token_balance_erc721_mv, token_balance_erc1155_mv, token_balance_erc6909_mv; \ No newline at end of file From ceeac3b5a5e918e5d0855386413fcf73b474c795 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Fri, 15 Aug 2025 07:57:15 +0000 Subject: [PATCH 10/43] Fix backfill table --- ...0000_clickhouse_backfill_logs_transfer.sql | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql b/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql index 22b3c17..c2090e6 100644 --- a/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql +++ b/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql @@ -21,8 +21,8 @@ CREATE TABLE IF NOT EXISTS backfill_logs --- Materialize view running to the correct tables -- ERC20 -CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc20_mv -TO logs_transfer +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc20_mv +TO token_transfers AS SELECT chain_id, @@ -48,8 +48,8 @@ WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b AND length(data) = 66; -- ERC721 -CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc721_mv -TO logs_transfer +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc721_mv +TO token_transfers AS SELECT chain_id, @@ -75,8 +75,8 @@ WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b AND length(data) = 2; -- ERC1155 (single) -CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc1155_single_mv -TO logs_transfer +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc1155_single_mv +TO token_transfers AS SELECT chain_id, @@ -100,8 +100,8 @@ WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0 AND length(data) = (2 + 2*64); -- ERC1155 (batch) -CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc1155_batch_mv -TO logs_transfer +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc1155_batch_mv +TO token_transfers AS SELECT chain_id, @@ -142,8 +142,8 @@ ARRAY JOIN -- ERC6909 -CREATE MATERIALIZED VIEW IF NOT EXISTS bf__logs_transfer_erc6909_mv -TO logs_transfer +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc6909_mv +TO token_transfers AS SELECT chain_id, @@ -196,7 +196,7 @@ WHERE topic_0 = '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728 -- '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859' -- 6909 -- ); --- DROP TABLE logs_transfer, token_balance; --- DROP TABLE bf__logs_transfer_erc20_mv, bf__logs_transfer_erc721_mv, bf__logs_transfer_erc1155_mv, bf__logs_transfer_erc6909_mv; --- DROP TABLE logs_transfer_erc20_mv, logs_transfer_erc721_mv, logs_transfer_erc1155_mv, logs_transfer_erc6909_mv; +-- DROP TABLE token_transfers, token_balance; +-- DROP TABLE bf__token_transfers_erc20_mv, bf__token_transfers_erc721_mv, bf__token_transfers_erc1155_mv, bf__token_transfers_erc6909_mv; +-- DROP TABLE token_transfers_erc20_mv, token_transfers_erc721_mv, token_transfers_erc1155_mv, token_transfers_erc6909_mv; -- DROP TABLE token_balance_erc20_mv, token_balance_erc721_mv, token_balance_erc1155_mv, token_balance_erc6909_mv; \ No newline at end of file From 64aaec52d457dce743c2e9c031aa764a7f06448f Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Fri, 15 Aug 2025 19:52:11 +0000 Subject: [PATCH 11/43] Update kafka storage producer --- internal/common/block.go | 20 +++++ internal/storage/kafka_publisher.go | 129 ++++++++++++++++++---------- 2 files changed, 102 insertions(+), 47 deletions(-) diff --git a/internal/common/block.go b/internal/common/block.go index eacf1f1..83a5bf0 100644 --- a/internal/common/block.go +++ b/internal/common/block.go @@ -100,3 +100,23 @@ func (b *Block) Serialize() BlockModel { BaseFeePerGas: b.BaseFeePerGas, } } + +func (b *BlockData) Serialize() BlockData { + data := BlockData{ + ChainId: b.ChainId, + Block: b.Block, + Transactions: b.Transactions, + Logs: b.Logs, + Traces: b.Traces, + } + if data.Transactions == nil { + data.Transactions = []Transaction{} + } + if data.Logs == nil { + data.Logs = []Log{} + } + if data.Traces == nil { + data.Traces = []Trace{} + } + return data +} diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go index b0b82ca..f7f0f72 100644 --- a/internal/storage/kafka_publisher.go +++ b/internal/storage/kafka_publisher.go @@ -5,6 +5,8 @@ import ( "crypto/tls" "encoding/json" "fmt" + "hash/fnv" + "math" "net" "strings" "sync" @@ -18,28 +20,37 @@ import ( ) type KafkaPublisher struct { - client *kgo.Client - mu sync.RWMutex + client *kgo.Client + mu sync.RWMutex + chainID string } -type PublishableMessage[T common.BlockData] struct { - Data T `json:"data"` - Status string `json:"status"` +type PublishableBlockMessage struct { + common.BlockData + Sign int8 `json:"sign"` + InsertTimestamp time.Time `json:"insert_timestamp"` } // NewKafkaPublisher method for storage connector (public) func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) { brokers := strings.Split(cfg.Brokers, ",") + chainID := config.Cfg.RPC.ChainID + opts := []kgo.Opt{ kgo.SeedBrokers(brokers...), kgo.AllowAutoTopicCreation(), - kgo.ProducerBatchCompression(kgo.SnappyCompression()), - kgo.ClientID(fmt.Sprintf("insight-indexer-kafka-storage-%s", config.Cfg.RPC.ChainID)), + kgo.ProducerBatchCompression(kgo.ZstdCompression()), + kgo.ClientID(fmt.Sprintf("insight-indexer-kafka-storage-%s", chainID)), + kgo.TransactionalID(fmt.Sprintf("insight-producer-%s", chainID)), + kgo.MaxBufferedBytes(2 * 1024 * 1024 * 1024), // 2GB kgo.MaxBufferedRecords(1_000_000), kgo.ProducerBatchMaxBytes(16_000_000), - kgo.RecordPartitioner(kgo.UniformBytesPartitioner(1_000_000, false, false, nil)), + kgo.RecordPartitioner(kgo.ManualPartitioner()), + kgo.ProduceRequestTimeout(30 * time.Second), kgo.MetadataMaxAge(60 * time.Second), kgo.DialTimeout(10 * time.Second), + kgo.RequiredAcks(kgo.AllISRAcks()), + kgo.RequestRetries(5), } if cfg.Username != "" && cfg.Password != "" { @@ -68,8 +79,10 @@ func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) { } publisher := &KafkaPublisher{ - client: client, + client: client, + chainID: chainID, } + return publisher, nil } @@ -78,7 +91,6 @@ func (p *KafkaPublisher) PublishBlockData(blockData []common.BlockData) error { } func (p *KafkaPublisher) PublishReorg(oldData []common.BlockData, newData []common.BlockData) error { - // TODO: need to revisit how reorg blocks get published to downstream if err := p.publishBlockData(oldData, true); err != nil { return fmt.Errorf("failed to publish old block data: %v", err) } @@ -105,30 +117,39 @@ func (p *KafkaPublisher) publishMessages(ctx context.Context, messages []*kgo.Re return nil } - p.mu.RLock() - defer p.mu.RUnlock() + // Lock for the entire transaction lifecycle to ensure thread safety + p.mu.Lock() + defer p.mu.Unlock() if p.client == nil { - return nil // Skip if no client configured + return fmt.Errorf("no kafka client configured") + } + + // Start a new transaction + if err := p.client.BeginTransaction(); err != nil { + return fmt.Errorf("failed to begin transaction: %v", err) } - var wg sync.WaitGroup - wg.Add(len(messages)) - // Publish to all configured producers + // Produce all messages in the transaction for _, msg := range messages { - p.client.Produce(ctx, msg, func(_ *kgo.Record, err error) { - defer wg.Done() - if err != nil { - log.Error().Err(err).Msg("Failed to publish message to Kafka") - } - }) + p.client.Produce(ctx, msg, nil) + } + + // Flush all messages + if err := p.client.Flush(ctx); err != nil { + p.client.EndTransaction(ctx, kgo.TryAbort) + return fmt.Errorf("failed to flush messages: %v", err) + } + + // Commit the transaction + if err := p.client.EndTransaction(ctx, kgo.TryCommit); err != nil { + return fmt.Errorf("failed to commit transaction: %v", err) } - wg.Wait() return nil } -func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isReorg bool) error { +func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isDeleted bool) error { if len(blockData) == 0 { return nil } @@ -138,15 +159,9 @@ func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isReorg // Prepare messages for blocks, events, transactions and traces blockMessages := make([]*kgo.Record, len(blockData)) - // TODO: handle reorg - status := "new" - if isReorg { - status = "reverted" - } - for i, data := range blockData { // Block message - if blockMsg, err := p.createBlockDataMessage(data, status); err == nil { + if blockMsg, err := p.createBlockDataMessage(data, isDeleted); err == nil { blockMessages[i] = blockMsg } else { return fmt.Errorf("failed to create block message: %v", err) @@ -161,27 +176,47 @@ func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isReorg return nil } -func (p *KafkaPublisher) createBlockDataMessage(data common.BlockData, status string) (*kgo.Record, error) { - msg := PublishableMessage[common.BlockData]{ - Data: data, - Status: status, +func (p *KafkaPublisher) createBlockDataMessage(data common.BlockData, isDeleted bool) (*kgo.Record, error) { + insertTimestamp := time.Now() + msg := PublishableBlockMessage{ + BlockData: data.Serialize(), + Sign: 1, + InsertTimestamp: insertTimestamp, + } + if isDeleted { + msg.Sign = -1 // Indicate deletion with a negative sign } msgJson, err := json.Marshal(msg) if err != nil { return nil, fmt.Errorf("failed to marshal block data: %v", err) } - return &kgo.Record{ - Topic: p.getTopicName("commit", data.ChainId), - Key: []byte(fmt.Sprintf("block-%s-%d-%s", status, data.ChainId, data.Block.Hash)), - Value: msgJson, - }, nil -} -func (p *KafkaPublisher) getTopicName(entity string, chainId uint64) string { - switch entity { - case "commit": - return fmt.Sprintf("insight.commit.blocks.%d", chainId) - default: - panic(fmt.Errorf("unknown topic entity: %s", entity)) + // Determine partition based on chainID + var partition int32 + if data.ChainId <= math.MaxInt32 { + // Direct assignment for chain IDs that fit in int32 + partition = int32(data.ChainId) + } else { + // Hash for larger chain IDs to avoid overflow + h := fnv.New32a() + fmt.Fprintf(h, "%d", data.ChainId) + partition = int32(h.Sum32() & 0x7FFFFFFF) // Ensure positive } + + // Create headers with metadata + headers := []kgo.RecordHeader{ + {Key: "chain_id", Value: []byte(fmt.Sprintf("%d", data.ChainId))}, + {Key: "block_number", Value: []byte(fmt.Sprintf("%d", data.Block.Number))}, + {Key: "sign", Value: []byte(fmt.Sprintf("%d", msg.Sign))}, + {Key: "insert_timestamp", Value: []byte(insertTimestamp.Format(time.RFC3339Nano))}, + {Key: "schema_version", Value: []byte("1")}, + } + + return &kgo.Record{ + Topic: "insight.commit.blocks", + Key: []byte(fmt.Sprintf("blockdata-%d-%d-%s-%d", data.ChainId, data.Block.Number, data.Block.Hash, msg.Sign)), + Value: msgJson, + Headers: headers, + Partition: partition, + }, nil } From 20dc4712d4522d77a5d183732830d07396f9a81d Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Fri, 15 Aug 2025 21:05:31 +0000 Subject: [PATCH 12/43] Kafka + Redis --- configs/config.go | 17 +- go.mod | 2 + go.sum | 4 + internal/storage/connector.go | 2 +- internal/storage/kafka_postgres.go | 614 ----------------------------- internal/storage/kafka_redis.go | 300 ++++++++++++++ 6 files changed, 319 insertions(+), 620 deletions(-) delete mode 100644 internal/storage/kafka_postgres.go create mode 100644 internal/storage/kafka_redis.go diff --git a/configs/config.go b/configs/config.go index b9d036b..b94669f 100644 --- a/configs/config.go +++ b/configs/config.go @@ -101,12 +101,19 @@ type PostgresConfig struct { ConnectTimeout int `mapstructure:"connectTimeout"` } +type RedisConfig struct { + Host string `mapstructure:"host"` + Port int `mapstructure:"port"` + Password string `mapstructure:"password"` + DB int `mapstructure:"db"` +} + type KafkaConfig struct { - Brokers string `mapstructure:"brokers"` - Username string `mapstructure:"username"` - Password string `mapstructure:"password"` - EnableTLS bool `mapstructure:"enable_tls"` - Postgres *PostgresConfig `mapstructure:"postgres"` + Brokers string `mapstructure:"brokers"` + Username string `mapstructure:"username"` + Password string `mapstructure:"password"` + EnableTLS bool `mapstructure:"enable_tls"` + Redis *RedisConfig `mapstructure:"redis"` } type RPCBatchRequestConfig struct { diff --git a/go.mod b/go.mod index f5e6788..66d4ef5 100644 --- a/go.mod +++ b/go.mod @@ -39,6 +39,7 @@ require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/deckarep/golang-set/v2 v2.6.0 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 // indirect + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/ethereum/c-kzg-4844/v2 v2.1.0 // indirect github.com/ethereum/go-verkle v0.2.2 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect @@ -85,6 +86,7 @@ require ( github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.55.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect + github.com/redis/go-redis/v9 v9.12.1 // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect diff --git a/go.sum b/go.sum index 2d7c778..6302502 100644 --- a/go.sum +++ b/go.sum @@ -63,6 +63,8 @@ github.com/decred/dcrd/crypto/blake256 v1.0.0 h1:/8DMNYp9SGi5f0w7uCm6d6M4OU2rGFK github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 h1:YLtO71vCjJRCBcrPMtQ9nqBsqpA1m5sE92cU+pd5Mcc= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/ethereum/c-kzg-4844/v2 v2.1.0 h1:gQropX9YFBhl3g4HYhwE70zq3IHFRgbbNPw0Shwzf5w= github.com/ethereum/c-kzg-4844/v2 v2.1.0/go.mod h1:TC48kOKjJKPbN7C++qIgt0TJzZ70QznYR7Ob+WXl57E= github.com/ethereum/go-ethereum v1.15.11 h1:JK73WKeu0WC0O1eyX+mdQAVHUV+UR1a9VB/domDngBU= @@ -237,6 +239,8 @@ github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/redis/go-redis/v9 v9.12.1 h1:k5iquqv27aBtnTm2tIkROUDp8JBXhXZIVu1InSgvovg= +github.com/redis/go-redis/v9 v9.12.1/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= diff --git a/internal/storage/connector.go b/internal/storage/connector.go index 9a90b16..0b5d743 100644 --- a/internal/storage/connector.go +++ b/internal/storage/connector.go @@ -149,7 +149,7 @@ func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) { var conn interface{} var err error if cfg.Kafka != nil { - conn, err = NewKafkaPostgresConnector(cfg.Kafka) + conn, err = NewKafkaRedisConnector(cfg.Kafka) } else if cfg.Postgres != nil { conn, err = NewPostgresConnector(cfg.Postgres) } else if cfg.Clickhouse != nil { diff --git a/internal/storage/kafka_postgres.go b/internal/storage/kafka_postgres.go deleted file mode 100644 index 23e7bfd..0000000 --- a/internal/storage/kafka_postgres.go +++ /dev/null @@ -1,614 +0,0 @@ -package storage - -import ( - "database/sql" - "encoding/json" - "fmt" - "math/big" - "strings" - "time" - - _ "github.com/lib/pq" - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" -) - -// KafkaPostgresConnector uses PostgreSQL for metadata storage and Kafka for block data delivery -type KafkaPostgresConnector struct { - db *sql.DB - cfg *config.KafkaConfig - kafkaPublisher *KafkaPublisher -} - -func NewKafkaPostgresConnector(cfg *config.KafkaConfig) (*KafkaPostgresConnector, error) { - // Connect to PostgreSQL - connStr := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s", - cfg.Postgres.Host, cfg.Postgres.Port, cfg.Postgres.Username, cfg.Postgres.Password, cfg.Postgres.Database) - - // Default to "require" for security if SSL mode not specified - sslMode := cfg.Postgres.SSLMode - if sslMode == "" { - sslMode = "require" - log.Info().Msg("No SSL mode specified, defaulting to 'require' for secure connection") - } - connStr += fmt.Sprintf(" sslmode=%s", sslMode) - - if cfg.Postgres.ConnectTimeout > 0 { - connStr += fmt.Sprintf(" connect_timeout=%d", cfg.Postgres.ConnectTimeout) - } - - db, err := sql.Open("postgres", connStr) - if err != nil { - return nil, fmt.Errorf("failed to connect to postgres: %w", err) - } - - db.SetMaxOpenConns(cfg.Postgres.MaxOpenConns) - db.SetMaxIdleConns(cfg.Postgres.MaxIdleConns) - - if cfg.Postgres.MaxConnLifetime > 0 { - db.SetConnMaxLifetime(time.Duration(cfg.Postgres.MaxConnLifetime) * time.Second) - } - - if err := db.Ping(); err != nil { - return nil, fmt.Errorf("failed to ping postgres: %w", err) - } - - // Initialize Kafka publisher if enabled - kafkaPublisher, err := NewKafkaPublisher(cfg) - if err != nil { - return nil, err - } - - return &KafkaPostgresConnector{ - db: db, - cfg: cfg, - kafkaPublisher: kafkaPublisher, - }, nil -} - -// Orchestrator Storage Implementation (PostgreSQL) - -func (kp *KafkaPostgresConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) { - query := `SELECT chain_id, block_number, last_error_timestamp, failure_count, reason - FROM block_failures WHERE 1=1` - - args := []interface{}{} - argCount := 0 - - if qf.ChainId != nil && qf.ChainId.Sign() > 0 { - argCount++ - query += fmt.Sprintf(" AND chain_id = $%d", argCount) - args = append(args, qf.ChainId.String()) - } - - if len(qf.BlockNumbers) > 0 { - placeholders := make([]string, len(qf.BlockNumbers)) - for i, bn := range qf.BlockNumbers { - argCount++ - placeholders[i] = fmt.Sprintf("$%d", argCount) - args = append(args, bn.String()) - } - query += fmt.Sprintf(" AND block_number IN (%s)", strings.Join(placeholders, ",")) - } - - if qf.SortBy != "" { - query += fmt.Sprintf(" ORDER BY %s", qf.SortBy) - if qf.SortOrder != "" { - query += " " + qf.SortOrder - } - } else { - query += " ORDER BY block_number DESC" - } - - if qf.Limit > 0 { - argCount++ - query += fmt.Sprintf(" LIMIT $%d", argCount) - args = append(args, qf.Limit) - } - - if qf.Offset > 0 { - argCount++ - query += fmt.Sprintf(" OFFSET $%d", argCount) - args = append(args, qf.Offset) - } - - rows, err := kp.db.Query(query, args...) - if err != nil { - return nil, err - } - defer func() { - if err := rows.Close(); err != nil { - log.Error().Err(err).Msg("Failed to close rows in GetBlockFailures") - } - }() - - var failures []common.BlockFailure - for rows.Next() { - var failure common.BlockFailure - var chainIdStr, blockNumberStr string - var timestamp int64 - var count int - - err := rows.Scan(&chainIdStr, &blockNumberStr, ×tamp, &count, &failure.FailureReason) - if err != nil { - return nil, fmt.Errorf("error scanning block failure: %w", err) - } - - var ok bool - failure.ChainId, ok = new(big.Int).SetString(chainIdStr, 10) - if !ok { - return nil, fmt.Errorf("failed to parse chain_id '%s' as big.Int", chainIdStr) - } - - failure.BlockNumber, ok = new(big.Int).SetString(blockNumberStr, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block_number '%s' as big.Int", blockNumberStr) - } - - failure.FailureTime = time.Unix(timestamp, 0) - failure.FailureCount = count - - failures = append(failures, failure) - } - - return failures, rows.Err() -} - -func (kp *KafkaPostgresConnector) StoreBlockFailures(failures []common.BlockFailure) error { - if len(failures) == 0 { - return nil - } - - valueStrings := make([]string, 0, len(failures)) - valueArgs := make([]interface{}, 0, len(failures)*5) - - for i, failure := range failures { - valueStrings = append(valueStrings, fmt.Sprintf("($%d, $%d, $%d, $%d, $%d)", - i*5+1, i*5+2, i*5+3, i*5+4, i*5+5)) - valueArgs = append(valueArgs, - failure.ChainId.String(), - failure.BlockNumber.String(), - failure.FailureTime.Unix(), - failure.FailureCount, - failure.FailureReason, - ) - } - - query := fmt.Sprintf(`INSERT INTO block_failures (chain_id, block_number, last_error_timestamp, failure_count, reason) - VALUES %s - ON CONFLICT (chain_id, block_number) - DO UPDATE SET - last_error_timestamp = EXCLUDED.last_error_timestamp, - failure_count = EXCLUDED.failure_count, - reason = EXCLUDED.reason, - updated_at = NOW()`, strings.Join(valueStrings, ",")) - - _, err := kp.db.Exec(query, valueArgs...) - return err -} - -func (kp *KafkaPostgresConnector) DeleteBlockFailures(failures []common.BlockFailure) error { - if len(failures) == 0 { - return nil - } - - tuples := make([]string, 0, len(failures)) - args := make([]interface{}, 0, len(failures)*2) - - for i, failure := range failures { - tuples = append(tuples, fmt.Sprintf("($%d, $%d)", i*2+1, i*2+2)) - args = append(args, failure.ChainId.String(), failure.BlockNumber.String()) - } - - query := fmt.Sprintf(`DELETE FROM block_failures - WHERE ctid IN ( - SELECT ctid - FROM block_failures - WHERE (chain_id, block_number) IN (%s) - FOR UPDATE SKIP LOCKED - )`, strings.Join(tuples, ",")) - - _, err := kp.db.Exec(query, args...) - return err -} - -func (kp *KafkaPostgresConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { - query := `SELECT cursor_value FROM cursors - WHERE cursor_type = 'reorg' AND chain_id = $1` - - var blockNumberString string - err := kp.db.QueryRow(query, chainId.String()).Scan(&blockNumberString) - if err != nil { - if err == sql.ErrNoRows { - return big.NewInt(0), nil - } - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) - } - - return blockNumber, nil -} - -func (kp *KafkaPostgresConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value) - VALUES ($1, 'reorg', $2) - ON CONFLICT (chain_id, cursor_type) - DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()` - - _, err := kp.db.Exec(query, chainId.String(), blockNumber.String()) - return err -} - -// Staging Storage Implementation (PostgreSQL) - -func (kp *KafkaPostgresConnector) InsertStagingData(data []common.BlockData) error { - if len(data) == 0 { - return nil - } - - valueStrings := make([]string, 0, len(data)) - valueArgs := make([]interface{}, 0, len(data)*3) - - for i, blockData := range data { - blockDataJSON, err := json.Marshal(blockData) - if err != nil { - return err - } - - valueStrings = append(valueStrings, fmt.Sprintf("($%d, $%d, $%d)", - i*3+1, i*3+2, i*3+3)) - valueArgs = append(valueArgs, - blockData.Block.ChainId.String(), - blockData.Block.Number.String(), - string(blockDataJSON), - ) - } - - query := fmt.Sprintf(`INSERT INTO block_data (chain_id, block_number, data) - VALUES %s - ON CONFLICT (chain_id, block_number) - DO UPDATE SET data = EXCLUDED.data, updated_at = NOW()`, strings.Join(valueStrings, ",")) - - _, err := kp.db.Exec(query, valueArgs...) - return err -} - -func (kp *KafkaPostgresConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) { - query := `SELECT data FROM block_data WHERE 1=1` - - args := []interface{}{} - argCount := 0 - - if qf.ChainId != nil && qf.ChainId.Sign() > 0 { - argCount++ - query += fmt.Sprintf(" AND chain_id = $%d", argCount) - args = append(args, qf.ChainId.String()) - } - - if len(qf.BlockNumbers) > 0 { - placeholders := make([]string, len(qf.BlockNumbers)) - for i, bn := range qf.BlockNumbers { - argCount++ - placeholders[i] = fmt.Sprintf("$%d", argCount) - args = append(args, bn.String()) - } - query += fmt.Sprintf(" AND block_number IN (%s)", strings.Join(placeholders, ",")) - } else if qf.StartBlock != nil && qf.EndBlock != nil { - argCount++ - query += fmt.Sprintf(" AND block_number BETWEEN $%d AND $%d", argCount, argCount+1) - args = append(args, qf.StartBlock.String(), qf.EndBlock.String()) - argCount++ - } - - query += " ORDER BY block_number ASC" - - if qf.Limit > 0 { - argCount++ - query += fmt.Sprintf(" LIMIT $%d", argCount) - args = append(args, qf.Limit) - } - - rows, err := kp.db.Query(query, args...) - if err != nil { - return nil, err - } - defer func() { - if err := rows.Close(); err != nil { - log.Error().Err(err).Msg("Failed to close rows in GetStagingData") - } - }() - - blockDataList := make([]common.BlockData, 0) - for rows.Next() { - var blockDataJson string - if err := rows.Scan(&blockDataJson); err != nil { - return nil, fmt.Errorf("error scanning block data: %w", err) - } - - var blockData common.BlockData - if err := json.Unmarshal([]byte(blockDataJson), &blockData); err != nil { - return nil, err - } - - blockDataList = append(blockDataList, blockData) - } - - return blockDataList, rows.Err() -} - -func (kp *KafkaPostgresConnector) DeleteStagingData(data []common.BlockData) error { - if len(data) == 0 { - return nil - } - - tuples := make([]string, 0, len(data)) - args := make([]interface{}, 0, len(data)*2) - - for i, blockData := range data { - tuples = append(tuples, fmt.Sprintf("($%d, $%d)", i*2+1, i*2+2)) - args = append(args, blockData.Block.ChainId.String(), blockData.Block.Number.String()) - } - - query := fmt.Sprintf(`DELETE FROM block_data - WHERE ctid IN ( - SELECT ctid - FROM block_data - WHERE (chain_id, block_number) IN (%s) - FOR UPDATE SKIP LOCKED - )`, strings.Join(tuples, ",")) - - _, err := kp.db.Exec(query, args...) - return err -} - -func (kp *KafkaPostgresConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { - query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'publish' AND chain_id = $1` - - var blockNumberString string - err := kp.db.QueryRow(query, chainId.String()).Scan(&blockNumberString) - if err != nil { - if err == sql.ErrNoRows { - return big.NewInt(0), nil - } - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) - } - return blockNumber, nil -} - -func (kp *KafkaPostgresConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value) - VALUES ($1, 'publish', $2) - ON CONFLICT (chain_id, cursor_type) - DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()` - - _, err := kp.db.Exec(query, chainId.String(), blockNumber.String()) - return err -} - -func (kp *KafkaPostgresConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) { - query := `SELECT MAX(block_number) FROM block_data WHERE 1=1` - - args := []interface{}{} - argCount := 0 - - if chainId != nil && chainId.Sign() > 0 { - argCount++ - query += fmt.Sprintf(" AND chain_id = $%d", argCount) - args = append(args, chainId.String()) - } - - if rangeStart != nil && rangeStart.Sign() > 0 { - argCount++ - query += fmt.Sprintf(" AND block_number >= $%d", argCount) - args = append(args, rangeStart.String()) - } - - if rangeEnd != nil && rangeEnd.Sign() > 0 { - argCount++ - query += fmt.Sprintf(" AND block_number <= $%d", argCount) - args = append(args, rangeEnd.String()) - } - - var blockNumberStr sql.NullString - err := kp.db.QueryRow(query, args...).Scan(&blockNumberStr) - if err != nil { - return nil, err - } - - if !blockNumberStr.Valid { - return big.NewInt(0), nil - } - - blockNumber, ok := new(big.Int).SetString(blockNumberStr.String, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", blockNumberStr.String) - } - - return blockNumber, nil -} - -func (kp *KafkaPostgresConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error { - query := `DELETE FROM block_data - WHERE ctid IN ( - SELECT ctid - FROM block_data - WHERE chain_id = $1 - AND block_number <= $2 - FOR UPDATE SKIP LOCKED - )` - _, err := kp.db.Exec(query, chainId.String(), blockNumber.String()) - return err -} - -// InsertBlockData publishes block data to Kafka instead of storing in database -func (kp *KafkaPostgresConnector) InsertBlockData(data []common.BlockData) error { - if len(data) == 0 { - return nil - } - - // Publish to Kafka - if err := kp.kafkaPublisher.PublishBlockData(data); err != nil { - return fmt.Errorf("failed to publish block data to kafka: %w", err) - } - log.Debug(). - Int("blocks", len(data)). - Msg("Published block data to Kafka") - - // Update cursor to track the highest block number published - if len(data) > 0 { - // Find the highest block number in the batch - var maxBlock *big.Int - for _, blockData := range data { - if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 { - maxBlock = blockData.Block.Number - } - } - if maxBlock != nil { - chainId := data[0].Block.ChainId - blockNumber := maxBlock - query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value) - VALUES ($1, 'commit', $2) - ON CONFLICT (chain_id, cursor_type) - DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()` - if _, err := kp.db.Exec(query, chainId.String(), blockNumber.String()); err != nil { - return err - } - } - } - - return nil -} - -// ReplaceBlockData handles reorg by publishing both old and new data to Kafka -func (kp *KafkaPostgresConnector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) { - if len(data) == 0 { - return nil, nil - } - - oldBlocks := []common.BlockData{} - - // Publish reorg event to Kafka - // TODO: Publish new blocks (the reorg handler will mark old ones as reverted) - if err := kp.kafkaPublisher.PublishBlockData(data); err != nil { - return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err) - } - - // Update cursor to track the highest block number - if len(data) > 0 { - var maxBlock *big.Int - for _, blockData := range data { - if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 { - maxBlock = blockData.Block.Number - } - } - if maxBlock != nil { - if err := kp.SetLastPublishedBlockNumber(data[0].Block.ChainId, maxBlock); err != nil { - return nil, fmt.Errorf("failed to update published block cursor: %w", err) - } - } - } - - return oldBlocks, nil -} - -func (kp *KafkaPostgresConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { - query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'commit' AND chain_id = $1` - - var blockNumberString string - err := kp.db.QueryRow(query, chainId.String()).Scan(&blockNumberString) - if err != nil { - if err == sql.ErrNoRows { - return big.NewInt(0), nil - } - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) - } - return blockNumber, nil -} - -func (kp *KafkaPostgresConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { - // Get the last published block number - lastPublished, err := kp.GetLastPublishedBlockNumber(chainId) - if err != nil { - return nil, err - } - - // Check if it's within the range - if lastPublished.Cmp(startBlock) >= 0 && lastPublished.Cmp(endBlock) <= 0 { - return lastPublished, nil - } - - // If outside range, return appropriate boundary - if lastPublished.Cmp(endBlock) > 0 { - return endBlock, nil - } - if lastPublished.Cmp(startBlock) < 0 { - return big.NewInt(0), nil - } - - return lastPublished, nil -} - -func (kp *KafkaPostgresConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { - return []common.BlockHeader{}, nil -} - -func (kp *KafkaPostgresConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) { - return QueryResult[common.TokenBalance]{Data: []common.TokenBalance{}}, nil -} - -func (kp *KafkaPostgresConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) { - return QueryResult[common.TokenTransfer]{Data: []common.TokenTransfer{}}, nil -} - -func (kp *KafkaPostgresConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) { - return []common.BlockData{}, nil -} - -func (kp *KafkaPostgresConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { - return []*big.Int{}, nil -} - -func (kp *KafkaPostgresConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) { - return []common.BlockData{}, nil -} - -// Query methods return empty results as this connector uses Kafka for data delivery -func (kp *KafkaPostgresConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) { - return QueryResult[common.Block]{Data: []common.Block{}}, nil -} - -func (kp *KafkaPostgresConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) { - return QueryResult[common.Transaction]{Data: []common.Transaction{}}, nil -} - -func (kp *KafkaPostgresConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) { - return QueryResult[common.Log]{Data: []common.Log{}}, nil -} - -func (kp *KafkaPostgresConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) { - return QueryResult[common.Trace]{Data: []common.Trace{}}, nil -} - -func (kp *KafkaPostgresConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) { - return QueryResult[interface{}]{Aggregates: []map[string]interface{}{}}, nil -} - -// Close closes the database connection -func (kp *KafkaPostgresConnector) Close() error { - return kp.db.Close() -} diff --git a/internal/storage/kafka_redis.go b/internal/storage/kafka_redis.go new file mode 100644 index 0000000..9c1a0ea --- /dev/null +++ b/internal/storage/kafka_redis.go @@ -0,0 +1,300 @@ +package storage + +import ( + "context" + "fmt" + "math/big" + "time" + + "github.com/redis/go-redis/v9" + "github.com/rs/zerolog/log" + config "github.com/thirdweb-dev/indexer/configs" + "github.com/thirdweb-dev/indexer/internal/common" +) + +// Redis key namespace constants for better organization and maintainability +const ( + // Cursor keys for tracking positions + KeyCursorReorg = "cursor:reorg" // String: cursor:reorg:{chainId} + KeyCursorPublish = "cursor:publish" // String: cursor:publish:{chainId} + KeyCursorCommit = "cursor:commit" // String: cursor:commit:{chainId} +) + +// KafkaRedisConnector uses Redis for metadata storage and Kafka for block data delivery +type KafkaRedisConnector struct { + redisClient *redis.Client + cfg *config.KafkaConfig + kafkaPublisher *KafkaPublisher +} + +func NewKafkaRedisConnector(cfg *config.KafkaConfig) (*KafkaRedisConnector, error) { + // Connect to Redis + redisClient := redis.NewClient(&redis.Options{ + Addr: fmt.Sprintf("%s:%d", cfg.Redis.Host, cfg.Redis.Port), + Password: cfg.Redis.Password, + DB: cfg.Redis.DB, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + if err := redisClient.Ping(ctx).Err(); err != nil { + return nil, fmt.Errorf("failed to connect to redis: %w", err) + } + + // Initialize Kafka publisher + kafkaPublisher, err := NewKafkaPublisher(cfg) + if err != nil { + return nil, err + } + + return &KafkaRedisConnector{ + redisClient: redisClient, + cfg: cfg, + kafkaPublisher: kafkaPublisher, + }, nil +} + +// Orchestrator Storage Implementation - Block failures not supported + +func (kr *KafkaRedisConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) { + return nil, fmt.Errorf("block failure tracking is not supported with KafkaRedis connector - use a different storage backend") +} + +func (kr *KafkaRedisConnector) StoreBlockFailures(failures []common.BlockFailure) error { + return fmt.Errorf("block failure tracking is not supported with KafkaRedis connector - use a different storage backend") +} + +func (kr *KafkaRedisConnector) DeleteBlockFailures(failures []common.BlockFailure) error { + return fmt.Errorf("block failure tracking is not supported with KafkaRedis connector - use a different storage backend") +} + +func (kr *KafkaRedisConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String()) + + val, err := kr.redisClient.Get(ctx, key).Result() + if err == redis.Nil { + return big.NewInt(0), nil + } else if err != nil { + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(val, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", val) + } + + return blockNumber, nil +} + +func (kr *KafkaRedisConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String()) + return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() +} + +// Staging Storage Implementation - Not supported for KafkaRedis connector + +func (kr *KafkaRedisConnector) InsertStagingData(data []common.BlockData) error { + return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging") +} + +func (kr *KafkaRedisConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) { + return nil, fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging") +} + +func (kr *KafkaRedisConnector) DeleteStagingData(data []common.BlockData) error { + return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging") +} + +func (kr *KafkaRedisConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String()) + + val, err := kr.redisClient.Get(ctx, key).Result() + if err == redis.Nil { + return big.NewInt(0), nil + } else if err != nil { + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(val, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", val) + } + return blockNumber, nil +} + +func (kr *KafkaRedisConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String()) + return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() +} + +func (kr *KafkaRedisConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) { + return nil, fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging") +} + +func (kr *KafkaRedisConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error { + return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging") +} + +// InsertBlockData publishes block data to Kafka instead of storing in database +func (kr *KafkaRedisConnector) InsertBlockData(data []common.BlockData) error { + if len(data) == 0 { + return nil + } + + // Publish to Kafka + if err := kr.kafkaPublisher.PublishBlockData(data); err != nil { + return fmt.Errorf("failed to publish block data to kafka: %w", err) + } + log.Debug(). + Int("blocks", len(data)). + Msg("Published block data to Kafka") + + // Update cursor to track the highest block number published + if len(data) > 0 { + // Find the highest block number in the batch + var maxBlock *big.Int + for _, blockData := range data { + if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 { + maxBlock = blockData.Block.Number + } + } + if maxBlock != nil { + ctx := context.Background() + chainId := data[0].Block.ChainId + key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) + if err := kr.redisClient.Set(ctx, key, maxBlock.String(), 0).Err(); err != nil { + return err + } + } + } + + return nil +} + +// ReplaceBlockData handles reorg by publishing both old and new data to Kafka +func (kr *KafkaRedisConnector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) { + if len(data) == 0 { + return nil, nil + } + + oldBlocks := []common.BlockData{} + + // Publish reorg event to Kafka + // TODO: Publish new blocks (the reorg handler will mark old ones as reverted) + if err := kr.kafkaPublisher.PublishBlockData(data); err != nil { + return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err) + } + + // Update cursor to track the highest block number + if len(data) > 0 { + var maxBlock *big.Int + for _, blockData := range data { + if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 { + maxBlock = blockData.Block.Number + } + } + if maxBlock != nil { + if err := kr.SetLastPublishedBlockNumber(data[0].Block.ChainId, maxBlock); err != nil { + return nil, fmt.Errorf("failed to update published block cursor: %w", err) + } + } + } + + return oldBlocks, nil +} + +func (kr *KafkaRedisConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) + + val, err := kr.redisClient.Get(ctx, key).Result() + if err == redis.Nil { + return big.NewInt(0), nil + } else if err != nil { + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(val, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", val) + } + return blockNumber, nil +} + +func (kr *KafkaRedisConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + // Get the last published block number + lastPublished, err := kr.GetLastPublishedBlockNumber(chainId) + if err != nil { + return nil, err + } + + // Check if it's within the range + if lastPublished.Cmp(startBlock) >= 0 && lastPublished.Cmp(endBlock) <= 0 { + return lastPublished, nil + } + + // If outside range, return appropriate boundary + if lastPublished.Cmp(endBlock) > 0 { + return endBlock, nil + } + if lastPublished.Cmp(startBlock) < 0 { + return big.NewInt(0), nil + } + + return lastPublished, nil +} + +func (kr *KafkaRedisConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { + return []common.BlockHeader{}, nil +} + +func (kr *KafkaRedisConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) { + return QueryResult[common.TokenBalance]{Data: []common.TokenBalance{}}, nil +} + +func (kr *KafkaRedisConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) { + return QueryResult[common.TokenTransfer]{Data: []common.TokenTransfer{}}, nil +} + +func (kr *KafkaRedisConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) { + return []common.BlockData{}, nil +} + +func (kr *KafkaRedisConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { + return []*big.Int{}, nil +} + +func (kr *KafkaRedisConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) { + return []common.BlockData{}, nil +} + +// Query methods return empty results as this connector uses Kafka for data delivery +func (kr *KafkaRedisConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) { + return QueryResult[common.Block]{Data: []common.Block{}}, nil +} + +func (kr *KafkaRedisConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) { + return QueryResult[common.Transaction]{Data: []common.Transaction{}}, nil +} + +func (kr *KafkaRedisConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) { + return QueryResult[common.Log]{Data: []common.Log{}}, nil +} + +func (kr *KafkaRedisConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) { + return QueryResult[common.Trace]{Data: []common.Trace{}}, nil +} + +func (kr *KafkaRedisConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) { + return QueryResult[interface{}]{Aggregates: []map[string]interface{}{}}, nil +} + +// Close closes the Redis connection +func (kr *KafkaRedisConnector) Close() error { + return kr.redisClient.Close() +} From 0bf3097f136cddc5ce80adea57944b395596987c Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Mon, 18 Aug 2025 15:51:51 +0000 Subject: [PATCH 13/43] Update schema payload --- internal/storage/kafka_publisher.go | 128 ++++++++++++++++++++++------ internal/storage/kafka_redis.go | 45 ++++------ 2 files changed, 117 insertions(+), 56 deletions(-) diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go index f7f0f72..d3376ce 100644 --- a/internal/storage/kafka_publisher.go +++ b/internal/storage/kafka_publisher.go @@ -5,8 +5,6 @@ import ( "crypto/tls" "encoding/json" "fmt" - "hash/fnv" - "math" "net" "strings" "sync" @@ -25,12 +23,39 @@ type KafkaPublisher struct { chainID string } -type PublishableBlockMessage struct { +type MessageType string + +type PublishableData interface { + GetType() MessageType +} + +type PublishableMessagePayload struct { + Data PublishableData `json:"data"` + Type MessageType `json:"type"` + Timestamp time.Time `json:"timestamp"` +} + +type PublishableMessageBlockData struct { common.BlockData Sign int8 `json:"sign"` InsertTimestamp time.Time `json:"insert_timestamp"` } +type PublishableMessageRevert struct { + ChainId uint64 `json:"chain_id"` + BlockNumber uint64 `json:"block_number"` + Sign int8 `json:"sign"` + InsertTimestamp time.Time `json:"insert_timestamp"` +} + +func (b PublishableMessageBlockData) GetType() MessageType { + return "block_data" +} + +func (b PublishableMessageRevert) GetType() MessageType { + return "revert" +} + // NewKafkaPublisher method for storage connector (public) func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) { brokers := strings.Split(cfg.Brokers, ",") @@ -91,6 +116,12 @@ func (p *KafkaPublisher) PublishBlockData(blockData []common.BlockData) error { } func (p *KafkaPublisher) PublishReorg(oldData []common.BlockData, newData []common.BlockData) error { + newHead := uint64(newData[0].Block.Number.Uint64()) + // Publish revert the revert to the new head - 1, so that the new updated block data can be re-processed + if err := p.publishBlockRevert(newData[0].ChainId, newHead-1); err != nil { + return fmt.Errorf("failed to revert: %v", err) + } + if err := p.publishBlockData(oldData, true); err != nil { return fmt.Errorf("failed to publish old block data: %v", err) } @@ -149,6 +180,27 @@ func (p *KafkaPublisher) publishMessages(ctx context.Context, messages []*kgo.Re return nil } +func (p *KafkaPublisher) publishBlockRevert(chainId uint64, blockNumber uint64) error { + publishStart := time.Now() + + // Prepare messages for blocks, events, transactions and traces + blockMessages := make([]*kgo.Record, 1) + + // Block message + if blockMsg, err := p.createBlockRevertMessage(chainId, blockNumber); err == nil { + blockMessages[0] = blockMsg + } else { + return fmt.Errorf("failed to create block revert message: %v", err) + } + + if err := p.publishMessages(context.Background(), blockMessages); err != nil { + return fmt.Errorf("failed to publish block revert messages: %v", err) + } + + log.Debug().Str("metric", "publish_duration").Msgf("Publisher.PublishBlockData duration: %f", time.Since(publishStart).Seconds()) + return nil +} + func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isDeleted bool) error { if len(blockData) == 0 { return nil @@ -176,47 +228,71 @@ func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isDelete return nil } -func (p *KafkaPublisher) createBlockDataMessage(data common.BlockData, isDeleted bool) (*kgo.Record, error) { - insertTimestamp := time.Now() - msg := PublishableBlockMessage{ - BlockData: data.Serialize(), +func (p *KafkaPublisher) createBlockDataMessage(block common.BlockData, isDeleted bool) (*kgo.Record, error) { + timestamp := time.Now() + + data := PublishableMessageBlockData{ + BlockData: block, Sign: 1, - InsertTimestamp: insertTimestamp, + InsertTimestamp: timestamp, } if isDeleted { - msg.Sign = -1 // Indicate deletion with a negative sign + data.Sign = -1 } + + msg := PublishableMessagePayload{ + Data: data, + Type: data.GetType(), + Timestamp: timestamp, + } + msgJson, err := json.Marshal(msg) if err != nil { return nil, fmt.Errorf("failed to marshal block data: %v", err) } - // Determine partition based on chainID - var partition int32 - if data.ChainId <= math.MaxInt32 { - // Direct assignment for chain IDs that fit in int32 - partition = int32(data.ChainId) - } else { - // Hash for larger chain IDs to avoid overflow - h := fnv.New32a() - fmt.Fprintf(h, "%d", data.ChainId) - partition = int32(h.Sum32() & 0x7FFFFFFF) // Ensure positive + return p.createRecord(data.GetType(), block.ChainId, block.Block.Number.Uint64(), timestamp, msgJson) +} + +func (p *KafkaPublisher) createBlockRevertMessage(chainId uint64, blockNumber uint64) (*kgo.Record, error) { + timestamp := time.Now() + + data := PublishableMessageRevert{ + ChainId: chainId, + BlockNumber: blockNumber, + Sign: 1, + InsertTimestamp: timestamp, + } + + msg := PublishableMessagePayload{ + Data: data, + Type: data.GetType(), + Timestamp: timestamp, } + msgJson, err := json.Marshal(msg) + if err != nil { + return nil, fmt.Errorf("failed to marshal block data: %v", err) + } + + return p.createRecord(data.GetType(), chainId, blockNumber, timestamp, msgJson) +} + +func (p *KafkaPublisher) createRecord(msgType MessageType, chainId uint64, blockNumber uint64, timestamp time.Time, msgJson []byte) (*kgo.Record, error) { // Create headers with metadata headers := []kgo.RecordHeader{ - {Key: "chain_id", Value: []byte(fmt.Sprintf("%d", data.ChainId))}, - {Key: "block_number", Value: []byte(fmt.Sprintf("%d", data.Block.Number))}, - {Key: "sign", Value: []byte(fmt.Sprintf("%d", msg.Sign))}, - {Key: "insert_timestamp", Value: []byte(insertTimestamp.Format(time.RFC3339Nano))}, + {Key: "chain_id", Value: []byte(fmt.Sprintf("%d", chainId))}, + {Key: "block_number", Value: []byte(fmt.Sprintf("%d", blockNumber))}, + {Key: "type", Value: []byte(fmt.Sprintf("%s", msgType))}, + {Key: "timestamp", Value: []byte(timestamp.Format(time.RFC3339Nano))}, {Key: "schema_version", Value: []byte("1")}, } return &kgo.Record{ - Topic: "insight.commit.blocks", - Key: []byte(fmt.Sprintf("blockdata-%d-%d-%s-%d", data.ChainId, data.Block.Number, data.Block.Hash, msg.Sign)), + Topic: fmt.Sprintf("insight.commit.blocks.%d", chainId), + Key: []byte(fmt.Sprintf("%d:%s:%d", chainId, msgType, blockNumber)), Value: msgJson, Headers: headers, - Partition: partition, + Partition: 0, }, nil } diff --git a/internal/storage/kafka_redis.go b/internal/storage/kafka_redis.go index 9c1a0ea..05d294c 100644 --- a/internal/storage/kafka_redis.go +++ b/internal/storage/kafka_redis.go @@ -184,27 +184,12 @@ func (kr *KafkaRedisConnector) ReplaceBlockData(data []common.BlockData) ([]comm oldBlocks := []common.BlockData{} - // Publish reorg event to Kafka - // TODO: Publish new blocks (the reorg handler will mark old ones as reverted) - if err := kr.kafkaPublisher.PublishBlockData(data); err != nil { + // TODO: We need to fetch the old blocks from the primary data store + if err := kr.kafkaPublisher.PublishReorg(data, data); err != nil { return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err) } - // Update cursor to track the highest block number - if len(data) > 0 { - var maxBlock *big.Int - for _, blockData := range data { - if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 { - maxBlock = blockData.Block.Number - } - } - if maxBlock != nil { - if err := kr.SetLastPublishedBlockNumber(data[0].Block.ChainId, maxBlock); err != nil { - return nil, fmt.Errorf("failed to update published block cursor: %w", err) - } - } - } - + // save cursor return oldBlocks, nil } @@ -250,48 +235,48 @@ func (kr *KafkaRedisConnector) GetMaxBlockNumberInRange(chainId *big.Int, startB } func (kr *KafkaRedisConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { - return []common.BlockHeader{}, nil + return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") } func (kr *KafkaRedisConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) { - return QueryResult[common.TokenBalance]{Data: []common.TokenBalance{}}, nil + return QueryResult[common.TokenBalance]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") } func (kr *KafkaRedisConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) { - return QueryResult[common.TokenTransfer]{Data: []common.TokenTransfer{}}, nil + return QueryResult[common.TokenTransfer]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") } func (kr *KafkaRedisConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) { - return []common.BlockData{}, nil + return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") } func (kr *KafkaRedisConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { - return []*big.Int{}, nil + return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") } func (kr *KafkaRedisConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) { - return []common.BlockData{}, nil + return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") } -// Query methods return empty results as this connector uses Kafka for data delivery +// Query methods return errors as this is a write-only connector for streaming func (kr *KafkaRedisConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) { - return QueryResult[common.Block]{Data: []common.Block{}}, nil + return QueryResult[common.Block]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") } func (kr *KafkaRedisConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) { - return QueryResult[common.Transaction]{Data: []common.Transaction{}}, nil + return QueryResult[common.Transaction]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") } func (kr *KafkaRedisConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) { - return QueryResult[common.Log]{Data: []common.Log{}}, nil + return QueryResult[common.Log]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") } func (kr *KafkaRedisConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) { - return QueryResult[common.Trace]{Data: []common.Trace{}}, nil + return QueryResult[common.Trace]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") } func (kr *KafkaRedisConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) { - return QueryResult[interface{}]{Aggregates: []map[string]interface{}{}}, nil + return QueryResult[interface{}]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") } // Close closes the Redis connection From cd434a2d7054b4baf6e86e70105458b7c1f41fc1 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 19 Aug 2025 05:53:35 +0000 Subject: [PATCH 14/43] Update kafka-postgres -> kafka-redis config --- cmd/root.go | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 7eecdd0..61e10bc 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -131,14 +131,10 @@ func init() { rootCmd.PersistentFlags().String("storage-main-kafka-username", "", "Kafka username for main storage") rootCmd.PersistentFlags().String("storage-main-kafka-password", "", "Kafka password for main storage") rootCmd.PersistentFlags().Bool("storage-main-kafka-enable-tls", true, "Enable TLS for Kafka connection in main storage") - rootCmd.PersistentFlags().String("storage-main-kafka-postgres-host", "", "PostgreSQL host for Kafka main storage bookkeeping") - rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-port", 5432, "PostgreSQL port for Kafka main storage bookkeeping") - rootCmd.PersistentFlags().String("storage-main-kafka-postgres-username", "", "PostgreSQL username for Kafka main storage bookkeeping") - rootCmd.PersistentFlags().String("storage-main-kafka-postgres-password", "", "PostgreSQL password for Kafka main storage bookkeeping") - rootCmd.PersistentFlags().String("storage-main-kafka-postgres-database", "", "PostgreSQL database for Kafka main storage bookkeeping") - rootCmd.PersistentFlags().String("storage-main-kafka-postgres-sslMode", "require", "PostgreSQL SSL mode for Kafka main storage bookkeeping") - rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-maxOpenConns", 25, "PostgreSQL max open connections for Kafka main storage bookkeeping") - rootCmd.PersistentFlags().Int("storage-main-kafka-postgres-maxIdleConns", 10, "PostgreSQL max idle connections for Kafka main storage bookkeeping") + rootCmd.PersistentFlags().String("storage-main-kafka-redis-host", "", "Redis host for Kafka main storage metadata") + rootCmd.PersistentFlags().Int("storage-main-kafka-redis-port", 6379, "Redis port for Kafka main storage metadata") + rootCmd.PersistentFlags().String("storage-main-kafka-redis-password", "", "Redis password for Kafka main storage metadata") + rootCmd.PersistentFlags().Int("storage-main-kafka-redis-db", 0, "Redis database number for Kafka main storage metadata") rootCmd.PersistentFlags().String("api-host", "localhost:3000", "API host") rootCmd.PersistentFlags().String("api-basicAuth-username", "", "API basic auth username") rootCmd.PersistentFlags().String("api-basicAuth-password", "", "API basic auth password") @@ -265,14 +261,10 @@ func init() { viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username")) viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password")) viper.BindPFlag("storage.main.kafka.enable_tls", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls")) - viper.BindPFlag("storage.main.kafka.postgres.host", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-host")) - viper.BindPFlag("storage.main.kafka.postgres.port", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-port")) - viper.BindPFlag("storage.main.kafka.postgres.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-username")) - viper.BindPFlag("storage.main.kafka.postgres.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-password")) - viper.BindPFlag("storage.main.kafka.postgres.database", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-database")) - viper.BindPFlag("storage.main.kafka.postgres.sslMode", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-sslMode")) - viper.BindPFlag("storage.main.kafka.postgres.maxOpenConns", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-maxOpenConns")) - viper.BindPFlag("storage.main.kafka.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-main-kafka-postgres-maxIdleConns")) + viper.BindPFlag("storage.main.kafka.redis.host", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-host")) + viper.BindPFlag("storage.main.kafka.redis.port", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-port")) + viper.BindPFlag("storage.main.kafka.redis.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-password")) + viper.BindPFlag("storage.main.kafka.redis.db", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-db")) viper.BindPFlag("api.host", rootCmd.PersistentFlags().Lookup("api-host")) viper.BindPFlag("api.basicAuth.username", rootCmd.PersistentFlags().Lookup("api-basicAuth-username")) viper.BindPFlag("api.basicAuth.password", rootCmd.PersistentFlags().Lookup("api-basicAuth-password")) From 4fd141d2e386795e9c972f2d4f7671dcf44da1c6 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Wed, 20 Aug 2025 06:56:12 +0000 Subject: [PATCH 15/43] Update schema to use replacing merge tree --- internal/storage/kafka_publisher.go | 10 +- .../0000_clickhouse_create_blocks_table.sql | 4 +- ...1_clickhouse_create_transactions_table.sql | 50 ++++-- .../0002_clickhouse_create_logs_table.sql | 20 ++- .../0003_clickhouse_create_traces_table.sql | 4 +- ...04_clickhouse_create_insert_null_table.sql | 4 +- .../0005_clickhouse_create_insert_data_mv.sql | 6 +- ...0006_clickhouse_create_token_transfers.sql | 4 +- ...7_clickhouse_create_token_transfers_mv.sql | 33 ++-- .../0008_clickhouse_create_token_balance.sql | 44 ----- .../0008_clickhouse_create_token_balances.sql | 64 +++++++ ...009_clickhouse_create_token_balance_mv.sql | 157 ----------------- ...09_clickhouse_create_token_balances_mv.sql | 161 ++++++++++++++++++ ...clickhouse_create_address_transactions.sql | 21 ++- ...ckhouse_create_address_transactions_mv.sql | 6 +- ...12_clickhouse_create_address_transfers.sql | 4 +- ...clickhouse_create_address_transfers_mv.sql | 4 +- 17 files changed, 337 insertions(+), 259 deletions(-) delete mode 100644 internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql create mode 100644 internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql delete mode 100644 internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql create mode 100644 internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go index d3376ce..90f3ca3 100644 --- a/internal/storage/kafka_publisher.go +++ b/internal/storage/kafka_publisher.go @@ -37,14 +37,14 @@ type PublishableMessagePayload struct { type PublishableMessageBlockData struct { common.BlockData - Sign int8 `json:"sign"` + IsDeleted int8 `json:"is_deleted"` InsertTimestamp time.Time `json:"insert_timestamp"` } type PublishableMessageRevert struct { ChainId uint64 `json:"chain_id"` BlockNumber uint64 `json:"block_number"` - Sign int8 `json:"sign"` + IsDeleted int8 `json:"is_deleted"` InsertTimestamp time.Time `json:"insert_timestamp"` } @@ -233,11 +233,11 @@ func (p *KafkaPublisher) createBlockDataMessage(block common.BlockData, isDelete data := PublishableMessageBlockData{ BlockData: block, - Sign: 1, + IsDeleted: 0, InsertTimestamp: timestamp, } if isDeleted { - data.Sign = -1 + data.IsDeleted = 1 } msg := PublishableMessagePayload{ @@ -260,7 +260,7 @@ func (p *KafkaPublisher) createBlockRevertMessage(chainId uint64, blockNumber ui data := PublishableMessageRevert{ ChainId: chainId, BlockNumber: blockNumber, - Sign: 1, + IsDeleted: 0, InsertTimestamp: timestamp, } diff --git a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql index b311f24..fa349c6 100644 --- a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql +++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql @@ -23,11 +23,11 @@ CREATE TABLE IF NOT EXISTS blocks ( `base_fee_per_gas` Nullable(UInt64), `insert_timestamp` DateTime DEFAULT now(), - `sign` Int8 DEFAULT 1, + `is_deleted` Int8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2, -) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) +) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) ORDER BY (chain_id, block_number) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql index 02a0294..eb5787c 100644 --- a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql +++ b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql @@ -32,8 +32,8 @@ CREATE TABLE IF NOT EXISTS transactions ( `logs_bloom` Nullable(String), `status` Nullable(UInt64), - `sign` Int8 DEFAULT 1, `insert_timestamp` DateTime DEFAULT now(), + `is_deleted` Int8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, @@ -45,14 +45,7 @@ CREATE TABLE IF NOT EXISTS transactions ( PROJECTION from_address_projection ( SELECT - chain_id, - block_number, - block_timestamp, - hash, - from_address, - to_address, - value, - data + * ORDER BY chain_id, from_address, @@ -62,21 +55,42 @@ CREATE TABLE IF NOT EXISTS transactions ( PROJECTION to_address_projection ( SELECT - chain_id, - block_number, - block_timestamp, - hash, - from_address, - to_address, - value, - data + * ORDER BY chain_id, to_address, block_number, hash + ), + PROJECTION from_address_state_projection + ( + SELECT + chain_id, + from_address, + countState() AS tx_count_state, + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY + chain_id, + from_address + ), + PROJECTION to_address_state_projection + ( + SELECT + chain_id, + to_address, + countState() AS tx_count_state, + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY + chain_id, + to_address ) -) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) +) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) ORDER BY (chain_id, block_number, hash) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql index e327edb..139d7dd 100644 --- a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql +++ b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql @@ -13,8 +13,8 @@ CREATE TABLE IF NOT EXISTS logs ( `topic_2` String, `topic_3` String, - `sign` Int8 DEFAULT 1, `insert_timestamp` DateTime DEFAULT now(), + `is_deleted` Int8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, @@ -48,8 +48,24 @@ CREATE TABLE IF NOT EXISTS logs ( transaction_index, log_index, address + ), + PROJECTION address_topic0_state_projection + ( + SELECT + chain_id, + address, + topic_0, + countState() AS log_count_state, + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY + chain_id, + address, + topic_0 ) -) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) +) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) ORDER BY (chain_id, block_number, transaction_hash, log_index) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; diff --git a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql index 17a032b..289f690 100644 --- a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql +++ b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql @@ -21,8 +21,8 @@ CREATE TABLE IF NOT EXISTS traces ( `reward_type` LowCardinality(Nullable(String)), `refund_address` Nullable(FixedString(42)), - `sign` Int8 DEFAULT 1, `insert_timestamp` DateTime DEFAULT now(), + `is_deleted` Int8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 2, @@ -52,7 +52,7 @@ CREATE TABLE IF NOT EXISTS traces ( trace_address ) -) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) +) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) ORDER BY (chain_id, transaction_hash, trace_address) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; diff --git a/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql index 46f1541..3cc7b1a 100644 --- a/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql +++ b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql @@ -94,6 +94,6 @@ CREATE TABLE IF NOT EXISTS insert_null_block_data ( refund_address Nullable(FixedString(42)) )), - sign Int8 DEFAULT 1, - insert_timestamp DateTime DEFAULT now() + insert_timestamp DateTime DEFAULT now(), + is_deleted Int8 DEFAULT 0 ) ENGINE = Null; diff --git a/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql index f7c7c46..1a067f8 100644 --- a/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql +++ b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql @@ -65,7 +65,7 @@ SELECT t.30 AS logs_bloom, t.31 AS status, insert_timestamp, - sign + is_deleted FROM insert_null_block_data ARRAY JOIN transactions AS t; @@ -87,7 +87,7 @@ SELECT l.11 AS topic_2, l.12 AS topic_3, insert_timestamp, - sign + is_deleted FROM insert_null_block_data ARRAY JOIN logs AS l; @@ -117,6 +117,6 @@ SELECT tr.19 AS reward_type, tr.20 AS refund_address, insert_timestamp, - sign + is_deleted FROM insert_null_block_data ARRAY JOIN traces AS tr; diff --git a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql index 0d6ef92..4afdcda 100644 --- a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql +++ b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql @@ -14,8 +14,8 @@ CREATE TABLE IF NOT EXISTS token_transfers `log_index` UInt64, `batch_index` Nullable(UInt16) DEFAULT NULL, - `sign` Int8 DEFAULT 1, `insert_timestamp` DateTime DEFAULT now(), + `is_deleted` Int8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3, @@ -54,7 +54,7 @@ CREATE TABLE IF NOT EXISTS token_transfers log_index ) ) -ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) +ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) ORDER BY (chain_id, token_address, block_number, transaction_index, log_index) SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql index e03b1a0..7c09aea 100644 --- a/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql +++ b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql @@ -16,8 +16,8 @@ SELECT reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS amount, log_index, CAST(NULL AS Nullable(UInt16)) AS batch_index, - sign, - insert_timestamp + insert_timestamp, + is_deleted FROM logs WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' -- Transfer AND length(topic_1) = 66 AND startsWith(topic_1, '0x') @@ -43,8 +43,8 @@ SELECT toUInt8(1) AS amount, log_index, CAST(NULL AS Nullable(UInt16)) AS batch_index, - sign, - insert_timestamp + insert_timestamp, + is_deleted FROM logs WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' AND length(topic_1) = 66 AND startsWith(topic_1, '0x') @@ -70,8 +70,8 @@ SELECT reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount, log_index, toNullable(toUInt16(0)) AS batch_index, - sign, - insert_timestamp + insert_timestamp, + is_deleted FROM logs WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62' -- TransferSingle AND length(topic_2) = 66 AND length(topic_3) = 66 @@ -95,12 +95,21 @@ SELECT reinterpretAsUInt256(reverse(unhex(amount_hex))) AS amount, log_index, toNullable(toUInt16(array_index - 1)) AS batch_index, - sign, - insert_timestamp + insert_timestamp, + is_deleted FROM ( SELECT - chain_id, address, topic_2, topic_3, - block_number, block_timestamp, transaction_hash, transaction_index, log_index, sign, insert_timestamp, + chain_id, + address, + topic_2, + topic_3, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + is_deleted, + insert_timestamp, toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64))))) AS ids_offset, toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64))))) AS amounts_offset, toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + ids_offset * 2, 64))))) AS ids_length, @@ -136,8 +145,8 @@ SELECT reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount, log_index, CAST(NULL AS Nullable(UInt16)) AS batch_index, - sign, - insert_timestamp + insert_timestamp, + is_deleted FROM logs WHERE topic_0 = '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859' AND length(topic_1) = 66 diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql deleted file mode 100644 index 0cf38c9..0000000 --- a/internal/tools/clickhouse/0008_clickhouse_create_token_balance.sql +++ /dev/null @@ -1,44 +0,0 @@ -CREATE TABLE IF NOT EXISTS token_balances -( - `chain_id` UInt256, - `token_type` LowCardinality(String), - `token_address` FixedString(42), - `owner_address` FixedString(42), - `token_id` UInt256, - - `balance_state` AggregateFunction(sum, Int256), - `last_block_number_state` AggregateFunction(max, UInt256), - `last_block_timestamp_state` AggregateFunction(max, DateTime), - - INDEX idx_last_block_number (finalizeAggregation(last_block_number_state)) TYPE minmax GRANULARITY 1, - INDEX idx_last_block_timestamp (finalizeAggregation(last_block_timestamp_state)) TYPE minmax GRANULARITY 1, - - PROJECTION owner_balances_projection - ( - SELECT - chain_id, - owner_address, - token_address, - token_id, - sumMerge(balance_state) AS balance, - maxMerge(last_block_number_state) AS last_block_number, - maxMerge(last_block_timestamp_state) AS last_block_timestamp - GROUP BY chain_id, owner_address, token_address, token_id - ), - PROJECTION token_projection - ( - SELECT - chain_id, - token_address, - token_id, - owner_address, - balance_state, - last_block_number_state, - last_block_timestamp_state - ORDER BY chain_id, token_address, token_id, owner_address - ) -) -ENGINE = AggregatingMergeTree -PARTITION BY chain_id -ORDER BY (chain_id, owner_address, token_address, token_id) -SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql new file mode 100644 index 0000000..a0ed08b --- /dev/null +++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql @@ -0,0 +1,64 @@ +CREATE TABLE IF NOT EXISTS token_balances +( + `chain_id` UInt256, + `token_type` LowCardinality(String), + `token_address` FixedString(42), + `owner_address` FixedString(42), + `token_id` UInt256, + + -- Normalized delta: positive for incoming, negative for outgoing + `balance_delta` Int256, + + -- Transaction details for ordering and deduplication + `block_number` UInt256, + `block_timestamp` DateTime, + `transaction_hash` FixedString(66), + `transaction_index` UInt64, + `log_index` UInt64, + `direction` Enum8('from' = 1, 'to' = 2), -- To make each transfer create 2 unique rows + + `insert_timestamp` DateTime DEFAULT now(), + `is_deleted` Int8 DEFAULT 0, + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_token_address token_address TYPE bloom_filter GRANULARITY 3, + INDEX idx_owner_address owner_address TYPE bloom_filter GRANULARITY 3, + + -- Projection for efficient balance queries by owner + PROJECTION owner_balances_projection + ( + SELECT + chain_id, + owner_address, + token_address, + token_id, + sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY chain_id, owner_address, token_address, token_id + ORDER BY chain_id, owner_address, token_address, token_id + ), + + -- Projection for efficient balance queries by token + PROJECTION token_balances_projection + ( + SELECT + chain_id, + token_address, + token_id, + owner_address, + sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY chain_id, token_address, token_id, owner_address + ORDER BY chain_id, token_address, token_id, owner_address + ) +) +ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) +PARTITION BY chain_id +ORDER BY (chain_id, owner_address, token_address, token_id, block_number, transaction_index, log_index, direction) +SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql deleted file mode 100644 index be000df..0000000 --- a/internal/tools/clickhouse/0009_clickhouse_create_token_balance_mv.sql +++ /dev/null @@ -1,157 +0,0 @@ --- ERC20 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc20_mv -TO token_balances -AS -SELECT - chain_id, - token_type, - token_address, - owner_address, - token_id, - sumState(delta) AS balance_state, - maxState(block_number) AS last_block_number_state, - maxState(block_timestamp) AS last_block_timestamp_state -FROM -( - -- FROM side (negative) - SELECT - chain_id, - token_type, - token_address, - token_id, - from_address AS owner_address, - toInt256(amount) * (-1) * sign AS delta, - block_number, - block_timestamp - FROM token_transfers WHERE token_type = 'erc20' - UNION ALL - -- TO side (positive) - SELECT - chain_id, - token_type, - token_address, - token_id, - to_address AS owner_address, - toInt256(amount) * (+1) * sign AS delta, - block_number, - block_timestamp - FROM token_transfers WHERE token_type = 'erc20' -) -GROUP BY chain_id, token_type, token_address, owner_address, token_id; - --- ERC721 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc721_mv -TO token_balances -AS -SELECT - chain_id, - token_type, - token_address, - owner_address, - token_id, - sumState(delta) AS balance_state, - maxState(block_number) AS last_block_number_state, - maxState(block_timestamp) AS last_block_timestamp_state -FROM -( - SELECT - chain_id, - token_type, - token_address, - from_address AS owner_address, - token_id, - toInt256(1) * (-1) * sign AS delta, - block_number, - block_timestamp - FROM token_transfers WHERE token_type = 'erc721' - UNION ALL - SELECT - chain_id, - token_type, - token_address, - to_address AS owner_address, - token_id, - toInt256(1) * (+1) * sign AS delta, - block_number, - block_timestamp - FROM token_transfers WHERE token_type = 'erc721' -) -GROUP BY chain_id, token_type, token_address, owner_address, token_id; - --- ERC1155 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc1155_mv -TO token_balances -AS -SELECT - chain_id, - token_type, - token_address, - owner_address, - token_id, - sumState(delta) AS balance_state, - maxState(block_number) AS last_block_number_state, - maxState(block_timestamp) AS last_block_timestamp_state -FROM -( - SELECT - chain_id, - token_type, - token_address, - from_address AS owner_address, - token_id, - toInt256(amount) * (-1) * sign AS delta, - block_number, - block_timestamp - FROM token_transfers WHERE token_type = 'erc1155' - UNION ALL - SELECT - chain_id, - token_type, - token_address, - to_address AS owner_address, - token_id, - toInt256(amount) * (+1) * sign AS delta, - block_number, - block_timestamp - FROM token_transfers WHERE token_type = 'erc1155' -) -GROUP BY chain_id, token_type, token_address, owner_address, token_id; - --- ERC6909 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc6909_mv -TO token_balances -AS -SELECT - chain_id, - token_type, - token_address, - owner_address, - token_id, - sumState(delta) AS balance_state, - maxState(block_number) AS last_block_number_state, - maxState(block_timestamp) AS last_block_timestamp_state -FROM -( - SELECT - chain_id, - token_type, - token_address, - from_address AS owner_address, - token_id, - toInt256(amount) * (-1) * sign AS delta, - block_number, - block_timestamp - FROM token_transfers WHERE token_type = 'erc6909' - UNION ALL - SELECT - chain_id, - token_type, - token_address, - to_address AS owner_address, - token_id, - toInt256(amount) * (+1) * sign AS delta, - block_number, - block_timestamp - FROM token_transfers WHERE token_type = 'erc6909' -) -GROUP BY chain_id, token_type, token_address, owner_address, token_id; \ No newline at end of file diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql new file mode 100644 index 0000000..63e523e --- /dev/null +++ b/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql @@ -0,0 +1,161 @@ +-- ERC20 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc20_mv +TO token_balances +AS +-- FROM side (outgoing, negative delta) +SELECT + chain_id, + token_type, + token_address, + from_address AS owner_address, + token_id, + -toInt256(amount) AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'from' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc20' +UNION ALL +-- TO side (incoming, positive delta) +SELECT + chain_id, + token_type, + token_address, + to_address AS owner_address, + token_id, + toInt256(amount) AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'to' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc20'; + +-- ERC721 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc721_mv +TO token_balances +AS +SELECT + chain_id, + token_type, + token_address, + from_address AS owner_address, + token_id, + -1 AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'from' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc721' +UNION ALL +SELECT + chain_id, + token_type, + token_address, + to_address AS owner_address, + token_id, + 1 AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'to' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc721'; + +-- ERC1155 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc1155_mv +TO token_balances +AS +SELECT + chain_id, + token_type, + token_address, + from_address AS owner_address, + token_id, + -toInt256(amount) AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'from' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc1155' +UNION ALL +SELECT + chain_id, + token_type, + token_address, + to_address AS owner_address, + token_id, + toInt256(amount) AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'to' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc1155'; + +-- ERC6909 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc6909_mv +TO token_balances +AS +SELECT + chain_id, + token_type, + token_address, + from_address AS owner_address, + token_id, + -toInt256(amount) AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'from' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc6909' +UNION ALL +SELECT + chain_id, + token_type, + token_address, + to_address AS owner_address, + token_id, + toInt256(amount) AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'to' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc6909'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql index c33638e..55ed9f9 100644 --- a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql +++ b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql @@ -32,12 +32,27 @@ CREATE TABLE IF NOT EXISTS address_transactions ( `logs_bloom` Nullable(String), `status` Nullable(UInt64), - `sign` Int8 DEFAULT 1, `insert_timestamp` DateTime DEFAULT now(), + `is_deleted` Int8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, - INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3 -) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) + INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3, + + PROJECTION address_total_count_projection + ( + SELECT + chain_id, + address, + countState() AS tx_count_state, + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY + chain_id, + address + ) +) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) ORDER BY (chain_id, address, block_number, hash, transaction_index) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql index 46d64d3..48c4cb2 100644 --- a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql +++ b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql @@ -34,9 +34,9 @@ SELECT blob_gas_price, logs_bloom, status, - - sign, - insert_timestamp + + insert_timestamp, + is_deleted FROM transactions ARRAY JOIN arrayZip([from_address, to_address], ['from', 'to']) AS address_tuple; \ No newline at end of file diff --git a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql index 2e8d071..2600e59 100644 --- a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql +++ b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql @@ -13,12 +13,12 @@ CREATE TABLE IF NOT EXISTS address_transfers ( `log_index` UInt64, `batch_index` Nullable(UInt16) DEFAULT NULL, - `sign` Int8 DEFAULT 1, `insert_timestamp` DateTime DEFAULT now(), + `is_deleted` Int8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3 -) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) +) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) ORDER BY (chain_id, address, block_number, transaction_hash, transaction_index) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql index 72a3ebb..9256143 100644 --- a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql +++ b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql @@ -15,8 +15,8 @@ SELECT amount, log_index, batch_index, - sign, - insert_timestamp + insert_timestamp, + is_deleted FROM token_transfers ARRAY JOIN arrayZip([from_address, to_address], ['from', 'to']) AS address_tuple; \ No newline at end of file From 3b9f6943e1fc5ef7cb8264d8ec6b77953a5c3bc4 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Wed, 20 Aug 2025 20:32:14 +0000 Subject: [PATCH 16/43] Fix schema --- .../0000_clickhouse_create_blocks_table.sql | 2 +- ...1_clickhouse_create_transactions_table.sql | 2 +- .../0002_clickhouse_create_logs_table.sql | 2 +- .../0003_clickhouse_create_traces_table.sql | 2 +- ...04_clickhouse_create_insert_null_table.sql | 2 +- .../0005_clickhouse_create_insert_data_mv.sql | 2 +- ...0006_clickhouse_create_token_transfers.sql | 56 ++++++++++++++++++- .../0008_clickhouse_create_token_balances.sql | 10 +--- ...clickhouse_create_address_transactions.sql | 2 +- ...12_clickhouse_create_address_transfers.sql | 43 +++++++++++++- 10 files changed, 106 insertions(+), 17 deletions(-) diff --git a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql index fa349c6..a1d1979 100644 --- a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql +++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql @@ -23,7 +23,7 @@ CREATE TABLE IF NOT EXISTS blocks ( `base_fee_per_gas` Nullable(UInt64), `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` Int8 DEFAULT 0, + `is_deleted` UInt8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2, diff --git a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql index eb5787c..11dff13 100644 --- a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql +++ b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql @@ -33,7 +33,7 @@ CREATE TABLE IF NOT EXISTS transactions ( `status` Nullable(UInt64), `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` Int8 DEFAULT 0, + `is_deleted` UInt8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, diff --git a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql index 139d7dd..89f6e1c 100644 --- a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql +++ b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql @@ -14,7 +14,7 @@ CREATE TABLE IF NOT EXISTS logs ( `topic_3` String, `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` Int8 DEFAULT 0, + `is_deleted` UInt8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, diff --git a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql index 289f690..8f69a1f 100644 --- a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql +++ b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql @@ -22,7 +22,7 @@ CREATE TABLE IF NOT EXISTS traces ( `refund_address` Nullable(FixedString(42)), `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` Int8 DEFAULT 0, + `is_deleted` UInt8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 2, diff --git a/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql index 3cc7b1a..8597fcd 100644 --- a/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql +++ b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql @@ -95,5 +95,5 @@ CREATE TABLE IF NOT EXISTS insert_null_block_data ( )), insert_timestamp DateTime DEFAULT now(), - is_deleted Int8 DEFAULT 0 + is_deleted UInt8 DEFAULT 0 ) ENGINE = Null; diff --git a/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql index 1a067f8..b10c379 100644 --- a/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql +++ b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql @@ -25,7 +25,7 @@ SELECT block.20 AS withdrawals_root, block.21 AS base_fee_per_gas, insert_timestamp, - sign + is_deleted FROM insert_null_block_data; CREATE MATERIALIZED VIEW IF NOT EXISTS insert_transactions_mv diff --git a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql index 4afdcda..9007649 100644 --- a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql +++ b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql @@ -15,7 +15,7 @@ CREATE TABLE IF NOT EXISTS token_transfers `batch_index` Nullable(UInt16) DEFAULT NULL, `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` Int8 DEFAULT 0, + `is_deleted` UInt8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3, @@ -52,6 +52,60 @@ CREATE TABLE IF NOT EXISTS token_transfers block_number, transaction_index, log_index + ), + PROJECTION from_address_state_projection ( + SELECT + chain_id, + from_address, + token_address, + token_type, + countState() AS transfer_count_state, + sumState(toInt256(amount)) AS total_amount_state, + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY + chain_id, + from_address, + token_address, + token_type + ), + PROJECTION to_address_state_projection ( + SELECT + chain_id, + to_address, + token_address, + token_type, + countState() AS transfer_count_state, + sumState(toInt256(amount)) AS total_amount_state, + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY + chain_id, + to_address, + token_address, + token_type + ), + PROJECTION token_state_projection ( + SELECT + chain_id, + token_address, + token_id, + token_type, + countState() AS transfer_count_state, + sumState(toInt256(amount)) AS total_volume_state, + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY + chain_id, + token_address, + token_id, + token_type ) ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql index a0ed08b..11e0c6a 100644 --- a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql +++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql @@ -18,13 +18,12 @@ CREATE TABLE IF NOT EXISTS token_balances `direction` Enum8('from' = 1, 'to' = 2), -- To make each transfer create 2 unique rows `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` Int8 DEFAULT 0, + `is_deleted` UInt8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_token_address token_address TYPE bloom_filter GRANULARITY 3, INDEX idx_owner_address owner_address TYPE bloom_filter GRANULARITY 3, - -- Projection for efficient balance queries by owner PROJECTION owner_balances_projection ( SELECT @@ -32,16 +31,14 @@ CREATE TABLE IF NOT EXISTS token_balances owner_address, token_address, token_id, - sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state + sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state, minState(block_number) AS min_block_number_state, minState(block_timestamp) AS min_block_timestamp_state, maxState(block_number) AS max_block_number_state, maxState(block_timestamp) AS max_block_timestamp_state GROUP BY chain_id, owner_address, token_address, token_id - ORDER BY chain_id, owner_address, token_address, token_id ), - -- Projection for efficient balance queries by token PROJECTION token_balances_projection ( SELECT @@ -49,13 +46,12 @@ CREATE TABLE IF NOT EXISTS token_balances token_address, token_id, owner_address, - sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state + sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state, minState(block_number) AS min_block_number_state, minState(block_timestamp) AS min_block_timestamp_state, maxState(block_number) AS max_block_number_state, maxState(block_timestamp) AS max_block_timestamp_state GROUP BY chain_id, token_address, token_id, owner_address - ORDER BY chain_id, token_address, token_id, owner_address ) ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) diff --git a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql index 55ed9f9..11179d7 100644 --- a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql +++ b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql @@ -33,7 +33,7 @@ CREATE TABLE IF NOT EXISTS address_transactions ( `status` Nullable(UInt64), `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` Int8 DEFAULT 0, + `is_deleted` UInt8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3, diff --git a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql index 2600e59..4b9b864 100644 --- a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql +++ b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql @@ -14,10 +14,49 @@ CREATE TABLE IF NOT EXISTS address_transfers ( `batch_index` Nullable(UInt16) DEFAULT NULL, `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` Int8 DEFAULT 0, + `is_deleted` UInt8 DEFAULT 0, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, - INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3 + INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3, + + PROJECTION address_state_projection ( + SELECT + chain_id, + address, + address_type, + token_address, + token_type, + countState() AS transfer_count_state, + sumState(toInt256(amount)) AS total_amount_state, + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY + chain_id, + address, + address_type, + token_address, + token_type + ), + PROJECTION address_total_state_projection ( + SELECT + chain_id, + address, + token_address, + token_type, + countState() AS transfer_count_state, + sumState(toInt256(amount)) AS total_amount_state, + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY + chain_id, + address, + token_address, + token_type + ) ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) ORDER BY (chain_id, address, block_number, transaction_hash, transaction_index) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) From 92a35ab4878372f5890e05537d084e26af63e018 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Fri, 22 Aug 2025 21:46:23 +0000 Subject: [PATCH 17/43] Badger & S3 --- cmd/orchestrator.go | 9 +- cmd/root.go | 45 +- configs/config.go | 32 +- go.mod | 51 +- go.sum | 120 ++- internal/orchestrator/orchestrator.go | 42 +- internal/orchestrator/poller.go | 2 +- internal/orchestrator/validator.go | 5 +- internal/storage/badger.go | 479 +++++++++++ internal/storage/clickhouse.go | 8 + internal/storage/connector.go | 94 ++- internal/storage/s3.go | 1071 +++++++++++++++++++++++++ 12 files changed, 1880 insertions(+), 78 deletions(-) create mode 100644 internal/storage/badger.go create mode 100644 internal/storage/s3.go diff --git a/cmd/orchestrator.go b/cmd/orchestrator.go index 84665df..6d8a357 100644 --- a/cmd/orchestrator.go +++ b/cmd/orchestrator.go @@ -32,12 +32,19 @@ func RunOrchestrator(cmd *cobra.Command, args []string) { if err != nil { log.Fatal().Err(err).Msg("Failed to create orchestrator") } + // Start Prometheus metrics server log.Info().Msg("Starting Metrics Server on port 2112") go func() { http.Handle("/metrics", promhttp.Handler()) - http.ListenAndServe(":2112", nil) + if err := http.ListenAndServe(":2112", nil); err != nil { + log.Error().Err(err).Msg("Metrics server error") + } }() + // Start orchestrator (blocks until shutdown) + // The orchestrator handles signals internally and coordinates shutdown orchestrator.Start() + + log.Info().Msg("Shutdown complete") } diff --git a/cmd/root.go b/cmd/root.go index 61e10bc..d9548fb 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -135,6 +135,29 @@ func init() { rootCmd.PersistentFlags().Int("storage-main-kafka-redis-port", 6379, "Redis port for Kafka main storage metadata") rootCmd.PersistentFlags().String("storage-main-kafka-redis-password", "", "Redis password for Kafka main storage metadata") rootCmd.PersistentFlags().Int("storage-main-kafka-redis-db", 0, "Redis database number for Kafka main storage metadata") + // Storage type selection flags + rootCmd.PersistentFlags().String("storage-staging-type", "auto", "Storage type for staging (auto, clickhouse, postgres, kafka, badger, s3)") + rootCmd.PersistentFlags().String("storage-main-type", "auto", "Storage type for main (auto, clickhouse, postgres, kafka, badger, s3)") + rootCmd.PersistentFlags().String("storage-orchestrator-type", "auto", "Storage type for orchestrator (auto, clickhouse, postgres, badger)") + // BadgerDB flags for staging storage + rootCmd.PersistentFlags().String("storage-staging-badger-path", "", "BadgerDB path for staging storage") + // BadgerDB flags for orchestrator storage + rootCmd.PersistentFlags().String("storage-orchestrator-badger-path", "", "BadgerDB path for orchestrator storage") + // S3 flags for main storage + rootCmd.PersistentFlags().String("storage-main-s3-bucket", "", "S3 bucket for main storage") + rootCmd.PersistentFlags().String("storage-main-s3-region", "", "S3 region for main storage") + rootCmd.PersistentFlags().String("storage-main-s3-prefix", "", "S3 key prefix for main storage") + rootCmd.PersistentFlags().String("storage-main-s3-accessKeyId", "", "S3 access key ID for main storage") + rootCmd.PersistentFlags().String("storage-main-s3-secretAccessKey", "", "S3 secret access key for main storage") + rootCmd.PersistentFlags().String("storage-main-s3-endpoint", "", "S3 endpoint URL for main storage (for S3-compatible services)") + rootCmd.PersistentFlags().String("storage-main-s3-format", "parquet", "S3 storage format for main storage (parquet or json)") + rootCmd.PersistentFlags().Int64("storage-main-s3-bufferSizeMB", 1024, "S3 buffer size in MB before flush for main storage") + rootCmd.PersistentFlags().Int("storage-main-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for main storage") + rootCmd.PersistentFlags().Int("storage-main-s3-maxBlocksPerFile", 0, "S3 max blocks per file for main storage (0 = no limit)") + // S3 Parquet configuration + rootCmd.PersistentFlags().String("storage-main-s3-parquet-compression", "snappy", "Parquet compression type for S3 main storage") + rootCmd.PersistentFlags().Int64("storage-main-s3-parquet-rowGroupSize", 256, "Parquet row group size in MB for S3 main storage") + rootCmd.PersistentFlags().Int64("storage-main-s3-parquet-pageSize", 8192, "Parquet page size in KB for S3 main storage") rootCmd.PersistentFlags().String("api-host", "localhost:3000", "API host") rootCmd.PersistentFlags().String("api-basicAuth-username", "", "API basic auth username") rootCmd.PersistentFlags().String("api-basicAuth-password", "", "API basic auth password") @@ -260,11 +283,29 @@ func init() { viper.BindPFlag("storage.main.kafka.brokers", rootCmd.PersistentFlags().Lookup("storage-main-kafka-brokers")) viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username")) viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password")) - viper.BindPFlag("storage.main.kafka.enable_tls", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls")) + viper.BindPFlag("storage.main.kafka.enableTLS", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls")) viper.BindPFlag("storage.main.kafka.redis.host", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-host")) viper.BindPFlag("storage.main.kafka.redis.port", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-port")) viper.BindPFlag("storage.main.kafka.redis.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-password")) viper.BindPFlag("storage.main.kafka.redis.db", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-db")) + viper.BindPFlag("storage.staging.type", rootCmd.PersistentFlags().Lookup("storage-staging-type")) + viper.BindPFlag("storage.main.type", rootCmd.PersistentFlags().Lookup("storage-main-type")) + viper.BindPFlag("storage.orchestrator.type", rootCmd.PersistentFlags().Lookup("storage-orchestrator-type")) + viper.BindPFlag("storage.staging.badger.path", rootCmd.PersistentFlags().Lookup("storage-staging-badger-path")) + viper.BindPFlag("storage.orchestrator.badger.path", rootCmd.PersistentFlags().Lookup("storage-orchestrator-badger-path")) + viper.BindPFlag("storage.main.s3.bucket", rootCmd.PersistentFlags().Lookup("storage-main-s3-bucket")) + viper.BindPFlag("storage.main.s3.region", rootCmd.PersistentFlags().Lookup("storage-main-s3-region")) + viper.BindPFlag("storage.main.s3.prefix", rootCmd.PersistentFlags().Lookup("storage-main-s3-prefix")) + viper.BindPFlag("storage.main.s3.accessKeyId", rootCmd.PersistentFlags().Lookup("storage-main-s3-accessKeyId")) + viper.BindPFlag("storage.main.s3.secretAccessKey", rootCmd.PersistentFlags().Lookup("storage-main-s3-secretAccessKey")) + viper.BindPFlag("storage.main.s3.endpoint", rootCmd.PersistentFlags().Lookup("storage-main-s3-endpoint")) + viper.BindPFlag("storage.main.s3.format", rootCmd.PersistentFlags().Lookup("storage-main-s3-format")) + viper.BindPFlag("storage.main.s3.bufferSizeMB", rootCmd.PersistentFlags().Lookup("storage-main-s3-bufferSizeMB")) + viper.BindPFlag("storage.main.s3.bufferTimeoutSeconds", rootCmd.PersistentFlags().Lookup("storage-main-s3-bufferTimeoutSeconds")) + viper.BindPFlag("storage.main.s3.maxBlocksPerFile", rootCmd.PersistentFlags().Lookup("storage-main-s3-maxBlocksPerFile")) + viper.BindPFlag("storage.main.s3.parquet.compression", rootCmd.PersistentFlags().Lookup("storage-main-s3-parquet-compression")) + viper.BindPFlag("storage.main.s3.parquet.rowGroupSize", rootCmd.PersistentFlags().Lookup("storage-main-s3-parquet-rowGroupSize")) + viper.BindPFlag("storage.main.s3.parquet.pageSize", rootCmd.PersistentFlags().Lookup("storage-main-s3-parquet-pageSize")) viper.BindPFlag("api.host", rootCmd.PersistentFlags().Lookup("api-host")) viper.BindPFlag("api.basicAuth.username", rootCmd.PersistentFlags().Lookup("api-basicAuth-username")) viper.BindPFlag("api.basicAuth.password", rootCmd.PersistentFlags().Lookup("api-basicAuth-password")) @@ -280,7 +321,7 @@ func init() { viper.BindPFlag("publisher.brokers", rootCmd.PersistentFlags().Lookup("publisher-brokers")) viper.BindPFlag("publisher.username", rootCmd.PersistentFlags().Lookup("publisher-username")) viper.BindPFlag("publisher.password", rootCmd.PersistentFlags().Lookup("publisher-password")) - viper.BindPFlag("publisher.enable_tls", rootCmd.PersistentFlags().Lookup("publisher-enable-tls")) + viper.BindPFlag("publisher.enableTLS", rootCmd.PersistentFlags().Lookup("publisher-enable-tls")) viper.BindPFlag("publisher.blocks.enabled", rootCmd.PersistentFlags().Lookup("publisher-blocks-enabled")) viper.BindPFlag("publisher.blocks.topicName", rootCmd.PersistentFlags().Lookup("publisher-blocks-topicName")) viper.BindPFlag("publisher.transactions.enabled", rootCmd.PersistentFlags().Lookup("publisher-transactions-enabled")) diff --git a/configs/config.go b/configs/config.go index 3777928..29703bb 100644 --- a/configs/config.go +++ b/configs/config.go @@ -60,9 +60,37 @@ const ( ) type StorageConnectionConfig struct { + Type string `mapstructure:"type"` // "auto", "clickhouse", "postgres", "kafka", "badger", "s3" Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"` Postgres *PostgresConfig `mapstructure:"postgres"` Kafka *KafkaConfig `mapstructure:"kafka"` + Badger *BadgerConfig `mapstructure:"badger"` + S3 *S3Config `mapstructure:"s3"` +} + +type BadgerConfig struct { + Path string `mapstructure:"path"` +} + +type S3Config struct { + Bucket string `mapstructure:"bucket"` + Region string `mapstructure:"region"` + Prefix string `mapstructure:"prefix"` + AccessKeyID string `mapstructure:"accessKeyId"` + SecretAccessKey string `mapstructure:"secretAccessKey"` + Endpoint string `mapstructure:"endpoint"` + Format string `mapstructure:"format"` + Parquet *ParquetConfig `mapstructure:"parquet"` + // Buffering configuration + BufferSize int64 `mapstructure:"bufferSizeMB"` // Target buffer size in MB before flush (default 1024 MB = 1GB) + BufferTimeout int `mapstructure:"bufferTimeoutSeconds"` // Max time in seconds before flush (default 300 = 5 min) + MaxBlocksPerFile int `mapstructure:"maxBlocksPerFile"` // Max blocks per parquet file (0 = no limit, only size/timeout triggers) +} + +type ParquetConfig struct { + Compression string `mapstructure:"compression"` + RowGroupSize int64 `mapstructure:"rowGroupSize"` + PageSize int64 `mapstructure:"pageSize"` } type TableConfig struct { @@ -113,7 +141,7 @@ type KafkaConfig struct { Brokers string `mapstructure:"brokers"` Username string `mapstructure:"username"` Password string `mapstructure:"password"` - EnableTLS bool `mapstructure:"enable_tls"` + EnableTLS bool `mapstructure:"enableTLS"` Redis *RedisConfig `mapstructure:"redis"` } @@ -193,7 +221,7 @@ type PublisherConfig struct { Brokers string `mapstructure:"brokers"` Username string `mapstructure:"username"` Password string `mapstructure:"password"` - EnableTLS bool `mapstructure:"enable_tls"` + EnableTLS bool `mapstructure:"enableTLS"` Blocks BlockPublisherConfig `mapstructure:"blocks"` Transactions TransactionPublisherConfig `mapstructure:"transactions"` Traces TracePublisherConfig `mapstructure:"traces"` diff --git a/go.mod b/go.mod index 66d4ef5..68052a9 100644 --- a/go.mod +++ b/go.mod @@ -4,14 +4,20 @@ go 1.23.0 require ( github.com/ClickHouse/clickhouse-go/v2 v2.36.0 + github.com/aws/aws-sdk-go-v2 v1.38.0 + github.com/aws/aws-sdk-go-v2/config v1.31.0 + github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0 + github.com/dgraph-io/badger/v4 v4.8.0 github.com/ethereum/go-ethereum v1.15.11 github.com/gin-gonic/gin v1.10.0 github.com/gorilla/schema v1.4.1 github.com/holiman/uint256 v1.3.2 github.com/lib/pq v1.10.9 + github.com/parquet-go/parquet-go v0.25.1 github.com/prometheus/client_golang v1.20.4 + github.com/redis/go-redis/v9 v9.12.1 github.com/rs/zerolog v1.33.0 - github.com/spf13/cobra v1.8.1 + github.com/spf13/cobra v1.9.1 github.com/spf13/viper v1.18.0 github.com/stretchr/testify v1.10.0 github.com/swaggo/files v1.0.1 @@ -25,6 +31,21 @@ require ( github.com/KyleBanks/depth v1.2.1 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/andybalholm/brotli v1.1.1 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.18.4 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.3 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.3 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.3 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.3 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.28.0 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.0 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.37.0 // indirect + github.com/aws/smithy-go v1.22.5 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bits-and-blooms/bitset v1.20.0 // indirect github.com/bytedance/sonic v1.12.6 // indirect @@ -39,7 +60,9 @@ require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/deckarep/golang-set/v2 v2.6.0 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 // indirect + github.com/dgraph-io/ristretto/v2 v2.2.0 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/ethereum/c-kzg-4844/v2 v2.1.0 // indirect github.com/ethereum/go-verkle v0.2.2 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect @@ -47,6 +70,8 @@ require ( github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-faster/city v1.0.1 // indirect github.com/go-faster/errors v0.7.1 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.3.0 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect @@ -58,6 +83,7 @@ require ( github.com/goccy/go-json v0.10.4 // indirect github.com/gofrs/flock v0.8.1 // indirect github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect + github.com/google/flatbuffers v25.2.10+incompatible // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/websocket v1.4.2 // indirect github.com/hashicorp/hcl v1.0.0 // indirect @@ -86,7 +112,6 @@ require ( github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.55.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect - github.com/redis/go-redis/v9 v9.12.1 // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect @@ -96,7 +121,7 @@ require ( github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.11.0 // indirect github.com/spf13/cast v1.6.0 // indirect - github.com/spf13/pflag v1.0.5 // indirect + github.com/spf13/pflag v1.0.6 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/subosito/gotenv v1.6.0 // indirect github.com/supranational/blst v0.3.14 // indirect @@ -106,18 +131,20 @@ require ( github.com/twmb/franz-go/pkg/kmsg v1.9.0 // indirect github.com/ugorji/go/codec v1.2.12 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect - go.opentelemetry.io/otel v1.36.0 // indirect - go.opentelemetry.io/otel/trace v1.36.0 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/otel v1.37.0 // indirect + go.opentelemetry.io/otel/metric v1.37.0 // indirect + go.opentelemetry.io/otel/trace v1.37.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/arch v0.12.0 // indirect - golang.org/x/crypto v0.38.0 // indirect + golang.org/x/crypto v0.39.0 // indirect golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 // indirect - golang.org/x/net v0.40.0 // indirect - golang.org/x/sync v0.14.0 // indirect - golang.org/x/sys v0.33.0 // indirect - golang.org/x/text v0.25.0 // indirect - golang.org/x/tools v0.30.0 // indirect - google.golang.org/protobuf v1.36.1 // indirect + golang.org/x/net v0.41.0 // indirect + golang.org/x/sync v0.15.0 // indirect + golang.org/x/sys v0.34.0 // indirect + golang.org/x/text v0.26.0 // indirect + golang.org/x/tools v0.33.0 // indirect + google.golang.org/protobuf v1.36.6 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect rsc.io/tmplfunc v0.0.3 // indirect diff --git a/go.sum b/go.sum index 6302502..92d6bd5 100644 --- a/go.sum +++ b/go.sum @@ -12,10 +12,50 @@ github.com/VictoriaMetrics/fastcache v1.12.2 h1:N0y9ASrJ0F6h0QaC3o6uJb3NIZ9VKLjC github.com/VictoriaMetrics/fastcache v1.12.2/go.mod h1:AmC+Nzz1+3G2eCPapF6UcsnkThDcMsQicp4xDukwJYI= github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= +github.com/aws/aws-sdk-go-v2 v1.38.0 h1:UCRQ5mlqcFk9HJDIqENSLR3wiG1VTWlyUfLDEvY7RxU= +github.com/aws/aws-sdk-go-v2 v1.38.0/go.mod h1:9Q0OoGQoboYIAJyslFyF1f5K1Ryddop8gqMhWx/n4Wg= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0 h1:6GMWV6CNpA/6fbFHnoAjrv4+LGfyTqZz2LtCHnspgDg= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0/go.mod h1:/mXlTIVG9jbxkqDnr5UQNQxW1HRYxeGklkM9vAFeabg= +github.com/aws/aws-sdk-go-v2/config v1.31.0 h1:9yH0xiY5fUnVNLRWO0AtayqwU1ndriZdN78LlhruJR4= +github.com/aws/aws-sdk-go-v2/config v1.31.0/go.mod h1:VeV3K72nXnhbe4EuxxhzsDc/ByrCSlZwUnWH52Nde/I= +github.com/aws/aws-sdk-go-v2/credentials v1.18.4 h1:IPd0Algf1b+Qy9BcDp0sCUcIWdCQPSzDoMK3a8pcbUM= +github.com/aws/aws-sdk-go-v2/credentials v1.18.4/go.mod h1:nwg78FjH2qvsRM1EVZlX9WuGUJOL5od+0qvm0adEzHk= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.3 h1:GicIdnekoJsjq9wqnvyi2elW6CGMSYKhdozE7/Svh78= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.3/go.mod h1:R7BIi6WNC5mc1kfRM7XM/VHC3uRWkjc396sfabq4iOo= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.3 h1:o9RnO+YZ4X+kt5Z7Nvcishlz0nksIt2PIzDglLMP0vA= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.3/go.mod h1:+6aLJzOG1fvMOyzIySYjOFjcguGvVRL68R+uoRencN4= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.3 h1:joyyUFhiTQQmVK6ImzNU9TQSNRNeD9kOklqTzyk5v6s= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.3/go.mod h1:+vNIyZQP3b3B1tSLI0lxvrU9cfM7gpdRXMFfm67ZcPc= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.3 h1:ZV2XK2L3HBq9sCKQiQ/MdhZJppH/rH0vddEAamsHUIs= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.3/go.mod h1:b9F9tk2HdHpbf3xbN7rUZcfmJI26N6NcJu/8OsBFI/0= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 h1:6+lZi2JeGKtCraAj1rpoZfKqnQ9SptseRZioejfUOLM= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0/go.mod h1:eb3gfbVIxIoGgJsi9pGne19dhCBpK6opTYpQqAmdy44= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.3 h1:3ZKmesYBaFX33czDl6mbrcHb6jeheg6LqjJhQdefhsY= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.3/go.mod h1:7ryVb78GLCnjq7cw45N6oUb9REl7/vNUwjvIqC5UgdY= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.3 h1:ieRzyHXypu5ByllM7Sp4hC5f/1Fy5wqxqY0yB85hC7s= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.3/go.mod h1:O5ROz8jHiOAKAwx179v+7sHMhfobFVi6nZt8DEyiYoM= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.3 h1:SE/e52dq9a05RuxzLcjT+S5ZpQobj3ie3UTaSf2NnZc= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.3/go.mod h1:zkpvBTsR020VVr8TOrwK2TrUW9pOir28sH5ECHpnAfo= +github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0 h1:egoDf+Geuuntmw79Mz6mk9gGmELCPzg5PFEABOHB+6Y= +github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0/go.mod h1:t9MDi29H+HDbkolTSQtbI0HP9DemAWQzUjmWC7LGMnE= +github.com/aws/aws-sdk-go-v2/service/sso v1.28.0 h1:Mc/MKBf2m4VynyJkABoVEN+QzkfLqGj0aiJuEe7cMeM= +github.com/aws/aws-sdk-go-v2/service/sso v1.28.0/go.mod h1:iS5OmxEcN4QIPXARGhavH7S8kETNL11kym6jhoS7IUQ= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.0 h1:6csaS/aJmqZQbKhi1EyEMM7yBW653Wy/B9hnBofW+sw= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.0/go.mod h1:59qHWaY5B+Rs7HGTuVGaC32m0rdpQ68N8QCN3khYiqs= +github.com/aws/aws-sdk-go-v2/service/sts v1.37.0 h1:MG9VFW43M4A8BYeAfaJJZWrroinxeTi2r3+SnmLQfSA= +github.com/aws/aws-sdk-go-v2/service/sts v1.37.0/go.mod h1:JdeBDPgpJfuS6rU/hNglmOigKhyEZtBmbraLE4GK1J8= +github.com/aws/smithy-go v1.22.5 h1:P9ATCXPMb2mPjYBgueqJNCA5S9UfktsW0tTxi+a7eqw= +github.com/aws/smithy-go v1.22.5/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bits-and-blooms/bitset v1.20.0 h1:2F+rfL86jE2d/bmw7OhqUg2Sj/1rURkBn3MdfoPyRVU= github.com/bits-and-blooms/bitset v1.20.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= +github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= +github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= +github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= github.com/bytedance/sonic v1.12.6 h1:/isNmCUF2x3Sh8RAp/4mh4ZGkcFAX/hLrzrK3AvpRzk= github.com/bytedance/sonic v1.12.6/go.mod h1:B8Gt/XvtZ3Fqj+iSKMypzymZxw/FVwgIGKzMzT9r/rk= github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= @@ -44,9 +84,8 @@ github.com/consensys/bavard v0.1.27/go.mod h1:k/zVjHHC4B+PQy1Pg7fgvG3ALicQw540Cr github.com/consensys/gnark-crypto v0.16.0 h1:8Dl4eYmUWK9WmlP1Bj6je688gBRJCJbT8Mw4KoTAawo= github.com/consensys/gnark-crypto v0.16.0/go.mod h1:Ke3j06ndtPTVvo++PhGNgvm+lgpLvzbcE2MqljY7diU= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= -github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.6 h1:XJtiaUW6dEEqVuZiMTn1ldk455QWwEIsMIJlo5vtkx0= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/crate-crypto/go-eth-kzg v1.3.0 h1:05GrhASN9kDAidaFJOda6A4BEvgvuXbazXg/0E3OOdI= github.com/crate-crypto/go-eth-kzg v1.3.0/go.mod h1:J9/u5sWfznSObptgfa92Jq8rTswn6ahQWEuiLHOjCUI= github.com/crate-crypto/go-ipa v0.0.0-20240724233137-53bbb0ceb27a h1:W8mUrRp6NOVl3J+MYp5kPMoUZPp7aOYHtaua31lwRHg= @@ -63,8 +102,16 @@ github.com/decred/dcrd/crypto/blake256 v1.0.0 h1:/8DMNYp9SGi5f0w7uCm6d6M4OU2rGFK github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 h1:YLtO71vCjJRCBcrPMtQ9nqBsqpA1m5sE92cU+pd5Mcc= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs= +github.com/dgraph-io/badger/v4 v4.8.0 h1:JYph1ChBijCw8SLeybvPINizbDKWZ5n/GYbz2yhN/bs= +github.com/dgraph-io/badger/v4 v4.8.0/go.mod h1:U6on6e8k/RTbUWxqKR0MvugJuVmkxSNc79ap4917h4w= +github.com/dgraph-io/ristretto/v2 v2.2.0 h1:bkY3XzJcXoMuELV8F+vS8kzNgicwQFAaGINAEJdWGOM= +github.com/dgraph-io/ristretto/v2 v2.2.0/go.mod h1:RZrm63UmcBAaYWC1DotLYBmTvgkrs0+XhBd7Npn7/zI= +github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38= +github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/ethereum/c-kzg-4844/v2 v2.1.0 h1:gQropX9YFBhl3g4HYhwE70zq3IHFRgbbNPw0Shwzf5w= github.com/ethereum/c-kzg-4844/v2 v2.1.0/go.mod h1:TC48kOKjJKPbN7C++qIgt0TJzZ70QznYR7Ob+WXl57E= github.com/ethereum/go-ethereum v1.15.11 h1:JK73WKeu0WC0O1eyX+mdQAVHUV+UR1a9VB/domDngBU= @@ -89,6 +136,11 @@ github.com/go-faster/city v1.0.1 h1:4WAxSZ3V2Ws4QRDrscLEDcibJY8uf41H6AhXDrNDcGw= github.com/go-faster/city v1.0.1/go.mod h1:jKcUJId49qdW3L1qKHH/3wPeUstCVpVSXTM6vO3VcTw= github.com/go-faster/errors v0.7.1 h1:MkJTnDoEdi9pDabt1dpWf7AA8/BaSYZqibYyhZ20AYg= github.com/go-faster/errors v0.7.1/go.mod h1:5ySTjWFiphBs07IKuiL69nxdfd5+fzh1u7FPGZP2quo= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= @@ -121,6 +173,8 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb h1:PBC98N2aIaM3XXiurYmW7fx4GZkL8feAMVq7nEjURHk= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q= +github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= @@ -139,6 +193,8 @@ github.com/hashicorp/go-bexpr v0.1.10 h1:9kuI5PFotCboP3dkDYFr/wi0gg0QVbSNz5oFRpx github.com/hashicorp/go-bexpr v0.1.10/go.mod h1:oxlubA2vC/gFVfX1A6JGp7ls7uCDlfJn732ehYYg+g0= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= +github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/holiman/billy v0.0.0-20240216141850-2abb0c79d3c4 h1:X4egAf/gcS1zATw6wn4Ej8vjuVGxeHdan+bRb2ebyv4= github.com/holiman/billy v0.0.0-20240216141850-2abb0c79d3c4/go.mod h1:5GuXa7vkL8u9FkFuWdVvfR5ix8hRB7DbOAaYULamFpc= github.com/holiman/bloomfilter/v2 v2.0.3 h1:73e0e/V0tCydx14a0SCYS/EWCxgwLZ18CZcZKVu0fao= @@ -209,6 +265,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= +github.com/parquet-go/parquet-go v0.25.1 h1:l7jJwNM0xrk0cnIIptWMtnSnuxRkwq53S+Po3KG8Xgo= +github.com/parquet-go/parquet-go v0.25.1/go.mod h1:AXBuotO1XiBtcqJb/FKFyjBG4aqa3aQAAWF3ZPzCanY= github.com/paulmach/orb v0.11.1 h1:3koVegMC4X/WeiXYz9iswopaTwMem53NzTJuTF20JzU= github.com/paulmach/orb v0.11.1/go.mod h1:5mULz1xQfs3bmQm63QEJA6lNGujuRafwA5S/EnuLaLU= github.com/paulmach/protoscan v0.2.1/go.mod h1:SpcSwydNLrxUGSDvXvO0P7g7AuhJ7lcKfDlhJCDw2gY= @@ -243,8 +301,8 @@ github.com/redis/go-redis/v9 v9.12.1 h1:k5iquqv27aBtnTm2tIkROUDp8JBXhXZIVu1InSgv github.com/redis/go-redis/v9 v9.12.1/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik= github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= @@ -268,10 +326,10 @@ github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0= github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= -github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= -github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.18.0 h1:pN6W1ub/G4OfnM+NR9p7xP9R6TltLUzp5JG9yZD3Qg0= github.com/spf13/viper v1.18.0/go.mod h1:EKmWIqdnk5lOcmR72yw6hS+8OPYcwD0jteitLMVB+yk= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -328,10 +386,14 @@ github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5t github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.mongodb.org/mongo-driver v1.11.4/go.mod h1:PTSz5yu21bkT/wXpkS7WR5f0ddqw5quethTUn9WM+2g= -go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg= -go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E= -go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w= -go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= +go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= +go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= +go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= +go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= +go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= golang.org/x/arch v0.12.0 h1:UsYJhbzPYGsT0HbEdmYcqtCv8UNGvnaL561NnIUvaKg= @@ -341,15 +403,15 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8= -golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw= +golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= +golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 h1:aAcj0Da7eBAtrTp03QXWvm88pSyOt+UgdZw2BFZ+lEw= golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8/go.mod h1:CQ1k9gNrJ50XIzaKCRR2hssIjF07kZFEiieALBM/ARQ= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM= -golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w= +golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -358,15 +420,15 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY= -golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds= +golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= +golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= -golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= +golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -383,8 +445,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= -golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= +golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -393,8 +455,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= -golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= +golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= +golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -402,16 +464,16 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY= -golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY= +golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= +golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk= -google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index 154dc89..1d879dc 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -21,6 +21,8 @@ type Orchestrator struct { committerEnabled bool reorgHandlerEnabled bool cancel context.CancelFunc + wg sync.WaitGroup + shutdownOnce sync.Once } func NewOrchestrator(rpc rpc.IRPCClient) (*Orchestrator, error) { @@ -43,8 +45,6 @@ func (o *Orchestrator) Start() { ctx, cancel := context.WithCancel(context.Background()) o.cancel = cancel - var wg sync.WaitGroup - sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT) @@ -58,30 +58,32 @@ func (o *Orchestrator) Start() { workModeMonitor := NewWorkModeMonitor(o.rpc, o.storage) if o.pollerEnabled { - wg.Add(1) + o.wg.Add(1) go func() { - defer wg.Done() + defer o.wg.Done() pollerWorkModeChan := make(chan WorkMode, 1) workModeMonitor.RegisterChannel(pollerWorkModeChan) defer workModeMonitor.UnregisterChannel(pollerWorkModeChan) + poller := NewPoller(o.rpc, o.storage, WithPollerWorkModeChan(pollerWorkModeChan)) poller.Start(ctx) + log.Info().Msg("Poller completed") }() } if o.failureRecovererEnabled { - wg.Add(1) + o.wg.Add(1) go func() { - defer wg.Done() + defer o.wg.Done() failureRecoverer := NewFailureRecoverer(o.rpc, o.storage) failureRecoverer.Start(ctx) }() } if o.committerEnabled { - wg.Add(1) + o.wg.Add(1) go func() { - defer wg.Done() + defer o.wg.Done() committerWorkModeChan := make(chan WorkMode, 1) workModeMonitor.RegisterChannel(committerWorkModeChan) defer workModeMonitor.UnregisterChannel(committerWorkModeChan) @@ -92,33 +94,35 @@ func (o *Orchestrator) Start() { } if o.reorgHandlerEnabled { - wg.Add(1) + o.wg.Add(1) go func() { - defer wg.Done() + defer o.wg.Done() reorgHandler := NewReorgHandler(o.rpc, o.storage) reorgHandler.Start(ctx) }() } - wg.Add(1) + o.wg.Add(1) go func() { - defer wg.Done() + defer o.wg.Done() workModeMonitor.Start(ctx) }() // The chain tracker is always running - wg.Add(1) + o.wg.Add(1) go func() { - defer wg.Done() + defer o.wg.Done() chainTracker := NewChainTracker(o.rpc) chainTracker.Start(ctx) }() - wg.Wait() -} + o.wg.Wait() -func (o *Orchestrator) Shutdown() { - if o.cancel != nil { - o.cancel() + // Waiting for all goroutines to complete + + if err := o.storage.Close(); err != nil { + log.Error().Err(err).Msg("Error closing storage connections") } + + log.Info().Msg("Orchestrator shutdown complete") } diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go index 5e3b313..31a64ae 100644 --- a/internal/orchestrator/poller.go +++ b/internal/orchestrator/poller.go @@ -158,7 +158,7 @@ func (p *Poller) Start(ctx context.Context) { lastPolledBlock := p.Poll(pollCtx, blockNumbers) if p.reachedPollLimit(lastPolledBlock) { - log.Debug().Msg("Reached poll limit, exiting poller") + log.Info().Msgf("Reached poll limit at block %s, completing poller", lastPolledBlock.String()) cancel() return } diff --git a/internal/orchestrator/validator.go b/internal/orchestrator/validator.go index db03cbe..b37b986 100644 --- a/internal/orchestrator/validator.go +++ b/internal/orchestrator/validator.go @@ -98,8 +98,11 @@ func (v *Validator) ValidateBlock(blockData common.BlockData) (valid bool, err e return true, nil } - // TODO: remove this once we know how to validate all tx types for _, tx := range blockData.Transactions { + if tx.TransactionType == 0x7E { + // TODO: Need to properly validate op-stack deposit transaction + return true, nil + } if tx.TransactionType > 4 { // Currently supported types are 0-4 log.Warn().Msgf("Skipping transaction root validation for block %s due to unsupported transaction type %d", blockData.Block.Number, tx.TransactionType) return true, nil diff --git a/internal/storage/badger.go b/internal/storage/badger.go new file mode 100644 index 0000000..1ffd431 --- /dev/null +++ b/internal/storage/badger.go @@ -0,0 +1,479 @@ +package storage + +import ( + "bytes" + "encoding/gob" + "fmt" + "math/big" + "sort" + "strings" + "sync" + "time" + + "github.com/dgraph-io/badger/v4" + "github.com/dgraph-io/badger/v4/options" + "github.com/rs/zerolog/log" + config "github.com/thirdweb-dev/indexer/configs" + "github.com/thirdweb-dev/indexer/internal/common" +) + +type BadgerConnector struct { + db *badger.DB + mu sync.RWMutex + gcTicker *time.Ticker + stopGC chan struct{} +} + +func NewBadgerConnector(cfg *config.BadgerConfig) (*BadgerConnector, error) { + opts := badger.DefaultOptions(cfg.Path) + + opts.ValueLogFileSize = 1024 * 1024 * 1024 // 1GB + opts.BaseTableSize = 128 * 1024 * 1024 // 128MB + opts.BaseLevelSize = 128 * 1024 * 1024 // 128MB + opts.LevelSizeMultiplier = 10 // Aggressive growth + opts.NumMemtables = 10 // ~1.28GB + opts.MemTableSize = opts.BaseTableSize // 128MB per memtable + opts.NumLevelZeroTables = 10 + opts.NumLevelZeroTablesStall = 30 + opts.SyncWrites = false // Faster but less durable + opts.DetectConflicts = false // No need for ACID in staging + opts.NumCompactors = 4 // More compactors for parallel compaction + opts.CompactL0OnClose = true // Compact L0 tables on close + opts.ValueLogMaxEntries = 1000000 // More entries per value log + opts.ValueThreshold = 1024 // Store values > 1024 bytes in value log + opts.IndexCacheSize = 512 * 1024 * 1024 // 512MB index cache + opts.BlockCacheSize = 256 * 1024 * 1024 // 256MB block cache + opts.Compression = options.Snappy + + opts.Logger = nil // Disable badger's internal logging + + db, err := badger.Open(opts) + if err != nil { + return nil, fmt.Errorf("failed to open badger db: %w", err) + } + + bc := &BadgerConnector{ + db: db, + stopGC: make(chan struct{}), + } + + // Start GC routine + bc.gcTicker = time.NewTicker(time.Duration(60) * time.Second) + go bc.runGC() + + return bc, nil +} + +func (bc *BadgerConnector) runGC() { + for { + select { + case <-bc.gcTicker.C: + err := bc.db.RunValueLogGC(0.5) + if err != nil && err != badger.ErrNoRewrite { + log.Debug().Err(err).Msg("BadgerDB GC error") + } + case <-bc.stopGC: + return + } + } +} + +func (bc *BadgerConnector) Close() error { + if bc.gcTicker != nil { + bc.gcTicker.Stop() + close(bc.stopGC) + } + return bc.db.Close() +} + +// Key construction helpers +func blockKey(chainId *big.Int, blockNumber *big.Int) []byte { + return []byte(fmt.Sprintf("b:%d:%s", chainId.Uint64(), blockNumber.String())) +} + +func blockFailureKey(chainId *big.Int, blockNumber *big.Int, timestamp int64) []byte { + return []byte(fmt.Sprintf("f:%d:%s:%d", chainId.Uint64(), blockNumber.String(), timestamp)) +} + +func lastReorgKey(chainId *big.Int) []byte { + return []byte(fmt.Sprintf("reorg:%d", chainId.Uint64())) +} + +func lastPublishedKey(chainId *big.Int) []byte { + return []byte(fmt.Sprintf("published:%d", chainId.Uint64())) +} + +// IOrchestratorStorage implementation +func (bc *BadgerConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) { + bc.mu.RLock() + defer bc.mu.RUnlock() + + var failures []common.BlockFailure + prefix := fmt.Sprintf("f:%d:", qf.ChainId.Uint64()) + + err := bc.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.Prefix = []byte(prefix) + it := txn.NewIterator(opts) + defer it.Close() + + for it.Rewind(); it.Valid(); it.Next() { + item := it.Item() + err := item.Value(func(val []byte) error { + var failure common.BlockFailure + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&failure); err != nil { + return err + } + + // Apply filters + if qf.StartBlock != nil && failure.BlockNumber.Cmp(qf.StartBlock) < 0 { + return nil + } + if qf.EndBlock != nil && failure.BlockNumber.Cmp(qf.EndBlock) > 0 { + return nil + } + + failures = append(failures, failure) + return nil + }) + if err != nil { + return err + } + + if qf.Limit > 0 && len(failures) >= qf.Limit { + break + } + } + return nil + }) + + return failures, err +} + +func (bc *BadgerConnector) StoreBlockFailures(failures []common.BlockFailure) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + for _, failure := range failures { + key := blockFailureKey(failure.ChainId, failure.BlockNumber, time.Now().Unix()) + + var buf bytes.Buffer + if err := gob.NewEncoder(&buf).Encode(failure); err != nil { + return err + } + + if err := txn.Set(key, buf.Bytes()); err != nil { + return err + } + } + return nil + }) +} + +func (bc *BadgerConnector) DeleteBlockFailures(failures []common.BlockFailure) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + for _, failure := range failures { + // Delete all failure entries for this block + prefix := fmt.Sprintf("f:%d:%s:", failure.ChainId.Uint64(), failure.BlockNumber.String()) + + opts := badger.DefaultIteratorOptions + opts.Prefix = []byte(prefix) + it := txn.NewIterator(opts) + defer it.Close() + + for it.Rewind(); it.Valid(); it.Next() { + if err := txn.Delete(it.Item().Key()); err != nil { + return err + } + } + } + return nil + }) +} + +func (bc *BadgerConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { + bc.mu.RLock() + defer bc.mu.RUnlock() + + var blockNumber *big.Int + err := bc.db.View(func(txn *badger.Txn) error { + item, err := txn.Get(lastReorgKey(chainId)) + if err == badger.ErrKeyNotFound { + return nil + } + if err != nil { + return err + } + + return item.Value(func(val []byte) error { + blockNumber = new(big.Int).SetBytes(val) + return nil + }) + }) + + if blockNumber == nil { + return big.NewInt(0), nil + } + return blockNumber, err +} + +func (bc *BadgerConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + return txn.Set(lastReorgKey(chainId), blockNumber.Bytes()) + }) +} + +// IStagingStorage implementation +func (bc *BadgerConnector) InsertStagingData(data []common.BlockData) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + for _, blockData := range data { + key := blockKey(big.NewInt(int64(blockData.ChainId)), blockData.Block.Number) + + var buf bytes.Buffer + if err := gob.NewEncoder(&buf).Encode(blockData); err != nil { + return err + } + + if err := txn.Set(key, buf.Bytes()); err != nil { + return err + } + } + return nil + }) +} + +func (bc *BadgerConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) { + bc.mu.RLock() + defer bc.mu.RUnlock() + + var results []common.BlockData + + if len(qf.BlockNumbers) > 0 { + // Fetch specific blocks + err := bc.db.View(func(txn *badger.Txn) error { + for _, blockNum := range qf.BlockNumbers { + key := blockKey(qf.ChainId, blockNum) + item, err := txn.Get(key) + if err == badger.ErrKeyNotFound { + continue + } + if err != nil { + return err + } + + err = item.Value(func(val []byte) error { + var blockData common.BlockData + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { + return err + } + results = append(results, blockData) + return nil + }) + if err != nil { + return err + } + } + return nil + }) + return results, err + } + + // Range query + prefix := fmt.Sprintf("b:%d:", qf.ChainId.Uint64()) + + err := bc.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.Prefix = []byte(prefix) + it := txn.NewIterator(opts) + defer it.Close() + + count := 0 + for it.Rewind(); it.Valid(); it.Next() { + if qf.Offset > 0 && count < qf.Offset { + count++ + continue + } + + item := it.Item() + err := item.Value(func(val []byte) error { + var blockData common.BlockData + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { + return err + } + + // Apply filters + if qf.StartBlock != nil && blockData.Block.Number.Cmp(qf.StartBlock) < 0 { + return nil + } + if qf.EndBlock != nil && blockData.Block.Number.Cmp(qf.EndBlock) > 0 { + return nil + } + + results = append(results, blockData) + return nil + }) + if err != nil { + return err + } + + count++ + if qf.Limit > 0 && len(results) >= qf.Limit { + break + } + } + return nil + }) + + // Sort by block number + sort.Slice(results, func(i, j int) bool { + return results[i].Block.Number.Cmp(results[j].Block.Number) < 0 + }) + + return results, err +} + +func (bc *BadgerConnector) DeleteStagingData(data []common.BlockData) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + for _, blockData := range data { + key := blockKey(big.NewInt(int64(blockData.ChainId)), blockData.Block.Number) + if err := txn.Delete(key); err != nil && err != badger.ErrKeyNotFound { + return err + } + } + return nil + }) +} + +func (bc *BadgerConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) { + bc.mu.RLock() + defer bc.mu.RUnlock() + + var maxBlock *big.Int + prefix := fmt.Sprintf("b:%d:", chainId.Uint64()) + + err := bc.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.Prefix = []byte(prefix) + opts.Reverse = true // Iterate in reverse to find max quickly + it := txn.NewIterator(opts) + defer it.Close() + + for it.Rewind(); it.Valid(); it.Next() { + key := string(it.Item().Key()) + parts := strings.Split(key, ":") + if len(parts) != 3 { + continue + } + + blockNum, ok := new(big.Int).SetString(parts[2], 10) + if !ok { + continue + } + + // Apply range filters if provided + if rangeStart != nil && rangeStart.Sign() > 0 && blockNum.Cmp(rangeStart) < 0 { + continue + } + if rangeEnd != nil && rangeEnd.Sign() > 0 && blockNum.Cmp(rangeEnd) > 0 { + continue + } + + maxBlock = blockNum + break // Found the maximum since we're iterating in reverse + } + return nil + }) + + if maxBlock == nil { + return big.NewInt(0), nil + } + return maxBlock, err +} + +func (bc *BadgerConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { + bc.mu.RLock() + defer bc.mu.RUnlock() + + var blockNumber *big.Int + err := bc.db.View(func(txn *badger.Txn) error { + item, err := txn.Get(lastPublishedKey(chainId)) + if err == badger.ErrKeyNotFound { + return nil + } + if err != nil { + return err + } + + return item.Value(func(val []byte) error { + blockNumber = new(big.Int).SetBytes(val) + return nil + }) + }) + + if blockNumber == nil { + return big.NewInt(0), nil + } + return blockNumber, err +} + +func (bc *BadgerConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + return txn.Set(lastPublishedKey(chainId), blockNumber.Bytes()) + }) +} + +func (bc *BadgerConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + prefix := fmt.Sprintf("b:%d:", chainId.Uint64()) + + return bc.db.Update(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.Prefix = []byte(prefix) + it := txn.NewIterator(opts) + defer it.Close() + + var keysToDelete [][]byte + for it.Rewind(); it.Valid(); it.Next() { + key := string(it.Item().Key()) + parts := strings.Split(key, ":") + if len(parts) != 3 { + continue + } + + blockNum, ok := new(big.Int).SetString(parts[2], 10) + if !ok { + continue + } + + if blockNum.Cmp(blockNumber) <= 0 { + keysToDelete = append(keysToDelete, it.Item().KeyCopy(nil)) + } + } + + for _, key := range keysToDelete { + if err := txn.Delete(key); err != nil { + return err + } + } + + return nil + }) +} diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go index 1f95536..9ea97ce 100644 --- a/internal/storage/clickhouse.go +++ b/internal/storage/clickhouse.go @@ -78,6 +78,14 @@ func NewClickHouseConnector(cfg *config.ClickhouseConfig) (*ClickHouseConnector, }, nil } +// Close closes the ClickHouse connection +func (c *ClickHouseConnector) Close() error { + if c.conn != nil { + return c.conn.Close() + } + return nil +} + func connectDB(cfg *config.ClickhouseConfig) (clickhouse.Conn, error) { port := cfg.Port if port == 0 { diff --git a/internal/storage/connector.go b/internal/storage/connector.go index 0b5d743..4b962af 100644 --- a/internal/storage/connector.go +++ b/internal/storage/connector.go @@ -72,12 +72,37 @@ type IStorage struct { StagingStorage IStagingStorage } +// Close closes all storage connections +func (s *IStorage) Close() error { + var errs []error + + // Close each storage that implements Closer interface + if err := s.OrchestratorStorage.Close(); err != nil { + errs = append(errs, fmt.Errorf("failed to close orchestrator storage: %w", err)) + } + + if err := s.MainStorage.Close(); err != nil { + errs = append(errs, fmt.Errorf("failed to close main storage: %w", err)) + } + + if err := s.StagingStorage.Close(); err != nil { + errs = append(errs, fmt.Errorf("failed to close staging storage: %w", err)) + } + + if len(errs) > 0 { + return fmt.Errorf("errors closing storage: %v", errs) + } + + return nil +} + type IOrchestratorStorage interface { GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) StoreBlockFailures(failures []common.BlockFailure) error DeleteBlockFailures(failures []common.BlockFailure) error GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error + Close() error } type IStagingStorage interface { @@ -88,6 +113,7 @@ type IStagingStorage interface { GetLastPublishedBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error + Close() error } type IMainStorage interface { @@ -99,16 +125,15 @@ type IMainStorage interface { GetLogs(qf QueryFilter, fields ...string) (logs QueryResult[common.Log], err error) GetTraces(qf QueryFilter, fields ...string) (traces QueryResult[common.Trace], err error) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) + GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) + GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) + GetMaxBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (maxBlockNumber *big.Int, err error) /** * Get block headers ordered from latest to oldest. */ GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) (blockHeaders []common.BlockHeader, err error) - - GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) - GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) - /** * Gets only the data required for validation. */ @@ -121,6 +146,8 @@ type IMainStorage interface { * Gets full block data with transactions, logs and traces. */ GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) (blocks []common.BlockData, err error) + + Close() error } func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) { @@ -148,14 +175,59 @@ func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) { func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) { var conn interface{} var err error - if cfg.Kafka != nil { - conn, err = NewKafkaRedisConnector(cfg.Kafka) - } else if cfg.Postgres != nil { - conn, err = NewPostgresConnector(cfg.Postgres) - } else if cfg.Clickhouse != nil { - conn, err = NewClickHouseConnector(cfg.Clickhouse) + + // Default to "auto" if Type is not specified + storageType := cfg.Type + if storageType == "" { + storageType = "auto" + } + + // Handle explicit type selection + if storageType != "auto" { + switch storageType { + case "kafka": + if cfg.Kafka == nil { + return *new(T), fmt.Errorf("kafka storage type specified but kafka config is nil") + } + conn, err = NewKafkaRedisConnector(cfg.Kafka) + case "postgres": + if cfg.Postgres == nil { + return *new(T), fmt.Errorf("postgres storage type specified but postgres config is nil") + } + conn, err = NewPostgresConnector(cfg.Postgres) + case "clickhouse": + if cfg.Clickhouse == nil { + return *new(T), fmt.Errorf("clickhouse storage type specified but clickhouse config is nil") + } + conn, err = NewClickHouseConnector(cfg.Clickhouse) + case "badger": + if cfg.Badger == nil { + return *new(T), fmt.Errorf("badger storage type specified but badger config is nil") + } + conn, err = NewBadgerConnector(cfg.Badger) + case "s3": + if cfg.S3 == nil { + return *new(T), fmt.Errorf("s3 storage type specified but s3 config is nil") + } + conn, err = NewS3Connector(cfg.S3) + default: + return *new(T), fmt.Errorf("unknown storage type: %s", storageType) + } } else { - return *new(T), fmt.Errorf("no storage driver configured") + // Auto mode: use the first non-nil config (existing behavior) + if cfg.Kafka != nil { + conn, err = NewKafkaRedisConnector(cfg.Kafka) + } else if cfg.Postgres != nil { + conn, err = NewPostgresConnector(cfg.Postgres) + } else if cfg.Clickhouse != nil { + conn, err = NewClickHouseConnector(cfg.Clickhouse) + } else if cfg.Badger != nil { + conn, err = NewBadgerConnector(cfg.Badger) + } else if cfg.S3 != nil { + conn, err = NewS3Connector(cfg.S3) + } else { + return *new(T), fmt.Errorf("no storage driver configured") + } } if err != nil { diff --git a/internal/storage/s3.go b/internal/storage/s3.go new file mode 100644 index 0000000..8a75c65 --- /dev/null +++ b/internal/storage/s3.go @@ -0,0 +1,1071 @@ +package storage + +import ( + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "math/big" + "sort" + "strings" + "sync" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + awsconfig "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/parquet-go/parquet-go" + "github.com/rs/zerolog/log" + config "github.com/thirdweb-dev/indexer/configs" + "github.com/thirdweb-dev/indexer/internal/common" +) + +type S3Connector struct { + client *s3.Client + config *config.S3Config + formatter DataFormatter + + // Buffering + buffer []common.BlockData + bufferMu sync.Mutex + bufferSize int64 // Current buffer size in bytes + bufferTimer *time.Timer + stopCh chan struct{} + flushCh chan struct{} + flushDoneCh chan struct{} // Signals when flush is complete + wg sync.WaitGroup +} + +// DataFormatter interface for different file formats +type DataFormatter interface { + FormatBlockData(data []common.BlockData) ([]byte, error) + GetFileExtension() string + GetContentType() string +} + +// ParquetBlockData represents the complete block data in Parquet format +type ParquetBlockData struct { + ChainID uint64 `parquet:"chain_id"` + BlockNumber uint64 `parquet:"block_number"` // Numeric for efficient min/max queries + BlockHash string `parquet:"block_hash"` + BlockTimestamp int64 `parquet:"block_timestamp"` + Block []byte `parquet:"block_json"` + Transactions []byte `parquet:"transactions_json"` + Logs []byte `parquet:"logs_json"` + Traces []byte `parquet:"traces_json"` +} + +func NewS3Connector(cfg *config.S3Config) (*S3Connector, error) { + awsCfg, err := awsconfig.LoadDefaultConfig(context.Background(), + awsconfig.WithRegion(cfg.Region), + ) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + // Override with explicit credentials if provided + if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" { + awsCfg.Credentials = aws.CredentialsProviderFunc(func(ctx context.Context) (aws.Credentials, error) { + return aws.Credentials{ + AccessKeyID: cfg.AccessKeyID, + SecretAccessKey: cfg.SecretAccessKey, + }, nil + }) + } + + s3Client := s3.NewFromConfig(awsCfg, func(o *s3.Options) { + if cfg.Endpoint != "" { + o.BaseEndpoint = aws.String(cfg.Endpoint) + } + }) + + // Set defaults + if cfg.Format == "" { + cfg.Format = "parquet" + } + + // Initialize parquet config with defaults if using parquet + if cfg.Format == "parquet" && cfg.Parquet == nil { + cfg.Parquet = &config.ParquetConfig{ + Compression: "snappy", + RowGroupSize: 256, // MB + PageSize: 8192, // KB + } + } + + // Set buffer defaults + if cfg.BufferSize == 0 { + cfg.BufferSize = 1024 // 1GB default + } + if cfg.BufferTimeout == 0 { + cfg.BufferTimeout = 300 // 5 minutes default + } + + // Create formatter based on format + var formatter DataFormatter + switch cfg.Format { + case "parquet": + formatter = &ParquetFormatter{config: cfg.Parquet} + default: + return nil, fmt.Errorf("unsupported format: %s", cfg.Format) + } + + s3c := &S3Connector{ + client: s3Client, + config: cfg, + formatter: formatter, + buffer: make([]common.BlockData, 0), + stopCh: make(chan struct{}), + flushCh: make(chan struct{}, 1), + flushDoneCh: make(chan struct{}), + } + + // Start background flush worker + s3c.wg.Add(1) + go s3c.flushWorker() + + return s3c, nil +} + +func (s *S3Connector) InsertBlockData(data []common.BlockData) error { + if len(data) == 0 { + return nil + } + + s.bufferMu.Lock() + defer s.bufferMu.Unlock() + + // Calculate actual serialized size for accurate memory tracking + formattedData, err := s.formatter.FormatBlockData(data) + if err != nil { + return fmt.Errorf("failed to format block data for size calculation: %w", err) + } + + // Use actual serialized size for accurate memory tracking + actualSize := int64(len(formattedData)) + s.bufferSize += actualSize + log.Debug(). + Int("block_count", len(data)). + Int64("size_bytes", actualSize). + Int64("avg_bytes_per_block", actualSize/int64(len(data))). + Msg("Calculated actual block data size") + + // Add to buffer + s.buffer = append(s.buffer, data...) + + // Reset timer if this is the first data in buffer + if len(s.buffer) == len(data) && s.bufferTimer == nil { + s.bufferTimer = time.AfterFunc(time.Duration(s.config.BufferTimeout)*time.Second, func() { + select { + case s.flushCh <- struct{}{}: + default: + } + }) + } + + // Check if we should flush based on size or block count + shouldFlush := s.bufferSize >= s.config.BufferSize*1024*1024 // Convert MB to bytes + + // Only check block count if MaxBlocksPerFile is set (> 0) + if s.config.MaxBlocksPerFile > 0 && len(s.buffer) >= s.config.MaxBlocksPerFile { + shouldFlush = true + } + + if shouldFlush { + // Stop timer if running + if s.bufferTimer != nil { + s.bufferTimer.Stop() + s.bufferTimer = nil + } + + // Trigger flush + select { + case s.flushCh <- struct{}{}: + default: + } + } + + return nil +} + +// flushWorker runs in background and handles buffer flushes +func (s *S3Connector) flushWorker() { + defer s.wg.Done() + + for { + select { + case <-s.stopCh: + // Final flush before stopping + s.flushBuffer() + return + case <-s.flushCh: + s.flushBuffer() + // Signal flush completion + select { + case s.flushDoneCh <- struct{}{}: + default: + } + } + } +} + +// flushBuffer writes buffered data to S3 +func (s *S3Connector) flushBuffer() error { + s.bufferMu.Lock() + if len(s.buffer) == 0 { + s.bufferMu.Unlock() + return nil + } + + // Take ownership of buffer + data := s.buffer + s.buffer = make([]common.BlockData, 0) + s.bufferSize = 0 + + // Stop timer if running + if s.bufferTimer != nil { + s.bufferTimer.Stop() + s.bufferTimer = nil + } + s.bufferMu.Unlock() + + // Group blocks by chain to generate appropriate keys + chainGroups := make(map[uint64][]common.BlockData) + for _, block := range data { + chainGroups[block.ChainId] = append(chainGroups[block.ChainId], block) + } + + for _, blocks := range chainGroups { + // Sort blocks by number + sort.Slice(blocks, func(i, j int) bool { + return blocks[i].Block.Number.Cmp(blocks[j].Block.Number) < 0 + }) + + // Process in chunks if MaxBlocksPerFile is set, otherwise upload all at once + if s.config.MaxBlocksPerFile > 0 { + // Split into chunks based on MaxBlocksPerFile + for i := 0; i < len(blocks); i += s.config.MaxBlocksPerFile { + end := i + s.config.MaxBlocksPerFile + if end > len(blocks) { + end = len(blocks) + } + + chunk := blocks[i:end] + if err := s.uploadBatch(chunk); err != nil { + log.Error().Err(err).Msg("Failed to upload batch to S3") + return err + } + } + } else { + // No block limit, upload entire buffer as one file + if err := s.uploadBatch(blocks); err != nil { + log.Error().Err(err).Msg("Failed to upload batch to S3") + return err + } + } + } + + return nil +} + +// Flush manually triggers a buffer flush and waits for completion +func (s *S3Connector) Flush() error { + // Check if buffer has data + s.bufferMu.Lock() + hasData := len(s.buffer) > 0 + s.bufferMu.Unlock() + + if !hasData { + return nil + } + + // Clear any pending flush completion signals + select { + case <-s.flushDoneCh: + default: + } + + // Trigger flush + select { + case s.flushCh <- struct{}{}: + // Wait for flush to complete + select { + case <-s.flushDoneCh: + return nil + case <-time.After(30 * time.Second): + return fmt.Errorf("flush timeout after 30 seconds") + } + default: + // Flush channel is full, likely a flush is already in progress + // Wait for it to complete + select { + case <-s.flushDoneCh: + return nil + case <-time.After(30 * time.Second): + return fmt.Errorf("flush timeout after 30 seconds") + } + } +} + +// Close closes the S3 connector and flushes any remaining data +func (s *S3Connector) Close() error { + // First, ensure any pending data is flushed + if err := s.Flush(); err != nil { + log.Error().Err(err).Msg("Error flushing buffer during close") + } + + // Signal stop + close(s.stopCh) + + // Wait for worker to finish + s.wg.Wait() + + return nil +} + +func (s *S3Connector) uploadBatch(data []common.BlockData) error { + if len(data) == 0 { + return nil + } + + chainID := data[0].ChainId + startBlock := data[0].Block.Number + endBlock := data[len(data)-1].Block.Number + // Use the first block's timestamp for year partitioning + blockTimestamp := data[0].Block.Timestamp + + // Format data using the configured formatter + formattedData, err := s.formatter.FormatBlockData(data) + if err != nil { + return fmt.Errorf("failed to format block data: %w", err) + } + + // Generate S3 key with chain_id/year partitioning based on block timestamp + key := s.generateS3Key(chainID, startBlock, endBlock, blockTimestamp) + + // Upload to S3 + ctx := context.Background() + _, err = s.client.PutObject(ctx, &s3.PutObjectInput{ + Bucket: aws.String(s.config.Bucket), + Key: aws.String(key), + Body: bytes.NewReader(formattedData), + ContentType: aws.String(s.formatter.GetContentType()), + Metadata: map[string]string{ + "chain_id": fmt.Sprintf("%d", chainID), + "start_block": startBlock.String(), + "end_block": endBlock.String(), + "block_count": fmt.Sprintf("%d", len(data)), + "timestamp": blockTimestamp.Format(time.RFC3339), + "checksum": s.calculateChecksum(formattedData), + "file_size": fmt.Sprintf("%d", len(formattedData)), + }, + }) + + if err != nil { + return fmt.Errorf("failed to upload to S3: %w", err) + } + + log.Info(). + Uint64("chain_id", chainID). + Str("min_block", startBlock.String()). + Str("max_block", endBlock.String()). + Int("block_count", len(data)). + Int("file_size_mb", len(formattedData)/(1024*1024)). + Str("s3_key", key). + Msg("Successfully uploaded buffered blocks to S3") + + return nil +} + +func (s *S3Connector) generateS3Key(chainID uint64, startBlock, endBlock *big.Int, blockTimestamp time.Time) string { + // Use the block's timestamp for year partitioning + year := blockTimestamp.Year() + if len(s.config.Prefix) > 0 { + return fmt.Sprintf("%s/chain_%d/year=%d/blocks_%s_%s%s", + s.config.Prefix, + chainID, + year, + startBlock.String(), + endBlock.String(), + s.formatter.GetFileExtension(), + ) + } + return fmt.Sprintf("chain_%d/year=%d/blocks_%s_%s%s", + chainID, + year, + startBlock.String(), + endBlock.String(), + s.formatter.GetFileExtension(), + ) +} + +// ParquetFormatter implements DataFormatter for Parquet format +type ParquetFormatter struct { + config *config.ParquetConfig +} + +func (f *ParquetFormatter) FormatBlockData(data []common.BlockData) ([]byte, error) { + var parquetData []ParquetBlockData + + for _, d := range data { + // Serialize each component to JSON + blockJSON, err := json.Marshal(d.Block) + if err != nil { + return nil, fmt.Errorf("failed to marshal block: %w", err) + } + + // Default transactions to empty array if nil + var txJSON []byte + if d.Transactions == nil { + txJSON, err = json.Marshal([]common.Transaction{}) + } else { + txJSON, err = json.Marshal(d.Transactions) + } + if err != nil { + return nil, fmt.Errorf("failed to marshal transactions: %w", err) + } + + // Default logs to empty array if nil + var logsJSON []byte + if d.Logs == nil { + logsJSON, err = json.Marshal([]common.Log{}) + } else { + logsJSON, err = json.Marshal(d.Logs) + } + if err != nil { + return nil, fmt.Errorf("failed to marshal logs: %w", err) + } + + // Default traces to empty array if nil + var tracesJSON []byte + if d.Traces == nil { + tracesJSON, err = json.Marshal([]common.Trace{}) + } else { + tracesJSON, err = json.Marshal(d.Traces) + } + if err != nil { + return nil, fmt.Errorf("failed to marshal traces: %w", err) + } + + // Convert block number to uint64 for efficient queries + // If block number is too large for uint64, use MaxUint64 + blockNum := d.Block.Number.Uint64() + if d.Block.Number.BitLen() > 64 { + return nil, fmt.Errorf("block number exceeds uint64 is not supported") + } + + pd := ParquetBlockData{ + ChainID: d.ChainId, + BlockNumber: blockNum, + BlockHash: d.Block.Hash, + BlockTimestamp: d.Block.Timestamp.Unix(), + Block: blockJSON, + Transactions: txJSON, + Logs: logsJSON, + Traces: tracesJSON, + } + + parquetData = append(parquetData, pd) + } + + var buf bytes.Buffer + + // Configure writer with compression and statistics for efficient queries + writerOptions := []parquet.WriterOption{ + f.getCompressionCodec(), + // Enable page statistics for query optimization (min/max per page) + parquet.DataPageStatistics(true), + // Set page buffer size for better statistics granularity + parquet.PageBufferSize(8 * 1024 * 1024), // 8MB pages + // Configure sorting for optimal query performance + // Sort by block_number first, then block_timestamp for efficient range queries + parquet.SortingWriterConfig( + parquet.SortingColumns( + parquet.Ascending("block_number"), + parquet.Ascending("block_timestamp"), + ), + ), + // Set column index size limit (enables column indexes for all columns) + parquet.ColumnIndexSizeLimit(16 * 1024), // 16KB limit for column index + } + + writer := parquet.NewGenericWriter[ParquetBlockData](&buf, writerOptions...) + + // Write all data at once for better compression and statistics + if _, err := writer.Write(parquetData); err != nil { + return nil, fmt.Errorf("failed to write parquet data: %w", err) + } + + if err := writer.Close(); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func (f *ParquetFormatter) GetFileExtension() string { + return ".parquet" +} + +func (f *ParquetFormatter) GetContentType() string { + return "application/octet-stream" +} + +func (f *ParquetFormatter) getCompressionCodec() parquet.WriterOption { + switch f.config.Compression { + case "gzip": + return parquet.Compression(&parquet.Gzip) + case "zstd": + return parquet.Compression(&parquet.Zstd) + default: + return parquet.Compression(&parquet.Snappy) + } +} + +func (s *S3Connector) calculateChecksum(data []byte) string { + hash := sha256.Sum256(data) + return hex.EncodeToString(hash[:]) +} + +// Implement remaining IMainStorage methods with empty implementations +// These will return errors indicating they're not supported + +func (s *S3Connector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) { + return nil, fmt.Errorf("ReplaceBlockData not supported by S3 connector") +} + +func (s *S3Connector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) { + return QueryResult[common.Block]{}, fmt.Errorf("GetBlocks not supported by S3 connector - use Athena or similar") +} + +func (s *S3Connector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) { + return QueryResult[common.Transaction]{}, fmt.Errorf("GetTransactions not supported by S3 connector - use Athena or similar") +} + +func (s *S3Connector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) { + return QueryResult[common.Log]{}, fmt.Errorf("GetLogs not supported by S3 connector - use Athena or similar") +} + +func (s *S3Connector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) { + return QueryResult[common.Trace]{}, fmt.Errorf("GetTraces not supported by S3 connector") +} + +func (s *S3Connector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) { + return QueryResult[interface{}]{}, fmt.Errorf("GetAggregations not supported by S3 connector") +} + +func (s *S3Connector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) { + return QueryResult[common.TokenBalance]{}, fmt.Errorf("GetTokenBalances not supported by S3 connector") +} + +func (s *S3Connector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) { + return QueryResult[common.TokenTransfer]{}, fmt.Errorf("GetTokenTransfers not supported by S3 connector") +} + +func (s *S3Connector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { + maxBlock := big.NewInt(0) + + // First check the buffer for blocks from this chain + s.bufferMu.Lock() + for _, block := range s.buffer { + if block.ChainId == chainId.Uint64() && block.Block.Number.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(block.Block.Number) + } + } + s.bufferMu.Unlock() + + // Then check S3 for the maximum block number + prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) + if s.config.Prefix != "" { + prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) + } + + ctx := context.Background() + paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ + Bucket: aws.String(s.config.Bucket), + Prefix: aws.String(prefix), + }) + + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return nil, fmt.Errorf("failed to list S3 objects: %w", err) + } + + for _, obj := range page.Contents { + // Extract block range from filename: blocks_{start}_{end}.parquet + if obj.Key == nil { + continue + } + _, endBlock := s.extractBlockRangeFromKey(*obj.Key) + if endBlock != nil && endBlock.Cmp(maxBlock) > 0 { + maxBlock = endBlock + } + } + } + + return maxBlock, nil +} + +func (s *S3Connector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + maxBlock := big.NewInt(0) + + // First check the buffer for blocks in this range + s.bufferMu.Lock() + for _, block := range s.buffer { + if block.ChainId == chainId.Uint64() { + blockNum := block.Block.Number + if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 && blockNum.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(blockNum) + } + } + } + s.bufferMu.Unlock() + + // Then check S3 files + prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) + if s.config.Prefix != "" { + prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) + } + + ctx := context.Background() + paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ + Bucket: aws.String(s.config.Bucket), + Prefix: aws.String(prefix), + }) + + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return nil, fmt.Errorf("failed to list objects: %w", err) + } + + for _, obj := range page.Contents { + if obj.Key == nil { + continue + } + fileStart, fileEnd := s.extractBlockRangeFromKey(*obj.Key) + if fileStart == nil || fileEnd == nil { + continue + } + + // Check if this file overlaps with our range + if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 { + // File overlaps with our range + effectiveEnd := new(big.Int).Set(fileEnd) + if effectiveEnd.Cmp(endBlock) > 0 { + effectiveEnd = endBlock + } + if effectiveEnd.Cmp(maxBlock) > 0 { + maxBlock = effectiveEnd + } + } + } + } + + return maxBlock, nil +} + +func (s *S3Connector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { + var headers []common.BlockHeader + + // First get headers from buffer + s.bufferMu.Lock() + for _, block := range s.buffer { + if block.ChainId == chainId.Uint64() { + // Check if block is in range (if from is specified) + if from != nil && block.Block.Number.Cmp(from) > 0 { + continue + } + // Apply limit if specified + if to != nil && len(headers) >= int(to.Int64()) { + break + } + headers = append(headers, common.BlockHeader{ + Number: block.Block.Number, + Hash: block.Block.Hash, + ParentHash: block.Block.ParentHash, + }) + } + } + s.bufferMu.Unlock() + + // If we need more headers, get from S3 + if to == nil || len(headers) < int(to.Int64()) { + // Download relevant parquet files and extract block headers + files, err := s.findFilesInRange(chainId, big.NewInt(0), from) // from 0 to 'from' block + if err != nil { + return nil, err + } + + for _, file := range files { + fileHeaders, err := s.extractBlockHeadersFromFile(file, chainId, from, to) + if err != nil { + log.Warn().Err(err).Str("file", file).Msg("Failed to extract headers from file") + continue + } + headers = append(headers, fileHeaders...) + } + } + + // Sort in descending order + sort.Slice(headers, func(i, j int) bool { + return headers[i].Number.Cmp(headers[j].Number) > 0 + }) + + // Apply limit if specified + if to != nil && len(headers) > int(to.Int64()) { + headers = headers[:to.Int64()] + } + + return headers, nil +} + +func (s *S3Connector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) { + if startBlock == nil || endBlock == nil { + return nil, fmt.Errorf("start block and end block must not be nil") + } + + if startBlock.Cmp(endBlock) > 0 { + return nil, fmt.Errorf("start block must be less than or equal to end block") + } + + var blockData []common.BlockData + + // First check buffer for blocks in range + s.bufferMu.Lock() + for _, block := range s.buffer { + if block.ChainId == chainId.Uint64() { + blockNum := block.Block.Number + if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { + blockData = append(blockData, block) + } + } + } + s.bufferMu.Unlock() + + // Then find and download relevant files from S3 + files, err := s.findFilesInRange(chainId, startBlock, endBlock) + if err != nil { + return nil, err + } + + for _, file := range files { + fileData, err := s.downloadAndParseFile(file, chainId, startBlock, endBlock) + if err != nil { + log.Warn().Err(err).Str("file", file).Msg("Failed to parse file") + continue + } + blockData = append(blockData, fileData...) + } + + // Sort by block number + sort.Slice(blockData, func(i, j int) bool { + return blockData[i].Block.Number.Cmp(blockData[j].Block.Number) < 0 + }) + + return blockData, nil +} + +func (s *S3Connector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { + // Build a set of all block numbers we have + blockSet := make(map[string]bool) + + // First add blocks from buffer + s.bufferMu.Lock() + for _, block := range s.buffer { + if block.ChainId == chainId.Uint64() { + blockNum := block.Block.Number + if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { + blockSet[blockNum.String()] = true + } + } + } + s.bufferMu.Unlock() + + // Then check S3 files in range + files, err := s.findFilesInRange(chainId, startBlock, endBlock) + if err != nil { + return nil, err + } + + for _, file := range files { + fileStart, fileEnd := s.extractBlockRangeFromKey(file) + if fileStart == nil || fileEnd == nil { + continue + } + + // Add all blocks in this file's range to our set + for i := new(big.Int).Set(fileStart); i.Cmp(fileEnd) <= 0; i.Add(i, big.NewInt(1)) { + if i.Cmp(startBlock) >= 0 && i.Cmp(endBlock) <= 0 { + blockSet[i.String()] = true + } + } + } + + // Find missing blocks + var missing []*big.Int + for i := new(big.Int).Set(startBlock); i.Cmp(endBlock) <= 0; i.Add(i, big.NewInt(1)) { + if !blockSet[i.String()] { + missing = append(missing, new(big.Int).Set(i)) + } + } + + return missing, nil +} + +func (s *S3Connector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) { + if len(blockNumbers) == 0 { + return nil, nil + } + + // Create a map for quick lookup + blockNumMap := make(map[string]bool) + for _, bn := range blockNumbers { + blockNumMap[bn.String()] = true + } + + var result []common.BlockData + + // First check buffer for requested blocks + s.bufferMu.Lock() + for _, block := range s.buffer { + if block.ChainId == chainId.Uint64() { + if blockNumMap[block.Block.Number.String()] { + result = append(result, block) + // Remove from map so we don't fetch it from S3 + delete(blockNumMap, block.Block.Number.String()) + } + } + } + s.bufferMu.Unlock() + + // If all blocks were in buffer, return early + if len(blockNumMap) == 0 { + return result, nil + } + + // Sort remaining block numbers to optimize file access + var remainingBlocks []*big.Int + for blockStr := range blockNumMap { + bn, _ := new(big.Int).SetString(blockStr, 10) + remainingBlocks = append(remainingBlocks, bn) + } + sort.Slice(remainingBlocks, func(i, j int) bool { + return remainingBlocks[i].Cmp(remainingBlocks[j]) < 0 + }) + + if len(remainingBlocks) == 0 { + return result, nil + } + + minBlock := remainingBlocks[0] + maxBlock := remainingBlocks[len(remainingBlocks)-1] + + // Find relevant files for remaining blocks + files, err := s.findFilesInRange(chainId, minBlock, maxBlock) + if err != nil { + return nil, err + } + + for _, file := range files { + fileData, err := s.downloadAndParseFile(file, chainId, minBlock, maxBlock) + if err != nil { + log.Warn().Err(err).Str("file", file).Msg("Failed to parse file") + continue + } + + // Filter to only requested blocks + for _, bd := range fileData { + if blockNumMap[bd.Block.Number.String()] { + result = append(result, bd) + } + } + } + + return result, nil +} + +// Helper functions + +func (s *S3Connector) extractBlockRangeFromKey(key string) (*big.Int, *big.Int) { + // Extract block range from key like: chain_1/year=2024/blocks_1000_2000.parquet + parts := strings.Split(key, "/") + if len(parts) == 0 { + return nil, nil + } + + filename := parts[len(parts)-1] + if !strings.HasPrefix(filename, "blocks_") || !strings.HasSuffix(filename, s.formatter.GetFileExtension()) { + return nil, nil + } + + // Remove prefix and extension + rangeStr := strings.TrimPrefix(filename, "blocks_") + rangeStr = strings.TrimSuffix(rangeStr, s.formatter.GetFileExtension()) + + // Split by underscore to get start and end + rangeParts := strings.Split(rangeStr, "_") + if len(rangeParts) != 2 { + return nil, nil + } + + startBlock, ok1 := new(big.Int).SetString(rangeParts[0], 10) + endBlock, ok2 := new(big.Int).SetString(rangeParts[1], 10) + if !ok1 || !ok2 { + return nil, nil + } + + return startBlock, endBlock +} + +func (s *S3Connector) findFilesInRange(chainId *big.Int, startBlock, endBlock *big.Int) ([]string, error) { + prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) + if s.config.Prefix != "" { + prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) + } + + ctx := context.Background() + paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ + Bucket: aws.String(s.config.Bucket), + Prefix: aws.String(prefix), + }) + + var relevantFiles []string + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return nil, fmt.Errorf("failed to list objects: %w", err) + } + + for _, obj := range page.Contents { + if obj.Key == nil { + continue + } + + fileStart, fileEnd := s.extractBlockRangeFromKey(*obj.Key) + if fileStart == nil || fileEnd == nil { + continue + } + + // Check if this file's range overlaps with our query range + if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 { + relevantFiles = append(relevantFiles, *obj.Key) + } + } + } + + return relevantFiles, nil +} + +func (s *S3Connector) downloadAndParseFile(key string, chainId *big.Int, startBlock, endBlock *big.Int) ([]common.BlockData, error) { + ctx := context.Background() + + // Download the file + result, err := s.client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(s.config.Bucket), + Key: aws.String(key), + }) + if err != nil { + return nil, fmt.Errorf("failed to download file: %w", err) + } + defer result.Body.Close() + + // Read entire file into memory (required for parquet reader) + data, err := io.ReadAll(result.Body) + if err != nil { + return nil, fmt.Errorf("failed to read file data: %w", err) + } + + // Read the parquet file + reader := parquet.NewGenericReader[ParquetBlockData](bytes.NewReader(data)) + defer reader.Close() + + var blockData []common.BlockData + parquetRows := make([]ParquetBlockData, 100) // Read in batches + + for { + n, err := reader.Read(parquetRows) + if err != nil && err.Error() != "EOF" { + return nil, fmt.Errorf("failed to read parquet: %w", err) + } + if n == 0 { + break + } + + for i := 0; i < n; i++ { + pd := parquetRows[i] + + // Convert uint64 block number to big.Int + blockNum := new(big.Int).SetUint64(pd.BlockNumber) + + // Filter by range if specified + if startBlock != nil && blockNum.Cmp(startBlock) < 0 { + continue + } + if endBlock != nil && blockNum.Cmp(endBlock) > 0 { + continue + } + + // Unmarshal JSON data + var block common.Block + if err := json.Unmarshal(pd.Block, &block); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal block") + continue + } + + var transactions []common.Transaction + if len(pd.Transactions) > 0 { + if err := json.Unmarshal(pd.Transactions, &transactions); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal transactions") + } + } + + var logs []common.Log + if len(pd.Logs) > 0 { + if err := json.Unmarshal(pd.Logs, &logs); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal logs") + } + } + + var traces []common.Trace + if len(pd.Traces) > 0 { + if err := json.Unmarshal(pd.Traces, &traces); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal traces") + } + } + + blockData = append(blockData, common.BlockData{ + ChainId: pd.ChainID, + Block: block, + Transactions: transactions, + Logs: logs, + Traces: traces, + }) + } + } + + return blockData, nil +} + +func (s *S3Connector) extractBlockHeadersFromFile(key string, chainId *big.Int, from, to *big.Int) ([]common.BlockHeader, error) { + // Download and parse only the block headers + blockData, err := s.downloadAndParseFile(key, chainId, from, to) + if err != nil { + return nil, err + } + + headers := make([]common.BlockHeader, 0, len(blockData)) + for _, bd := range blockData { + headers = append(headers, common.BlockHeader{ + Number: bd.Block.Number, + Hash: bd.Block.Hash, + ParentHash: bd.Block.ParentHash, + }) + } + + return headers, nil +} From eea71f4de66fb5e1463a303773e5ff78fab3ae9f Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Fri, 22 Aug 2025 23:02:25 +0000 Subject: [PATCH 18/43] Until block for committer --- configs/config.go | 1 + internal/orchestrator/committer.go | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/configs/config.go b/configs/config.go index 29703bb..e92a4b6 100644 --- a/configs/config.go +++ b/configs/config.go @@ -30,6 +30,7 @@ type CommitterConfig struct { Interval int `mapstructure:"interval"` BlocksPerCommit int `mapstructure:"blocksPerCommit"` FromBlock int `mapstructure:"fromBlock"` + UntilBlock int `mapstructure:"untilBlock"` } type ReorgHandlerConfig struct { diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go index 47d93ba..53ff864 100644 --- a/internal/orchestrator/committer.go +++ b/internal/orchestrator/committer.go @@ -26,6 +26,7 @@ type Committer struct { blocksPerCommit int storage storage.IStorage commitFromBlock *big.Int + commitUntilBlock *big.Int rpc rpc.IRPCClient lastCommittedBlock atomic.Uint64 lastPublishedBlock atomic.Uint64 @@ -60,12 +61,23 @@ func NewCommitter(rpc rpc.IRPCClient, storage storage.IStorage, opts ...Committe blocksPerCommit = DEFAULT_BLOCKS_PER_COMMIT } + commitUntilBlock := config.Cfg.Committer.UntilBlock + if commitUntilBlock == 0 { + // default to match the poller.untilBlock + if config.Cfg.Poller.UntilBlock != 0 { + commitUntilBlock = config.Cfg.Poller.UntilBlock + } else { + commitUntilBlock = -1 + } + } + commitFromBlock := big.NewInt(int64(config.Cfg.Committer.FromBlock)) committer := &Committer{ triggerIntervalMs: triggerInterval, blocksPerCommit: blocksPerCommit, storage: storage, commitFromBlock: commitFromBlock, + commitUntilBlock: big.NewInt(int64(commitUntilBlock)), rpc: rpc, publisher: publisher.GetInstance(), workMode: "", @@ -204,6 +216,7 @@ func (c *Committer) Start(ctx context.Context) { } c.runCommitLoop(ctx, interval) + log.Info().Msg("Committer shutting down") c.publisher.Close() } @@ -232,6 +245,11 @@ func (c *Committer) runCommitLoop(ctx context.Context, interval time.Duration) { log.Debug().Msg("Committer work mode not set, skipping commit") continue } + if c.commitUntilBlock.Sign() > 0 && c.lastCommittedBlock.Load() > c.commitUntilBlock.Uint64() { + // Completing the commit loop if we've committed more than commit until block + log.Info().Msgf("Committer reached configured untilBlock %s, the last commit block is %d, stopping commits", c.commitUntilBlock.String(), c.lastCommittedBlock.Load()) + return + } blockDataToCommit, err := c.getSequentialBlockDataToCommit(ctx) if err != nil { log.Error().Err(err).Msg("Error getting block data to commit") From 68087a0ec65df602aa51637e91a1dde68da16eec Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Mon, 25 Aug 2025 22:52:39 +0000 Subject: [PATCH 19/43] terminate when poller or committer exit --- internal/orchestrator/orchestrator.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index 1d879dc..f412dff 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -67,7 +67,10 @@ func (o *Orchestrator) Start() { poller := NewPoller(o.rpc, o.storage, WithPollerWorkModeChan(pollerWorkModeChan)) poller.Start(ctx) + log.Info().Msg("Poller completed") + // If the poller is terminated, cancel the orchestrator + o.cancel() }() } @@ -77,6 +80,8 @@ func (o *Orchestrator) Start() { defer o.wg.Done() failureRecoverer := NewFailureRecoverer(o.rpc, o.storage) failureRecoverer.Start(ctx) + + log.Info().Msg("Failure recoverer completed") }() } @@ -90,6 +95,10 @@ func (o *Orchestrator) Start() { validator := NewValidator(o.rpc, o.storage) committer := NewCommitter(o.rpc, o.storage, WithCommitterWorkModeChan(committerWorkModeChan), WithValidator(validator)) committer.Start(ctx) + + // If the committer is terminated, cancel the orchestrator + log.Info().Msg("Committer completed") + o.cancel() }() } @@ -99,6 +108,8 @@ func (o *Orchestrator) Start() { defer o.wg.Done() reorgHandler := NewReorgHandler(o.rpc, o.storage) reorgHandler.Start(ctx) + + log.Info().Msg("Reorg handler completed") }() } @@ -106,6 +117,8 @@ func (o *Orchestrator) Start() { go func() { defer o.wg.Done() workModeMonitor.Start(ctx) + + log.Info().Msg("Work mode monitor completed") }() // The chain tracker is always running @@ -114,11 +127,12 @@ func (o *Orchestrator) Start() { defer o.wg.Done() chainTracker := NewChainTracker(o.rpc) chainTracker.Start(ctx) - }() - o.wg.Wait() + log.Info().Msg("Chain tracker completed") + }() // Waiting for all goroutines to complete + o.wg.Wait() if err := o.storage.Close(); err != nil { log.Error().Err(err).Msg("Error closing storage connections") From c0ba962f39fdc16b2c2981525e95d4cb0eaf9502 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 01:56:33 +0000 Subject: [PATCH 20/43] Fix commit until block --- internal/orchestrator/committer.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go index 53ff864..bde4854 100644 --- a/internal/orchestrator/committer.go +++ b/internal/orchestrator/committer.go @@ -245,7 +245,7 @@ func (c *Committer) runCommitLoop(ctx context.Context, interval time.Duration) { log.Debug().Msg("Committer work mode not set, skipping commit") continue } - if c.commitUntilBlock.Sign() > 0 && c.lastCommittedBlock.Load() > c.commitUntilBlock.Uint64() { + if c.commitUntilBlock.Sign() > 0 && c.lastCommittedBlock.Load() >= c.commitUntilBlock.Uint64() { // Completing the commit loop if we've committed more than commit until block log.Info().Msgf("Committer reached configured untilBlock %s, the last commit block is %d, stopping commits", c.commitUntilBlock.String(), c.lastCommittedBlock.Load()) return @@ -399,9 +399,16 @@ func (c *Committer) getBlockNumbersToPublish(ctx context.Context) ([]*big.Int, e func (c *Committer) getBlockToCommitUntil(ctx context.Context, latestCommittedBlockNumber *big.Int) (*big.Int, error) { untilBlock := new(big.Int).Add(latestCommittedBlockNumber, big.NewInt(int64(c.blocksPerCommit))) + + // If a commit until block is set, then set a limit on the commit until block + if c.commitUntilBlock.Sign() > 0 && untilBlock.Cmp(c.commitUntilBlock) > 0 { + return new(big.Int).Set(c.commitUntilBlock), nil + } + c.workModeMutex.RLock() currentMode := c.workMode c.workModeMutex.RUnlock() + if currentMode == WorkModeBackfill { return untilBlock, nil } else { From 51f1398e92546714e7f1ef14ba29af4f8c3324eb Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 02:36:10 +0000 Subject: [PATCH 21/43] Don't cancel active tasks in poller --- internal/orchestrator/poller.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go index 31a64ae..b46fc34 100644 --- a/internal/orchestrator/poller.go +++ b/internal/orchestrator/poller.go @@ -159,7 +159,6 @@ func (p *Poller) Start(ctx context.Context) { lastPolledBlock := p.Poll(pollCtx, blockNumbers) if p.reachedPollLimit(lastPolledBlock) { log.Info().Msgf("Reached poll limit at block %s, completing poller", lastPolledBlock.String()) - cancel() return } } From debc231eb7f150d4e8da748bd0f9c6dcd424e44f Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 08:05:30 +0000 Subject: [PATCH 22/43] migrate with destination storage --- cmd/migrate_valid.go | 304 +++++++++++++-------- cmd/root.go | 93 ++++++- configs/config.go | 17 +- internal/common/block.go | 2 - internal/orchestrator/failure_recoverer.go | 1 - internal/orchestrator/poller.go | 1 - internal/orchestrator/reorg_handler.go | 1 - internal/storage/badger.go | 4 +- internal/storage/clickhouse.go | 23 +- internal/storage/connector.go | 6 +- internal/storage/kafka_publisher.go | 15 +- internal/storage/kafka_redis.go | 23 +- internal/storage/s3.go | 154 +++++++++-- test/mocks/MockIMainStorage.go | 60 ---- 14 files changed, 462 insertions(+), 242 deletions(-) diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go index cc3e912..d8d34db 100644 --- a/cmd/migrate_valid.go +++ b/cmd/migrate_valid.go @@ -4,7 +4,9 @@ import ( "context" "math/big" "os" - "strconv" + "os/signal" + "syscall" + "time" "github.com/rs/zerolog/log" "github.com/spf13/cobra" @@ -13,6 +15,7 @@ import ( "github.com/thirdweb-dev/indexer/internal/orchestrator" "github.com/thirdweb-dev/indexer/internal/rpc" "github.com/thirdweb-dev/indexer/internal/storage" + "github.com/thirdweb-dev/indexer/internal/worker" ) var ( @@ -27,12 +30,18 @@ var ( ) const ( - TARGET_STORAGE_DATABASE = "temp" - DEFAULT_RPC_BATCH_SIZE = 200 - DEFAULT_BATCH_SIZE = 1000 + DEFAULT_RPC_BATCH_SIZE = 100 + DEFAULT_BATCH_SIZE = 2000 ) func RunValidationMigration(cmd *cobra.Command, args []string) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Set up signal handling for graceful shutdown + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) + migrator := NewMigrator() defer migrator.Close() @@ -40,88 +49,159 @@ func RunValidationMigration(cmd *cobra.Command, args []string) { log.Info().Msgf("Migrating blocks from %s to %s (both ends inclusive)", rangeStartBlock.String(), rangeEndBlock.String()) - // 2. Start going in loops - for currentBlock := rangeStartBlock; currentBlock.Cmp(rangeEndBlock) <= 0; { - endBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.migrationBatchSize-1))) - if endBlock.Cmp(rangeEndBlock) > 0 { - endBlock = rangeEndBlock - } + // Run migration in a goroutine + done := make(chan struct{}) + var migrationErr error - blockNumbers := generateBlockNumbersForRange(currentBlock, endBlock) - log.Info().Msgf("Processing blocks %s to %s", blockNumbers[0].String(), blockNumbers[len(blockNumbers)-1].String()) + go func() { + defer close(done) - validBlocksForRange := migrator.GetValidBlocksForRange(blockNumbers) + // 2. Start going in loops + for currentBlock := rangeStartBlock; currentBlock.Cmp(rangeEndBlock) <= 0; { + batchStartTime := time.Now() - blocksToInsertMap := make(map[string]common.BlockData) - for _, blockData := range validBlocksForRange { - blocksToInsertMap[blockData.Block.Number.String()] = blockData - } + // Check for cancellation + select { + case <-ctx.Done(): + log.Info().Msgf("Migration interrupted at block %s", currentBlock.String()) + return + default: + } - // Loop over block numbers to find missing blocks - missingBlocks := make([]*big.Int, 0) - for _, blockNum := range blockNumbers { - if _, exists := blocksToInsertMap[blockNum.String()]; !exists { - missingBlocks = append(missingBlocks, blockNum) + endBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.migrationBatchSize-1))) + if endBlock.Cmp(rangeEndBlock) > 0 { + endBlock = rangeEndBlock } - } - validMissingBlocks := migrator.GetValidBlocksFromRPC(missingBlocks) - for _, blockData := range validMissingBlocks { - blocksToInsertMap[blockData.Block.Number.String()] = blockData - } + blockNumbers := generateBlockNumbersForRange(currentBlock, endBlock) + log.Info().Msgf("Processing blocks %s to %s", blockNumbers[0].String(), blockNumbers[len(blockNumbers)-1].String()) + + // Fetch valid blocks from source + fetchStartTime := time.Now() + validBlocksForRange, err := migrator.GetValidBlocksForRange(blockNumbers) + fetchDuration := time.Since(fetchStartTime) + if err != nil { + // If we got an error fetching valid blocks, we'll continue + log.Error().Err(err).Msg("Failed to get valid blocks for range") + time.Sleep(3 * time.Second) + continue + } + log.Debug().Dur("duration", fetchDuration).Int("blocks_fetched", len(validBlocksForRange)).Msg("Fetched valid blocks from source") - blocksToInsert := make([]common.BlockData, 0) - for _, blockData := range blocksToInsertMap { - blocksToInsert = append(blocksToInsert, blockData) - } + // Build map of fetched blocks + mapBuildStartTime := time.Now() + blocksToInsertMap := make(map[string]common.BlockData) + for _, blockData := range validBlocksForRange { + blocksToInsertMap[blockData.Block.Number.String()] = blockData + } - err := migrator.targetConn.InsertBlockData(blocksToInsert) - if err != nil { - log.Fatal().Err(err).Msg("Failed to insert blocks to target storage") + // Loop over block numbers to find missing blocks + missingBlocks := make([]*big.Int, 0) + for _, blockNum := range blockNumbers { + if _, exists := blocksToInsertMap[blockNum.String()]; !exists { + missingBlocks = append(missingBlocks, blockNum) + } + } + mapBuildDuration := time.Since(mapBuildStartTime) + log.Debug().Dur("duration", mapBuildDuration).Int("missing_blocks", len(missingBlocks)).Msg("Identified missing blocks") + + // Fetch missing blocks from RPC + if len(missingBlocks) > 0 { + rpcFetchStartTime := time.Now() + validMissingBlocks := migrator.GetValidBlocksFromRPC(missingBlocks) + rpcFetchDuration := time.Since(rpcFetchStartTime) + log.Debug().Dur("duration", rpcFetchDuration).Int("blocks_fetched", len(validMissingBlocks)).Msg("Fetched missing blocks from RPC") + + for _, blockData := range validMissingBlocks { + if blockData.Block.ChainId.Sign() == 0 { + log.Fatal().Msgf("Block %s has chain ID 0, %+v", blockData.Block.Number.String(), blockData.Block) + } + blocksToInsertMap[blockData.Block.Number.String()] = blockData + } + } + + // Prepare blocks for insertion + prepStartTime := time.Now() + blocksToInsert := make([]common.BlockData, 0, len(blocksToInsertMap)) + for _, blockData := range blocksToInsertMap { + blocksToInsert = append(blocksToInsert, blockData) + } + prepDuration := time.Since(prepStartTime) + log.Debug().Dur("duration", prepDuration).Int("blocks_to_insert", len(blocksToInsert)).Msg("Prepared blocks for insertion") + + // Insert blocks to destination + insertStartTime := time.Now() + err = migrator.destination.InsertBlockData(blocksToInsert) + insertDuration := time.Since(insertStartTime) + if err != nil { + migrationErr = err + log.Error().Err(err).Dur("duration", insertDuration).Msg("Failed to insert blocks to target storage") + time.Sleep(3 * time.Second) + continue + } + + batchDuration := time.Since(batchStartTime) + log.Info(). + Dur("total_duration", batchDuration). + Dur("fetch_duration", fetchDuration). + Dur("insert_duration", insertDuration). + Int("blocks_processed", len(blocksToInsert)). + Msg("Batch processed successfully") + + currentBlock = new(big.Int).Add(endBlock, big.NewInt(1)) } - currentBlock = new(big.Int).Add(endBlock, big.NewInt(1)) - } + // 3. then finally copy partitions from target table to main tables + log.Info().Msg("Migration completed successfully") + }() - // 3. then finally copy partitions from target table to main tables - log.Info().Msg("Done") + // Wait for either completion or interrupt signal + select { + case <-done: + if migrationErr != nil { + log.Fatal().Err(migrationErr).Msg("Migration failed") + } + log.Info().Msg("Done") + case sig := <-sigChan: + log.Info().Msgf("Received signal: %s, initiating graceful shutdown...", sig) + cancel() + <-done + log.Info().Msg("Migration stopped gracefully") + } } type Migrator struct { rpcClient rpc.IRPCClient - storage storage.IStorage + worker *worker.Worker + source storage.IStorage + destination storage.IMainStorage validator *orchestrator.Validator - targetConn *storage.ClickHouseConnector migrationBatchSize int rpcBatchSize int } func NewMigrator() *Migrator { - targetDBName := os.Getenv("TARGET_STORAGE_DATABASE") - if targetDBName == "" { - targetDBName = TARGET_STORAGE_DATABASE - } batchSize := DEFAULT_BATCH_SIZE - batchSizeEnvInt, err := strconv.Atoi(os.Getenv("MIGRATION_BATCH_SIZE")) - if err == nil && batchSizeEnvInt > 0 { - batchSize = batchSizeEnvInt + if config.Cfg.Migrator.StorageBatchSize > 0 { + batchSize = int(config.Cfg.Migrator.StorageBatchSize) } rpcBatchSize := DEFAULT_RPC_BATCH_SIZE - rpcBatchSizeEnvInt, err := strconv.Atoi(os.Getenv("MIGRATION_RPC_BATCH_SIZE")) - if err == nil && rpcBatchSizeEnvInt > 0 { - rpcBatchSize = rpcBatchSizeEnvInt + if config.Cfg.Migrator.RpcBatchSize > 0 { + rpcBatchSize = int(config.Cfg.Migrator.RpcBatchSize) } + rpcClient, err := rpc.Initialize() if err != nil { log.Fatal().Err(err).Msg("Failed to initialize RPC") } - s, err := storage.NewStorageConnector(&config.Cfg.Storage) + + sourceConnector, err := storage.NewStorageConnector(&config.Cfg.Storage) if err != nil { log.Fatal().Err(err).Msg("Failed to initialize storage") } // check if chain was indexed with block receipts. If it was, then the current RPC must support block receipts - validRpc, err := validateRPC(rpcClient, s) + validRpc, err := validateRPC(rpcClient, sourceConnector) if err != nil { log.Fatal().Err(err).Msg("Failed to validate RPC") } @@ -129,114 +209,112 @@ func NewMigrator() *Migrator { log.Fatal().Msg("RPC does not support block receipts, but transactions were indexed with receipts") } - validator := orchestrator.NewValidator(rpcClient, s) + validator := orchestrator.NewValidator(rpcClient, sourceConnector) - targetStorageConfig := *config.Cfg.Storage.Main.Clickhouse - targetStorageConfig.Database = targetDBName - targetConn, err := storage.NewClickHouseConnector(&targetStorageConfig) + destinationConnector, err := storage.NewConnector[storage.IMainStorage](&config.Cfg.Migrator.Destination) if err != nil { - log.Fatal().Err(err).Msg("Failed to initialize target storage") + log.Fatal().Err(err).Msg("Failed to initialize storage") } return &Migrator{ migrationBatchSize: batchSize, rpcBatchSize: rpcBatchSize, rpcClient: rpcClient, - storage: s, + source: sourceConnector, + destination: destinationConnector, validator: validator, - targetConn: targetConn, + worker: worker.NewWorker(rpcClient), } } func (m *Migrator) Close() { m.rpcClient.Close() + + if err := m.source.Close(); err != nil { + log.Fatal().Err(err).Msg("Failed to close source storage") + } + + if err := m.destination.Close(); err != nil { + log.Fatal().Err(err).Msg("Failed to close destination storage") + } } func (m *Migrator) DetermineMigrationBoundaries() (*big.Int, *big.Int) { // get latest block from main storage - latestBlockStored, err := m.storage.MainStorage.GetMaxBlockNumber(m.rpcClient.GetChainID()) + latestBlockStored, err := m.source.MainStorage.GetMaxBlockNumber(m.rpcClient.GetChainID()) if err != nil { log.Fatal().Err(err).Msg("Failed to get latest block from main storage") } log.Info().Msgf("Latest block in main storage: %d", latestBlockStored) endBlock := latestBlockStored - // set range end from env instead if configured - endBlockEnv := os.Getenv("END_BLOCK") - if endBlockEnv != "" { - configuredEndBlock, ok := new(big.Int).SetString(endBlockEnv, 10) - if !ok { - log.Fatal().Msgf("Failed to parse end block %s", endBlockEnv) - } - log.Info().Msgf("Configured end block: %s", configuredEndBlock.String()) - // set configured end block only if it's greater than 0 and less than latest block in main storage - if configuredEndBlock.Sign() > 0 && configuredEndBlock.Cmp(latestBlockStored) < 0 { - endBlock = configuredEndBlock - } + endBlockEnv := big.NewInt(int64(config.Cfg.Migrator.EndBlock)) + if endBlockEnv.Sign() > 0 && endBlockEnv.Cmp(latestBlockStored) < 0 { + endBlock = endBlockEnv } - startBlock := big.NewInt(0) // default start block is 0 - // if start block is configured, use it - startBlockEnv := os.Getenv("START_BLOCK") - if startBlockEnv != "" { - configuredStartBlock, ok := new(big.Int).SetString(startBlockEnv, 10) - if !ok { - log.Fatal().Msgf("Failed to parse start block %s", startBlockEnv) - } - log.Info().Msgf("Configured start block: %s", configuredStartBlock.String()) - startBlock = configuredStartBlock - } + startBlock := big.NewInt(int64(config.Cfg.Migrator.StartBlock)) // default start block is 0 - latestMigratedBlock, err := m.targetConn.GetMaxBlockNumberInRange(m.rpcClient.GetChainID(), startBlock, endBlock) + blockCount, err := m.destination.GetBlockCount(m.rpcClient.GetChainID(), startBlock, endBlock) if err != nil { log.Fatal().Err(err).Msg("Failed to get latest block from target storage") } - log.Info().Msgf("Latest block in target storage: %d", latestMigratedBlock) + log.Info().Msgf("Block count in the target storage for range %s to %s: count=%s", startBlock.String(), endBlock.String(), blockCount.String()) - if latestMigratedBlock.Cmp(endBlock) >= 0 { + expectedCount := new(big.Int).Sub(endBlock, startBlock) + expectedCount = expectedCount.Add(expectedCount, big.NewInt(1)) + if expectedCount.Cmp(blockCount) == 0 { log.Fatal().Msgf("Full range is already migrated") + return nil, nil } - // if configured start block is less than or equal to already migrated and migrated block is not 0, start from last migrated + 1 - if startBlock.Cmp(latestMigratedBlock) <= 0 && latestMigratedBlock.Sign() > 0 { - startBlock = new(big.Int).Add(latestMigratedBlock, big.NewInt(1)) + maxStoredBlock, err := m.destination.GetMaxBlockNumberInRange(m.rpcClient.GetChainID(), startBlock, endBlock) + if err != nil { + log.Fatal().Err(err).Msg("Failed to get max block from destination storage") + return nil, nil + } + + log.Info().Msgf("Block in the target storage for range %s to %s: count=%s, max=%s", startBlock.String(), endBlock.String(), blockCount.String(), maxStoredBlock.String()) + if maxStoredBlock != nil && maxStoredBlock.Cmp(startBlock) >= 0 { + startBlock = new(big.Int).Add(maxStoredBlock, big.NewInt(1)) } return startBlock, endBlock } func (m *Migrator) FetchBlocksFromRPC(blockNumbers []*big.Int) ([]common.BlockData, error) { - allBlockData := make([]common.BlockData, 0) - for i := 0; i < len(blockNumbers); i += m.rpcBatchSize { - end := i + m.rpcBatchSize - if end > len(blockNumbers) { - end = len(blockNumbers) - } - batch := blockNumbers[i:end] - blockData := m.rpcClient.GetFullBlocks(context.Background(), batch) + allBlockData := make([]common.BlockData, 0, len(blockNumbers)) - for _, block := range blockData { - if block.Error != nil { - log.Warn().Err(block.Error).Msgf("Failed to fetch block %s from RPC", block.BlockNumber.String()) - continue - } - allBlockData = append(allBlockData, block.Data) + blockData := m.worker.Run(context.Background(), blockNumbers) + for _, block := range blockData { + if block.Error != nil { + log.Warn().Err(block.Error).Msgf("Failed to fetch block %s from RPC", block.BlockNumber.String()) + continue } + allBlockData = append(allBlockData, block.Data) } return allBlockData, nil } -func (m *Migrator) GetValidBlocksForRange(blockNumbers []*big.Int) []common.BlockData { - blockData, err := m.storage.MainStorage.GetFullBlockData(m.rpcClient.GetChainID(), blockNumbers) +func (m *Migrator) GetValidBlocksForRange(blockNumbers []*big.Int) ([]common.BlockData, error) { + getFullBlockTime := time.Now() + blockData, err := m.source.MainStorage.GetFullBlockData(m.rpcClient.GetChainID(), blockNumbers) + getFullBlockDuration := time.Since(getFullBlockTime) if err != nil { - log.Fatal().Err(err).Msg("Failed to get full block data") + log.Error().Err(err).Msg("Failed to get full block data") + return nil, err } + validateBlockTime := time.Now() validBlocks, _, err := m.validator.ValidateBlocks(blockData) + validateBlockDuration := time.Since(validateBlockTime) if err != nil { - log.Fatal().Err(err).Msg("Failed to validate blocks") + log.Error().Err(err).Msg("Failed to validate blocks") + return nil, err } - return validBlocks + + log.Debug().Dur("get_full_block", getFullBlockDuration).Dur("validate_block", validateBlockDuration).Int("count", len(blockNumbers)).Msg("Get valid blocks for range") + return validBlocks, nil } func (m *Migrator) GetValidBlocksFromRPC(blockNumbers []*big.Int) []common.BlockData { @@ -282,7 +360,15 @@ func validateRPC(rpcClient rpc.IRPCClient, s storage.IStorage) (bool, error) { } func generateBlockNumbersForRange(startBlock, endBlock *big.Int) []*big.Int { - blockNumbers := make([]*big.Int, 0) + if startBlock.Cmp(endBlock) > 0 { + return []*big.Int{} + } + + // Pre-calculate capacity to avoid slice growth + length := new(big.Int).Sub(endBlock, startBlock) + length.Add(length, big.NewInt(1)) + + blockNumbers := make([]*big.Int, 0, length.Int64()) for i := new(big.Int).Set(startBlock); i.Cmp(endBlock) <= 0; i.Add(i, big.NewInt(1)) { blockNumbers = append(blockNumbers, new(big.Int).Set(i)) } diff --git a/cmd/root.go b/cmd/root.go index d9548fb..479d4f4 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -83,6 +83,7 @@ func init() { rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for orchestrator storage") rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-disableTLS", false, "Clickhouse disableTLS for orchestrator storage") rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-enableParallelViewProcessing", false, "Clickhouse enableParallelViewProcessing for orchestrator storage") + rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-enableCompression", false, "Clickhouse enableCompression for orchestrator storage") rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxQueryTime", 60, "Clickhouse max query time for orchestrator storage") rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxMemoryUsage", 1000000000, "Clickhouse max memory usage in bytes for orchestrator storage") rootCmd.PersistentFlags().String("storage-orchestrator-postgres-host", "", "PostgreSQL host for orchestrator storage") @@ -105,6 +106,7 @@ func init() { rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for main storage") rootCmd.PersistentFlags().Bool("storage-main-clickhouse-disableTLS", false, "Clickhouse disableTLS for main storage") rootCmd.PersistentFlags().Bool("storage-main-clickhouse-enableParallelViewProcessing", false, "Clickhouse enableParallelViewProcessing for main storage") + rootCmd.PersistentFlags().Bool("storage-main-clickhouse-enableCompression", false, "Clickhouse enableCompression for main storage") rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxQueryTime", 60, "Clickhouse max query time for main storage") rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxMemoryUsage", 1000000000, "Clickhouse max memory usage in bytes for main storage") rootCmd.PersistentFlags().String("storage-staging-clickhouse-username", "", "Clickhouse username for staging storage") @@ -115,6 +117,7 @@ func init() { rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for staging storage") rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-disableTLS", false, "Clickhouse disableTLS for staging storage") rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-enableParallelViewProcessing", false, "Clickhouse enableParallelViewProcessing for staging storage") + rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-enableCompression", false, "Clickhouse enableCompression for staging storage") rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxQueryTime", 60, "Clickhouse max query time for staging storage") rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxMemoryUsage", 1000000000, "Clickhouse max memory usage in bytes for staging storage") rootCmd.PersistentFlags().String("storage-staging-postgres-host", "", "PostgreSQL host for staging storage") @@ -135,15 +138,11 @@ func init() { rootCmd.PersistentFlags().Int("storage-main-kafka-redis-port", 6379, "Redis port for Kafka main storage metadata") rootCmd.PersistentFlags().String("storage-main-kafka-redis-password", "", "Redis password for Kafka main storage metadata") rootCmd.PersistentFlags().Int("storage-main-kafka-redis-db", 0, "Redis database number for Kafka main storage metadata") - // Storage type selection flags rootCmd.PersistentFlags().String("storage-staging-type", "auto", "Storage type for staging (auto, clickhouse, postgres, kafka, badger, s3)") rootCmd.PersistentFlags().String("storage-main-type", "auto", "Storage type for main (auto, clickhouse, postgres, kafka, badger, s3)") rootCmd.PersistentFlags().String("storage-orchestrator-type", "auto", "Storage type for orchestrator (auto, clickhouse, postgres, badger)") - // BadgerDB flags for staging storage rootCmd.PersistentFlags().String("storage-staging-badger-path", "", "BadgerDB path for staging storage") - // BadgerDB flags for orchestrator storage rootCmd.PersistentFlags().String("storage-orchestrator-badger-path", "", "BadgerDB path for orchestrator storage") - // S3 flags for main storage rootCmd.PersistentFlags().String("storage-main-s3-bucket", "", "S3 bucket for main storage") rootCmd.PersistentFlags().String("storage-main-s3-region", "", "S3 region for main storage") rootCmd.PersistentFlags().String("storage-main-s3-prefix", "", "S3 key prefix for main storage") @@ -154,7 +153,6 @@ func init() { rootCmd.PersistentFlags().Int64("storage-main-s3-bufferSizeMB", 1024, "S3 buffer size in MB before flush for main storage") rootCmd.PersistentFlags().Int("storage-main-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for main storage") rootCmd.PersistentFlags().Int("storage-main-s3-maxBlocksPerFile", 0, "S3 max blocks per file for main storage (0 = no limit)") - // S3 Parquet configuration rootCmd.PersistentFlags().String("storage-main-s3-parquet-compression", "snappy", "Parquet compression type for S3 main storage") rootCmd.PersistentFlags().Int64("storage-main-s3-parquet-rowGroupSize", 256, "Parquet row group size in MB for S3 main storage") rootCmd.PersistentFlags().Int64("storage-main-s3-parquet-pageSize", 8192, "Parquet page size in KB for S3 main storage") @@ -189,6 +187,47 @@ func init() { rootCmd.PersistentFlags().Int("workMode-checkIntervalMinutes", 10, "How often to check work mode in minutes") rootCmd.PersistentFlags().Int64("workMode-liveModeThreshold", 500, "How many blocks the indexer can be behind before switching to live mode") rootCmd.PersistentFlags().String("validation-mode", "strict", "Validation mode. Strict will validate logsBloom and transactionsRoot. Minimal will validate transaction count and logs existence.") + rootCmd.PersistentFlags().String("migrator-destination-type", "auto", "Storage type for migrator destination (auto, clickhouse, postgres, kafka, badger, s3)") + rootCmd.PersistentFlags().String("migrator-destination-clickhouse-host", "", "Clickhouse host for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-port", 0, "Clickhouse port for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-clickhouse-username", "", "Clickhouse username for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-clickhouse-password", "", "Clickhouse password for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-clickhouse-database", "", "Clickhouse database for migrator destination") + rootCmd.PersistentFlags().Bool("migrator-destination-clickhouse-disableTLS", false, "Clickhouse disableTLS for migrator destination") + rootCmd.PersistentFlags().Bool("migrator-destination-clickhouse-asyncInsert", false, "Clickhouse async insert for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-maxRowsPerInsert", 100000, "Clickhouse max rows per insert for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-maxOpenConns", 30, "Clickhouse max open connections for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-postgres-host", "", "PostgreSQL host for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-postgres-port", 5432, "PostgreSQL port for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-postgres-username", "", "PostgreSQL username for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-postgres-password", "", "PostgreSQL password for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-postgres-database", "", "PostgreSQL database for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-postgres-sslMode", "require", "PostgreSQL SSL mode for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-postgres-maxOpenConns", 50, "PostgreSQL max open connections for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-postgres-maxIdleConns", 25, "PostgreSQL max idle connections for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-kafka-brokers", "", "Kafka brokers for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-kafka-username", "", "Kafka username for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-kafka-password", "", "Kafka password for migrator destination") + rootCmd.PersistentFlags().Bool("migrator-destination-kafka-enableTLS", true, "Enable TLS for Kafka connection in migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-badger-path", "", "BadgerDB path for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-bucket", "", "S3 bucket for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-region", "", "S3 region for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-prefix", "", "S3 key prefix for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-accessKeyId", "", "S3 access key ID for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-secretAccessKey", "", "S3 secret access key for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-endpoint", "", "S3 endpoint URL for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-format", "parquet", "S3 storage format for migrator destination") + rootCmd.PersistentFlags().Int64("migrator-destination-s3-bufferSizeMB", 1024, "S3 buffer size in MB before flush for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-s3-maxBlocksPerFile", 0, "S3 max blocks per file for migrator destination") + rootCmd.PersistentFlags().Uint("migrator-storageBatchSize", 2000, "Batch size for storage operations in migrator") + rootCmd.PersistentFlags().Uint("migrator-rpcBatchSize", 100, "Batch size for RPC operations in migrator") + rootCmd.PersistentFlags().Uint("migrator-startBlock", 0, "Start block for migration") + rootCmd.PersistentFlags().Uint("migrator-endBlock", 0, "End block for migration") + viper.BindPFlag("rpc.url", rootCmd.PersistentFlags().Lookup("rpc-url")) viper.BindPFlag("rpc.blocks.blocksPerRequest", rootCmd.PersistentFlags().Lookup("rpc-blocks-blocksPerRequest")) viper.BindPFlag("rpc.blocks.batchDelay", rootCmd.PersistentFlags().Lookup("rpc-blocks-batchDelay")) @@ -232,6 +271,7 @@ func init() { viper.BindPFlag("storage.staging.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxIdleConns")) viper.BindPFlag("storage.staging.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-disableTLS")) viper.BindPFlag("storage.staging.clickhouse.enableParallelViewProcessing", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-enableParallelViewProcessing")) + viper.BindPFlag("storage.staging.clickhouse.enableCompression", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-enableCompression")) viper.BindPFlag("storage.staging.clickhouse.maxQueryTime", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxQueryTime")) viper.BindPFlag("storage.staging.clickhouse.maxMemoryUsage", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxMemoryUsage")) viper.BindPFlag("storage.main.clickhouse.database", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-database")) @@ -245,6 +285,7 @@ func init() { viper.BindPFlag("storage.main.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxIdleConns")) viper.BindPFlag("storage.main.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-disableTLS")) viper.BindPFlag("storage.main.clickhouse.enableParallelViewProcessing", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-enableParallelViewProcessing")) + viper.BindPFlag("storage.main.clickhouse.enableCompression", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-enableCompression")) viper.BindPFlag("storage.main.clickhouse.maxQueryTime", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxQueryTime")) viper.BindPFlag("storage.main.clickhouse.maxMemoryUsage", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxMemoryUsage")) viper.BindPFlag("storage.orchestrator.clickhouse.database", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-database")) @@ -258,6 +299,7 @@ func init() { viper.BindPFlag("storage.orchestrator.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxIdleConns")) viper.BindPFlag("storage.orchestrator.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-disableTLS")) viper.BindPFlag("storage.orchestrator.clickhouse.enableParallelViewProcessing", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-enableParallelViewProcessing")) + viper.BindPFlag("storage.orchestrator.clickhouse.enableCompression", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-enableCompression")) viper.BindPFlag("storage.orchestrator.clickhouse.maxQueryTime", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxQueryTime")) viper.BindPFlag("storage.orchestrator.clickhouse.maxMemoryUsage", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxMemoryUsage")) viper.BindPFlag("storage.orchestrator.postgres.host", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-host")) @@ -337,6 +379,47 @@ func init() { viper.BindPFlag("workMode.checkIntervalMinutes", rootCmd.PersistentFlags().Lookup("workMode-checkIntervalMinutes")) viper.BindPFlag("workMode.liveModeThreshold", rootCmd.PersistentFlags().Lookup("workMode-liveModeThreshold")) viper.BindPFlag("validation.mode", rootCmd.PersistentFlags().Lookup("validation-mode")) + // Migrator viper bindings + viper.BindPFlag("migrator.destination.type", rootCmd.PersistentFlags().Lookup("migrator-destination-type")) + viper.BindPFlag("migrator.destination.clickhouse.host", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-host")) + viper.BindPFlag("migrator.destination.clickhouse.port", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-port")) + viper.BindPFlag("migrator.destination.clickhouse.username", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-username")) + viper.BindPFlag("migrator.destination.clickhouse.password", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-password")) + viper.BindPFlag("migrator.destination.clickhouse.database", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-database")) + viper.BindPFlag("migrator.destination.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-disableTLS")) + viper.BindPFlag("migrator.destination.clickhouse.asyncInsert", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-asyncInsert")) + viper.BindPFlag("migrator.destination.clickhouse.maxRowsPerInsert", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-maxRowsPerInsert")) + viper.BindPFlag("migrator.destination.clickhouse.maxOpenConns", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-maxOpenConns")) + viper.BindPFlag("migrator.destination.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-maxIdleConns")) + viper.BindPFlag("migrator.destination.postgres.host", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-host")) + viper.BindPFlag("migrator.destination.postgres.port", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-port")) + viper.BindPFlag("migrator.destination.postgres.username", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-username")) + viper.BindPFlag("migrator.destination.postgres.password", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-password")) + viper.BindPFlag("migrator.destination.postgres.database", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-database")) + viper.BindPFlag("migrator.destination.postgres.sslMode", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-sslMode")) + viper.BindPFlag("migrator.destination.postgres.maxOpenConns", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-maxOpenConns")) + viper.BindPFlag("migrator.destination.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-maxIdleConns")) + viper.BindPFlag("migrator.destination.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-maxConnLifetime")) + viper.BindPFlag("migrator.destination.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-connectTimeout")) + viper.BindPFlag("migrator.destination.kafka.brokers", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-brokers")) + viper.BindPFlag("migrator.destination.kafka.username", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-username")) + viper.BindPFlag("migrator.destination.kafka.password", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-password")) + viper.BindPFlag("migrator.destination.kafka.enableTLS", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-enableTLS")) + viper.BindPFlag("migrator.destination.badger.path", rootCmd.PersistentFlags().Lookup("migrator-destination-badger-path")) + viper.BindPFlag("migrator.destination.s3.bucket", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-bucket")) + viper.BindPFlag("migrator.destination.s3.region", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-region")) + viper.BindPFlag("migrator.destination.s3.prefix", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-prefix")) + viper.BindPFlag("migrator.destination.s3.accessKeyId", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-accessKeyId")) + viper.BindPFlag("migrator.destination.s3.secretAccessKey", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-secretAccessKey")) + viper.BindPFlag("migrator.destination.s3.endpoint", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-endpoint")) + viper.BindPFlag("migrator.destination.s3.format", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-format")) + viper.BindPFlag("migrator.destination.s3.bufferSizeMB", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-bufferSizeMB")) + viper.BindPFlag("migrator.destination.s3.bufferTimeoutSeconds", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-bufferTimeoutSeconds")) + viper.BindPFlag("migrator.destination.s3.maxBlocksPerFile", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-maxBlocksPerFile")) + viper.BindPFlag("migrator.startBlock", rootCmd.PersistentFlags().Lookup("migrator-startBlock")) + viper.BindPFlag("migrator.endBlock", rootCmd.PersistentFlags().Lookup("migrator-endBlock")) + viper.BindPFlag("migrator.storageBatchSize", rootCmd.PersistentFlags().Lookup("migrator-storageBatchSize")) + viper.BindPFlag("migrator.rpcBatchSize", rootCmd.PersistentFlags().Lookup("migrator-rpcBatchSize")) rootCmd.AddCommand(orchestratorCmd) rootCmd.AddCommand(apiCmd) rootCmd.AddCommand(validateAndFixCmd) diff --git a/configs/config.go b/configs/config.go index e92a4b6..226fa9d 100644 --- a/configs/config.go +++ b/configs/config.go @@ -52,13 +52,6 @@ type StorageConfig struct { Main StorageConnectionConfig `mapstructure:"main"` Orchestrator StorageConnectionConfig `mapstructure:"orchestrator"` } -type StorageType string - -const ( - StorageTypeMain StorageType = "main" - StorageTypeStaging StorageType = "staging" - StorageTypeOrchestrator StorageType = "orchestrator" -) type StorageConnectionConfig struct { Type string `mapstructure:"type"` // "auto", "clickhouse", "postgres", "kafka", "badger", "s3" @@ -116,6 +109,7 @@ type ClickhouseConfig struct { EnableParallelViewProcessing bool `mapstructure:"enableParallelViewProcessing"` MaxQueryTime int `mapstructure:"maxQueryTime"` MaxMemoryUsage int `mapstructure:"maxMemoryUsage"` + EnableCompression bool `mapstructure:"enableCompression"` } type PostgresConfig struct { @@ -238,6 +232,14 @@ type ValidationConfig struct { Mode string `mapstructure:"mode"` // "disabled", "minimal", "strict" } +type MigratorConfig struct { + Destination StorageConnectionConfig `mapstructure:"destination"` + StartBlock uint `mapstructure:"startBlock"` + EndBlock uint `mapstructure:"endBlock"` + StorageBatchSize uint `mapstructure:"storageBatchSize"` + RpcBatchSize uint `mapstructure:"rpcBatchSize"` +} + type Config struct { RPC RPCConfig `mapstructure:"rpc"` Log LogConfig `mapstructure:"log"` @@ -250,6 +252,7 @@ type Config struct { Publisher PublisherConfig `mapstructure:"publisher"` WorkMode WorkModeConfig `mapstructure:"workMode"` Validation ValidationConfig `mapstructure:"validation"` + Migrator MigratorConfig `mapstructure:"migrator"` } var Cfg Config diff --git a/internal/common/block.go b/internal/common/block.go index 83a5bf0..f0cd019 100644 --- a/internal/common/block.go +++ b/internal/common/block.go @@ -59,7 +59,6 @@ type BlockModel struct { } type BlockData struct { - ChainId uint64 `json:"chain_id"` Block Block `json:"block"` Transactions []Transaction `json:"transactions"` Logs []Log `json:"logs"` @@ -103,7 +102,6 @@ func (b *Block) Serialize() BlockModel { func (b *BlockData) Serialize() BlockData { data := BlockData{ - ChainId: b.ChainId, Block: b.Block, Transactions: b.Transactions, Logs: b.Logs, diff --git a/internal/orchestrator/failure_recoverer.go b/internal/orchestrator/failure_recoverer.go index a097034..da1ae91 100644 --- a/internal/orchestrator/failure_recoverer.go +++ b/internal/orchestrator/failure_recoverer.go @@ -110,7 +110,6 @@ func (fr *FailureRecoverer) handleWorkerResults(blockFailures []common.BlockFail }) } else { successfulResults = append(successfulResults, common.BlockData{ - ChainId: fr.rpc.GetChainID().Uint64(), Block: result.Data.Block, Logs: result.Data.Logs, Transactions: result.Data.Transactions, diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go index b46fc34..5045dc1 100644 --- a/internal/orchestrator/poller.go +++ b/internal/orchestrator/poller.go @@ -261,7 +261,6 @@ func (p *Poller) convertPollResultsToBlockData(results []rpc.GetFullBlockResult) blockData := make([]common.BlockData, 0, len(successfulResults)) for _, result := range successfulResults { blockData = append(blockData, common.BlockData{ - ChainId: p.rpc.GetChainID().Uint64(), Block: result.Data.Block, Logs: result.Data.Logs, Transactions: result.Data.Transactions, diff --git a/internal/orchestrator/reorg_handler.go b/internal/orchestrator/reorg_handler.go index 889801c..2de8b95 100644 --- a/internal/orchestrator/reorg_handler.go +++ b/internal/orchestrator/reorg_handler.go @@ -274,7 +274,6 @@ func (rh *ReorgHandler) handleReorg(ctx context.Context, reorgedBlockNumbers []* return fmt.Errorf("cannot fix reorg: failed block %s: %w", result.BlockNumber.String(), result.Error) } data = append(data, common.BlockData{ - ChainId: rh.rpc.GetChainID().Uint64(), Block: result.Data.Block, Logs: result.Data.Logs, Transactions: result.Data.Transactions, diff --git a/internal/storage/badger.go b/internal/storage/badger.go index 1ffd431..3c1305d 100644 --- a/internal/storage/badger.go +++ b/internal/storage/badger.go @@ -237,7 +237,7 @@ func (bc *BadgerConnector) InsertStagingData(data []common.BlockData) error { return bc.db.Update(func(txn *badger.Txn) error { for _, blockData := range data { - key := blockKey(big.NewInt(int64(blockData.ChainId)), blockData.Block.Number) + key := blockKey(blockData.Block.ChainId, blockData.Block.Number) var buf bytes.Buffer if err := gob.NewEncoder(&buf).Encode(blockData); err != nil { @@ -348,7 +348,7 @@ func (bc *BadgerConnector) DeleteStagingData(data []common.BlockData) error { return bc.db.Update(func(txn *badger.Txn) error { for _, blockData := range data { - key := blockKey(big.NewInt(int64(blockData.ChainId)), blockData.Block.Number) + key := blockKey(blockData.Block.ChainId, blockData.Block.Number) if err := txn.Delete(key); err != nil && err != badger.ErrKeyNotFound { return err } diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go index 9ea97ce..d3319c6 100644 --- a/internal/storage/clickhouse.go +++ b/internal/storage/clickhouse.go @@ -107,6 +107,14 @@ func connectDB(cfg *config.ClickhouseConfig) (clickhouse.Conn, error) { }, MaxOpenConns: cfg.MaxOpenConns, MaxIdleConns: cfg.MaxIdleConns, + Compression: func() *clickhouse.Compression { + c := &clickhouse.Compression{} + if cfg.EnableCompression { + zLog.Debug().Msg("ClickHouse LZ4 compression is enabled") + c.Method = clickhouse.CompressionLZ4 + } + return c + }(), Settings: func() clickhouse.Settings { settings := clickhouse.Settings{ "do_not_merge_across_partitions_select_final": "1", @@ -901,6 +909,19 @@ func (c *ClickHouseConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBl return maxBlockNumber, nil } +func (c *ClickHouseConnector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (blockCount *big.Int, err error) { + tableName := c.getTableName(chainId, "blocks") + query := fmt.Sprintf("SELECT COUNT(DISTINCT block_number) FROM %s.%s WHERE chain_id = ? AND block_number >= ? AND block_number <= ?", c.cfg.Database, tableName) + err = c.conn.QueryRow(context.Background(), query, chainId, startBlock, endBlock).Scan(&blockCount) + if err != nil { + if err == sql.ErrNoRows { + return big.NewInt(0), nil + } + return nil, err + } + return blockCount, nil +} + func (c *ClickHouseConnector) getMaxBlockNumberConsistent(chainId *big.Int) (maxBlockNumber *big.Int, err error) { tableName := c.getTableName(chainId, "blocks") query := fmt.Sprintf("SELECT block_number FROM %s.%s WHERE chain_id = ? ORDER BY block_number DESC LIMIT 1 SETTINGS select_sequential_consistency = 1", c.cfg.Database, tableName) @@ -1976,7 +1997,6 @@ func (c *ClickHouseConnector) GetValidationBlockData(chainId *big.Int, startBloc for i, block := range blocksResult.blocks { blockNum := block.Number.String() blockData[i] = common.BlockData{ - ChainId: chainId.Uint64(), Block: block, Logs: logsResult.logMap[blockNum], Transactions: txsResult.txMap[blockNum], @@ -2156,7 +2176,6 @@ func (c *ClickHouseConnector) GetFullBlockData(chainId *big.Int, blockNumbers [] for i, block := range blocksResult.blocks { blockNum := block.Number.String() blockData[i] = common.BlockData{ - ChainId: chainId.Uint64(), Block: block, Logs: logsResult.logMap[blockNum], Transactions: txsResult.txMap[blockNum], diff --git a/internal/storage/connector.go b/internal/storage/connector.go index 4b962af..dc23d9b 100644 --- a/internal/storage/connector.go +++ b/internal/storage/connector.go @@ -130,6 +130,8 @@ type IMainStorage interface { GetMaxBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (maxBlockNumber *big.Int, err error) + GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (blockCount *big.Int, err error) + /** * Get block headers ordered from latest to oldest. */ @@ -175,13 +177,13 @@ func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) { func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) { var conn interface{} var err error - + // Default to "auto" if Type is not specified storageType := cfg.Type if storageType == "" { storageType = "auto" } - + // Handle explicit type selection if storageType != "auto" { switch storageType { diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go index 90f3ca3..72dc96f 100644 --- a/internal/storage/kafka_publisher.go +++ b/internal/storage/kafka_publisher.go @@ -18,9 +18,8 @@ import ( ) type KafkaPublisher struct { - client *kgo.Client - mu sync.RWMutex - chainID string + client *kgo.Client + mu sync.RWMutex } type MessageType string @@ -37,6 +36,7 @@ type PublishableMessagePayload struct { type PublishableMessageBlockData struct { common.BlockData + ChainId uint64 `json:"chain_id"` IsDeleted int8 `json:"is_deleted"` InsertTimestamp time.Time `json:"insert_timestamp"` } @@ -104,8 +104,7 @@ func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) { } publisher := &KafkaPublisher{ - client: client, - chainID: chainID, + client: client, } return publisher, nil @@ -116,9 +115,10 @@ func (p *KafkaPublisher) PublishBlockData(blockData []common.BlockData) error { } func (p *KafkaPublisher) PublishReorg(oldData []common.BlockData, newData []common.BlockData) error { + chainId := newData[0].Block.ChainId.Uint64() newHead := uint64(newData[0].Block.Number.Uint64()) // Publish revert the revert to the new head - 1, so that the new updated block data can be re-processed - if err := p.publishBlockRevert(newData[0].ChainId, newHead-1); err != nil { + if err := p.publishBlockRevert(chainId, newHead-1); err != nil { return fmt.Errorf("failed to revert: %v", err) } @@ -233,6 +233,7 @@ func (p *KafkaPublisher) createBlockDataMessage(block common.BlockData, isDelete data := PublishableMessageBlockData{ BlockData: block, + ChainId: block.Block.ChainId.Uint64(), IsDeleted: 0, InsertTimestamp: timestamp, } @@ -251,7 +252,7 @@ func (p *KafkaPublisher) createBlockDataMessage(block common.BlockData, isDelete return nil, fmt.Errorf("failed to marshal block data: %v", err) } - return p.createRecord(data.GetType(), block.ChainId, block.Block.Number.Uint64(), timestamp, msgJson) + return p.createRecord(data.GetType(), data.ChainId, block.Block.Number.Uint64(), timestamp, msgJson) } func (p *KafkaPublisher) createBlockRevertMessage(chainId uint64, blockNumber uint64) (*kgo.Record, error) { diff --git a/internal/storage/kafka_redis.go b/internal/storage/kafka_redis.go index 05d294c..030fa21 100644 --- a/internal/storage/kafka_redis.go +++ b/internal/storage/kafka_redis.go @@ -212,26 +212,11 @@ func (kr *KafkaRedisConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, er } func (kr *KafkaRedisConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { - // Get the last published block number - lastPublished, err := kr.GetLastPublishedBlockNumber(chainId) - if err != nil { - return nil, err - } - - // Check if it's within the range - if lastPublished.Cmp(startBlock) >= 0 && lastPublished.Cmp(endBlock) <= 0 { - return lastPublished, nil - } - - // If outside range, return appropriate boundary - if lastPublished.Cmp(endBlock) > 0 { - return endBlock, nil - } - if lastPublished.Cmp(startBlock) < 0 { - return big.NewInt(0), nil - } + return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") +} - return lastPublished, nil +func (kr *KafkaRedisConnector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") } func (kr *KafkaRedisConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { diff --git a/internal/storage/s3.go b/internal/storage/s3.go index 8a75c65..e3e4038 100644 --- a/internal/storage/s3.go +++ b/internal/storage/s3.go @@ -48,7 +48,7 @@ type DataFormatter interface { // ParquetBlockData represents the complete block data in Parquet format type ParquetBlockData struct { - ChainID uint64 `parquet:"chain_id"` + ChainId uint64 `parquet:"chain_id"` BlockNumber uint64 `parquet:"block_number"` // Numeric for efficient min/max queries BlockHash string `parquet:"block_hash"` BlockTimestamp int64 `parquet:"block_timestamp"` @@ -235,7 +235,8 @@ func (s *S3Connector) flushBuffer() error { // Group blocks by chain to generate appropriate keys chainGroups := make(map[uint64][]common.BlockData) for _, block := range data { - chainGroups[block.ChainId] = append(chainGroups[block.ChainId], block) + chainId := block.Block.ChainId.Uint64() + chainGroups[chainId] = append(chainGroups[chainId], block) } for _, blocks := range chainGroups { @@ -295,8 +296,8 @@ func (s *S3Connector) Flush() error { select { case <-s.flushDoneCh: return nil - case <-time.After(30 * time.Second): - return fmt.Errorf("flush timeout after 30 seconds") + case <-time.After(60 * time.Second): + return fmt.Errorf("flush timeout after 60 seconds") } default: // Flush channel is full, likely a flush is already in progress @@ -304,8 +305,8 @@ func (s *S3Connector) Flush() error { select { case <-s.flushDoneCh: return nil - case <-time.After(30 * time.Second): - return fmt.Errorf("flush timeout after 30 seconds") + case <-time.After(60 * time.Second): + return fmt.Errorf("flush timeout after 60 seconds") } } } @@ -331,7 +332,7 @@ func (s *S3Connector) uploadBatch(data []common.BlockData) error { return nil } - chainID := data[0].ChainId + chainId := data[0].Block.ChainId.Uint64() startBlock := data[0].Block.Number endBlock := data[len(data)-1].Block.Number // Use the first block's timestamp for year partitioning @@ -344,7 +345,7 @@ func (s *S3Connector) uploadBatch(data []common.BlockData) error { } // Generate S3 key with chain_id/year partitioning based on block timestamp - key := s.generateS3Key(chainID, startBlock, endBlock, blockTimestamp) + key := s.generateS3Key(chainId, startBlock, endBlock, blockTimestamp) // Upload to S3 ctx := context.Background() @@ -354,7 +355,7 @@ func (s *S3Connector) uploadBatch(data []common.BlockData) error { Body: bytes.NewReader(formattedData), ContentType: aws.String(s.formatter.GetContentType()), Metadata: map[string]string{ - "chain_id": fmt.Sprintf("%d", chainID), + "chain_id": fmt.Sprintf("%d", chainId), "start_block": startBlock.String(), "end_block": endBlock.String(), "block_count": fmt.Sprintf("%d", len(data)), @@ -369,7 +370,7 @@ func (s *S3Connector) uploadBatch(data []common.BlockData) error { } log.Info(). - Uint64("chain_id", chainID). + Uint64("chain_id", chainId). Str("min_block", startBlock.String()). Str("max_block", endBlock.String()). Int("block_count", len(data)). @@ -458,7 +459,7 @@ func (f *ParquetFormatter) FormatBlockData(data []common.BlockData) ([]byte, err } pd := ParquetBlockData{ - ChainID: d.ChainId, + ChainId: d.Block.ChainId.Uint64(), BlockNumber: blockNum, BlockHash: d.Block.Hash, BlockTimestamp: d.Block.Timestamp.Unix(), @@ -571,7 +572,7 @@ func (s *S3Connector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { // First check the buffer for blocks from this chain s.bufferMu.Lock() for _, block := range s.buffer { - if block.ChainId == chainId.Uint64() && block.Block.Number.Cmp(maxBlock) > 0 { + if block.Block.ChainId.Cmp(chainId) == 0 && block.Block.Number.Cmp(maxBlock) > 0 { maxBlock = new(big.Int).Set(block.Block.Number) } } @@ -612,14 +613,99 @@ func (s *S3Connector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { func (s *S3Connector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { maxBlock := big.NewInt(0) + foundAny := false // First check the buffer for blocks in this range s.bufferMu.Lock() for _, block := range s.buffer { - if block.ChainId == chainId.Uint64() { + if block.Block.ChainId.Cmp(chainId) == 0 { blockNum := block.Block.Number - if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 && blockNum.Cmp(maxBlock) > 0 { - maxBlock = new(big.Int).Set(blockNum) + if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { + if !foundAny || blockNum.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(blockNum) + foundAny = true + } + } + } + } + s.bufferMu.Unlock() + + // Then check S3 files + prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) + if s.config.Prefix != "" { + prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) + } + + ctx := context.Background() + paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ + Bucket: aws.String(s.config.Bucket), + Prefix: aws.String(prefix), + }) + + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return nil, fmt.Errorf("failed to list objects: %w", err) + } + + for _, obj := range page.Contents { + if obj.Key == nil { + continue + } + fileStart, fileEnd := s.extractBlockRangeFromKey(*obj.Key) + if fileStart == nil || fileEnd == nil { + continue + } + + // Check if this file overlaps with our range + if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 { + // The maximum block in this file that's within our range + maxInFile := new(big.Int).Set(fileEnd) + if maxInFile.Cmp(endBlock) > 0 { + maxInFile = endBlock + } + + if !foundAny || maxInFile.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(maxInFile) + foundAny = true + } + } + } + } + + if !foundAny { + return big.NewInt(0), nil + } + + return maxBlock, nil +} + +func (s *S3Connector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + minBlock := big.NewInt(0) + maxBlock := big.NewInt(0) + count := big.NewInt(0) + foundAny := false + + // First check the buffer for blocks in this range + s.bufferMu.Lock() + for _, block := range s.buffer { + if block.Block.ChainId.Cmp(chainId) == 0 { + blockNum := block.Block.Number + if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { + count.Add(count, big.NewInt(1)) + + if !foundAny { + minBlock = new(big.Int).Set(blockNum) + maxBlock = new(big.Int).Set(blockNum) + foundAny = true + } else { + if blockNum.Cmp(minBlock) < 0 { + minBlock = new(big.Int).Set(blockNum) + } + if blockNum.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(blockNum) + } + } } } } @@ -654,19 +740,40 @@ func (s *S3Connector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big // Check if this file overlaps with our range if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 { - // File overlaps with our range + // Calculate the effective range within our query bounds + effectiveStart := new(big.Int).Set(fileStart) + if effectiveStart.Cmp(startBlock) < 0 { + effectiveStart = startBlock + } effectiveEnd := new(big.Int).Set(fileEnd) if effectiveEnd.Cmp(endBlock) > 0 { effectiveEnd = endBlock } - if effectiveEnd.Cmp(maxBlock) > 0 { - maxBlock = effectiveEnd + + // Update min/max blocks + if !foundAny { + minBlock = new(big.Int).Set(effectiveStart) + maxBlock = new(big.Int).Set(effectiveEnd) + foundAny = true + } else { + if effectiveStart.Cmp(minBlock) < 0 { + minBlock = new(big.Int).Set(effectiveStart) + } + if effectiveEnd.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(effectiveEnd) + } } + + // Add the count of blocks in this file's overlapping range + // Note: This assumes contiguous blocks in the file + blocksInRange := new(big.Int).Sub(effectiveEnd, effectiveStart) + blocksInRange.Add(blocksInRange, big.NewInt(1)) // Add 1 because range is inclusive + count.Add(count, blocksInRange) } } } - return maxBlock, nil + return count, nil } func (s *S3Connector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { @@ -675,7 +782,7 @@ func (s *S3Connector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, // First get headers from buffer s.bufferMu.Lock() for _, block := range s.buffer { - if block.ChainId == chainId.Uint64() { + if block.Block.ChainId.Cmp(chainId) == 0 { // Check if block is in range (if from is specified) if from != nil && block.Block.Number.Cmp(from) > 0 { continue @@ -738,7 +845,7 @@ func (s *S3Connector) GetValidationBlockData(chainId *big.Int, startBlock *big.I // First check buffer for blocks in range s.bufferMu.Lock() for _, block := range s.buffer { - if block.ChainId == chainId.Uint64() { + if block.Block.ChainId.Cmp(chainId) == 0 { blockNum := block.Block.Number if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { blockData = append(blockData, block) @@ -777,7 +884,7 @@ func (s *S3Connector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big. // First add blocks from buffer s.bufferMu.Lock() for _, block := range s.buffer { - if block.ChainId == chainId.Uint64() { + if block.Block.ChainId.Cmp(chainId) == 0 { blockNum := block.Block.Number if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { blockSet[blockNum.String()] = true @@ -833,7 +940,7 @@ func (s *S3Connector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int // First check buffer for requested blocks s.bufferMu.Lock() for _, block := range s.buffer { - if block.ChainId == chainId.Uint64() { + if block.Block.ChainId.Cmp(chainId) == 0 { if blockNumMap[block.Block.Number.String()] { result = append(result, block) // Remove from map so we don't fetch it from S3 @@ -1039,7 +1146,6 @@ func (s *S3Connector) downloadAndParseFile(key string, chainId *big.Int, startBl } blockData = append(blockData, common.BlockData{ - ChainId: pd.ChainID, Block: block, Transactions: transactions, Logs: logs, diff --git a/test/mocks/MockIMainStorage.go b/test/mocks/MockIMainStorage.go index 679345c..a77c398 100644 --- a/test/mocks/MockIMainStorage.go +++ b/test/mocks/MockIMainStorage.go @@ -462,66 +462,6 @@ func (_c *MockIMainStorage_GetMaxBlockNumber_Call) RunAndReturn(run func(*big.In return _c } -// GetMaxBlockNumberInRange provides a mock function with given fields: chainId, startBlock, endBlock -func (_m *MockIMainStorage) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { - ret := _m.Called(chainId, startBlock, endBlock) - - if len(ret) == 0 { - panic("no return value specified for GetMaxBlockNumberInRange") - } - - var r0 *big.Int - var r1 error - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) (*big.Int, error)); ok { - return rf(chainId, startBlock, endBlock) - } - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) *big.Int); ok { - r0 = rf(chainId, startBlock, endBlock) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*big.Int) - } - } - - if rf, ok := ret.Get(1).(func(*big.Int, *big.Int, *big.Int) error); ok { - r1 = rf(chainId, startBlock, endBlock) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetMaxBlockNumberInRange_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetMaxBlockNumberInRange' -type MockIMainStorage_GetMaxBlockNumberInRange_Call struct { - *mock.Call -} - -// GetMaxBlockNumberInRange is a helper method to define mock.On call -// - chainId *big.Int -// - startBlock *big.Int -// - endBlock *big.Int -func (_e *MockIMainStorage_Expecter) GetMaxBlockNumberInRange(chainId interface{}, startBlock interface{}, endBlock interface{}) *MockIMainStorage_GetMaxBlockNumberInRange_Call { - return &MockIMainStorage_GetMaxBlockNumberInRange_Call{Call: _e.mock.On("GetMaxBlockNumberInRange", chainId, startBlock, endBlock)} -} - -func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) Run(run func(chainId *big.Int, startBlock *big.Int, endBlock *big.Int)) *MockIMainStorage_GetMaxBlockNumberInRange_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int), args[1].(*big.Int), args[2].(*big.Int)) - }) - return _c -} - -func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) Return(maxBlockNumber *big.Int, err error) *MockIMainStorage_GetMaxBlockNumberInRange_Call { - _c.Call.Return(maxBlockNumber, err) - return _c -} - -func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) RunAndReturn(run func(*big.Int, *big.Int, *big.Int) (*big.Int, error)) *MockIMainStorage_GetMaxBlockNumberInRange_Call { - _c.Call.Return(run) - return _c -} - // GetTokenBalances provides a mock function with given fields: qf, fields func (_m *MockIMainStorage) GetTokenBalances(qf storage.BalancesQueryFilter, fields ...string) (storage.QueryResult[common.TokenBalance], error) { _va := make([]interface{}, len(fields)) From bddbf54ebaa2af0ae69d084229cb4b1aeacbea73 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 08:08:58 +0000 Subject: [PATCH 23/43] Remove RPC batch config in migrate --- cmd/migrate_valid.go | 12 +++--------- cmd/root.go | 6 ++---- configs/config.go | 9 ++++----- 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go index d8d34db..28088ed 100644 --- a/cmd/migrate_valid.go +++ b/cmd/migrate_valid.go @@ -30,8 +30,7 @@ var ( ) const ( - DEFAULT_RPC_BATCH_SIZE = 100 - DEFAULT_BATCH_SIZE = 2000 + DEFAULT_BATCH_SIZE = 2000 ) func RunValidationMigration(cmd *cobra.Command, args []string) { @@ -182,12 +181,8 @@ type Migrator struct { func NewMigrator() *Migrator { batchSize := DEFAULT_BATCH_SIZE - if config.Cfg.Migrator.StorageBatchSize > 0 { - batchSize = int(config.Cfg.Migrator.StorageBatchSize) - } - rpcBatchSize := DEFAULT_RPC_BATCH_SIZE - if config.Cfg.Migrator.RpcBatchSize > 0 { - rpcBatchSize = int(config.Cfg.Migrator.RpcBatchSize) + if config.Cfg.Migrator.BatchSize > 0 { + batchSize = int(config.Cfg.Migrator.BatchSize) } rpcClient, err := rpc.Initialize() @@ -218,7 +213,6 @@ func NewMigrator() *Migrator { return &Migrator{ migrationBatchSize: batchSize, - rpcBatchSize: rpcBatchSize, rpcClient: rpcClient, source: sourceConnector, destination: destinationConnector, diff --git a/cmd/root.go b/cmd/root.go index 479d4f4..a5ac7c9 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -223,8 +223,7 @@ func init() { rootCmd.PersistentFlags().Int64("migrator-destination-s3-bufferSizeMB", 1024, "S3 buffer size in MB before flush for migrator destination") rootCmd.PersistentFlags().Int("migrator-destination-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for migrator destination") rootCmd.PersistentFlags().Int("migrator-destination-s3-maxBlocksPerFile", 0, "S3 max blocks per file for migrator destination") - rootCmd.PersistentFlags().Uint("migrator-storageBatchSize", 2000, "Batch size for storage operations in migrator") - rootCmd.PersistentFlags().Uint("migrator-rpcBatchSize", 100, "Batch size for RPC operations in migrator") + rootCmd.PersistentFlags().Uint("migrator-batchSize", 2000, "Batch size for storage operations in migrator") rootCmd.PersistentFlags().Uint("migrator-startBlock", 0, "Start block for migration") rootCmd.PersistentFlags().Uint("migrator-endBlock", 0, "End block for migration") @@ -418,8 +417,7 @@ func init() { viper.BindPFlag("migrator.destination.s3.maxBlocksPerFile", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-maxBlocksPerFile")) viper.BindPFlag("migrator.startBlock", rootCmd.PersistentFlags().Lookup("migrator-startBlock")) viper.BindPFlag("migrator.endBlock", rootCmd.PersistentFlags().Lookup("migrator-endBlock")) - viper.BindPFlag("migrator.storageBatchSize", rootCmd.PersistentFlags().Lookup("migrator-storageBatchSize")) - viper.BindPFlag("migrator.rpcBatchSize", rootCmd.PersistentFlags().Lookup("migrator-rpcBatchSize")) + viper.BindPFlag("migrator.batchSize", rootCmd.PersistentFlags().Lookup("migrator-batchSize")) rootCmd.AddCommand(orchestratorCmd) rootCmd.AddCommand(apiCmd) rootCmd.AddCommand(validateAndFixCmd) diff --git a/configs/config.go b/configs/config.go index 226fa9d..f92a3e2 100644 --- a/configs/config.go +++ b/configs/config.go @@ -233,11 +233,10 @@ type ValidationConfig struct { } type MigratorConfig struct { - Destination StorageConnectionConfig `mapstructure:"destination"` - StartBlock uint `mapstructure:"startBlock"` - EndBlock uint `mapstructure:"endBlock"` - StorageBatchSize uint `mapstructure:"storageBatchSize"` - RpcBatchSize uint `mapstructure:"rpcBatchSize"` + Destination StorageConnectionConfig `mapstructure:"destination"` + StartBlock uint `mapstructure:"startBlock"` + EndBlock uint `mapstructure:"endBlock"` + BatchSize uint `mapstructure:"batchSize"` } type Config struct { From 43dad9cb4a1ba5f7084dfececfbb5d91ab6de8b6 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 08:12:11 +0000 Subject: [PATCH 24/43] Cleanup --- cmd/migrate_valid.go | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go index 28088ed..f06a43c 100644 --- a/cmd/migrate_valid.go +++ b/cmd/migrate_valid.go @@ -67,7 +67,7 @@ func RunValidationMigration(cmd *cobra.Command, args []string) { default: } - endBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.migrationBatchSize-1))) + endBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.batchSize-1))) if endBlock.Cmp(rangeEndBlock) > 0 { endBlock = rangeEndBlock } @@ -170,13 +170,12 @@ func RunValidationMigration(cmd *cobra.Command, args []string) { } type Migrator struct { - rpcClient rpc.IRPCClient - worker *worker.Worker - source storage.IStorage - destination storage.IMainStorage - validator *orchestrator.Validator - migrationBatchSize int - rpcBatchSize int + rpcClient rpc.IRPCClient + worker *worker.Worker + source storage.IStorage + destination storage.IMainStorage + validator *orchestrator.Validator + batchSize int } func NewMigrator() *Migrator { @@ -212,12 +211,12 @@ func NewMigrator() *Migrator { } return &Migrator{ - migrationBatchSize: batchSize, - rpcClient: rpcClient, - source: sourceConnector, - destination: destinationConnector, - validator: validator, - worker: worker.NewWorker(rpcClient), + batchSize: batchSize, + rpcClient: rpcClient, + source: sourceConnector, + destination: destinationConnector, + validator: validator, + worker: worker.NewWorker(rpcClient), } } From da31422e11e08c4802355bef7fb9ad1cd11abc06 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 09:06:54 +0000 Subject: [PATCH 25/43] Add from_address, to_address to schema --- .../0010_clickhouse_create_address_transactions.sql | 4 ++++ .../0011_clickhouse_create_address_transactions_mv.sql | 2 ++ .../clickhouse/0012_clickhouse_create_address_transfers.sql | 6 +++++- .../0013_clickhouse_create_address_transfers_mv.sql | 2 ++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql index 11179d7..fa9f55a 100644 --- a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql +++ b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql @@ -8,6 +8,8 @@ CREATE TABLE IF NOT EXISTS address_transactions ( `transaction_index` UInt64, `address` FixedString(42), `address_type` Enum8('from' = 1, 'to' = 2), + `from_address` FixedString(42), + `to_address` FixedString(42), `value` UInt256, `gas` UInt64, `gas_price` UInt256, @@ -37,6 +39,8 @@ CREATE TABLE IF NOT EXISTS address_transactions ( INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3, + INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 4, + INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 4, PROJECTION address_total_count_projection ( diff --git a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql index 48c4cb2..c5b5ac7 100644 --- a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql +++ b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql @@ -11,6 +11,8 @@ SELECT transaction_index, address_tuple.1 AS address, address_tuple.2 AS address_type, + from_address, + to_address, value, gas, gas_price, diff --git a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql index 4b9b864..3803323 100644 --- a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql +++ b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql @@ -5,6 +5,8 @@ CREATE TABLE IF NOT EXISTS address_transfers ( `token_id` UInt256, `address` FixedString(42), `address_type` Enum8('from' = 1, 'to' = 2), + `from_address` FixedString(42), + `to_address` FixedString(42), `block_number` UInt256, `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)), `transaction_hash` FixedString(66), @@ -18,7 +20,9 @@ CREATE TABLE IF NOT EXISTS address_transfers ( INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3, - + INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 4, + INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 4, + PROJECTION address_state_projection ( SELECT chain_id, diff --git a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql index 9256143..0a7d2cc 100644 --- a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql +++ b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql @@ -8,6 +8,8 @@ SELECT token_id, address_tuple.1 AS address, address_tuple.2 AS address_type, + from_address, + to_address, block_number, block_timestamp, transaction_hash, From b698c18076abdacd9134b95d40c742c473d9ba01 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 17:27:01 +0000 Subject: [PATCH 26/43] Retry with RPC batch size reduction --- cmd/migrate_valid.go | 371 ++++++++++++++++++++++++++++------------ cmd/root.go | 4 +- configs/config.go | 2 +- internal/rpc/batcher.go | 116 +++++++++++++ internal/rpc/rpc.go | 8 +- internal/storage/s3.go | 24 ++- 6 files changed, 400 insertions(+), 125 deletions(-) diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go index f06a43c..43f2988 100644 --- a/cmd/migrate_valid.go +++ b/cmd/migrate_valid.go @@ -2,9 +2,11 @@ package cmd import ( "context" + "fmt" "math/big" "os" "os/signal" + "sync" "syscall" "time" @@ -31,6 +33,7 @@ var ( const ( DEFAULT_BATCH_SIZE = 2000 + DEFAULT_WORKERS = 1 ) func RunValidationMigration(cmd *cobra.Command, args []string) { @@ -44,129 +47,235 @@ func RunValidationMigration(cmd *cobra.Command, args []string) { migrator := NewMigrator() defer migrator.Close() - rangeStartBlock, rangeEndBlock := migrator.DetermineMigrationBoundaries() + targetEndBlock := big.NewInt(int64(config.Cfg.Migrator.EndBlock)) + targetStartBlock := big.NewInt(int64(config.Cfg.Migrator.StartBlock)) + rangeStartBlock, rangeEndBlock := migrator.DetermineMigrationBoundaries(targetStartBlock, targetEndBlock) log.Info().Msgf("Migrating blocks from %s to %s (both ends inclusive)", rangeStartBlock.String(), rangeEndBlock.String()) - // Run migration in a goroutine - done := make(chan struct{}) - var migrationErr error + // Calculate work distribution for workers + numWorkers := DEFAULT_WORKERS + workRanges := divideBlockRange(rangeStartBlock, rangeEndBlock, numWorkers) + log.Info().Msgf("Starting %d workers to process migration", len(workRanges)) + + // Create error channel and wait group + errChan := make(chan error, numWorkers) + var wg sync.WaitGroup + + // Start workers + for workerID, workRange := range workRanges { + wg.Add(1) + go func(id int, startBlock, endBlock *big.Int) { + defer wg.Done() + + // Only check boundaries per-worker if we have multiple workers + // For single worker, we already determined boundaries globally + var actualStart, actualEnd *big.Int + if numWorkers > 1 { + // Multiple workers: each needs to check their specific range + actualStart, actualEnd = migrator.DetermineMigrationBoundariesForRange(startBlock, endBlock) + if actualStart == nil || actualEnd == nil { + log.Info().Msgf("Worker %d: Range %s to %s already fully migrated", id, startBlock.String(), endBlock.String()) + return + } + log.Info().Msgf("Worker %d starting: blocks %s to %s (adjusted from %s to %s)", + id, actualStart.String(), actualEnd.String(), startBlock.String(), endBlock.String()) + } else { + // Single worker: use the already-determined boundaries + actualStart, actualEnd = startBlock, endBlock + log.Info().Msgf("Worker %d starting: blocks %s to %s", id, actualStart.String(), actualEnd.String()) + } + + if err := processBlockRange(ctx, migrator, id, actualStart, actualEnd); err != nil { + errChan <- err + log.Error().Err(err).Msgf("Worker %d failed", id) + return + } + log.Info().Msgf("Worker %d completed successfully", id) + }(workerID, workRange.start, workRange.end) + } + + // Monitor for completion or interruption + done := make(chan struct{}) go func() { - defer close(done) + wg.Wait() + close(done) + }() - // 2. Start going in loops - for currentBlock := rangeStartBlock; currentBlock.Cmp(rangeEndBlock) <= 0; { - batchStartTime := time.Now() + // Wait for either completion, error, or interrupt signal + select { + case <-done: + log.Info().Msg("All workers completed successfully") + // 3. then finally copy partitions from target table to main tables + log.Info().Msg("Migration completed successfully") + case err := <-errChan: + log.Error().Err(err).Msg("Migration failed due to worker error") + cancel() + wg.Wait() + log.Fatal().Msg("Migration stopped due to error") + case sig := <-sigChan: + log.Info().Msgf("Received signal: %s, initiating graceful shutdown...", sig) + cancel() + wg.Wait() + log.Info().Msg("Migration stopped gracefully") + } +} - // Check for cancellation - select { - case <-ctx.Done(): - log.Info().Msgf("Migration interrupted at block %s", currentBlock.String()) - return - default: - } +type blockRange struct { + start *big.Int + end *big.Int +} - endBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.batchSize-1))) - if endBlock.Cmp(rangeEndBlock) > 0 { - endBlock = rangeEndBlock - } +func divideBlockRange(startBlock, endBlock *big.Int, numWorkers int) []blockRange { + ranges := make([]blockRange, 0, numWorkers) - blockNumbers := generateBlockNumbersForRange(currentBlock, endBlock) - log.Info().Msgf("Processing blocks %s to %s", blockNumbers[0].String(), blockNumbers[len(blockNumbers)-1].String()) - - // Fetch valid blocks from source - fetchStartTime := time.Now() - validBlocksForRange, err := migrator.GetValidBlocksForRange(blockNumbers) - fetchDuration := time.Since(fetchStartTime) - if err != nil { - // If we got an error fetching valid blocks, we'll continue - log.Error().Err(err).Msg("Failed to get valid blocks for range") - time.Sleep(3 * time.Second) - continue - } - log.Debug().Dur("duration", fetchDuration).Int("blocks_fetched", len(validBlocksForRange)).Msg("Fetched valid blocks from source") + // Calculate total blocks + totalBlocks := new(big.Int).Sub(endBlock, startBlock) + totalBlocks.Add(totalBlocks, big.NewInt(1)) // inclusive range - // Build map of fetched blocks - mapBuildStartTime := time.Now() - blocksToInsertMap := make(map[string]common.BlockData) - for _, blockData := range validBlocksForRange { - blocksToInsertMap[blockData.Block.Number.String()] = blockData - } + // Calculate blocks per worker + blocksPerWorker := new(big.Int).Div(totalBlocks, big.NewInt(int64(numWorkers))) + remainder := new(big.Int).Mod(totalBlocks, big.NewInt(int64(numWorkers))) - // Loop over block numbers to find missing blocks - missingBlocks := make([]*big.Int, 0) - for _, blockNum := range blockNumbers { - if _, exists := blocksToInsertMap[blockNum.String()]; !exists { - missingBlocks = append(missingBlocks, blockNum) - } - } - mapBuildDuration := time.Since(mapBuildStartTime) - log.Debug().Dur("duration", mapBuildDuration).Int("missing_blocks", len(missingBlocks)).Msg("Identified missing blocks") - - // Fetch missing blocks from RPC - if len(missingBlocks) > 0 { - rpcFetchStartTime := time.Now() - validMissingBlocks := migrator.GetValidBlocksFromRPC(missingBlocks) - rpcFetchDuration := time.Since(rpcFetchStartTime) - log.Debug().Dur("duration", rpcFetchDuration).Int("blocks_fetched", len(validMissingBlocks)).Msg("Fetched missing blocks from RPC") - - for _, blockData := range validMissingBlocks { - if blockData.Block.ChainId.Sign() == 0 { - log.Fatal().Msgf("Block %s has chain ID 0, %+v", blockData.Block.Number.String(), blockData.Block) - } - blocksToInsertMap[blockData.Block.Number.String()] = blockData - } - } + currentStart := new(big.Int).Set(startBlock) - // Prepare blocks for insertion - prepStartTime := time.Now() - blocksToInsert := make([]common.BlockData, 0, len(blocksToInsertMap)) - for _, blockData := range blocksToInsertMap { - blocksToInsert = append(blocksToInsert, blockData) - } - prepDuration := time.Since(prepStartTime) - log.Debug().Dur("duration", prepDuration).Int("blocks_to_insert", len(blocksToInsert)).Msg("Prepared blocks for insertion") - - // Insert blocks to destination - insertStartTime := time.Now() - err = migrator.destination.InsertBlockData(blocksToInsert) - insertDuration := time.Since(insertStartTime) - if err != nil { - migrationErr = err - log.Error().Err(err).Dur("duration", insertDuration).Msg("Failed to insert blocks to target storage") - time.Sleep(3 * time.Second) - continue - } + for i := 0; i < numWorkers; i++ { + // Calculate end block for this worker + workerBlockCount := new(big.Int).Set(blocksPerWorker) - batchDuration := time.Since(batchStartTime) - log.Info(). - Dur("total_duration", batchDuration). - Dur("fetch_duration", fetchDuration). - Dur("insert_duration", insertDuration). - Int("blocks_processed", len(blocksToInsert)). - Msg("Batch processed successfully") + // Distribute remainder blocks to first workers + if big.NewInt(int64(i)).Cmp(remainder) < 0 { + workerBlockCount.Add(workerBlockCount, big.NewInt(1)) + } - currentBlock = new(big.Int).Add(endBlock, big.NewInt(1)) + // Skip if no blocks for this worker + if workerBlockCount.Sign() == 0 { + continue } - // 3. then finally copy partitions from target table to main tables - log.Info().Msg("Migration completed successfully") - }() + currentEnd := new(big.Int).Add(currentStart, workerBlockCount) + currentEnd.Sub(currentEnd, big.NewInt(1)) // inclusive range - // Wait for either completion or interrupt signal - select { - case <-done: - if migrationErr != nil { - log.Fatal().Err(migrationErr).Msg("Migration failed") + // Ensure we don't exceed the end block + if currentEnd.Cmp(endBlock) > 0 { + currentEnd = new(big.Int).Set(endBlock) } - log.Info().Msg("Done") - case sig := <-sigChan: - log.Info().Msgf("Received signal: %s, initiating graceful shutdown...", sig) - cancel() - <-done - log.Info().Msg("Migration stopped gracefully") + + ranges = append(ranges, blockRange{ + start: new(big.Int).Set(currentStart), + end: new(big.Int).Set(currentEnd), + }) + + // Move to next range + currentStart = new(big.Int).Add(currentEnd, big.NewInt(1)) + + // Stop if we've covered all blocks + if currentStart.Cmp(endBlock) > 0 { + break + } + } + + return ranges +} + +func processBlockRange(ctx context.Context, migrator *Migrator, workerID int, startBlock, endBlock *big.Int) error { + currentBlock := new(big.Int).Set(startBlock) + + for currentBlock.Cmp(endBlock) <= 0 { + batchStartTime := time.Now() + + // Check for cancellation + select { + case <-ctx.Done(): + log.Info().Msgf("Worker %d: Migration interrupted at block %s", workerID, currentBlock.String()) + return nil + default: + } + + batchEndBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.batchSize-1))) + if batchEndBlock.Cmp(endBlock) > 0 { + batchEndBlock = endBlock + } + + blockNumbers := generateBlockNumbersForRange(currentBlock, batchEndBlock) + log.Info().Msgf("Worker %d: Processing blocks %s to %s", workerID, blockNumbers[0].String(), blockNumbers[len(blockNumbers)-1].String()) + + // Fetch valid blocks from source + fetchStartTime := time.Now() + validBlocksForRange, err := migrator.GetValidBlocksForRange(blockNumbers) + fetchDuration := time.Since(fetchStartTime) + if err != nil { + // If we got an error fetching valid blocks, we'll continue + log.Error().Err(err).Msgf("Worker %d: Failed to get valid blocks for range", workerID) + time.Sleep(3 * time.Second) + continue + } + log.Debug().Dur("duration", fetchDuration).Int("blocks_fetched", len(validBlocksForRange)).Msgf("Worker %d: Fetched valid blocks from source", workerID) + + // Build map of fetched blocks + mapBuildStartTime := time.Now() + blocksToInsertMap := make(map[string]common.BlockData) + for _, blockData := range validBlocksForRange { + blocksToInsertMap[blockData.Block.Number.String()] = blockData + } + + // Loop over block numbers to find missing blocks + missingBlocks := make([]*big.Int, 0) + for _, blockNum := range blockNumbers { + if _, exists := blocksToInsertMap[blockNum.String()]; !exists { + missingBlocks = append(missingBlocks, blockNum) + } + } + mapBuildDuration := time.Since(mapBuildStartTime) + log.Debug().Dur("duration", mapBuildDuration).Int("missing_blocks", len(missingBlocks)).Msgf("Worker %d: Identified missing blocks", workerID) + + // Fetch missing blocks from RPC + if len(missingBlocks) > 0 { + rpcFetchStartTime := time.Now() + validMissingBlocks := migrator.GetValidBlocksFromRPC(missingBlocks) + rpcFetchDuration := time.Since(rpcFetchStartTime) + log.Debug().Dur("duration", rpcFetchDuration).Int("blocks_fetched", len(validMissingBlocks)).Msgf("Worker %d: Fetched missing blocks from RPC", workerID) + + for _, blockData := range validMissingBlocks { + if blockData.Block.ChainId.Sign() == 0 { + return fmt.Errorf("worker %d: block %s has chain ID 0", workerID, blockData.Block.Number.String()) + } + blocksToInsertMap[blockData.Block.Number.String()] = blockData + } + } + + // Prepare blocks for insertion + prepStartTime := time.Now() + blocksToInsert := make([]common.BlockData, 0, len(blocksToInsertMap)) + for _, blockData := range blocksToInsertMap { + blocksToInsert = append(blocksToInsert, blockData) + } + prepDuration := time.Since(prepStartTime) + log.Debug().Dur("duration", prepDuration).Int("blocks_to_insert", len(blocksToInsert)).Msgf("Worker %d: Prepared blocks for insertion", workerID) + + // Insert blocks to destination + insertStartTime := time.Now() + err = migrator.destination.InsertBlockData(blocksToInsert) + insertDuration := time.Since(insertStartTime) + if err != nil { + log.Error().Err(err).Dur("duration", insertDuration).Msgf("Worker %d: Failed to insert blocks to target storage", workerID) + time.Sleep(3 * time.Second) + continue + } + + batchDuration := time.Since(batchStartTime) + log.Info(). + Dur("total_duration", batchDuration). + Dur("fetch_duration", fetchDuration). + Dur("insert_duration", insertDuration). + Int("blocks_processed", len(blocksToInsert)). + Msgf("Worker %d: Batch processed successfully", workerID) + + currentBlock = new(big.Int).Add(batchEndBlock, big.NewInt(1)) } + + return nil } type Migrator struct { @@ -232,7 +341,7 @@ func (m *Migrator) Close() { } } -func (m *Migrator) DetermineMigrationBoundaries() (*big.Int, *big.Int) { +func (m *Migrator) DetermineMigrationBoundaries(targetStartBlock, targetEndBlock *big.Int) (*big.Int, *big.Int) { // get latest block from main storage latestBlockStored, err := m.source.MainStorage.GetMaxBlockNumber(m.rpcClient.GetChainID()) if err != nil { @@ -241,12 +350,11 @@ func (m *Migrator) DetermineMigrationBoundaries() (*big.Int, *big.Int) { log.Info().Msgf("Latest block in main storage: %d", latestBlockStored) endBlock := latestBlockStored - endBlockEnv := big.NewInt(int64(config.Cfg.Migrator.EndBlock)) - if endBlockEnv.Sign() > 0 && endBlockEnv.Cmp(latestBlockStored) < 0 { - endBlock = endBlockEnv + if targetEndBlock.Sign() > 0 && targetEndBlock.Cmp(latestBlockStored) < 0 { + endBlock = targetEndBlock } - startBlock := big.NewInt(int64(config.Cfg.Migrator.StartBlock)) // default start block is 0 + startBlock := targetStartBlock blockCount, err := m.destination.GetBlockCount(m.rpcClient.GetChainID(), startBlock, endBlock) if err != nil { @@ -275,6 +383,51 @@ func (m *Migrator) DetermineMigrationBoundaries() (*big.Int, *big.Int) { return startBlock, endBlock } +// DetermineMigrationBoundariesForRange determines the actual migration boundaries for a worker's specific range +// Returns nil, nil if the range is already fully migrated +// Fails fatally if it cannot determine boundaries (to ensure data correctness) +func (m *Migrator) DetermineMigrationBoundariesForRange(rangeStart, rangeEnd *big.Int) (*big.Int, *big.Int) { + // Check how many blocks we have in this specific range + blockCount, err := m.destination.GetBlockCount(m.rpcClient.GetChainID(), rangeStart, rangeEnd) + if err != nil { + log.Fatal().Err(err).Msgf("Worker failed to get block count for range %s to %s", rangeStart.String(), rangeEnd.String()) + return nil, nil + } + + expectedCount := new(big.Int).Sub(rangeEnd, rangeStart) + expectedCount = expectedCount.Add(expectedCount, big.NewInt(1)) + + // If all blocks are already migrated, return nil + if expectedCount.Cmp(blockCount) == 0 { + log.Debug().Msgf("Range %s to %s already fully migrated (%s blocks)", rangeStart.String(), rangeEnd.String(), blockCount.String()) + return nil, nil + } + + // Find the actual starting point by checking what blocks we already have + maxStoredBlock, err := m.destination.GetMaxBlockNumberInRange(m.rpcClient.GetChainID(), rangeStart, rangeEnd) + if err != nil { + log.Fatal().Err(err).Msgf("Worker failed to get max block in range %s to %s", rangeStart.String(), rangeEnd.String()) + return nil, nil + } + + actualStart := rangeStart + if maxStoredBlock != nil && maxStoredBlock.Cmp(rangeStart) >= 0 { + // We have some blocks already, start from the next one + actualStart = new(big.Int).Add(maxStoredBlock, big.NewInt(1)) + + // If the new start is beyond our range end, the range is fully migrated + if actualStart.Cmp(rangeEnd) > 0 { + log.Debug().Msgf("Range %s to %s already fully migrated (max block: %s)", rangeStart.String(), rangeEnd.String(), maxStoredBlock.String()) + return nil, nil + } + } + + log.Debug().Msgf("Range %s-%s: found %s blocks, max stored: %v, will migrate from %s", + rangeStart.String(), rangeEnd.String(), blockCount.String(), maxStoredBlock, actualStart.String()) + + return actualStart, rangeEnd +} + func (m *Migrator) FetchBlocksFromRPC(blockNumbers []*big.Int) ([]common.BlockData, error) { allBlockData := make([]common.BlockData, 0, len(blockNumbers)) diff --git a/cmd/root.go b/cmd/root.go index a5ac7c9..1afa037 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -150,7 +150,7 @@ func init() { rootCmd.PersistentFlags().String("storage-main-s3-secretAccessKey", "", "S3 secret access key for main storage") rootCmd.PersistentFlags().String("storage-main-s3-endpoint", "", "S3 endpoint URL for main storage (for S3-compatible services)") rootCmd.PersistentFlags().String("storage-main-s3-format", "parquet", "S3 storage format for main storage (parquet or json)") - rootCmd.PersistentFlags().Int64("storage-main-s3-bufferSizeMB", 1024, "S3 buffer size in MB before flush for main storage") + rootCmd.PersistentFlags().Int64("storage-main-s3-bufferSizeMB", 512, "S3 buffer size in MB before flush for main storage") rootCmd.PersistentFlags().Int("storage-main-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for main storage") rootCmd.PersistentFlags().Int("storage-main-s3-maxBlocksPerFile", 0, "S3 max blocks per file for main storage (0 = no limit)") rootCmd.PersistentFlags().String("storage-main-s3-parquet-compression", "snappy", "Parquet compression type for S3 main storage") @@ -220,7 +220,7 @@ func init() { rootCmd.PersistentFlags().String("migrator-destination-s3-secretAccessKey", "", "S3 secret access key for migrator destination") rootCmd.PersistentFlags().String("migrator-destination-s3-endpoint", "", "S3 endpoint URL for migrator destination") rootCmd.PersistentFlags().String("migrator-destination-s3-format", "parquet", "S3 storage format for migrator destination") - rootCmd.PersistentFlags().Int64("migrator-destination-s3-bufferSizeMB", 1024, "S3 buffer size in MB before flush for migrator destination") + rootCmd.PersistentFlags().Int64("migrator-destination-s3-bufferSizeMB", 512, "S3 buffer size in MB before flush for migrator destination") rootCmd.PersistentFlags().Int("migrator-destination-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for migrator destination") rootCmd.PersistentFlags().Int("migrator-destination-s3-maxBlocksPerFile", 0, "S3 max blocks per file for migrator destination") rootCmd.PersistentFlags().Uint("migrator-batchSize", 2000, "Batch size for storage operations in migrator") diff --git a/configs/config.go b/configs/config.go index f92a3e2..7c2bfce 100644 --- a/configs/config.go +++ b/configs/config.go @@ -76,7 +76,7 @@ type S3Config struct { Format string `mapstructure:"format"` Parquet *ParquetConfig `mapstructure:"parquet"` // Buffering configuration - BufferSize int64 `mapstructure:"bufferSizeMB"` // Target buffer size in MB before flush (default 1024 MB = 1GB) + BufferSize int64 `mapstructure:"bufferSizeMB"` // Target buffer size in MB before flush (default 512 MB) BufferTimeout int `mapstructure:"bufferTimeoutSeconds"` // Max time in seconds before flush (default 300 = 5 min) MaxBlocksPerFile int `mapstructure:"maxBlocksPerFile"` // Max blocks per parquet file (0 = no limit, only size/timeout triggers) } diff --git a/internal/rpc/batcher.go b/internal/rpc/batcher.go index c34fd13..2589e0d 100644 --- a/internal/rpc/batcher.go +++ b/internal/rpc/batcher.go @@ -2,6 +2,7 @@ package rpc import ( "context" + "strings" "sync" "time" @@ -50,6 +51,121 @@ func RPCFetchInBatches[K any, T any](rpc *Client, ctx context.Context, keys []K, return results } +func RPCFetchInBatchesWithRetry[K any, T any](rpc *Client, ctx context.Context, keys []K, batchSize int, batchDelay int, method string, argsFunc func(K) []interface{}) []RPCFetchBatchResult[K, T] { + if len(keys) <= batchSize { + return RPCFetchSingleBatchWithRetry[K, T](rpc, ctx, keys, method, argsFunc) + } + chunks := common.SliceToChunks[K](keys, batchSize) + + log.Debug().Msgf("Fetching %s for %d blocks in %d chunks of max %d requests", method, len(keys), len(chunks), batchSize) + + var wg sync.WaitGroup + resultsCh := make(chan []RPCFetchBatchResult[K, T], len(chunks)) + + for _, chunk := range chunks { + wg.Add(1) + go func(chunk []K) { + defer wg.Done() + resultsCh <- RPCFetchSingleBatchWithRetry[K, T](rpc, ctx, chunk, method, argsFunc) + if batchDelay > 0 { + time.Sleep(time.Duration(batchDelay) * time.Millisecond) + } + }(chunk) + } + go func() { + wg.Wait() + close(resultsCh) + }() + + results := make([]RPCFetchBatchResult[K, T], 0, len(keys)) + for batchResults := range resultsCh { + results = append(results, batchResults...) + } + + return results +} + +func RPCFetchSingleBatchWithRetry[K any, T any](rpc *Client, ctx context.Context, keys []K, method string, argsFunc func(K) []interface{}) []RPCFetchBatchResult[K, T] { + currentBatchSize := len(keys) + minBatchSize := 1 + + // First try with the full batch + results := RPCFetchSingleBatch[K, T](rpc, ctx, keys, method, argsFunc) + if !hasBatchError(results) { + return results + } + + // If we got 413, start retrying with smaller batches + newBatchSize := len(keys) / 2 + if newBatchSize < minBatchSize { + newBatchSize = minBatchSize + } + log.Debug().Msgf("Got error for batch size %d, retrying with batch size %d", currentBatchSize, newBatchSize) + + // Start with half the size + currentBatchSize = newBatchSize + + // Keep retrying with smaller batch sizes + for currentBatchSize >= minBatchSize { + chunks := common.SliceToChunks[K](keys, currentBatchSize) + allResults := make([]RPCFetchBatchResult[K, T], 0, len(keys)) + hasError := false + + // Process chunks sequentially to maintain order + for _, chunk := range chunks { + chunkResults := RPCFetchSingleBatch[K, T](rpc, ctx, chunk, method, argsFunc) + + if hasBatchError(chunkResults) { + hasError = true + break + } + allResults = append(allResults, chunkResults...) + } + + if !hasError { + // Successfully processed all chunks, return results in original order + return allResults + } + + // Still getting error, reduce batch size further + newBatchSize := currentBatchSize / 2 + if newBatchSize < minBatchSize { + newBatchSize = minBatchSize + } + log.Debug().Msgf("Got error for batch size %d, retrying with batch size %d", currentBatchSize, newBatchSize) + currentBatchSize = newBatchSize + + // If we're already at minimum batch size and still failing, try one more time + if currentBatchSize == minBatchSize && hasError { + // Process items one by one as last resort + finalResults := make([]RPCFetchBatchResult[K, T], 0, len(keys)) + for _, key := range keys { + singleResult := RPCFetchSingleBatch[K, T](rpc, ctx, []K{key}, method, argsFunc) + finalResults = append(finalResults, singleResult...) + } + return finalResults + } + } + + // Should not reach here, but return error results as fallback + log.Fatal().Msgf("Unable to process batch even with size 1, returning errors") + return nil +} + +func hasBatchError[K any, T any](results []RPCFetchBatchResult[K, T]) bool { + for _, result := range results { + if result.Error != nil { + if httpErr, ok := result.Error.(gethRpc.HTTPError); ok && httpErr.StatusCode == 413 { + return true + } + if strings.Contains(result.Error.Error(), "413") { + return true + } + } + } + return false +} + func RPCFetchSingleBatch[K any, T any](rpc *Client, ctx context.Context, keys []K, method string, argsFunc func(K) []interface{}) []RPCFetchBatchResult[K, T] { batch := make([]gethRpc.BatchElem, len(keys)) results := make([]RPCFetchBatchResult[K, T], len(keys)) diff --git a/internal/rpc/rpc.go b/internal/rpc/rpc.go index d148418..67295df 100644 --- a/internal/rpc/rpc.go +++ b/internal/rpc/rpc.go @@ -238,20 +238,20 @@ func (rpc *Client) GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) [ go func() { defer wg.Done() - result := RPCFetchSingleBatch[*big.Int, common.RawBlock](rpc, ctx, blockNumbers, "eth_getBlockByNumber", GetBlockWithTransactionsParams) + result := RPCFetchSingleBatchWithRetry[*big.Int, common.RawBlock](rpc, ctx, blockNumbers, "eth_getBlockByNumber", GetBlockWithTransactionsParams) blocks = result }() if rpc.supportsBlockReceipts { go func() { defer wg.Done() - result := RPCFetchInBatches[*big.Int, common.RawReceipts](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Receipts, config.Cfg.RPC.BlockReceipts.BatchDelay, "eth_getBlockReceipts", GetBlockReceiptsParams) + result := RPCFetchInBatchesWithRetry[*big.Int, common.RawReceipts](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Receipts, config.Cfg.RPC.BlockReceipts.BatchDelay, "eth_getBlockReceipts", GetBlockReceiptsParams) receipts = result }() } else { go func() { defer wg.Done() - result := RPCFetchInBatches[*big.Int, common.RawLogs](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Logs, config.Cfg.RPC.Logs.BatchDelay, "eth_getLogs", GetLogsParams) + result := RPCFetchInBatchesWithRetry[*big.Int, common.RawLogs](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Logs, config.Cfg.RPC.Logs.BatchDelay, "eth_getLogs", GetLogsParams) logs = result }() } @@ -260,7 +260,7 @@ func (rpc *Client) GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) [ wg.Add(1) go func() { defer wg.Done() - result := RPCFetchInBatches[*big.Int, common.RawTraces](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Traces, config.Cfg.RPC.Traces.BatchDelay, "trace_block", TraceBlockParams) + result := RPCFetchInBatchesWithRetry[*big.Int, common.RawTraces](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Traces, config.Cfg.RPC.Traces.BatchDelay, "trace_block", TraceBlockParams) traces = result }() } diff --git a/internal/storage/s3.go b/internal/storage/s3.go index e3e4038..2ed0a74 100644 --- a/internal/storage/s3.go +++ b/internal/storage/s3.go @@ -37,6 +37,7 @@ type S3Connector struct { flushCh chan struct{} flushDoneCh chan struct{} // Signals when flush is complete wg sync.WaitGroup + closeOnce sync.Once } // DataFormatter interface for different file formats @@ -313,18 +314,23 @@ func (s *S3Connector) Flush() error { // Close closes the S3 connector and flushes any remaining data func (s *S3Connector) Close() error { - // First, ensure any pending data is flushed - if err := s.Flush(); err != nil { - log.Error().Err(err).Msg("Error flushing buffer during close") - } + var closeErr error + + s.closeOnce.Do(func() { + // First, ensure any pending data is flushed + if err := s.Flush(); err != nil { + log.Error().Err(err).Msg("Error flushing buffer during close") + closeErr = err + } - // Signal stop - close(s.stopCh) + // Signal stop + close(s.stopCh) - // Wait for worker to finish - s.wg.Wait() + // Wait for worker to finish + s.wg.Wait() + }) - return nil + return closeErr } func (s *S3Connector) uploadBatch(data []common.BlockData) error { From 96dc60ba4d7a6fd55cbdf04f5ea28ea9e375b8f8 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 19:10:41 +0000 Subject: [PATCH 27/43] Shuffle Orchestrator and Staging interface --- internal/orchestrator/committer.go | 14 +++---- internal/orchestrator/committer_test.go | 10 ++--- internal/orchestrator/failure_recoverer.go | 6 +-- internal/orchestrator/poller.go | 2 +- internal/storage/badger.go | 49 +++++++++++++++++++--- internal/storage/clickhouse.go | 27 +++++++++++- internal/storage/connector.go | 23 ++++++---- internal/storage/kafka_redis.go | 26 +++++++++++- internal/storage/postgres.go | 31 +++++++++++++- test/mocks/MockIStagingStorage.go | 22 +++++----- 10 files changed, 168 insertions(+), 42 deletions(-) diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go index bde4854..6b4df18 100644 --- a/internal/orchestrator/committer.go +++ b/internal/orchestrator/committer.go @@ -109,7 +109,7 @@ func (c *Committer) Start(ctx context.Context) { } // Initialize publisher position - always use max(lastPublished, lastCommitted) to prevent double publishing - lastPublished, err := c.storage.StagingStorage.GetLastPublishedBlockNumber(chainID) + lastPublished, err := c.storage.OrchestratorStorage.GetLastPublishedBlockNumber(chainID) if err != nil { // It's okay to fail silently here; it's only used for staging cleanup and will be // corrected by the worker loop. @@ -126,7 +126,7 @@ func (c *Committer) Start(ctx context.Context) { Msg("Publisher is behind committed position, seeking forward to committed value") c.lastPublishedBlock.Store(latestCommittedBlockNumber.Uint64()) - if err := c.storage.StagingStorage.SetLastPublishedBlockNumber(chainID, latestCommittedBlockNumber); err != nil { + if err := c.storage.OrchestratorStorage.SetLastPublishedBlockNumber(chainID, latestCommittedBlockNumber); err != nil { log.Error().Err(err).Msg("Failed to update last published block number after seeking forward") // Fall back to the stored value on error c.lastPublishedBlock.Store(lastPublished.Uint64()) @@ -167,7 +167,7 @@ func (c *Committer) Start(ctx context.Context) { // Only update storage if we're changing the position if lastPublished == nil || targetPublishBlock.Cmp(lastPublished) != 0 { - if err := c.storage.StagingStorage.SetLastPublishedBlockNumber(chainID, targetPublishBlock); err != nil { + if err := c.storage.OrchestratorStorage.SetLastPublishedBlockNumber(chainID, targetPublishBlock); err != nil { log.Error().Err(err).Msg("Failed to update published block number in storage") // If we can't update storage, use what was there originally to avoid issues if lastPublished != nil { @@ -303,11 +303,11 @@ func (c *Committer) cleanupProcessedStagingBlocks() { chainID := c.rpc.GetChainID() blockNumber := new(big.Int).SetUint64(limit) stagingDeleteStart := time.Now() - if err := c.storage.StagingStorage.DeleteOlderThan(chainID, blockNumber); err != nil { + if err := c.storage.StagingStorage.DeleteStagingDataOlderThan(chainID, blockNumber); err != nil { log.Error().Err(err).Msg("Failed to delete staging data") return } - log.Debug().Str("metric", "staging_delete_duration").Msgf("StagingStorage.DeleteOlderThan duration: %f", time.Since(stagingDeleteStart).Seconds()) + log.Debug().Str("metric", "staging_delete_duration").Msgf("StagingStorage.DeleteStagingDataOlderThan duration: %f", time.Since(stagingDeleteStart).Seconds()) metrics.StagingDeleteDuration.Observe(time.Since(stagingDeleteStart).Seconds()) } @@ -358,7 +358,7 @@ func (c *Committer) getBlockNumbersToCommit(ctx context.Context) ([]*big.Int, er func (c *Committer) getBlockNumbersToPublish(ctx context.Context) ([]*big.Int, error) { // Get the last published block from storage (which was already corrected in Start) - latestPublishedBlockNumber, err := c.storage.StagingStorage.GetLastPublishedBlockNumber(c.rpc.GetChainID()) + latestPublishedBlockNumber, err := c.storage.OrchestratorStorage.GetLastPublishedBlockNumber(c.rpc.GetChainID()) if err != nil { return nil, fmt.Errorf("failed to get last published block number: %v", err) } @@ -550,7 +550,7 @@ func (c *Committer) publish(ctx context.Context) error { chainID := c.rpc.GetChainID() highest := blockData[len(blockData)-1].Block.Number - if err := c.storage.StagingStorage.SetLastPublishedBlockNumber(chainID, highest); err != nil { + if err := c.storage.OrchestratorStorage.SetLastPublishedBlockNumber(chainID, highest); err != nil { return err } c.lastPublishedBlock.Store(highest.Uint64()) diff --git a/internal/orchestrator/committer_test.go b/internal/orchestrator/committer_test.go index c6d5906..8e2cb90 100644 --- a/internal/orchestrator/committer_test.go +++ b/internal/orchestrator/committer_test.go @@ -336,7 +336,7 @@ func TestCommitDeletesAfterPublish(t *testing.T) { mockRPC.EXPECT().GetChainID().Return(chainID) mockMainStorage.EXPECT().InsertBlockData(blockData).Return(nil) - mockStagingStorage.EXPECT().DeleteOlderThan(chainID, big.NewInt(102)).RunAndReturn(func(*big.Int, *big.Int) error { + mockStagingStorage.EXPECT().DeleteStagingDataOlderThan(chainID, big.NewInt(102)).RunAndReturn(func(*big.Int, *big.Int) error { close(deleteDone) return nil }) @@ -347,7 +347,7 @@ func TestCommitDeletesAfterPublish(t *testing.T) { select { case <-deleteDone: case <-time.After(2 * time.Second): - t.Fatal("DeleteOlderThan was not called within timeout period") + t.Fatal("DeleteStagingDataOlderThan was not called within timeout period") } } @@ -380,7 +380,7 @@ func TestCommitParallelPublisherMode(t *testing.T) { mockStagingStorage.AssertNotCalled(t, "GetLastPublishedBlockNumber", mock.Anything) mockStagingStorage.AssertNotCalled(t, "SetLastPublishedBlockNumber", mock.Anything, mock.Anything) - mockStagingStorage.AssertNotCalled(t, "DeleteOlderThan", mock.Anything, mock.Anything) + mockStagingStorage.AssertNotCalled(t, "DeleteStagingDataOlderThan", mock.Anything, mock.Anything) } func TestCleanupProcessedStagingBlocks(t *testing.T) { @@ -400,11 +400,11 @@ func TestCleanupProcessedStagingBlocks(t *testing.T) { committer.lastPublishedBlock.Store(0) committer.cleanupProcessedStagingBlocks() - mockStagingStorage.AssertNotCalled(t, "DeleteOlderThan", mock.Anything, mock.Anything) + mockStagingStorage.AssertNotCalled(t, "DeleteStagingDataOlderThan", mock.Anything, mock.Anything) committer.lastPublishedBlock.Store(90) mockRPC.EXPECT().GetChainID().Return(chainID) - mockStagingStorage.EXPECT().DeleteOlderThan(chainID, big.NewInt(90)).Return(nil) + mockStagingStorage.EXPECT().DeleteStagingDataOlderThan(chainID, big.NewInt(90)).Return(nil) committer.cleanupProcessedStagingBlocks() } func TestHandleGap(t *testing.T) { diff --git a/internal/orchestrator/failure_recoverer.go b/internal/orchestrator/failure_recoverer.go index da1ae91..8ca110f 100644 --- a/internal/orchestrator/failure_recoverer.go +++ b/internal/orchestrator/failure_recoverer.go @@ -55,7 +55,7 @@ func (fr *FailureRecoverer) Start(ctx context.Context) { log.Info().Msg("Failure recoverer shutting down") return case <-ticker.C: - blockFailures, err := fr.storage.OrchestratorStorage.GetBlockFailures(storage.QueryFilter{ + blockFailures, err := fr.storage.StagingStorage.GetBlockFailures(storage.QueryFilter{ ChainId: fr.rpc.GetChainID(), Limit: fr.failuresPerPoll, }) @@ -122,11 +122,11 @@ func (fr *FailureRecoverer) handleWorkerResults(blockFailures []common.BlockFail log.Error().Err(fmt.Errorf("error inserting block data in failure recoverer: %v", err)) return } - if err := fr.storage.OrchestratorStorage.StoreBlockFailures(newBlockFailures); err != nil { + if err := fr.storage.StagingStorage.StoreBlockFailures(newBlockFailures); err != nil { log.Error().Err(err).Msg("Error storing block failures") return } - if err := fr.storage.OrchestratorStorage.DeleteBlockFailures(failuresToDelete); err != nil { + if err := fr.storage.StagingStorage.DeleteBlockFailures(failuresToDelete); err != nil { log.Error().Err(err).Msg("Error deleting block failures") return } diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go index 5045dc1..331f00c 100644 --- a/internal/orchestrator/poller.go +++ b/internal/orchestrator/poller.go @@ -352,7 +352,7 @@ func (p *Poller) handleBlockFailures(results []rpc.GetFullBlockResult) { }) } } - err := p.storage.OrchestratorStorage.StoreBlockFailures(blockFailures) + err := p.storage.StagingStorage.StoreBlockFailures(blockFailures) if err != nil { // TODO: exiting if this fails, but should handle this better log.Error().Err(err).Msg("Error saving block failures") diff --git a/internal/storage/badger.go b/internal/storage/badger.go index 3c1305d..e649040 100644 --- a/internal/storage/badger.go +++ b/internal/storage/badger.go @@ -88,19 +88,23 @@ func (bc *BadgerConnector) Close() error { // Key construction helpers func blockKey(chainId *big.Int, blockNumber *big.Int) []byte { - return []byte(fmt.Sprintf("b:%d:%s", chainId.Uint64(), blockNumber.String())) + return []byte(fmt.Sprintf("blockdata:%s:%s", chainId.String(), blockNumber.String())) } func blockFailureKey(chainId *big.Int, blockNumber *big.Int, timestamp int64) []byte { - return []byte(fmt.Sprintf("f:%d:%s:%d", chainId.Uint64(), blockNumber.String(), timestamp)) + return []byte(fmt.Sprintf("blockfailure:%s:%s:%d", chainId.String(), blockNumber.String(), timestamp)) } func lastReorgKey(chainId *big.Int) []byte { - return []byte(fmt.Sprintf("reorg:%d", chainId.Uint64())) + return []byte(fmt.Sprintf("reorg:%s", chainId.String())) } func lastPublishedKey(chainId *big.Int) []byte { - return []byte(fmt.Sprintf("published:%d", chainId.Uint64())) + return []byte(fmt.Sprintf("publish:%s", chainId.String())) +} + +func lastCommittedKey(chainId *big.Int) []byte { + return []byte(fmt.Sprintf("commit:%s", chainId.String())) } // IOrchestratorStorage implementation @@ -438,7 +442,42 @@ func (bc *BadgerConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNu }) } -func (bc *BadgerConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error { +func (bc *BadgerConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { + bc.mu.RLock() + defer bc.mu.RUnlock() + + var blockNumber *big.Int + err := bc.db.View(func(txn *badger.Txn) error { + item, err := txn.Get(lastCommittedKey(chainId)) + if err == badger.ErrKeyNotFound { + return nil + } + if err != nil { + return err + } + + return item.Value(func(val []byte) error { + blockNumber = new(big.Int).SetBytes(val) + return nil + }) + }) + + if blockNumber == nil { + return big.NewInt(0), nil + } + return blockNumber, err +} + +func (bc *BadgerConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + return txn.Set(lastCommittedKey(chainId), blockNumber.Bytes()) + }) +} + +func (bc *BadgerConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { bc.mu.Lock() defer bc.mu.Unlock() diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go index d3319c6..013e917 100644 --- a/internal/storage/clickhouse.go +++ b/internal/storage/clickhouse.go @@ -1147,6 +1147,31 @@ func (c *ClickHouseConnector) SetLastPublishedBlockNumber(chainId *big.Int, bloc return c.conn.Exec(context.Background(), query) } +func (c *ClickHouseConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { + query := fmt.Sprintf("SELECT cursor_value FROM %s.cursors FINAL WHERE cursor_type = 'commit'", c.cfg.Database) + if chainId.Sign() > 0 { + query += fmt.Sprintf(" AND chain_id = %s", chainId.String()) + } + var blockNumberString string + err := c.conn.QueryRow(context.Background(), query).Scan(&blockNumberString) + if err != nil { + if err == sql.ErrNoRows { + return big.NewInt(0), nil + } + return nil, err + } + blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) + } + return blockNumber, nil +} + +func (c *ClickHouseConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + query := fmt.Sprintf("INSERT INTO %s.cursors (chain_id, cursor_type, cursor_value) VALUES (%s, 'commit', '%s')", c.cfg.Database, chainId, blockNumber.String()) + return c.conn.Exec(context.Background(), query) +} + func (c *ClickHouseConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { query := fmt.Sprintf("SELECT cursor_value FROM %s.cursors FINAL WHERE cursor_type = 'reorg'", c.cfg.Database) if chainId.Sign() > 0 { @@ -2186,7 +2211,7 @@ func (c *ClickHouseConnector) GetFullBlockData(chainId *big.Int, blockNumbers [] return blockData, nil } -func (c *ClickHouseConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error { +func (c *ClickHouseConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { query := fmt.Sprintf(` INSERT INTO %s.block_data (chain_id, block_number, is_deleted) SELECT chain_id, block_number, 1 diff --git a/internal/storage/connector.go b/internal/storage/connector.go index dc23d9b..60e5cfb 100644 --- a/internal/storage/connector.go +++ b/internal/storage/connector.go @@ -96,23 +96,32 @@ func (s *IStorage) Close() error { return nil } +// The orchestartor storage is a persisted key/value store type IOrchestratorStorage interface { - GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) - StoreBlockFailures(failures []common.BlockFailure) error - DeleteBlockFailures(failures []common.BlockFailure) error GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error + GetLastPublishedBlockNumber(chainId *big.Int) (blockNumber *big.Int, err error) + SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error + GetLastCommittedBlockNumber(chainId *big.Int) (blockNumber *big.Int, err error) + SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error + Close() error } +// The staging storage is a emphemeral block data store type IStagingStorage interface { + // Staging block data InsertStagingData(data []common.BlockData) error GetStagingData(qf QueryFilter) (data []common.BlockData, err error) - DeleteStagingData(data []common.BlockData) error GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (maxBlockNumber *big.Int, err error) - GetLastPublishedBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error) - SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error - DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error + DeleteStagingData(data []common.BlockData) error + DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error + + // Block failures + GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) + StoreBlockFailures(failures []common.BlockFailure) error + DeleteBlockFailures(failures []common.BlockFailure) error + Close() error } diff --git a/internal/storage/kafka_redis.go b/internal/storage/kafka_redis.go index 030fa21..e76a8e9 100644 --- a/internal/storage/kafka_redis.go +++ b/internal/storage/kafka_redis.go @@ -132,11 +132,35 @@ func (kr *KafkaRedisConnector) SetLastPublishedBlockNumber(chainId *big.Int, blo return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() } +func (kr *KafkaRedisConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) + + val, err := kr.redisClient.Get(ctx, key).Result() + if err == redis.Nil { + return big.NewInt(0), nil + } else if err != nil { + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(val, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", val) + } + return blockNumber, nil +} + +func (kr *KafkaRedisConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) + return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() +} + func (kr *KafkaRedisConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) { return nil, fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging") } -func (kr *KafkaRedisConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error { +func (kr *KafkaRedisConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging") } diff --git a/internal/storage/postgres.go b/internal/storage/postgres.go index 1476c44..fb0748d 100644 --- a/internal/storage/postgres.go +++ b/internal/storage/postgres.go @@ -388,6 +388,35 @@ func (p *PostgresConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockN return err } +func (p *PostgresConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { + query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'commit' AND chain_id = $1` + + var blockNumberString string + err := p.db.QueryRow(query, chainId.String()).Scan(&blockNumberString) + if err != nil { + if err == sql.ErrNoRows { + return big.NewInt(0), nil + } + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) + } + return blockNumber, nil +} + +func (p *PostgresConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value) + VALUES ($1, 'commit', $2) + ON CONFLICT (chain_id, cursor_type) + DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()` + + _, err := p.db.Exec(query, chainId.String(), blockNumber.String()) + return err +} + func (p *PostgresConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) { query := `SELECT MAX(block_number) FROM block_data WHERE 1=1` @@ -431,7 +460,7 @@ func (p *PostgresConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStar return blockNumber, nil } -func (p *PostgresConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error { +func (p *PostgresConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { query := `DELETE FROM block_data WHERE ctid IN ( SELECT ctid diff --git a/test/mocks/MockIStagingStorage.go b/test/mocks/MockIStagingStorage.go index 14f8e68..bd73136 100644 --- a/test/mocks/MockIStagingStorage.go +++ b/test/mocks/MockIStagingStorage.go @@ -341,12 +341,12 @@ func (_c *MockIStagingStorage_InsertStagingData_Call) RunAndReturn(run func([]co return _c } -// DeleteOlderThan provides a mock function with given fields: chainId, blockNumber -func (_m *MockIStagingStorage) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error { +// DeleteStagingDataOlderThan provides a mock function with given fields: chainId, blockNumber +func (_m *MockIStagingStorage) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { ret := _m.Called(chainId, blockNumber) if len(ret) == 0 { - panic("no return value specified for DeleteOlderThan") + panic("no return value specified for DeleteStagingDataOlderThan") } var r0 error @@ -359,31 +359,31 @@ func (_m *MockIStagingStorage) DeleteOlderThan(chainId *big.Int, blockNumber *bi return r0 } -// MockIStagingStorage_DeleteOlderThan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteOlderThan' -type MockIStagingStorage_DeleteOlderThan_Call struct { +// MockIStagingStorage_DeleteStagingDataOlderThan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteStagingDataOlderThan' +type MockIStagingStorage_DeleteStagingDataOlderThan_Call struct { *mock.Call } -// DeleteOlderThan is a helper method to define mock.On call +// DeleteStagingDataOlderThan is a helper method to define mock.On call // - chainId *big.Int // - blockNumber *big.Int -func (_e *MockIStagingStorage_Expecter) DeleteOlderThan(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_DeleteOlderThan_Call { - return &MockIStagingStorage_DeleteOlderThan_Call{Call: _e.mock.On("DeleteOlderThan", chainId, blockNumber)} +func (_e *MockIStagingStorage_Expecter) DeleteStagingDataOlderThan(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { + return &MockIStagingStorage_DeleteStagingDataOlderThan_Call{Call: _e.mock.On("DeleteStagingDataOlderThan", chainId, blockNumber)} } -func (_c *MockIStagingStorage_DeleteOlderThan_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_DeleteOlderThan_Call { +func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { _c.Call.Run(func(args mock.Arguments) { run(args[0].(*big.Int), args[1].(*big.Int)) }) return _c } -func (_c *MockIStagingStorage_DeleteOlderThan_Call) Return(_a0 error) *MockIStagingStorage_DeleteOlderThan_Call { +func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Return(_a0 error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { _c.Call.Return(_a0) return _c } -func (_c *MockIStagingStorage_DeleteOlderThan_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_DeleteOlderThan_Call { +func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { _c.Call.Return(run) return _c } From 41cc98d610865a68c53bc7b3eeaf682d23287661 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 19:55:04 +0000 Subject: [PATCH 28/43] store poller in committer --- internal/orchestrator/committer.go | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go index 6b4df18..1e00602 100644 --- a/internal/orchestrator/committer.go +++ b/internal/orchestrator/committer.go @@ -31,6 +31,7 @@ type Committer struct { lastCommittedBlock atomic.Uint64 lastPublishedBlock atomic.Uint64 publisher *publisher.Publisher + poller *Poller workMode WorkMode workModeMutex sync.RWMutex workModeChan chan WorkMode @@ -80,6 +81,7 @@ func NewCommitter(rpc rpc.IRPCClient, storage storage.IStorage, opts ...Committe commitUntilBlock: big.NewInt(int64(commitUntilBlock)), rpc: rpc, publisher: publisher.GetInstance(), + poller: NewBoundlessPoller(rpc, storage), workMode: "", } cfb := commitFromBlock.Uint64() @@ -445,8 +447,7 @@ func (c *Committer) fetchBlockData(ctx context.Context, blockNumbers []*big.Int) } return blocksData, nil } else { - poller := NewBoundlessPoller(c.rpc, c.storage) - blocksData, err := poller.PollWithoutSaving(ctx, blockNumbers) + blocksData, err := c.poller.PollWithoutSaving(ctx, blockNumbers) if err != nil { return nil, fmt.Errorf("poller error: %v", err) } @@ -612,13 +613,11 @@ func (c *Committer) handleGap(ctx context.Context, expectedStartBlockNumber *big return nil } - poller := NewBoundlessPoller(c.rpc, c.storage) - missingBlockCount := new(big.Int).Sub(actualFirstBlock.Number, expectedStartBlockNumber).Int64() log.Debug().Msgf("Detected %d missing blocks between blocks %s and %s", missingBlockCount, expectedStartBlockNumber.String(), actualFirstBlock.Number.String()) - if missingBlockCount > poller.blocksPerPoll { - log.Debug().Msgf("Limiting polling missing blocks to %d blocks due to config", poller.blocksPerPoll) - missingBlockCount = poller.blocksPerPoll + if missingBlockCount > c.poller.blocksPerPoll { + log.Debug().Msgf("Limiting polling missing blocks to %d blocks due to config", c.poller.blocksPerPoll) + missingBlockCount = c.poller.blocksPerPoll } missingBlockNumbers := make([]*big.Int, missingBlockCount) for i := int64(0); i < missingBlockCount; i++ { @@ -627,7 +626,7 @@ func (c *Committer) handleGap(ctx context.Context, expectedStartBlockNumber *big } log.Debug().Msgf("Polling %d blocks while handling gap: %v", len(missingBlockNumbers), missingBlockNumbers) - poller.Poll(ctx, missingBlockNumbers) + c.poller.Poll(ctx, missingBlockNumbers) return fmt.Errorf("first block number (%s) in commit batch does not match expected (%s)", actualFirstBlock.Number.String(), expectedStartBlockNumber.String()) } @@ -644,11 +643,10 @@ func (c *Committer) handleMissingStagingData(ctx context.Context, blocksToCommit } log.Debug().Msgf("Detected missing blocks in staging data starting from %s.", blocksToCommit[0].String()) - poller := NewBoundlessPoller(c.rpc, c.storage) blocksToPoll := blocksToCommit - if len(blocksToCommit) > int(poller.blocksPerPoll) { - blocksToPoll = blocksToCommit[:int(poller.blocksPerPoll)] + if len(blocksToCommit) > int(c.poller.blocksPerPoll) { + blocksToPoll = blocksToCommit[:int(c.poller.blocksPerPoll)] } - poller.Poll(ctx, blocksToPoll) + c.poller.Poll(ctx, blocksToPoll) log.Debug().Msgf("Polled %d blocks due to committer detecting them as missing. Range: %s - %s", len(blocksToPoll), blocksToPoll[0].String(), blocksToPoll[len(blocksToPoll)-1].String()) } From 70ea8714f028a1ca4f38ee85aa963fd6a2d41754 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 20:15:23 +0000 Subject: [PATCH 29/43] Simplified storage. Split kafka and redis --- cmd/migrate_valid.go | 12 +- cmd/root.go | 16 +- configs/config.go | 42 +++-- internal/handlers/logs_handlers.go | 2 +- internal/storage/connector.go | 163 +++++++++++++--- internal/storage/kafka.go | 125 ++++++++++++ internal/storage/kafka_redis.go | 294 ----------------------------- internal/storage/redis.go | 125 ++++++++++++ 8 files changed, 434 insertions(+), 345 deletions(-) create mode 100644 internal/storage/kafka.go delete mode 100644 internal/storage/kafka_redis.go create mode 100644 internal/storage/redis.go diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go index 43f2988..04dcc60 100644 --- a/cmd/migrate_valid.go +++ b/cmd/migrate_valid.go @@ -67,7 +67,7 @@ func RunValidationMigration(cmd *cobra.Command, args []string) { wg.Add(1) go func(id int, startBlock, endBlock *big.Int) { defer wg.Done() - + // Only check boundaries per-worker if we have multiple workers // For single worker, we already determined boundaries globally var actualStart, actualEnd *big.Int @@ -78,7 +78,7 @@ func RunValidationMigration(cmd *cobra.Command, args []string) { log.Info().Msgf("Worker %d: Range %s to %s already fully migrated", id, startBlock.String(), endBlock.String()) return } - log.Info().Msgf("Worker %d starting: blocks %s to %s (adjusted from %s to %s)", + log.Info().Msgf("Worker %d starting: blocks %s to %s (adjusted from %s to %s)", id, actualStart.String(), actualEnd.String(), startBlock.String(), endBlock.String()) } else { // Single worker: use the already-determined boundaries @@ -314,7 +314,7 @@ func NewMigrator() *Migrator { validator := orchestrator.NewValidator(rpcClient, sourceConnector) - destinationConnector, err := storage.NewConnector[storage.IMainStorage](&config.Cfg.Migrator.Destination) + destinationConnector, err := storage.NewMainConnector(&config.Cfg.Migrator.Destination) if err != nil { log.Fatal().Err(err).Msg("Failed to initialize storage") } @@ -396,7 +396,7 @@ func (m *Migrator) DetermineMigrationBoundariesForRange(rangeStart, rangeEnd *bi expectedCount := new(big.Int).Sub(rangeEnd, rangeStart) expectedCount = expectedCount.Add(expectedCount, big.NewInt(1)) - + // If all blocks are already migrated, return nil if expectedCount.Cmp(blockCount) == 0 { log.Debug().Msgf("Range %s to %s already fully migrated (%s blocks)", rangeStart.String(), rangeEnd.String(), blockCount.String()) @@ -414,7 +414,7 @@ func (m *Migrator) DetermineMigrationBoundariesForRange(rangeStart, rangeEnd *bi if maxStoredBlock != nil && maxStoredBlock.Cmp(rangeStart) >= 0 { // We have some blocks already, start from the next one actualStart = new(big.Int).Add(maxStoredBlock, big.NewInt(1)) - + // If the new start is beyond our range end, the range is fully migrated if actualStart.Cmp(rangeEnd) > 0 { log.Debug().Msgf("Range %s to %s already fully migrated (max block: %s)", rangeStart.String(), rangeEnd.String(), maxStoredBlock.String()) @@ -422,7 +422,7 @@ func (m *Migrator) DetermineMigrationBoundariesForRange(rangeStart, rangeEnd *bi } } - log.Debug().Msgf("Range %s-%s: found %s blocks, max stored: %v, will migrate from %s", + log.Debug().Msgf("Range %s-%s: found %s blocks, max stored: %v, will migrate from %s", rangeStart.String(), rangeEnd.String(), blockCount.String(), maxStoredBlock, actualStart.String()) return actualStart, rangeEnd diff --git a/cmd/root.go b/cmd/root.go index 1afa037..6aa04c7 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -134,10 +134,10 @@ func init() { rootCmd.PersistentFlags().String("storage-main-kafka-username", "", "Kafka username for main storage") rootCmd.PersistentFlags().String("storage-main-kafka-password", "", "Kafka password for main storage") rootCmd.PersistentFlags().Bool("storage-main-kafka-enable-tls", true, "Enable TLS for Kafka connection in main storage") - rootCmd.PersistentFlags().String("storage-main-kafka-redis-host", "", "Redis host for Kafka main storage metadata") - rootCmd.PersistentFlags().Int("storage-main-kafka-redis-port", 6379, "Redis port for Kafka main storage metadata") - rootCmd.PersistentFlags().String("storage-main-kafka-redis-password", "", "Redis password for Kafka main storage metadata") - rootCmd.PersistentFlags().Int("storage-main-kafka-redis-db", 0, "Redis database number for Kafka main storage metadata") + rootCmd.PersistentFlags().String("storage-orchestrator-redis-host", "", "Redis host for orchestrator storage metadata") + rootCmd.PersistentFlags().Int("storage-orchestrator-redis-port", 6379, "Redis port for orchestrator storage metadata") + rootCmd.PersistentFlags().String("storage-orchestrator-redis-password", "", "Redis password for orchestator storage metadata") + rootCmd.PersistentFlags().Int("storage-orchestrator-redis-db", 0, "Redis database number for orchestrator storage metadata") rootCmd.PersistentFlags().String("storage-staging-type", "auto", "Storage type for staging (auto, clickhouse, postgres, kafka, badger, s3)") rootCmd.PersistentFlags().String("storage-main-type", "auto", "Storage type for main (auto, clickhouse, postgres, kafka, badger, s3)") rootCmd.PersistentFlags().String("storage-orchestrator-type", "auto", "Storage type for orchestrator (auto, clickhouse, postgres, badger)") @@ -325,10 +325,10 @@ func init() { viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username")) viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password")) viper.BindPFlag("storage.main.kafka.enableTLS", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls")) - viper.BindPFlag("storage.main.kafka.redis.host", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-host")) - viper.BindPFlag("storage.main.kafka.redis.port", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-port")) - viper.BindPFlag("storage.main.kafka.redis.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-password")) - viper.BindPFlag("storage.main.kafka.redis.db", rootCmd.PersistentFlags().Lookup("storage-main-kafka-redis-db")) + viper.BindPFlag("storage.orchestrator.redis.host", rootCmd.PersistentFlags().Lookup("storage-main-redis-host")) + viper.BindPFlag("storage.orchestrator.redis.port", rootCmd.PersistentFlags().Lookup("storage-main-redis-port")) + viper.BindPFlag("storage.orchestrator.redis.password", rootCmd.PersistentFlags().Lookup("storage-main-redis-password")) + viper.BindPFlag("storage.orchestrator.redis.db", rootCmd.PersistentFlags().Lookup("storage-main-redis-db")) viper.BindPFlag("storage.staging.type", rootCmd.PersistentFlags().Lookup("storage-staging-type")) viper.BindPFlag("storage.main.type", rootCmd.PersistentFlags().Lookup("storage-main-type")) viper.BindPFlag("storage.orchestrator.type", rootCmd.PersistentFlags().Lookup("storage-orchestrator-type")) diff --git a/configs/config.go b/configs/config.go index 7c2bfce..9c3e7c0 100644 --- a/configs/config.go +++ b/configs/config.go @@ -48,13 +48,28 @@ type FailureRecovererConfig struct { } type StorageConfig struct { - Staging StorageConnectionConfig `mapstructure:"staging"` - Main StorageConnectionConfig `mapstructure:"main"` - Orchestrator StorageConnectionConfig `mapstructure:"orchestrator"` + Orchestrator StorageOrchestratorConfig `mapstructure:"orchestrator"` + Staging StorageStagingConfig `mapstructure:"staging"` + Main StorageMainConfig `mapstructure:"main"` } -type StorageConnectionConfig struct { - Type string `mapstructure:"type"` // "auto", "clickhouse", "postgres", "kafka", "badger", "s3" +type StorageOrchestratorConfig struct { + Type string `mapstructure:"type"` + Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"` + Postgres *PostgresConfig `mapstructure:"postgres"` + Redis *RedisConfig `mapstructure:"redis"` + Badger *BadgerConfig `mapstructure:"badger"` +} + +type StorageStagingConfig struct { + Type string `mapstructure:"type"` + Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"` + Postgres *PostgresConfig `mapstructure:"postgres"` + Badger *BadgerConfig `mapstructure:"badger"` +} + +type StorageMainConfig struct { + Type string `mapstructure:"type"` Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"` Postgres *PostgresConfig `mapstructure:"postgres"` Kafka *KafkaConfig `mapstructure:"kafka"` @@ -133,11 +148,10 @@ type RedisConfig struct { } type KafkaConfig struct { - Brokers string `mapstructure:"brokers"` - Username string `mapstructure:"username"` - Password string `mapstructure:"password"` - EnableTLS bool `mapstructure:"enableTLS"` - Redis *RedisConfig `mapstructure:"redis"` + Brokers string `mapstructure:"brokers"` + Username string `mapstructure:"username"` + Password string `mapstructure:"password"` + EnableTLS bool `mapstructure:"enableTLS"` } type RPCBatchRequestConfig struct { @@ -233,10 +247,10 @@ type ValidationConfig struct { } type MigratorConfig struct { - Destination StorageConnectionConfig `mapstructure:"destination"` - StartBlock uint `mapstructure:"startBlock"` - EndBlock uint `mapstructure:"endBlock"` - BatchSize uint `mapstructure:"batchSize"` + Destination StorageMainConfig `mapstructure:"destination"` + StartBlock uint `mapstructure:"startBlock"` + EndBlock uint `mapstructure:"endBlock"` + BatchSize uint `mapstructure:"batchSize"` } type Config struct { diff --git a/internal/handlers/logs_handlers.go b/internal/handlers/logs_handlers.go index 965aeae..89f9d2e 100644 --- a/internal/handlers/logs_handlers.go +++ b/internal/handlers/logs_handlers.go @@ -224,7 +224,7 @@ func decodeLogsIfNeeded(chainId string, logs []common.Log, eventABI *abi.Event, func getMainStorage() (storage.IMainStorage, error) { storageOnce.Do(func() { var err error - mainStorage, err = storage.NewConnector[storage.IMainStorage](&config.Cfg.Storage.Main) + mainStorage, err = storage.NewMainConnector(&config.Cfg.Storage.Main) if err != nil { storageErr = err log.Error().Err(err).Msg("Error creating storage connector") diff --git a/internal/storage/connector.go b/internal/storage/connector.go index 60e5cfb..4767578 100644 --- a/internal/storage/connector.go +++ b/internal/storage/connector.go @@ -165,25 +165,25 @@ func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) { var storage IStorage var err error - storage.OrchestratorStorage, err = NewConnector[IOrchestratorStorage](&cfg.Orchestrator) + storage.OrchestratorStorage, err = NewOrchestratorConnector(&cfg.Orchestrator) if err != nil { return IStorage{}, fmt.Errorf("failed to create orchestrator storage: %w", err) } - storage.MainStorage, err = NewConnector[IMainStorage](&cfg.Main) + storage.StagingStorage, err = NewStagingConnector(&cfg.Staging) if err != nil { - return IStorage{}, fmt.Errorf("failed to create main storage: %w", err) + return IStorage{}, fmt.Errorf("failed to create staging storage: %w", err) } - storage.StagingStorage, err = NewConnector[IStagingStorage](&cfg.Staging) + storage.MainStorage, err = NewMainConnector(&cfg.Main) if err != nil { - return IStorage{}, fmt.Errorf("failed to create staging storage: %w", err) + return IStorage{}, fmt.Errorf("failed to create main storage: %w", err) } return storage, nil } -func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) { +func NewOrchestratorConnector(cfg *config.StorageOrchestratorConfig) (IOrchestratorStorage, error) { var conn interface{} var err error @@ -196,58 +196,177 @@ func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) { // Handle explicit type selection if storageType != "auto" { switch storageType { - case "kafka": - if cfg.Kafka == nil { - return *new(T), fmt.Errorf("kafka storage type specified but kafka config is nil") + case "redis": + if cfg.Redis == nil { + return nil, fmt.Errorf("redis storage type specified but redis config is nil") + } + conn, err = NewRedisConnector(cfg.Redis) + case "postgres": + if cfg.Postgres == nil { + return nil, fmt.Errorf("postgres storage type specified but postgres config is nil") } - conn, err = NewKafkaRedisConnector(cfg.Kafka) + conn, err = NewPostgresConnector(cfg.Postgres) + case "clickhouse": + if cfg.Clickhouse == nil { + return nil, fmt.Errorf("clickhouse storage type specified but clickhouse config is nil") + } + conn, err = NewClickHouseConnector(cfg.Clickhouse) + case "badger": + if cfg.Badger == nil { + return nil, fmt.Errorf("badger storage type specified but badger config is nil") + } + conn, err = NewBadgerConnector(cfg.Badger) + default: + return nil, fmt.Errorf("unknown storage type: %s", storageType) + } + } else { + // Auto mode: use the first non-nil config (existing behavior) + if cfg.Redis != nil { + conn, err = NewRedisConnector(cfg.Redis) + } else if cfg.Postgres != nil { + conn, err = NewPostgresConnector(cfg.Postgres) + } else if cfg.Clickhouse != nil { + conn, err = NewClickHouseConnector(cfg.Clickhouse) + } else if cfg.Badger != nil { + conn, err = NewBadgerConnector(cfg.Badger) + } else { + return nil, fmt.Errorf("no storage driver configured") + } + } + + if err != nil { + return nil, err + } + + typedConn, ok := conn.(IOrchestratorStorage) + if !ok { + return nil, fmt.Errorf("connector does not implement the required interface") + } + + return typedConn, nil +} + +func NewStagingConnector(cfg *config.StorageStagingConfig) (IStagingStorage, error) { + var conn interface{} + var err error + + // Default to "auto" if Type is not specified + storageType := cfg.Type + if storageType == "" { + storageType = "auto" + } + + // Handle explicit type selection + if storageType != "auto" { + switch storageType { case "postgres": if cfg.Postgres == nil { - return *new(T), fmt.Errorf("postgres storage type specified but postgres config is nil") + return nil, fmt.Errorf("postgres storage type specified but postgres config is nil") } conn, err = NewPostgresConnector(cfg.Postgres) case "clickhouse": if cfg.Clickhouse == nil { - return *new(T), fmt.Errorf("clickhouse storage type specified but clickhouse config is nil") + return nil, fmt.Errorf("clickhouse storage type specified but clickhouse config is nil") } conn, err = NewClickHouseConnector(cfg.Clickhouse) case "badger": if cfg.Badger == nil { - return *new(T), fmt.Errorf("badger storage type specified but badger config is nil") + return nil, fmt.Errorf("badger storage type specified but badger config is nil") } conn, err = NewBadgerConnector(cfg.Badger) + default: + return nil, fmt.Errorf("unknown storage type: %s", storageType) + } + } else { + // Auto mode: use the first non-nil config (existing behavior) + if cfg.Postgres != nil { + conn, err = NewPostgresConnector(cfg.Postgres) + } else if cfg.Clickhouse != nil { + conn, err = NewClickHouseConnector(cfg.Clickhouse) + } else if cfg.Badger != nil { + conn, err = NewBadgerConnector(cfg.Badger) + } else { + return nil, fmt.Errorf("no storage driver configured") + } + } + + if err != nil { + return nil, err + } + + typedConn, ok := conn.(IStagingStorage) + if !ok { + return nil, fmt.Errorf("connector does not implement the required interface") + } + + return typedConn, nil +} + +func NewMainConnector(cfg *config.StorageMainConfig) (IMainStorage, error) { + var conn interface{} + var err error + + // Default to "auto" if Type is not specified + storageType := cfg.Type + if storageType == "" { + storageType = "auto" + } + + // Handle explicit type selection + if storageType != "auto" { + switch storageType { + case "kafka": + if cfg.Kafka == nil { + return nil, fmt.Errorf("kafka storage type specified but kafka config is nil") + } + conn, err = NewKafkaConnector(cfg.Kafka) case "s3": if cfg.S3 == nil { - return *new(T), fmt.Errorf("s3 storage type specified but s3 config is nil") + return nil, fmt.Errorf("s3 storage type specified but s3 config is nil") } conn, err = NewS3Connector(cfg.S3) + case "postgres": + if cfg.Postgres == nil { + return nil, fmt.Errorf("postgres storage type specified but postgres config is nil") + } + conn, err = NewPostgresConnector(cfg.Postgres) + case "clickhouse": + if cfg.Clickhouse == nil { + return nil, fmt.Errorf("clickhouse storage type specified but clickhouse config is nil") + } + conn, err = NewClickHouseConnector(cfg.Clickhouse) + case "badger": + if cfg.Badger == nil { + return nil, fmt.Errorf("badger storage type specified but badger config is nil") + } + conn, err = NewBadgerConnector(cfg.Badger) default: - return *new(T), fmt.Errorf("unknown storage type: %s", storageType) + return nil, fmt.Errorf("unknown storage type: %s", storageType) } } else { // Auto mode: use the first non-nil config (existing behavior) if cfg.Kafka != nil { - conn, err = NewKafkaRedisConnector(cfg.Kafka) + conn, err = NewKafkaConnector(cfg.Kafka) + } else if cfg.S3 != nil { + conn, err = NewS3Connector(cfg.S3) } else if cfg.Postgres != nil { conn, err = NewPostgresConnector(cfg.Postgres) } else if cfg.Clickhouse != nil { conn, err = NewClickHouseConnector(cfg.Clickhouse) } else if cfg.Badger != nil { conn, err = NewBadgerConnector(cfg.Badger) - } else if cfg.S3 != nil { - conn, err = NewS3Connector(cfg.S3) } else { - return *new(T), fmt.Errorf("no storage driver configured") + return nil, fmt.Errorf("no storage driver configured") } } if err != nil { - return *new(T), err + return nil, err } - typedConn, ok := conn.(T) + typedConn, ok := conn.(IMainStorage) if !ok { - return *new(T), fmt.Errorf("connector does not implement the required interface") + return nil, fmt.Errorf("connector does not implement the required interface") } return typedConn, nil diff --git a/internal/storage/kafka.go b/internal/storage/kafka.go new file mode 100644 index 0000000..1de9014 --- /dev/null +++ b/internal/storage/kafka.go @@ -0,0 +1,125 @@ +package storage + +import ( + "fmt" + "math/big" + + "github.com/rs/zerolog/log" + config "github.com/thirdweb-dev/indexer/configs" + "github.com/thirdweb-dev/indexer/internal/common" +) + +// KafkaConnector uses Redis for metadata storage and Kafka for block data delivery +type KafkaConnector struct { + cfg *config.KafkaConfig + kafkaPublisher *KafkaPublisher +} + +func NewKafkaConnector(cfg *config.KafkaConfig) (*KafkaConnector, error) { + // Initialize Kafka publisher + kafkaPublisher, err := NewKafkaPublisher(cfg) + if err != nil { + return nil, err + } + + return &KafkaConnector{ + cfg: cfg, + kafkaPublisher: kafkaPublisher, + }, nil +} + +// InsertBlockData publishes block data to Kafka instead of storing in database +func (kr *KafkaConnector) InsertBlockData(data []common.BlockData) error { + if len(data) == 0 { + return nil + } + + // Publish to Kafka + if err := kr.kafkaPublisher.PublishBlockData(data); err != nil { + return fmt.Errorf("failed to publish block data to kafka: %w", err) + } + log.Debug(). + Int("blocks", len(data)). + Msg("Published block data to Kafka") + + return nil +} + +// ReplaceBlockData handles reorg by publishing both old and new data to Kafka +func (kr *KafkaConnector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) { + if len(data) == 0 { + return nil, nil + } + + oldBlocks := []common.BlockData{} + + // TODO: We need to fetch the old blocks from the primary data store + if err := kr.kafkaPublisher.PublishReorg(data, data); err != nil { + return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err) + } + + // save cursor + return oldBlocks, nil +} + +func (kr *KafkaConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) { + return QueryResult[common.TokenBalance]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) { + return QueryResult[common.TokenTransfer]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +// Query methods return errors as this is a write-only connector for streaming +func (kr *KafkaConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) { + return QueryResult[common.Block]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) { + return QueryResult[common.Transaction]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) { + return QueryResult[common.Log]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) { + return QueryResult[common.Trace]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) { + return QueryResult[interface{}]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +// Close closes the Redis connection +func (kr *KafkaConnector) Close() error { + return kr.kafkaPublisher.Close() +} diff --git a/internal/storage/kafka_redis.go b/internal/storage/kafka_redis.go deleted file mode 100644 index e76a8e9..0000000 --- a/internal/storage/kafka_redis.go +++ /dev/null @@ -1,294 +0,0 @@ -package storage - -import ( - "context" - "fmt" - "math/big" - "time" - - "github.com/redis/go-redis/v9" - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" -) - -// Redis key namespace constants for better organization and maintainability -const ( - // Cursor keys for tracking positions - KeyCursorReorg = "cursor:reorg" // String: cursor:reorg:{chainId} - KeyCursorPublish = "cursor:publish" // String: cursor:publish:{chainId} - KeyCursorCommit = "cursor:commit" // String: cursor:commit:{chainId} -) - -// KafkaRedisConnector uses Redis for metadata storage and Kafka for block data delivery -type KafkaRedisConnector struct { - redisClient *redis.Client - cfg *config.KafkaConfig - kafkaPublisher *KafkaPublisher -} - -func NewKafkaRedisConnector(cfg *config.KafkaConfig) (*KafkaRedisConnector, error) { - // Connect to Redis - redisClient := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%d", cfg.Redis.Host, cfg.Redis.Port), - Password: cfg.Redis.Password, - DB: cfg.Redis.DB, - }) - - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - if err := redisClient.Ping(ctx).Err(); err != nil { - return nil, fmt.Errorf("failed to connect to redis: %w", err) - } - - // Initialize Kafka publisher - kafkaPublisher, err := NewKafkaPublisher(cfg) - if err != nil { - return nil, err - } - - return &KafkaRedisConnector{ - redisClient: redisClient, - cfg: cfg, - kafkaPublisher: kafkaPublisher, - }, nil -} - -// Orchestrator Storage Implementation - Block failures not supported - -func (kr *KafkaRedisConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) { - return nil, fmt.Errorf("block failure tracking is not supported with KafkaRedis connector - use a different storage backend") -} - -func (kr *KafkaRedisConnector) StoreBlockFailures(failures []common.BlockFailure) error { - return fmt.Errorf("block failure tracking is not supported with KafkaRedis connector - use a different storage backend") -} - -func (kr *KafkaRedisConnector) DeleteBlockFailures(failures []common.BlockFailure) error { - return fmt.Errorf("block failure tracking is not supported with KafkaRedis connector - use a different storage backend") -} - -func (kr *KafkaRedisConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String()) - - val, err := kr.redisClient.Get(ctx, key).Result() - if err == redis.Nil { - return big.NewInt(0), nil - } else if err != nil { - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(val, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", val) - } - - return blockNumber, nil -} - -func (kr *KafkaRedisConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String()) - return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() -} - -// Staging Storage Implementation - Not supported for KafkaRedis connector - -func (kr *KafkaRedisConnector) InsertStagingData(data []common.BlockData) error { - return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging") -} - -func (kr *KafkaRedisConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) { - return nil, fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging") -} - -func (kr *KafkaRedisConnector) DeleteStagingData(data []common.BlockData) error { - return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging") -} - -func (kr *KafkaRedisConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String()) - - val, err := kr.redisClient.Get(ctx, key).Result() - if err == redis.Nil { - return big.NewInt(0), nil - } else if err != nil { - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(val, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", val) - } - return blockNumber, nil -} - -func (kr *KafkaRedisConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String()) - return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() -} - -func (kr *KafkaRedisConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) - - val, err := kr.redisClient.Get(ctx, key).Result() - if err == redis.Nil { - return big.NewInt(0), nil - } else if err != nil { - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(val, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", val) - } - return blockNumber, nil -} - -func (kr *KafkaRedisConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) - return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() -} - -func (kr *KafkaRedisConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) { - return nil, fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging") -} - -func (kr *KafkaRedisConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { - return fmt.Errorf("staging operations are not supported with KafkaRedis connector - use a different storage backend for staging") -} - -// InsertBlockData publishes block data to Kafka instead of storing in database -func (kr *KafkaRedisConnector) InsertBlockData(data []common.BlockData) error { - if len(data) == 0 { - return nil - } - - // Publish to Kafka - if err := kr.kafkaPublisher.PublishBlockData(data); err != nil { - return fmt.Errorf("failed to publish block data to kafka: %w", err) - } - log.Debug(). - Int("blocks", len(data)). - Msg("Published block data to Kafka") - - // Update cursor to track the highest block number published - if len(data) > 0 { - // Find the highest block number in the batch - var maxBlock *big.Int - for _, blockData := range data { - if maxBlock == nil || blockData.Block.Number.Cmp(maxBlock) > 0 { - maxBlock = blockData.Block.Number - } - } - if maxBlock != nil { - ctx := context.Background() - chainId := data[0].Block.ChainId - key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) - if err := kr.redisClient.Set(ctx, key, maxBlock.String(), 0).Err(); err != nil { - return err - } - } - } - - return nil -} - -// ReplaceBlockData handles reorg by publishing both old and new data to Kafka -func (kr *KafkaRedisConnector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) { - if len(data) == 0 { - return nil, nil - } - - oldBlocks := []common.BlockData{} - - // TODO: We need to fetch the old blocks from the primary data store - if err := kr.kafkaPublisher.PublishReorg(data, data); err != nil { - return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err) - } - - // save cursor - return oldBlocks, nil -} - -func (kr *KafkaRedisConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) - - val, err := kr.redisClient.Get(ctx, key).Result() - if err == redis.Nil { - return big.NewInt(0), nil - } else if err != nil { - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(val, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", val) - } - return blockNumber, nil -} - -func (kr *KafkaRedisConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { - return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -func (kr *KafkaRedisConnector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { - return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -func (kr *KafkaRedisConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { - return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -func (kr *KafkaRedisConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) { - return QueryResult[common.TokenBalance]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -func (kr *KafkaRedisConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) { - return QueryResult[common.TokenTransfer]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -func (kr *KafkaRedisConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) { - return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -func (kr *KafkaRedisConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { - return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -func (kr *KafkaRedisConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) { - return nil, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -// Query methods return errors as this is a write-only connector for streaming -func (kr *KafkaRedisConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) { - return QueryResult[common.Block]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -func (kr *KafkaRedisConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) { - return QueryResult[common.Transaction]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -func (kr *KafkaRedisConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) { - return QueryResult[common.Log]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -func (kr *KafkaRedisConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) { - return QueryResult[common.Trace]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -func (kr *KafkaRedisConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) { - return QueryResult[interface{}]{}, fmt.Errorf("query operations are not supported with KafkaRedis connector - this is a write-only connector for streaming") -} - -// Close closes the Redis connection -func (kr *KafkaRedisConnector) Close() error { - return kr.redisClient.Close() -} diff --git a/internal/storage/redis.go b/internal/storage/redis.go new file mode 100644 index 0000000..d48b17f --- /dev/null +++ b/internal/storage/redis.go @@ -0,0 +1,125 @@ +package storage + +import ( + "context" + "fmt" + "math/big" + "time" + + "github.com/redis/go-redis/v9" + config "github.com/thirdweb-dev/indexer/configs" +) + +// Redis key namespace constants for better organization and maintainability +const ( + // Cursor keys for tracking positions + KeyCursorReorg = "cursor:reorg" // String: cursor:reorg:{chainId} + KeyCursorPublish = "cursor:publish" // String: cursor:publish:{chainId} + KeyCursorCommit = "cursor:commit" // String: cursor:commit:{chainId} +) + +// RedisConnector uses Redis for metadata storage +type RedisConnector struct { + redisClient *redis.Client + cfg *config.RedisConfig +} + +func NewRedisConnector(cfg *config.RedisConfig) (*RedisConnector, error) { + // Connect to Redis + redisClient := redis.NewClient(&redis.Options{ + Addr: fmt.Sprintf("%s:%d", cfg.Host, cfg.Port), + Password: cfg.Password, + DB: cfg.DB, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + if err := redisClient.Ping(ctx).Err(); err != nil { + return nil, fmt.Errorf("failed to connect to redis: %w", err) + } + + return &RedisConnector{ + redisClient: redisClient, + cfg: cfg, + }, nil +} + +// Orchestrator Storage Implementation +func (kr *RedisConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String()) + + val, err := kr.redisClient.Get(ctx, key).Result() + if err == redis.Nil { + return big.NewInt(0), nil + } else if err != nil { + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(val, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", val) + } + + return blockNumber, nil +} + +func (kr *RedisConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String()) + return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() +} + +func (kr *RedisConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String()) + + val, err := kr.redisClient.Get(ctx, key).Result() + if err == redis.Nil { + return big.NewInt(0), nil + } else if err != nil { + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(val, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", val) + } + return blockNumber, nil +} + +func (kr *RedisConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String()) + return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() +} + +func (kr *RedisConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) + + val, err := kr.redisClient.Get(ctx, key).Result() + if err == redis.Nil { + return big.NewInt(0), nil + } else if err != nil { + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(val, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", val) + } + return blockNumber, nil +} + +func (kr *RedisConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) + return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() +} + +// Close closes the Redis connection +func (kr *RedisConnector) Close() error { + return kr.redisClient.Close() +} From f920a71b653aa12f100be381910f8f76438879da Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 20:45:47 +0000 Subject: [PATCH 30/43] kafka requires orchestrator --- cmd/migrate_valid.go | 2 +- internal/handlers/logs_handlers.go | 3 ++- internal/orchestrator/validator.go | 1 + internal/storage/connector.go | 14 ++++++++++---- internal/storage/kafka.go | 24 ++++++++++++++++++------ 5 files changed, 32 insertions(+), 12 deletions(-) diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go index 04dcc60..cb384de 100644 --- a/cmd/migrate_valid.go +++ b/cmd/migrate_valid.go @@ -314,7 +314,7 @@ func NewMigrator() *Migrator { validator := orchestrator.NewValidator(rpcClient, sourceConnector) - destinationConnector, err := storage.NewMainConnector(&config.Cfg.Migrator.Destination) + destinationConnector, err := storage.NewMainConnector(&config.Cfg.Migrator.Destination, &sourceConnector.OrchestratorStorage) if err != nil { log.Fatal().Err(err).Msg("Failed to initialize storage") } diff --git a/internal/handlers/logs_handlers.go b/internal/handlers/logs_handlers.go index 89f9d2e..63ac197 100644 --- a/internal/handlers/logs_handlers.go +++ b/internal/handlers/logs_handlers.go @@ -224,7 +224,8 @@ func decodeLogsIfNeeded(chainId string, logs []common.Log, eventABI *abi.Event, func getMainStorage() (storage.IMainStorage, error) { storageOnce.Do(func() { var err error - mainStorage, err = storage.NewMainConnector(&config.Cfg.Storage.Main) + // TODO: move this to a QueryConnector later to decouple read/write connector + mainStorage, err = storage.NewMainConnector(&config.Cfg.Storage.Main, nil) if err != nil { storageErr = err log.Error().Err(err).Msg("Error creating storage connector") diff --git a/internal/orchestrator/validator.go b/internal/orchestrator/validator.go index b37b986..63a174f 100644 --- a/internal/orchestrator/validator.go +++ b/internal/orchestrator/validator.go @@ -186,5 +186,6 @@ func (v *Validator) FindAndFixGaps(startBlock *big.Int, endBlock *big.Int) error log.Error().Err(err).Msgf("Failed to insert missing blocks: %v", polledBlocks) return err } + return nil } diff --git a/internal/storage/connector.go b/internal/storage/connector.go index 4767578..23fdb52 100644 --- a/internal/storage/connector.go +++ b/internal/storage/connector.go @@ -175,7 +175,7 @@ func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) { return IStorage{}, fmt.Errorf("failed to create staging storage: %w", err) } - storage.MainStorage, err = NewMainConnector(&cfg.Main) + storage.MainStorage, err = NewMainConnector(&cfg.Main, &storage.OrchestratorStorage) if err != nil { return IStorage{}, fmt.Errorf("failed to create main storage: %w", err) } @@ -302,7 +302,7 @@ func NewStagingConnector(cfg *config.StorageStagingConfig) (IStagingStorage, err return typedConn, nil } -func NewMainConnector(cfg *config.StorageMainConfig) (IMainStorage, error) { +func NewMainConnector(cfg *config.StorageMainConfig, orchestratorStorage *IOrchestratorStorage) (IMainStorage, error) { var conn interface{} var err error @@ -319,7 +319,10 @@ func NewMainConnector(cfg *config.StorageMainConfig) (IMainStorage, error) { if cfg.Kafka == nil { return nil, fmt.Errorf("kafka storage type specified but kafka config is nil") } - conn, err = NewKafkaConnector(cfg.Kafka) + if orchestratorStorage == nil { + return nil, fmt.Errorf("orchestrator storage must be provided for kafka main storage") + } + conn, err = NewKafkaConnector(cfg.Kafka, orchestratorStorage) case "s3": if cfg.S3 == nil { return nil, fmt.Errorf("s3 storage type specified but s3 config is nil") @@ -346,7 +349,10 @@ func NewMainConnector(cfg *config.StorageMainConfig) (IMainStorage, error) { } else { // Auto mode: use the first non-nil config (existing behavior) if cfg.Kafka != nil { - conn, err = NewKafkaConnector(cfg.Kafka) + if orchestratorStorage == nil { + return nil, fmt.Errorf("orchestrator storage must be provided for kafka main storage") + } + conn, err = NewKafkaConnector(cfg.Kafka, orchestratorStorage) } else if cfg.S3 != nil { conn, err = NewS3Connector(cfg.S3) } else if cfg.Postgres != nil { diff --git a/internal/storage/kafka.go b/internal/storage/kafka.go index 1de9014..747d853 100644 --- a/internal/storage/kafka.go +++ b/internal/storage/kafka.go @@ -11,20 +11,26 @@ import ( // KafkaConnector uses Redis for metadata storage and Kafka for block data delivery type KafkaConnector struct { - cfg *config.KafkaConfig - kafkaPublisher *KafkaPublisher + cfg *config.KafkaConfig + kafkaPublisher *KafkaPublisher + orchestratorStorage IOrchestratorStorage } -func NewKafkaConnector(cfg *config.KafkaConfig) (*KafkaConnector, error) { +func NewKafkaConnector(cfg *config.KafkaConfig, orchestratorStorage *IOrchestratorStorage) (*KafkaConnector, error) { // Initialize Kafka publisher kafkaPublisher, err := NewKafkaPublisher(cfg) if err != nil { return nil, err } + if orchestratorStorage == nil { + return nil, fmt.Errorf("orchestrator storage must be provided for kafka connector") + } + return &KafkaConnector{ - cfg: cfg, - kafkaPublisher: kafkaPublisher, + cfg: cfg, + kafkaPublisher: kafkaPublisher, + orchestratorStorage: *orchestratorStorage, }, nil } @@ -42,6 +48,12 @@ func (kr *KafkaConnector) InsertBlockData(data []common.BlockData) error { Int("blocks", len(data)). Msg("Published block data to Kafka") + chainId := data[0].Block.ChainId + maxBlockNumber := data[len(data)-1].Block.Number + if err := kr.orchestratorStorage.SetLastCommittedBlockNumber(chainId, maxBlockNumber); err != nil { + return fmt.Errorf("failed to update last committed block number in orchestrator storage: %w", err) + } + return nil } @@ -63,7 +75,7 @@ func (kr *KafkaConnector) ReplaceBlockData(data []common.BlockData) ([]common.Bl } func (kr *KafkaConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { - return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") + return kr.orchestratorStorage.GetLastCommittedBlockNumber(chainId) } func (kr *KafkaConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { From 32eece50d4bc366f9e0eeea674f04e37a311a961 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 21:17:59 +0000 Subject: [PATCH 31/43] Fix orchestrator flag --- cmd/root.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 6aa04c7..1661638 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -311,6 +311,12 @@ func init() { viper.BindPFlag("storage.orchestrator.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-maxIdleConns")) viper.BindPFlag("storage.orchestrator.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-maxConnLifetime")) viper.BindPFlag("storage.orchestrator.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-connectTimeout")) + viper.BindPFlag("storage.orchestrator.redis.host", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-host")) + viper.BindPFlag("storage.orchestrator.redis.port", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-port")) + viper.BindPFlag("storage.orchestrator.redis.password", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-password")) + viper.BindPFlag("storage.orchestrator.redis.db", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-db")) + viper.BindPFlag("storage.orchestrator.badger.path", rootCmd.PersistentFlags().Lookup("storage-orchestrator-badger-path")) + viper.BindPFlag("storage.orchestrator.type", rootCmd.PersistentFlags().Lookup("storage-orchestrator-type")) viper.BindPFlag("storage.staging.postgres.host", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-host")) viper.BindPFlag("storage.staging.postgres.port", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-port")) viper.BindPFlag("storage.staging.postgres.username", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-username")) @@ -321,19 +327,13 @@ func init() { viper.BindPFlag("storage.staging.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxIdleConns")) viper.BindPFlag("storage.staging.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxConnLifetime")) viper.BindPFlag("storage.staging.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-connectTimeout")) + viper.BindPFlag("storage.staging.badger.path", rootCmd.PersistentFlags().Lookup("storage-staging-badger-path")) + viper.BindPFlag("storage.staging.type", rootCmd.PersistentFlags().Lookup("storage-staging-type")) viper.BindPFlag("storage.main.kafka.brokers", rootCmd.PersistentFlags().Lookup("storage-main-kafka-brokers")) viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username")) viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password")) viper.BindPFlag("storage.main.kafka.enableTLS", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls")) - viper.BindPFlag("storage.orchestrator.redis.host", rootCmd.PersistentFlags().Lookup("storage-main-redis-host")) - viper.BindPFlag("storage.orchestrator.redis.port", rootCmd.PersistentFlags().Lookup("storage-main-redis-port")) - viper.BindPFlag("storage.orchestrator.redis.password", rootCmd.PersistentFlags().Lookup("storage-main-redis-password")) - viper.BindPFlag("storage.orchestrator.redis.db", rootCmd.PersistentFlags().Lookup("storage-main-redis-db")) - viper.BindPFlag("storage.staging.type", rootCmd.PersistentFlags().Lookup("storage-staging-type")) viper.BindPFlag("storage.main.type", rootCmd.PersistentFlags().Lookup("storage-main-type")) - viper.BindPFlag("storage.orchestrator.type", rootCmd.PersistentFlags().Lookup("storage-orchestrator-type")) - viper.BindPFlag("storage.staging.badger.path", rootCmd.PersistentFlags().Lookup("storage-staging-badger-path")) - viper.BindPFlag("storage.orchestrator.badger.path", rootCmd.PersistentFlags().Lookup("storage-orchestrator-badger-path")) viper.BindPFlag("storage.main.s3.bucket", rootCmd.PersistentFlags().Lookup("storage-main-s3-bucket")) viper.BindPFlag("storage.main.s3.region", rootCmd.PersistentFlags().Lookup("storage-main-s3-region")) viper.BindPFlag("storage.main.s3.prefix", rootCmd.PersistentFlags().Lookup("storage-main-s3-prefix")) From e35ff76abb61dd9b87b57c4929c2ab749216b620 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 21:57:28 +0000 Subject: [PATCH 32/43] Fix badger keys --- internal/storage/badger.go | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/internal/storage/badger.go b/internal/storage/badger.go index e649040..991e479 100644 --- a/internal/storage/badger.go +++ b/internal/storage/badger.go @@ -91,8 +91,16 @@ func blockKey(chainId *big.Int, blockNumber *big.Int) []byte { return []byte(fmt.Sprintf("blockdata:%s:%s", chainId.String(), blockNumber.String())) } -func blockFailureKey(chainId *big.Int, blockNumber *big.Int, timestamp int64) []byte { - return []byte(fmt.Sprintf("blockfailure:%s:%s:%d", chainId.String(), blockNumber.String(), timestamp)) +func blockKeyRange(chainId *big.Int) []byte { + return []byte(fmt.Sprintf("blockdata:%s:", chainId.String())) +} + +func blockFailureKey(chainId *big.Int, blockNumber *big.Int) []byte { + return []byte(fmt.Sprintf("blockfailure:%s:%s", chainId.String(), blockNumber.String())) +} + +func blockFailureKeyRange(chainId *big.Int) []byte { + return []byte(fmt.Sprintf("blockfailure:%s:", chainId.String())) } func lastReorgKey(chainId *big.Int) []byte { @@ -113,7 +121,7 @@ func (bc *BadgerConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailu defer bc.mu.RUnlock() var failures []common.BlockFailure - prefix := fmt.Sprintf("f:%d:", qf.ChainId.Uint64()) + prefix := blockFailureKeyRange(qf.ChainId) err := bc.db.View(func(txn *badger.Txn) error { opts := badger.DefaultIteratorOptions @@ -160,7 +168,7 @@ func (bc *BadgerConnector) StoreBlockFailures(failures []common.BlockFailure) er return bc.db.Update(func(txn *badger.Txn) error { for _, failure := range failures { - key := blockFailureKey(failure.ChainId, failure.BlockNumber, time.Now().Unix()) + key := blockFailureKey(failure.ChainId, failure.BlockNumber) var buf bytes.Buffer if err := gob.NewEncoder(&buf).Encode(failure); err != nil { @@ -182,7 +190,7 @@ func (bc *BadgerConnector) DeleteBlockFailures(failures []common.BlockFailure) e return bc.db.Update(func(txn *badger.Txn) error { for _, failure := range failures { // Delete all failure entries for this block - prefix := fmt.Sprintf("f:%d:%s:", failure.ChainId.Uint64(), failure.BlockNumber.String()) + prefix := blockFailureKey(failure.ChainId, failure.BlockNumber) opts := badger.DefaultIteratorOptions opts.Prefix = []byte(prefix) @@ -293,7 +301,7 @@ func (bc *BadgerConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, e } // Range query - prefix := fmt.Sprintf("b:%d:", qf.ChainId.Uint64()) + prefix := blockKeyRange(qf.ChainId) err := bc.db.View(func(txn *badger.Txn) error { opts := badger.DefaultIteratorOptions @@ -366,7 +374,7 @@ func (bc *BadgerConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart defer bc.mu.RUnlock() var maxBlock *big.Int - prefix := fmt.Sprintf("b:%d:", chainId.Uint64()) + prefix := blockKeyRange(chainId) err := bc.db.View(func(txn *badger.Txn) error { opts := badger.DefaultIteratorOptions @@ -481,7 +489,7 @@ func (bc *BadgerConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNum bc.mu.Lock() defer bc.mu.Unlock() - prefix := fmt.Sprintf("b:%d:", chainId.Uint64()) + prefix := blockKeyRange(chainId) return bc.db.Update(func(txn *badger.Txn) error { opts := badger.DefaultIteratorOptions From 6233232efcb814bf803a00c40a3b75792443fce6 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 22:10:58 +0000 Subject: [PATCH 33/43] Fix backfill missing blocks in staging --- internal/orchestrator/committer.go | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go index 1e00602..c557933 100644 --- a/internal/orchestrator/committer.go +++ b/internal/orchestrator/committer.go @@ -631,18 +631,7 @@ func (c *Committer) handleGap(ctx context.Context, expectedStartBlockNumber *big } func (c *Committer) handleMissingStagingData(ctx context.Context, blocksToCommit []*big.Int) { - // Checks if there are any blocks in staging after the current range end - lastStagedBlockNumber, err := c.storage.StagingStorage.GetLastStagedBlockNumber(c.rpc.GetChainID(), blocksToCommit[len(blocksToCommit)-1], big.NewInt(0)) - if err != nil { - log.Error().Err(err).Msg("Error checking staged data for missing range") - return - } - if lastStagedBlockNumber == nil || lastStagedBlockNumber.Sign() <= 0 { - log.Debug().Msgf("Committer is caught up with staging. No need to poll for missing blocks.") - return - } log.Debug().Msgf("Detected missing blocks in staging data starting from %s.", blocksToCommit[0].String()) - blocksToPoll := blocksToCommit if len(blocksToCommit) > int(c.poller.blocksPerPoll) { blocksToPoll = blocksToCommit[:int(c.poller.blocksPerPoll)] From 31d923f1211ff1a0aeb14f7aa542e9e7739b6350 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Tue, 26 Aug 2025 22:19:48 +0000 Subject: [PATCH 34/43] Revert "Fix backfill missing blocks in staging" This reverts commit 6233232efcb814bf803a00c40a3b75792443fce6. --- internal/orchestrator/committer.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go index c557933..1e00602 100644 --- a/internal/orchestrator/committer.go +++ b/internal/orchestrator/committer.go @@ -631,7 +631,18 @@ func (c *Committer) handleGap(ctx context.Context, expectedStartBlockNumber *big } func (c *Committer) handleMissingStagingData(ctx context.Context, blocksToCommit []*big.Int) { + // Checks if there are any blocks in staging after the current range end + lastStagedBlockNumber, err := c.storage.StagingStorage.GetLastStagedBlockNumber(c.rpc.GetChainID(), blocksToCommit[len(blocksToCommit)-1], big.NewInt(0)) + if err != nil { + log.Error().Err(err).Msg("Error checking staged data for missing range") + return + } + if lastStagedBlockNumber == nil || lastStagedBlockNumber.Sign() <= 0 { + log.Debug().Msgf("Committer is caught up with staging. No need to poll for missing blocks.") + return + } log.Debug().Msgf("Detected missing blocks in staging data starting from %s.", blocksToCommit[0].String()) + blocksToPoll := blocksToCommit if len(blocksToCommit) > int(c.poller.blocksPerPoll) { blocksToPoll = blocksToCommit[:int(c.poller.blocksPerPoll)] From 7cb6ff1ee64dd8b6f02ea5621b7bd21614141e58 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Wed, 27 Aug 2025 03:36:13 +0000 Subject: [PATCH 35/43] block buffer --- internal/storage/block_buffer.go | 229 +++++++++++++++++++++++++++++++ internal/storage/s3.go | 221 ++++++++++++++--------------- 2 files changed, 334 insertions(+), 116 deletions(-) create mode 100644 internal/storage/block_buffer.go diff --git a/internal/storage/block_buffer.go b/internal/storage/block_buffer.go new file mode 100644 index 0000000..ddec1aa --- /dev/null +++ b/internal/storage/block_buffer.go @@ -0,0 +1,229 @@ +package storage + +import ( + "fmt" + "math/big" + "sync" + + "github.com/rs/zerolog/log" + "github.com/thirdweb-dev/indexer/internal/common" +) + +// BlockBuffer manages buffering of block data with size and count limits +type BlockBuffer struct { + mu sync.RWMutex + data []common.BlockData + sizeBytes int64 + maxSizeBytes int64 + maxBlocks int +} + +// NewBlockBuffer creates a new block buffer +func NewBlockBuffer(maxSizeMB int64, maxBlocks int) *BlockBuffer { + return &BlockBuffer{ + data: make([]common.BlockData, 0), + maxSizeBytes: maxSizeMB * 1024 * 1024, + maxBlocks: maxBlocks, + } +} + +// Add adds blocks to the buffer and returns true if flush is needed +func (b *BlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64) bool { + if len(blocks) == 0 { + return false + } + + b.mu.Lock() + defer b.mu.Unlock() + + // Add to buffer + b.data = append(b.data, blocks...) + b.sizeBytes += actualSizeBytes + + log.Debug(). + Int("block_count", len(blocks)). + Int64("size_bytes", actualSizeBytes). + Int64("total_size_bytes", b.sizeBytes). + Int("total_blocks", len(b.data)). + Msg("Added blocks to buffer") + + // Check if flush is needed + return b.shouldFlushLocked() +} + +// Flush removes all data from the buffer and returns it +func (b *BlockBuffer) Flush() []common.BlockData { + b.mu.Lock() + defer b.mu.Unlock() + + if len(b.data) == 0 { + return nil + } + + // Take ownership of data + data := b.data + b.data = make([]common.BlockData, 0) + b.sizeBytes = 0 + + log.Info(). + Int("block_count", len(data)). + Msg("Flushing buffer") + + return data +} + +// ShouldFlush checks if the buffer should be flushed based on configured thresholds +func (b *BlockBuffer) ShouldFlush() bool { + b.mu.RLock() + defer b.mu.RUnlock() + return b.shouldFlushLocked() +} + +// Size returns the current buffer size in bytes and block count +func (b *BlockBuffer) Size() (int64, int) { + b.mu.RLock() + defer b.mu.RUnlock() + return b.sizeBytes, len(b.data) +} + +// IsEmpty returns true if the buffer is empty +func (b *BlockBuffer) IsEmpty() bool { + b.mu.RLock() + defer b.mu.RUnlock() + return len(b.data) == 0 +} + +// GetData returns a copy of the current buffer data +func (b *BlockBuffer) GetData() []common.BlockData { + b.mu.RLock() + defer b.mu.RUnlock() + + result := make([]common.BlockData, len(b.data)) + copy(result, b.data) + return result +} + +// GetBlocksInRange returns blocks from the buffer that fall within the given range +func (b *BlockBuffer) GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData { + b.mu.RLock() + defer b.mu.RUnlock() + + var result []common.BlockData + for _, block := range b.data { + if block.Block.ChainId.Cmp(chainId) == 0 { + blockNum := block.Block.Number + if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { + result = append(result, block) + } + } + } + return result +} + +// GetBlockByNumber returns a specific block from the buffer if it exists +func (b *BlockBuffer) GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData { + b.mu.RLock() + defer b.mu.RUnlock() + + for _, block := range b.data { + if block.Block.ChainId.Cmp(chainId) == 0 && block.Block.Number.Cmp(blockNumber) == 0 { + blockCopy := block + return &blockCopy + } + } + return nil +} + +// GetMaxBlockNumber returns the maximum block number for a chain in the buffer +func (b *BlockBuffer) GetMaxBlockNumber(chainId *big.Int) *big.Int { + b.mu.RLock() + defer b.mu.RUnlock() + + var maxBlock *big.Int + for _, block := range b.data { + if block.Block.ChainId.Cmp(chainId) == 0 { + if maxBlock == nil || block.Block.Number.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(block.Block.Number) + } + } + } + return maxBlock +} + +// Clear empties the buffer without returning data +func (b *BlockBuffer) Clear() { + b.mu.Lock() + defer b.mu.Unlock() + + b.data = make([]common.BlockData, 0) + b.sizeBytes = 0 +} + +// Stats returns statistics about the buffer +func (b *BlockBuffer) Stats() BufferStats { + b.mu.RLock() + defer b.mu.RUnlock() + + stats := BufferStats{ + BlockCount: len(b.data), + SizeBytes: b.sizeBytes, + ChainCount: 0, + ChainStats: make(map[uint64]ChainStats), + } + + // Calculate per-chain statistics + for _, block := range b.data { + chainId := block.Block.ChainId.Uint64() + chainStat := stats.ChainStats[chainId] + + if chainStat.MinBlock == nil || block.Block.Number.Cmp(chainStat.MinBlock) < 0 { + chainStat.MinBlock = new(big.Int).Set(block.Block.Number) + } + if chainStat.MaxBlock == nil || block.Block.Number.Cmp(chainStat.MaxBlock) > 0 { + chainStat.MaxBlock = new(big.Int).Set(block.Block.Number) + } + chainStat.BlockCount++ + + stats.ChainStats[chainId] = chainStat + } + + stats.ChainCount = len(stats.ChainStats) + return stats +} + +// Private methods + +func (b *BlockBuffer) shouldFlushLocked() bool { + // Check size limit + if b.maxSizeBytes > 0 && b.sizeBytes >= b.maxSizeBytes { + return true + } + + // Check block count limit + if b.maxBlocks > 0 && len(b.data) >= b.maxBlocks { + return true + } + + return false +} + +// BufferStats contains statistics about the buffer +type BufferStats struct { + BlockCount int + SizeBytes int64 + ChainCount int + ChainStats map[uint64]ChainStats +} + +// ChainStats contains per-chain statistics +type ChainStats struct { + BlockCount int + MinBlock *big.Int + MaxBlock *big.Int +} + +// String returns a string representation of buffer stats +func (s BufferStats) String() string { + return fmt.Sprintf("BufferStats{blocks=%d, size=%dMB, chains=%d}", + s.BlockCount, s.SizeBytes/(1024*1024), s.ChainCount) +} diff --git a/internal/storage/s3.go b/internal/storage/s3.go index 2ed0a74..4e1de31 100644 --- a/internal/storage/s3.go +++ b/internal/storage/s3.go @@ -27,15 +27,15 @@ type S3Connector struct { client *s3.Client config *config.S3Config formatter DataFormatter + buffer *BlockBuffer - // Buffering - buffer []common.BlockData - bufferMu sync.Mutex - bufferSize int64 // Current buffer size in bytes - bufferTimer *time.Timer + // Flush control stopCh chan struct{} flushCh chan struct{} flushDoneCh chan struct{} // Signals when flush is complete + flushTimer *time.Timer + timerMu sync.Mutex + lastAddTime time.Time wg sync.WaitGroup closeOnce sync.Once } @@ -114,11 +114,14 @@ func NewS3Connector(cfg *config.S3Config) (*S3Connector, error) { return nil, fmt.Errorf("unsupported format: %s", cfg.Format) } + // Create buffer with configured settings + buffer := NewBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile) + s3c := &S3Connector{ client: s3Client, config: cfg, formatter: formatter, - buffer: make([]common.BlockData, 0), + buffer: buffer, stopCh: make(chan struct{}), flushCh: make(chan struct{}, 1), flushDoneCh: make(chan struct{}), @@ -136,9 +139,6 @@ func (s *S3Connector) InsertBlockData(data []common.BlockData) error { return nil } - s.bufferMu.Lock() - defer s.bufferMu.Unlock() - // Calculate actual serialized size for accurate memory tracking formattedData, err := s.formatter.FormatBlockData(data) if err != nil { @@ -147,42 +147,36 @@ func (s *S3Connector) InsertBlockData(data []common.BlockData) error { // Use actual serialized size for accurate memory tracking actualSize := int64(len(formattedData)) - s.bufferSize += actualSize log.Debug(). Int("block_count", len(data)). Int64("size_bytes", actualSize). Int64("avg_bytes_per_block", actualSize/int64(len(data))). Msg("Calculated actual block data size") - // Add to buffer - s.buffer = append(s.buffer, data...) - - // Reset timer if this is the first data in buffer - if len(s.buffer) == len(data) && s.bufferTimer == nil { - s.bufferTimer = time.AfterFunc(time.Duration(s.config.BufferTimeout)*time.Second, func() { + // Add to buffer and check if flush is needed + shouldFlush := s.buffer.Add(data, actualSize) + + // Start or reset timer when first data is added + s.timerMu.Lock() + sizeBytes, blockCount := s.buffer.Size() + if sizeBytes == actualSize && blockCount == len(data) && s.config.BufferTimeout > 0 { + // First data added to buffer, track time and start timer + s.lastAddTime = time.Now() + if s.flushTimer != nil { + s.flushTimer.Stop() + } + s.flushTimer = time.AfterFunc(time.Duration(s.config.BufferTimeout)*time.Second, func() { select { case s.flushCh <- struct{}{}: default: } }) } - - // Check if we should flush based on size or block count - shouldFlush := s.bufferSize >= s.config.BufferSize*1024*1024 // Convert MB to bytes - - // Only check block count if MaxBlocksPerFile is set (> 0) - if s.config.MaxBlocksPerFile > 0 && len(s.buffer) >= s.config.MaxBlocksPerFile { - shouldFlush = true - } + s.timerMu.Unlock() if shouldFlush { - // Stop timer if running - if s.bufferTimer != nil { - s.bufferTimer.Stop() - s.bufferTimer = nil - } - - // Trigger flush + // Stop timer and trigger flush + s.stopFlushTimer() select { case s.flushCh <- struct{}{}: default: @@ -196,6 +190,10 @@ func (s *S3Connector) InsertBlockData(data []common.BlockData) error { func (s *S3Connector) flushWorker() { defer s.wg.Done() + // Check periodically for expired buffers + ticker := time.NewTicker(10 * time.Second) + defer ticker.Stop() + for { select { case <-s.stopCh: @@ -209,30 +207,56 @@ func (s *S3Connector) flushWorker() { case s.flushDoneCh <- struct{}{}: default: } + case <-ticker.C: + // Check if buffer has expired based on our own tracking + if s.isBufferExpired() { + s.flushBuffer() + } } } } +// stopFlushTimer stops the flush timer if it's running +func (s *S3Connector) stopFlushTimer() { + s.timerMu.Lock() + defer s.timerMu.Unlock() + + if s.flushTimer != nil { + s.flushTimer.Stop() + s.flushTimer = nil + } +} + +// isBufferExpired checks if the buffer has exceeded the timeout duration +func (s *S3Connector) isBufferExpired() bool { + s.timerMu.Lock() + defer s.timerMu.Unlock() + + if s.config.BufferTimeout <= 0 || s.lastAddTime.IsZero() || s.buffer.IsEmpty() { + return false + } + + return time.Since(s.lastAddTime) > time.Duration(s.config.BufferTimeout)*time.Second +} + // flushBuffer writes buffered data to S3 func (s *S3Connector) flushBuffer() error { - s.bufferMu.Lock() - if len(s.buffer) == 0 { - s.bufferMu.Unlock() + data := s.buffer.Flush() + if len(data) == 0 { return nil } - // Take ownership of buffer - data := s.buffer - s.buffer = make([]common.BlockData, 0) - s.bufferSize = 0 + // Stop timer and reset last add time since we're flushing + s.stopFlushTimer() + s.timerMu.Lock() + s.lastAddTime = time.Time{} + s.timerMu.Unlock() - // Stop timer if running - if s.bufferTimer != nil { - s.bufferTimer.Stop() - s.bufferTimer = nil - } - s.bufferMu.Unlock() + return s.uploadBatchData(data) +} +// uploadBatchData handles uploading batched data to S3, grouped by chain +func (s *S3Connector) uploadBatchData(data []common.BlockData) error { // Group blocks by chain to generate appropriate keys chainGroups := make(map[uint64][]common.BlockData) for _, block := range data { @@ -276,11 +300,7 @@ func (s *S3Connector) flushBuffer() error { // Flush manually triggers a buffer flush and waits for completion func (s *S3Connector) Flush() error { // Check if buffer has data - s.bufferMu.Lock() - hasData := len(s.buffer) > 0 - s.bufferMu.Unlock() - - if !hasData { + if s.buffer.IsEmpty() { return nil } @@ -315,8 +335,11 @@ func (s *S3Connector) Flush() error { // Close closes the S3 connector and flushes any remaining data func (s *S3Connector) Close() error { var closeErr error - + s.closeOnce.Do(func() { + // Stop the flush timer + s.stopFlushTimer() + // First, ensure any pending data is flushed if err := s.Flush(); err != nil { log.Error().Err(err).Msg("Error flushing buffer during close") @@ -573,16 +596,11 @@ func (s *S3Connector) GetTokenTransfers(qf TransfersQueryFilter, fields ...strin } func (s *S3Connector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { - maxBlock := big.NewInt(0) - // First check the buffer for blocks from this chain - s.bufferMu.Lock() - for _, block := range s.buffer { - if block.Block.ChainId.Cmp(chainId) == 0 && block.Block.Number.Cmp(maxBlock) > 0 { - maxBlock = new(big.Int).Set(block.Block.Number) - } + maxBlock := s.buffer.GetMaxBlockNumber(chainId) + if maxBlock == nil { + maxBlock = big.NewInt(0) } - s.bufferMu.Unlock() // Then check S3 for the maximum block number prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) @@ -622,19 +640,14 @@ func (s *S3Connector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big foundAny := false // First check the buffer for blocks in this range - s.bufferMu.Lock() - for _, block := range s.buffer { - if block.Block.ChainId.Cmp(chainId) == 0 { - blockNum := block.Block.Number - if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { - if !foundAny || blockNum.Cmp(maxBlock) > 0 { - maxBlock = new(big.Int).Set(blockNum) - foundAny = true - } - } + bufferBlocks := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock) + for _, block := range bufferBlocks { + blockNum := block.Block.Number + if !foundAny || blockNum.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(blockNum) + foundAny = true } } - s.bufferMu.Unlock() // Then check S3 files prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) @@ -693,29 +706,24 @@ func (s *S3Connector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBl foundAny := false // First check the buffer for blocks in this range - s.bufferMu.Lock() - for _, block := range s.buffer { - if block.Block.ChainId.Cmp(chainId) == 0 { - blockNum := block.Block.Number - if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { - count.Add(count, big.NewInt(1)) - - if !foundAny { - minBlock = new(big.Int).Set(blockNum) - maxBlock = new(big.Int).Set(blockNum) - foundAny = true - } else { - if blockNum.Cmp(minBlock) < 0 { - minBlock = new(big.Int).Set(blockNum) - } - if blockNum.Cmp(maxBlock) > 0 { - maxBlock = new(big.Int).Set(blockNum) - } - } + bufferBlocks := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock) + for _, block := range bufferBlocks { + blockNum := block.Block.Number + count.Add(count, big.NewInt(1)) + + if !foundAny { + minBlock = new(big.Int).Set(blockNum) + maxBlock = new(big.Int).Set(blockNum) + foundAny = true + } else { + if blockNum.Cmp(minBlock) < 0 { + minBlock = new(big.Int).Set(blockNum) + } + if blockNum.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(blockNum) } } } - s.bufferMu.Unlock() // Then check S3 files prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) @@ -786,8 +794,8 @@ func (s *S3Connector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, var headers []common.BlockHeader // First get headers from buffer - s.bufferMu.Lock() - for _, block := range s.buffer { + bufferData := s.buffer.GetData() + for _, block := range bufferData { if block.Block.ChainId.Cmp(chainId) == 0 { // Check if block is in range (if from is specified) if from != nil && block.Block.Number.Cmp(from) > 0 { @@ -804,7 +812,6 @@ func (s *S3Connector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, }) } } - s.bufferMu.Unlock() // If we need more headers, get from S3 if to == nil || len(headers) < int(to.Int64()) { @@ -846,19 +853,8 @@ func (s *S3Connector) GetValidationBlockData(chainId *big.Int, startBlock *big.I return nil, fmt.Errorf("start block must be less than or equal to end block") } - var blockData []common.BlockData - // First check buffer for blocks in range - s.bufferMu.Lock() - for _, block := range s.buffer { - if block.Block.ChainId.Cmp(chainId) == 0 { - blockNum := block.Block.Number - if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { - blockData = append(blockData, block) - } - } - } - s.bufferMu.Unlock() + blockData := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock) // Then find and download relevant files from S3 files, err := s.findFilesInRange(chainId, startBlock, endBlock) @@ -888,16 +884,10 @@ func (s *S3Connector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big. blockSet := make(map[string]bool) // First add blocks from buffer - s.bufferMu.Lock() - for _, block := range s.buffer { - if block.Block.ChainId.Cmp(chainId) == 0 { - blockNum := block.Block.Number - if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { - blockSet[blockNum.String()] = true - } - } + bufferBlocks := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock) + for _, block := range bufferBlocks { + blockSet[block.Block.Number.String()] = true } - s.bufferMu.Unlock() // Then check S3 files in range files, err := s.findFilesInRange(chainId, startBlock, endBlock) @@ -944,8 +934,8 @@ func (s *S3Connector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int var result []common.BlockData // First check buffer for requested blocks - s.bufferMu.Lock() - for _, block := range s.buffer { + bufferData := s.buffer.GetData() + for _, block := range bufferData { if block.Block.ChainId.Cmp(chainId) == 0 { if blockNumMap[block.Block.Number.String()] { result = append(result, block) @@ -954,7 +944,6 @@ func (s *S3Connector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int } } } - s.bufferMu.Unlock() // If all blocks were in buffer, return early if len(blockNumMap) == 0 { From 59aad94a2e72c0482684e4a63f2bd231c2bfbd15 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Wed, 27 Aug 2025 08:35:19 +0000 Subject: [PATCH 36/43] Poller S3 support --- cmd/root.go | 25 + configs/config.go | 48 +- internal/orchestrator/orchestrator.go | 6 +- internal/orchestrator/poller.go | 27 +- internal/source/s3.go | 1119 +++++++++++++++++++++++++ internal/source/source.go | 14 + internal/storage/s3.go | 4 +- internal/worker/worker.go | 353 ++++++-- 8 files changed, 1496 insertions(+), 100 deletions(-) create mode 100644 internal/source/s3.go create mode 100644 internal/source/source.go diff --git a/cmd/root.go b/cmd/root.go index 1661638..fb3999f 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -56,6 +56,19 @@ func init() { rootCmd.PersistentFlags().Bool("poller-force-from-block", false, "Force the poller to start from the block specified in `poller-from-block`") rootCmd.PersistentFlags().Int("poller-until-block", 0, "Until which block to poll") rootCmd.PersistentFlags().Int("poller-parallel-pollers", 5, "Maximum number of parallel pollers") + rootCmd.PersistentFlags().String("poller-s3-bucket", "", "S3 bucket for poller archive source") + rootCmd.PersistentFlags().String("poller-s3-region", "", "S3 region for poller archive source") + rootCmd.PersistentFlags().String("poller-s3-prefix", "", "S3 prefix for poller archive source") + rootCmd.PersistentFlags().String("poller-s3-accessKeyId", "", "S3 access key ID for poller archive source") + rootCmd.PersistentFlags().String("poller-s3-secretAccessKey", "", "S3 secret access key for poller archive source") + rootCmd.PersistentFlags().String("poller-s3-endpoint", "", "S3 endpoint for poller archive source (for S3-compatible services)") + rootCmd.PersistentFlags().String("poller-s3-format", "parquet", "S3 storage format for poller archive source") + rootCmd.PersistentFlags().String("poller-s3-cacheDir", "/tmp/insight-archive", "Local cache directory for poller archive source") + rootCmd.PersistentFlags().Int("poller-s3-metadataTTL", 0, "Metadata cache TTL in seconds for poller archive source") + rootCmd.PersistentFlags().Int("poller-s3-fileCacheTTL", 0, "File cache TTL in seconds for poller archive source") + rootCmd.PersistentFlags().Int64("poller-s3-maxCacheSize", 0, "Max cache size in bytes for poller archive source (default 5GB)") + rootCmd.PersistentFlags().Int("poller-s3-cleanupInterval", 0, "Cache cleanup interval in seconds for poller archive source") + rootCmd.PersistentFlags().Int("poller-s3-maxConcurrentDownloads", 3, "Max concurrent downloads for poller archive source") rootCmd.PersistentFlags().Bool("committer-enabled", true, "Toggle committer") rootCmd.PersistentFlags().Int("committer-blocks-per-commit", 10, "How many blocks to commit each interval") rootCmd.PersistentFlags().Int("committer-interval", 1000, "How often to commit blocks in milliseconds") @@ -247,6 +260,18 @@ func init() { viper.BindPFlag("poller.forceFromBlock", rootCmd.PersistentFlags().Lookup("poller-force-from-block")) viper.BindPFlag("poller.untilBlock", rootCmd.PersistentFlags().Lookup("poller-until-block")) viper.BindPFlag("poller.parallelPollers", rootCmd.PersistentFlags().Lookup("poller-parallel-pollers")) + viper.BindPFlag("poller.s3.endpoint", rootCmd.PersistentFlags().Lookup("poller-s3-endpoint")) + viper.BindPFlag("poller.s3.accessKeyId", rootCmd.PersistentFlags().Lookup("poller-s3-accessKeyId")) + viper.BindPFlag("poller.s3.secretAccessKey", rootCmd.PersistentFlags().Lookup("poller-s3-secretAccessKey")) + viper.BindPFlag("poller.s3.bucket", rootCmd.PersistentFlags().Lookup("poller-s3-bucket")) + viper.BindPFlag("poller.s3.region", rootCmd.PersistentFlags().Lookup("poller-s3-region")) + viper.BindPFlag("poller.s3.prefix", rootCmd.PersistentFlags().Lookup("poller-s3-prefix")) + viper.BindPFlag("poller.s3.cacheDir", rootCmd.PersistentFlags().Lookup("poller-s3-cacheDir")) + viper.BindPFlag("poller.s3.metadataTTL", rootCmd.PersistentFlags().Lookup("poller-s3-metadataTTL")) + viper.BindPFlag("poller.s3.fileCacheTTL", rootCmd.PersistentFlags().Lookup("poller-s3-fileCacheTTL")) + viper.BindPFlag("poller.s3.maxCacheSize", rootCmd.PersistentFlags().Lookup("poller-s3-maxCacheSize")) + viper.BindPFlag("poller.s3.cleanupInterval", rootCmd.PersistentFlags().Lookup("poller-s3-cleanupInterval")) + viper.BindPFlag("poller.s3.maxConcurrentDownloads", rootCmd.PersistentFlags().Lookup("poller-s3-maxConcurrentDownloads")) viper.BindPFlag("committer.enabled", rootCmd.PersistentFlags().Lookup("committer-enabled")) viper.BindPFlag("committer.blocksPerCommit", rootCmd.PersistentFlags().Lookup("committer-blocks-per-commit")) viper.BindPFlag("committer.interval", rootCmd.PersistentFlags().Lookup("committer-interval")) diff --git a/configs/config.go b/configs/config.go index 9c3e7c0..78daec6 100644 --- a/configs/config.go +++ b/configs/config.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "strings" + "time" "github.com/rs/zerolog/log" "github.com/spf13/viper" @@ -16,13 +17,14 @@ type LogConfig struct { } type PollerConfig struct { - Enabled bool `mapstructure:"enabled"` - Interval int `mapstructure:"interval"` - BlocksPerPoll int `mapstructure:"blocksPerPoll"` - FromBlock int `mapstructure:"fromBlock"` - ForceFromBlock bool `mapstructure:"forceFromBlock"` - UntilBlock int `mapstructure:"untilBlock"` - ParallelPollers int `mapstructure:"parallelPollers"` + Enabled bool `mapstructure:"enabled"` + Interval int `mapstructure:"interval"` + BlocksPerPoll int `mapstructure:"blocksPerPoll"` + FromBlock int `mapstructure:"fromBlock"` + ForceFromBlock bool `mapstructure:"forceFromBlock"` + UntilBlock int `mapstructure:"untilBlock"` + ParallelPollers int `mapstructure:"parallelPollers"` + S3 *S3SourceConfig `mapstructure:"s3"` } type CommitterConfig struct { @@ -74,7 +76,7 @@ type StorageMainConfig struct { Postgres *PostgresConfig `mapstructure:"postgres"` Kafka *KafkaConfig `mapstructure:"kafka"` Badger *BadgerConfig `mapstructure:"badger"` - S3 *S3Config `mapstructure:"s3"` + S3 *S3StorageConfig `mapstructure:"s3"` } type BadgerConfig struct { @@ -82,14 +84,18 @@ type BadgerConfig struct { } type S3Config struct { - Bucket string `mapstructure:"bucket"` - Region string `mapstructure:"region"` - Prefix string `mapstructure:"prefix"` - AccessKeyID string `mapstructure:"accessKeyId"` - SecretAccessKey string `mapstructure:"secretAccessKey"` - Endpoint string `mapstructure:"endpoint"` - Format string `mapstructure:"format"` - Parquet *ParquetConfig `mapstructure:"parquet"` + Bucket string `mapstructure:"bucket"` + Region string `mapstructure:"region"` + Prefix string `mapstructure:"prefix"` + AccessKeyID string `mapstructure:"accessKeyId"` + SecretAccessKey string `mapstructure:"secretAccessKey"` + Endpoint string `mapstructure:"endpoint"` +} + +type S3StorageConfig struct { + S3Config `mapstructure:",squash"` + Format string `mapstructure:"format"` + Parquet *ParquetConfig `mapstructure:"parquet"` // Buffering configuration BufferSize int64 `mapstructure:"bufferSizeMB"` // Target buffer size in MB before flush (default 512 MB) BufferTimeout int `mapstructure:"bufferTimeoutSeconds"` // Max time in seconds before flush (default 300 = 5 min) @@ -237,6 +243,16 @@ type PublisherConfig struct { Events EventPublisherConfig `mapstructure:"events"` } +type S3SourceConfig struct { + S3Config `mapstructure:",squash"` + CacheDir string `mapstructure:"cacheDir"` + MetadataTTL time.Duration `mapstructure:"metadataTTL"` + FileCacheTTL time.Duration `mapstructure:"fileCacheTTL"` + MaxCacheSize int64 `mapstructure:"maxCacheSize"` + CleanupInterval time.Duration `mapstructure:"cleanupInterval"` + MaxConcurrentDownloads int `mapstructure:"maxConcurrentDownloads"` +} + type WorkModeConfig struct { CheckIntervalMinutes int `mapstructure:"checkIntervalMinutes"` LiveModeThreshold int64 `mapstructure:"liveModeThreshold"` diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index f412dff..ab54eb5 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -22,7 +22,6 @@ type Orchestrator struct { reorgHandlerEnabled bool cancel context.CancelFunc wg sync.WaitGroup - shutdownOnce sync.Once } func NewOrchestrator(rpc rpc.IRPCClient) (*Orchestrator, error) { @@ -65,7 +64,10 @@ func (o *Orchestrator) Start() { workModeMonitor.RegisterChannel(pollerWorkModeChan) defer workModeMonitor.UnregisterChannel(pollerWorkModeChan) - poller := NewPoller(o.rpc, o.storage, WithPollerWorkModeChan(pollerWorkModeChan)) + poller := NewPoller(o.rpc, o.storage, + WithPollerWorkModeChan(pollerWorkModeChan), + WithPollerS3Source(config.Cfg.Poller.S3), + ) poller.Start(ctx) log.Info().Msg("Poller completed") diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go index 331f00c..527bf8d 100644 --- a/internal/orchestrator/poller.go +++ b/internal/orchestrator/poller.go @@ -12,6 +12,7 @@ import ( "github.com/thirdweb-dev/indexer/internal/common" "github.com/thirdweb-dev/indexer/internal/metrics" "github.com/thirdweb-dev/indexer/internal/rpc" + "github.com/thirdweb-dev/indexer/internal/source" "github.com/thirdweb-dev/indexer/internal/storage" "github.com/thirdweb-dev/indexer/internal/worker" ) @@ -21,6 +22,7 @@ const DEFAULT_TRIGGER_INTERVAL = 1000 type Poller struct { rpc rpc.IRPCClient + worker *worker.Worker blocksPerPoll int64 triggerIntervalMs int64 storage storage.IStorage @@ -47,15 +49,33 @@ func WithPollerWorkModeChan(ch chan WorkMode) PollerOption { } } +func WithPollerS3Source(cfg *config.S3SourceConfig) PollerOption { + return func(p *Poller) { + if cfg == nil || cfg.Region == "" || cfg.Bucket == "" { + return + } + + source, err := source.NewS3Source(cfg, p.rpc.GetChainID()) + if err != nil { + log.Fatal().Err(err).Msg("Failed to create S3 source") + } + + log.Info().Msg("Poller S3 source configuration detected, setting up S3 source for poller") + p.worker = worker.NewWorkerWithArchive(p.rpc, source) + } +} + func NewBoundlessPoller(rpc rpc.IRPCClient, storage storage.IStorage, opts ...PollerOption) *Poller { blocksPerPoll := config.Cfg.Poller.BlocksPerPoll if blocksPerPoll == 0 { blocksPerPoll = DEFAULT_BLOCKS_PER_POLL } + triggerInterval := config.Cfg.Poller.Interval if triggerInterval == 0 { triggerInterval = DEFAULT_TRIGGER_INTERVAL } + poller := &Poller{ rpc: rpc, triggerIntervalMs: int64(triggerInterval), @@ -68,6 +88,10 @@ func NewBoundlessPoller(rpc rpc.IRPCClient, storage storage.IStorage, opts ...Po opt(poller) } + if poller.worker == nil { + poller.worker = worker.NewWorker(poller.rpc) + } + return poller } @@ -235,8 +259,7 @@ func (p *Poller) PollWithoutSaving(ctx context.Context, blockNumbers []*big.Int) endBlockNumberFloat, _ := endBlock.Float64() metrics.PollerLastTriggeredBlock.Set(endBlockNumberFloat) - worker := worker.NewWorker(p.rpc) - results := worker.Run(ctx, blockNumbers) + results := p.worker.Run(ctx, blockNumbers) blockData, failedResults := p.convertPollResultsToBlockData(results) return blockData, failedResults } diff --git a/internal/source/s3.go b/internal/source/s3.go new file mode 100644 index 0000000..676a9ad --- /dev/null +++ b/internal/source/s3.go @@ -0,0 +1,1119 @@ +package source + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "math/big" + "os" + "path/filepath" + "sort" + "strings" + "sync" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + awsconfig "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/parquet-go/parquet-go" + "github.com/rs/zerolog/log" + config "github.com/thirdweb-dev/indexer/configs" + "github.com/thirdweb-dev/indexer/internal/common" + "github.com/thirdweb-dev/indexer/internal/rpc" +) + +// FileMetadata represents cached information about S3 files +type FileMetadata struct { + Key string + MinBlock *big.Int + MaxBlock *big.Int + Size int64 + LastAccess time.Time +} + +// BlockIndex represents the index of blocks within a file +type BlockIndex struct { + BlockNumber uint64 + RowOffset int64 + RowSize int +} + +type S3Source struct { + client *s3.Client + config *config.S3SourceConfig + chainId *big.Int + cacheDir string + + // Configurable settings + metadataTTL time.Duration // How long to cache metadata + fileCacheTTL time.Duration // How long to keep files in cache + maxCacheSize int64 // Max cache size in bytes + cleanupInterval time.Duration // How often to run cleanup + maxConcurrentDownloads int // Max concurrent S3 downloads + + // Metadata cache + metaMu sync.RWMutex + fileMetadata map[string]*FileMetadata // S3 key -> metadata + minBlock *big.Int + maxBlock *big.Int + metaLoaded bool + metaLoadTime time.Time // When metadata was last loaded + + // Local file cache + cacheMu sync.RWMutex + cacheMap map[string]time.Time // Track cache file access times + blockIndex map[string][]BlockIndex // File -> block indices + downloadMu sync.Mutex // Prevent duplicate downloads + + // Download tracking + downloading map[string]*sync.WaitGroup // Files currently downloading + + // Active use tracking + activeUseMu sync.RWMutex + activeUse map[string]int // Files currently being read (reference count) +} + +// ParquetBlockData represents the block data structure in parquet files +type ParquetBlockData struct { + ChainId uint64 `parquet:"chain_id"` + BlockNumber uint64 `parquet:"block_number"` + BlockHash string `parquet:"block_hash"` + BlockTimestamp int64 `parquet:"block_timestamp"` + Block []byte `parquet:"block_json"` + Transactions []byte `parquet:"transactions_json"` + Logs []byte `parquet:"logs_json"` + Traces []byte `parquet:"traces_json"` +} + +func NewS3Source(cfg *config.S3SourceConfig, chainId *big.Int) (*S3Source, error) { + // Apply defaults + if cfg.MetadataTTL == 0 { + cfg.MetadataTTL = 10 * time.Minute + } + if cfg.FileCacheTTL == 0 { + cfg.FileCacheTTL = 15 * time.Minute // 15 minutes + } + if cfg.MaxCacheSize == 0 { + cfg.MaxCacheSize = 5 * 1024 * 1024 * 1024 // Increased from 5GB to 10GB + } + if cfg.CleanupInterval == 0 { + cfg.CleanupInterval = 5 * time.Minute // 5 minutes + } + if cfg.MaxConcurrentDownloads == 0 { + cfg.MaxConcurrentDownloads = 3 + } + + awsCfg, err := awsconfig.LoadDefaultConfig(context.Background(), + awsconfig.WithRegion(cfg.Region), + ) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + // Override with explicit credentials if provided + if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" { + awsCfg.Credentials = aws.CredentialsProviderFunc(func(ctx context.Context) (aws.Credentials, error) { + return aws.Credentials{ + AccessKeyID: cfg.AccessKeyID, + SecretAccessKey: cfg.SecretAccessKey, + }, nil + }) + } + + s3Client := s3.NewFromConfig(awsCfg, func(o *s3.Options) { + if cfg.Endpoint != "" { + o.BaseEndpoint = aws.String(cfg.Endpoint) + } + }) + + // Create cache directory + cacheDir := cfg.CacheDir + if cacheDir == "" { + cacheDir = filepath.Join(os.TempDir(), "s3-archive-cache", fmt.Sprintf("chain_%d", chainId.Uint64())) + } + if err := os.MkdirAll(cacheDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create cache directory: %w", err) + } + + archive := &S3Source{ + client: s3Client, + config: cfg, + chainId: chainId, + cacheDir: cacheDir, + metadataTTL: cfg.MetadataTTL, + fileCacheTTL: cfg.FileCacheTTL, + maxCacheSize: cfg.MaxCacheSize, + cleanupInterval: cfg.CleanupInterval, + maxConcurrentDownloads: cfg.MaxConcurrentDownloads, + fileMetadata: make(map[string]*FileMetadata), + cacheMap: make(map[string]time.Time), + blockIndex: make(map[string][]BlockIndex), + downloading: make(map[string]*sync.WaitGroup), + activeUse: make(map[string]int), + } + + // Start cache cleanup goroutine + go archive.cleanupCache() + + // Load metadata in background (optional) + if cfg.Bucket != "" { + go func() { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if err := archive.loadMetadata(ctx); err != nil { + log.Warn().Err(err).Msg("Failed to preload S3 metadata") + } + }() + } + + return archive, nil +} + +func (s *S3Source) GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult { + if len(blockNumbers) == 0 { + return nil + } + + // Ensure metadata is loaded + if err := s.ensureMetadataLoaded(ctx); err != nil { + log.Error().Err(err).Msg("Failed to load metadata") + return s.makeErrorResults(blockNumbers, err) + } + + // Sort block numbers for efficient file access + sortedBlocks := make([]*big.Int, len(blockNumbers)) + copy(sortedBlocks, blockNumbers) + sort.Slice(sortedBlocks, func(i, j int) bool { + return sortedBlocks[i].Cmp(sortedBlocks[j]) < 0 + }) + + // Group blocks by files that contain them + fileGroups := s.groupBlocksByFiles(sortedBlocks) + + // Mark files as being actively used + s.activeUseMu.Lock() + for fileKey := range fileGroups { + s.activeUse[fileKey]++ + log.Trace(). + Str("file", fileKey). + Int("new_count", s.activeUse[fileKey]). + Msg("Incrementing file reference count") + } + s.activeUseMu.Unlock() + + // Ensure we release the hold on files when done + defer func() { + s.activeUseMu.Lock() + for fileKey := range fileGroups { + s.activeUse[fileKey]-- + log.Trace(). + Str("file", fileKey). + Int("new_count", s.activeUse[fileKey]). + Msg("Decrementing file reference count") + if s.activeUse[fileKey] <= 0 { + delete(s.activeUse, fileKey) + } + } + s.activeUseMu.Unlock() + + // Update access times to keep files in cache + s.cacheMu.Lock() + now := time.Now() + for fileKey := range fileGroups { + s.cacheMap[fileKey] = now + } + s.cacheMu.Unlock() + }() + + // Download required files and wait for ALL to be ready + if err := s.ensureFilesAvailable(ctx, fileGroups); err != nil { + log.Error().Err(err).Msg("Failed to ensure files are available") + return s.makeErrorResults(blockNumbers, err) + } + + // Read blocks from local files - at this point all files should be available + results := make([]rpc.GetFullBlockResult, 0, len(blockNumbers)) + resultMap := make(map[uint64]rpc.GetFullBlockResult) + + for fileKey, blocks := range fileGroups { + localPath := s.getCacheFilePath(fileKey) + + // Double-check file still exists (defensive programming) + if !s.isFileCached(localPath) { + log.Error().Str("file", fileKey).Str("path", localPath).Msg("File disappeared after ensureFilesAvailable") + // Try to re-download the file synchronously as a last resort + if err := s.downloadFile(ctx, fileKey); err != nil { + log.Error().Err(err).Str("file", fileKey).Msg("Failed to re-download disappeared file") + for _, bn := range blocks { + resultMap[bn.Uint64()] = rpc.GetFullBlockResult{ + BlockNumber: bn, + Error: fmt.Errorf("file disappeared and re-download failed: %w", err), + } + } + continue + } + } + + // Read blocks from local file efficiently + fileResults, err := s.readBlocksFromLocalFile(localPath, blocks) + if err != nil { + log.Error().Err(err).Str("file", fileKey).Msg("Failed to read blocks from local file") + // Even if one file fails, continue with others + for _, bn := range blocks { + resultMap[bn.Uint64()] = rpc.GetFullBlockResult{ + BlockNumber: bn, + Error: fmt.Errorf("failed to read from file: %w", err), + } + } + continue + } + + for blockNum, result := range fileResults { + resultMap[blockNum] = result + } + } + + // Build ordered results + for _, bn := range blockNumbers { + if result, ok := resultMap[bn.Uint64()]; ok { + results = append(results, result) + } else { + results = append(results, rpc.GetFullBlockResult{ + BlockNumber: bn, + Error: fmt.Errorf("block %s not found", bn.String()), + }) + } + } + + return results +} + +func (s *S3Source) GetSupportedBlockRange(ctx context.Context) (minBlockNumber *big.Int, maxBlockNumber *big.Int, err error) { + if err := s.ensureMetadataLoaded(ctx); err != nil { + return nil, nil, err + } + + s.metaMu.RLock() + defer s.metaMu.RUnlock() + + if s.minBlock == nil || s.maxBlock == nil { + return big.NewInt(0), big.NewInt(0), fmt.Errorf("no blocks found for chain %d", s.chainId.Uint64()) + } + + return new(big.Int).Set(s.minBlock), new(big.Int).Set(s.maxBlock), nil +} + +func (s *S3Source) Close() { + // Clean up cache directory + if s.cacheDir != "" { + os.RemoveAll(s.cacheDir) + } +} + +// Metadata management + +func (s *S3Source) loadMetadata(ctx context.Context) error { + s.metaMu.Lock() + defer s.metaMu.Unlock() + + // Check if metadata is still fresh + if s.metaLoaded && time.Since(s.metaLoadTime) < s.metadataTTL { + return nil + } + + prefix := fmt.Sprintf("chain_%d/", s.chainId.Uint64()) + if s.config.Prefix != "" { + prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) + } + + paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ + Bucket: aws.String(s.config.Bucket), + Prefix: aws.String(prefix), + }) + + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return fmt.Errorf("failed to list S3 objects: %w", err) + } + + for _, obj := range page.Contents { + if obj.Key == nil || obj.Size == nil { + continue + } + + startBlock, endBlock := s.extractBlockRangeFromKey(*obj.Key) + if startBlock == nil || endBlock == nil { + continue + } + + // Store metadata + s.fileMetadata[*obj.Key] = &FileMetadata{ + Key: *obj.Key, + MinBlock: startBlock, + MaxBlock: endBlock, + Size: *obj.Size, + } + + // Update global min/max + if s.minBlock == nil || startBlock.Cmp(s.minBlock) < 0 { + s.minBlock = new(big.Int).Set(startBlock) + } + if s.maxBlock == nil || endBlock.Cmp(s.maxBlock) > 0 { + s.maxBlock = new(big.Int).Set(endBlock) + } + } + } + + s.metaLoaded = true + s.metaLoadTime = time.Now() + log.Info(). + Int("files", len(s.fileMetadata)). + Str("min_block", s.minBlock.String()). + Str("max_block", s.maxBlock.String()). + Dur("ttl", s.metadataTTL). + Msg("Loaded S3 metadata cache") + + return nil +} + +func (s *S3Source) ensureMetadataLoaded(ctx context.Context) error { + s.metaMu.RLock() + // Check if metadata is loaded and still fresh + if s.metaLoaded && time.Since(s.metaLoadTime) < s.metadataTTL { + s.metaMu.RUnlock() + return nil + } + s.metaMu.RUnlock() + + return s.loadMetadata(ctx) +} + +// File grouping and downloading + +func (s *S3Source) ensureFilesAvailable(ctx context.Context, fileGroups map[string][]*big.Int) error { + var wg sync.WaitGroup + errChan := make(chan error, len(fileGroups)) + + // Limit concurrent downloads + sem := make(chan struct{}, s.maxConcurrentDownloads) + + for fileKey := range fileGroups { + wg.Add(1) + go func(key string) { + defer wg.Done() + + // First check if file is already being downloaded by another goroutine + s.downloadMu.Lock() + if downloadWg, downloading := s.downloading[key]; downloading { + s.downloadMu.Unlock() + // Wait for the existing download to complete + downloadWg.Wait() + + // Verify file exists after waiting + localPath := s.getCacheFilePath(key) + if !s.isFileCached(localPath) { + errChan <- fmt.Errorf("file %s not available after waiting for download", key) + } else { + // Ensure file is tracked in cache map + s.ensureFileInCacheMap(key) + // Update access time for this file since we'll be using it + s.cacheMu.Lock() + s.cacheMap[key] = time.Now() + s.cacheMu.Unlock() + } + return + } + s.downloadMu.Unlock() + + // Check if file is already cached + localPath := s.getCacheFilePath(key) + if s.isFileCached(localPath) { + // Ensure file is in cache map (in case it was on disk but not tracked) + s.ensureFileInCacheMap(key) + // Update access time + s.cacheMu.Lock() + s.cacheMap[key] = time.Now() + s.cacheMu.Unlock() + return + } + + // Need to download the file + sem <- struct{}{} + defer func() { <-sem }() + + if err := s.downloadFile(ctx, key); err != nil { + errChan <- fmt.Errorf("failed to download %s: %w", key, err) + return + } + + // Verify file exists after download + if !s.isFileCached(localPath) { + errChan <- fmt.Errorf("file %s not cached after download", key) + } + }(fileKey) + } + + // Wait for all files to be available + wg.Wait() + close(errChan) + + // Collect any errors + var errors []string + for err := range errChan { + if err != nil { + errors = append(errors, err.Error()) + } + } + + if len(errors) > 0 { + return fmt.Errorf("failed to ensure files available: %s", strings.Join(errors, "; ")) + } + + return nil +} + +func (s *S3Source) groupBlocksByFiles(blockNumbers []*big.Int) map[string][]*big.Int { + s.metaMu.RLock() + defer s.metaMu.RUnlock() + + fileGroups := make(map[string][]*big.Int) + + for _, blockNum := range blockNumbers { + // Find files that contain this block + for _, meta := range s.fileMetadata { + if blockNum.Cmp(meta.MinBlock) >= 0 && blockNum.Cmp(meta.MaxBlock) <= 0 { + fileGroups[meta.Key] = append(fileGroups[meta.Key], blockNum) + break // Each block should only be in one file + } + } + } + + return fileGroups +} + +func (s *S3Source) downloadFile(ctx context.Context, fileKey string) error { + // Prevent duplicate downloads + s.downloadMu.Lock() + if wg, downloading := s.downloading[fileKey]; downloading { + s.downloadMu.Unlock() + wg.Wait() + return nil + } + + wg := &sync.WaitGroup{} + wg.Add(1) + s.downloading[fileKey] = wg + s.downloadMu.Unlock() + + defer func() { + wg.Done() + s.downloadMu.Lock() + delete(s.downloading, fileKey) + s.downloadMu.Unlock() + }() + + localPath := s.getCacheFilePath(fileKey) + + // Create temp file for atomic write + tempPath := localPath + ".tmp" + + // Download from S3 + result, err := s.client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(s.config.Bucket), + Key: aws.String(fileKey), + }) + if err != nil { + return fmt.Errorf("failed to download file: %w", err) + } + defer result.Body.Close() + + // Create directory if needed + dir := filepath.Dir(tempPath) + if err := os.MkdirAll(dir, 0755); err != nil { + return err + } + + // Write to temp file + file, err := os.Create(tempPath) + if err != nil { + return err + } + + _, err = io.Copy(file, result.Body) + file.Close() + + if err != nil { + os.Remove(tempPath) + return err + } + + // Atomic rename + if err := os.Rename(tempPath, localPath); err != nil { + os.Remove(tempPath) + return err + } + + // Build block index for the file + go s.buildBlockIndex(localPath, fileKey) + + // Update cache map + s.cacheMu.Lock() + s.cacheMap[fileKey] = time.Now() + s.cacheMu.Unlock() + + log.Info().Str("file", fileKey).Str("path", localPath).Msg("Downloaded file from S3") + + return nil +} + +// Optimized parquet reading + +func (s *S3Source) buildBlockIndex(filePath, fileKey string) error { + file, err := os.Open(filePath) + if err != nil { + return err + } + defer file.Close() + + stat, err := file.Stat() + if err != nil { + return err + } + + pFile, err := parquet.OpenFile(file, stat.Size()) + if err != nil { + return err + } + + // Read only the block_number column to build index + blockNumCol := -1 + for i, field := range pFile.Schema().Fields() { + if field.Name() == "block_number" { + blockNumCol = i + break + } + } + + if blockNumCol < 0 { + return fmt.Errorf("block_number column not found") + } + + var index []BlockIndex + for _, rg := range pFile.RowGroups() { + chunk := rg.ColumnChunks()[blockNumCol] + pages := chunk.Pages() + offset := int64(0) + + for { + page, err := pages.ReadPage() + if err != nil { + break + } + + values := page.Values() + // Type assert to the specific reader type + switch reader := values.(type) { + case parquet.Int64Reader: + // Handle int64 block numbers + blockNums := make([]int64, page.NumValues()) + n, _ := reader.ReadInt64s(blockNums) + + for i := 0; i < n; i++ { + if blockNums[i] >= 0 { + index = append(index, BlockIndex{ + BlockNumber: uint64(blockNums[i]), + RowOffset: offset + int64(i), + RowSize: 1, + }) + } + } + default: + // Try to read as generic values + values := make([]parquet.Value, page.NumValues()) + n, _ := reader.ReadValues(values) + + for i := 0; i < n; i++ { + if !values[i].IsNull() { + blockNum := values[i].Uint64() + index = append(index, BlockIndex{ + BlockNumber: blockNum, + RowOffset: offset + int64(i), + RowSize: 1, + }) + } + } + } + offset += int64(page.NumValues()) + } + } + + // Store index + s.cacheMu.Lock() + s.blockIndex[fileKey] = index + s.cacheMu.Unlock() + + return nil +} + +func (s *S3Source) readBlocksFromLocalFile(filePath string, blockNumbers []*big.Int) (map[uint64]rpc.GetFullBlockResult, error) { + // Update access time for this file + fileKey := s.getFileKeyFromPath(filePath) + if fileKey != "" { + s.cacheMu.Lock() + s.cacheMap[fileKey] = time.Now() + s.cacheMu.Unlock() + } + + file, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer file.Close() + + stat, err := file.Stat() + if err != nil { + return nil, err + } + + // Create block map for quick lookup + blockMap := make(map[uint64]bool) + for _, bn := range blockNumbers { + blockMap[bn.Uint64()] = true + } + + // Use optimized parquet reading + pFile, err := parquet.OpenFile(file, stat.Size()) + if err != nil { + return nil, err + } + + results := make(map[uint64]rpc.GetFullBlockResult) + + // Read row groups + for _, rg := range pFile.RowGroups() { + // Check row group statistics to see if it contains our blocks + if !s.rowGroupContainsBlocks(rg, blockMap) { + continue + } + + // Read rows from this row group using generic reader + rows := make([]parquet.Row, rg.NumRows()) + reader := parquet.NewRowGroupReader(rg) + + n, err := reader.ReadRows(rows) + if err != nil && err != io.EOF { + log.Warn().Err(err).Msg("Error reading row group") + continue + } + + // Convert rows to our struct + for i := 0; i < n; i++ { + row := rows[i] + if len(row) < 8 { + continue // Not enough columns + } + + // Extract block number first to check if we need this row + blockNum := row[1].Uint64() // block_number is second column + + // Skip if not in requested blocks + if !blockMap[blockNum] { + continue + } + + // Build ParquetBlockData from row + pd := ParquetBlockData{ + ChainId: row[0].Uint64(), + BlockNumber: blockNum, + BlockHash: row[2].String(), + BlockTimestamp: row[3].Int64(), + Block: row[4].ByteArray(), + Transactions: row[5].ByteArray(), + Logs: row[6].ByteArray(), + Traces: row[7].ByteArray(), + } + + // Parse block data + result, err := s.parseBlockData(pd) + if err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to parse block data") + continue + } + + results[pd.BlockNumber] = result + } + } + + return results, nil +} + +func (s *S3Source) rowGroupContainsBlocks(rg parquet.RowGroup, blockMap map[uint64]bool) bool { + // Get the block_number column chunk + for i, col := range rg.Schema().Fields() { + if col.Name() == "block_number" { + chunk := rg.ColumnChunks()[i] + ci, _ := chunk.ColumnIndex() + if ci != nil { + // Check min/max values + for j := 0; j < ci.NumPages(); j++ { + minVal := ci.MinValue(j) + maxVal := ci.MaxValue(j) + + if minVal.IsNull() || maxVal.IsNull() { + continue + } + + minBlock := minVal.Uint64() + maxBlock := maxVal.Uint64() + + // Check if any requested blocks fall in this range + for blockNum := range blockMap { + if blockNum >= minBlock && blockNum <= maxBlock { + return true + } + } + } + } + break + } + } + + // If no statistics, assume it might contain blocks + return true +} + +func (s *S3Source) parseBlockData(pd ParquetBlockData) (rpc.GetFullBlockResult, error) { + var block common.Block + if err := json.Unmarshal(pd.Block, &block); err != nil { + return rpc.GetFullBlockResult{}, err + } + + var transactions []common.Transaction + if len(pd.Transactions) > 0 { + if err := json.Unmarshal(pd.Transactions, &transactions); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal transactions") + } + } + + var logs []common.Log + if len(pd.Logs) > 0 { + if err := json.Unmarshal(pd.Logs, &logs); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal logs") + } + } + + var traces []common.Trace + if len(pd.Traces) > 0 { + if err := json.Unmarshal(pd.Traces, &traces); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal traces") + } + } + + return rpc.GetFullBlockResult{ + BlockNumber: new(big.Int).SetUint64(pd.BlockNumber), + Data: common.BlockData{ + Block: block, + Transactions: transactions, + Logs: logs, + Traces: traces, + }, + Error: nil, + }, nil +} + +// RefreshMetadata forces a refresh of the metadata cache +func (s *S3Source) RefreshMetadata(ctx context.Context) error { + s.metaMu.Lock() + s.metaLoaded = false + s.metaLoadTime = time.Time{} + s.metaMu.Unlock() + + return s.loadMetadata(ctx) +} + +// GetCacheStats returns statistics about the cache +func (s *S3Source) GetCacheStats() (fileCount int, totalSize int64, oldestAccess time.Time) { + s.cacheMu.RLock() + defer s.cacheMu.RUnlock() + + fileCount = len(s.cacheMap) + now := time.Now() + + for key, accessTime := range s.cacheMap { + path := s.getCacheFilePath(key) + if info, err := os.Stat(path); err == nil { + totalSize += info.Size() + } + if oldestAccess.IsZero() || accessTime.Before(oldestAccess) { + oldestAccess = accessTime + } + } + + // Also check metadata freshness + s.metaMu.RLock() + metaAge := now.Sub(s.metaLoadTime) + s.metaMu.RUnlock() + + log.Debug(). + Int("file_count", fileCount). + Int64("total_size_mb", totalSize/(1024*1024)). + Dur("oldest_file_age", now.Sub(oldestAccess)). + Dur("metadata_age", metaAge). + Msg("Cache statistics") + + return fileCount, totalSize, oldestAccess +} + +// Helper functions + +func (s *S3Source) extractBlockRangeFromKey(key string) (*big.Int, *big.Int) { + parts := strings.Split(key, "/") + if len(parts) == 0 { + return nil, nil + } + + filename := parts[len(parts)-1] + if !strings.HasPrefix(filename, "blocks_") || !strings.HasSuffix(filename, ".parquet") { + return nil, nil + } + + rangeStr := strings.TrimPrefix(filename, "blocks_") + rangeStr = strings.TrimSuffix(rangeStr, ".parquet") + + rangeParts := strings.Split(rangeStr, "_") + if len(rangeParts) != 2 { + return nil, nil + } + + startBlock, ok1 := new(big.Int).SetString(rangeParts[0], 10) + endBlock, ok2 := new(big.Int).SetString(rangeParts[1], 10) + if !ok1 || !ok2 { + return nil, nil + } + + return startBlock, endBlock +} + +func (s *S3Source) getCacheFilePath(fileKey string) string { + // Create a safe filename from the S3 key + hash := sha256.Sum256([]byte(fileKey)) + filename := hex.EncodeToString(hash[:])[:16] + ".parquet" + return filepath.Join(s.cacheDir, filename) +} + +func (s *S3Source) getFileKeyFromPath(filePath string) string { + // Reverse lookup - find the key for a given cache path + s.cacheMu.RLock() + defer s.cacheMu.RUnlock() + + for key := range s.cacheMap { + if s.getCacheFilePath(key) == filePath { + return key + } + } + return "" +} + +func (s *S3Source) isFileCached(filePath string) bool { + // First check if file exists at all + info, err := os.Stat(filePath) + if err != nil { + return false + } + + // Check if file has content + if info.Size() == 0 { + return false + } + + // Check if a temp file exists (indicating incomplete download) + tempPath := filePath + ".tmp" + if _, err := os.Stat(tempPath); err == nil { + // Temp file exists, download is incomplete + return false + } + + // File exists, has content, and no temp file - it's cached + return true +} + +// ensureFileInCacheMap ensures a file that exists on disk is tracked in the cache map +func (s *S3Source) ensureFileInCacheMap(fileKey string) { + s.cacheMu.Lock() + defer s.cacheMu.Unlock() + + // If not in cache map, add it with current time + if _, exists := s.cacheMap[fileKey]; !exists { + localPath := s.getCacheFilePath(fileKey) + if info, err := os.Stat(localPath); err == nil { + // Use file modification time if it's recent, otherwise use current time + modTime := info.ModTime() + if time.Since(modTime) < s.fileCacheTTL { + s.cacheMap[fileKey] = modTime + } else { + s.cacheMap[fileKey] = time.Now() + } + log.Trace(). + Str("file", fileKey). + Time("access_time", s.cacheMap[fileKey]). + Msg("Added existing file to cache map") + } + } +} + +func (s *S3Source) makeErrorResults(blockNumbers []*big.Int, err error) []rpc.GetFullBlockResult { + results := make([]rpc.GetFullBlockResult, len(blockNumbers)) + for i, bn := range blockNumbers { + results[i] = rpc.GetFullBlockResult{ + BlockNumber: bn, + Error: err, + } + } + return results +} + +func (s *S3Source) cleanupCache() { + ticker := time.NewTicker(s.cleanupInterval) + defer ticker.Stop() + + for range ticker.C { + s.cacheMu.Lock() + s.downloadMu.Lock() + s.activeUseMu.RLock() + + // Remove files not accessed within the TTL + cutoff := time.Now().Add(-s.fileCacheTTL) + protectedCount := 0 + expiredCount := 0 + + for fileKey, accessTime := range s.cacheMap { + // Skip files that are currently being downloaded + if _, downloading := s.downloading[fileKey]; downloading { + protectedCount++ + continue + } + + // Skip files that are actively being used + if count, active := s.activeUse[fileKey]; active && count > 0 { + protectedCount++ + // Only log at trace level to reduce noise + log.Trace(). + Str("file", fileKey). + Int("ref_count", count). + Msg("Skipping actively used file in cleanup") + continue + } + + if accessTime.Before(cutoff) { + expiredCount++ + cacheFile := s.getCacheFilePath(fileKey) + log.Debug(). + Str("file", fileKey). + Str("path", cacheFile). + Time("last_access", accessTime). + Time("cutoff", cutoff). + Msg("Removing expired file from cache") + os.Remove(cacheFile) + delete(s.cacheMap, fileKey) + delete(s.blockIndex, fileKey) + } + } + + s.activeUseMu.RUnlock() + s.downloadMu.Unlock() + s.cacheMu.Unlock() + + // Only log if something interesting happened (files were deleted) + if expiredCount > 0 { + log.Debug(). + Int("protected", protectedCount). + Int("expired", expiredCount). + Int("total_cached", len(s.cacheMap)). + Msg("Cache cleanup cycle completed - removed expired files") + } else if protectedCount > 0 { + // Use trace level for routine cleanup cycles with no deletions + log.Trace(). + Int("protected", protectedCount). + Int("total_cached", len(s.cacheMap)). + Msg("Cache cleanup cycle completed - no files expired") + } + + // Also check disk usage and remove oldest files if needed + s.enforceMaxCacheSize() + } +} + +func (s *S3Source) enforceMaxCacheSize() { + maxSize := s.maxCacheSize + + var totalSize int64 + var files []struct { + path string + key string + size int64 + access time.Time + } + + s.cacheMu.RLock() + for key, accessTime := range s.cacheMap { + path := s.getCacheFilePath(key) + if info, err := os.Stat(path); err == nil { + totalSize += info.Size() + files = append(files, struct { + path string + key string + size int64 + access time.Time + }{path, key, info.Size(), accessTime}) + } + } + s.cacheMu.RUnlock() + + if totalSize <= maxSize { + return + } + + log.Debug(). + Int64("total_size_mb", totalSize/(1024*1024)). + Int64("max_size_mb", maxSize/(1024*1024)). + Int("file_count", len(files)). + Msg("Cache size exceeded, removing old files") + + // Sort by access time (oldest first) + sort.Slice(files, func(i, j int) bool { + return files[i].access.Before(files[j].access) + }) + + // Remove oldest files until under limit + s.cacheMu.Lock() + s.downloadMu.Lock() + s.activeUseMu.RLock() + defer s.activeUseMu.RUnlock() + defer s.downloadMu.Unlock() + defer s.cacheMu.Unlock() + + for _, f := range files { + if totalSize <= maxSize { + break + } + + // Skip files that are currently being downloaded + if _, downloading := s.downloading[f.key]; downloading { + continue + } + + // Skip files that are actively being used + if count, active := s.activeUse[f.key]; active && count > 0 { + continue + } + + os.Remove(f.path) + delete(s.cacheMap, f.key) + delete(s.blockIndex, f.key) + totalSize -= f.size + } +} diff --git a/internal/source/source.go b/internal/source/source.go new file mode 100644 index 0000000..2b9ef85 --- /dev/null +++ b/internal/source/source.go @@ -0,0 +1,14 @@ +package source + +import ( + "context" + "math/big" + + "github.com/thirdweb-dev/indexer/internal/rpc" +) + +type ISource interface { + GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult + GetSupportedBlockRange(ctx context.Context) (minBlockNumber *big.Int, maxBlockNumber *big.Int, err error) + Close() +} diff --git a/internal/storage/s3.go b/internal/storage/s3.go index 4e1de31..9cd5f24 100644 --- a/internal/storage/s3.go +++ b/internal/storage/s3.go @@ -25,7 +25,7 @@ import ( type S3Connector struct { client *s3.Client - config *config.S3Config + config *config.S3StorageConfig formatter DataFormatter buffer *BlockBuffer @@ -59,7 +59,7 @@ type ParquetBlockData struct { Traces []byte `parquet:"traces_json"` } -func NewS3Connector(cfg *config.S3Config) (*S3Connector, error) { +func NewS3Connector(cfg *config.S3StorageConfig) (*S3Connector, error) { awsCfg, err := awsconfig.LoadDefaultConfig(context.Background(), awsconfig.WithRegion(cfg.Region), ) diff --git a/internal/worker/worker.go b/internal/worker/worker.go index d25294f..fd94bab 100644 --- a/internal/worker/worker.go +++ b/internal/worker/worker.go @@ -2,6 +2,7 @@ package worker import ( "context" + "fmt" "math/big" "sort" "sync" @@ -12,138 +13,334 @@ import ( "github.com/thirdweb-dev/indexer/internal/common" "github.com/thirdweb-dev/indexer/internal/metrics" "github.com/thirdweb-dev/indexer/internal/rpc" + "github.com/thirdweb-dev/indexer/internal/source" ) +// SourceType represents the type of data source +type SourceType string + +const ( + // SourceTypeRPC represents RPC data source + SourceTypeRPC SourceType = "rpc" + // SourceTypeArchive represents archive data source (e.g., S3) + SourceTypeArchive SourceType = "archive" +) + +// String returns the string representation of the source type +func (s SourceType) String() string { + return string(s) +} + +// Worker handles block data fetching from RPC and optional archive type Worker struct { - rpc rpc.IRPCClient + rpc rpc.IRPCClient + archive source.ISource // Optional alternative source + rpcSemaphore chan struct{} // Limit concurrent RPC requests } func NewWorker(rpc rpc.IRPCClient) *Worker { return &Worker{ - rpc: rpc, + rpc: rpc, + rpcSemaphore: make(chan struct{}, 20), + } +} + +// NewWorkerWithArchive creates a new Worker with optional archive support +func NewWorkerWithArchive(rpc rpc.IRPCClient, source source.ISource) *Worker { + return &Worker{ + rpc: rpc, + archive: source, + rpcSemaphore: make(chan struct{}, 20), + } +} + +// fetchFromRPC fetches blocks directly from RPC +func (w *Worker) fetchFromRPC(ctx context.Context, blocks []*big.Int) []rpc.GetFullBlockResult { + // Acquire semaphore for rate limiting + select { + case w.rpcSemaphore <- struct{}{}: + defer func() { <-w.rpcSemaphore }() + case <-ctx.Done(): + return nil + } + + return w.rpc.GetFullBlocks(ctx, blocks) +} + +// fetchFromArchive fetches blocks from archive if available +func (w *Worker) fetchFromArchive(ctx context.Context, blocks []*big.Int) []rpc.GetFullBlockResult { + if w.archive == nil { + return nil } + return w.archive.GetFullBlocks(ctx, blocks) } -func (w *Worker) processChunkWithRetry(ctx context.Context, chunk []*big.Int, resultsCh chan<- []rpc.GetFullBlockResult, sem chan struct{}) { +// processChunkWithRetry processes a chunk with automatic retry on failure +func (w *Worker) processChunkWithRetry(ctx context.Context, chunk []*big.Int, fetchFunc func(context.Context, []*big.Int) []rpc.GetFullBlockResult) []rpc.GetFullBlockResult { select { case <-ctx.Done(): - return + // Return error results for all blocks if context cancelled + var results []rpc.GetFullBlockResult + for _, block := range chunk { + results = append(results, rpc.GetFullBlockResult{ + BlockNumber: block, + Error: fmt.Errorf("context cancelled"), + }) + } + return results default: } - // Acquire semaphore only for the RPC request - sem <- struct{}{} - results := w.rpc.GetFullBlocks(ctx, chunk) - <-sem // Release semaphore immediately after RPC request + // Fetch the chunk + results := fetchFunc(ctx, chunk) - if len(chunk) == 1 { - // chunk size 1 is the minimum, so we return whatever we get - resultsCh <- results - return + // If we got all results, return them + if len(results) == len(chunk) { + allSuccess := true + for _, r := range results { + if r.Error != nil { + allSuccess = false + break + } + } + if allSuccess { + return results + } } - // Check for failed blocks + // Separate successful and failed + successMap := make(map[string]rpc.GetFullBlockResult) var failedBlocks []*big.Int - var successfulResults []rpc.GetFullBlockResult for i, result := range results { - if result.Error != nil { - failedBlocks = append(failedBlocks, chunk[i]) - } else { - successfulResults = append(successfulResults, result) + if i < len(chunk) { + if result.Error == nil { + successMap[chunk[i].String()] = result + } else { + failedBlocks = append(failedBlocks, chunk[i]) + } } } - log.Debug().Msgf("Out of %d blocks, %d successful, %d failed", len(results), len(successfulResults), len(failedBlocks)) - // If we have successful results, send them - if len(successfulResults) > 0 { - resultsCh <- successfulResults - } + // If only one block failed, retry once more + if len(failedBlocks) == 1 { + retryResults := fetchFunc(ctx, failedBlocks) + if len(retryResults) > 0 { + if retryResults[0].Error == nil { + successMap[failedBlocks[0].String()] = retryResults[0] + } else { + // Keep the error result + successMap[failedBlocks[0].String()] = rpc.GetFullBlockResult{ + BlockNumber: failedBlocks[0], + Error: retryResults[0].Error, + } + } + } + } else if len(failedBlocks) > 1 { + // Split failed blocks and retry recursively + mid := len(failedBlocks) / 2 + leftChunk := failedBlocks[:mid] + rightChunk := failedBlocks[mid:] - // If no blocks failed, we're done - if len(failedBlocks) == 0 { - return - } + log.Debug(). + Int("failed_count", len(failedBlocks)). + Int("left_chunk", len(leftChunk)). + Int("right_chunk", len(rightChunk)). + Msg("Splitting failed blocks for retry") - // can't split any further, so try one last time - if len(failedBlocks) == 1 { - w.processChunkWithRetry(ctx, failedBlocks, resultsCh, sem) - return - } + // Process both halves + leftResults := w.processChunkWithRetry(ctx, leftChunk, fetchFunc) + rightResults := w.processChunkWithRetry(ctx, rightChunk, fetchFunc) - // Split failed blocks in half and retry - mid := len(failedBlocks) / 2 - leftChunk := failedBlocks[:mid] - rightChunk := failedBlocks[mid:] + // Add results to map + for _, r := range leftResults { + if r.BlockNumber != nil { + successMap[r.BlockNumber.String()] = r + } + } + for _, r := range rightResults { + if r.BlockNumber != nil { + successMap[r.BlockNumber.String()] = r + } + } + } - log.Debug().Msgf("Splitting %d failed blocks into chunks of %d and %d", len(failedBlocks), len(leftChunk), len(rightChunk)) + // Build final results in original order + var finalResults []rpc.GetFullBlockResult + for _, block := range chunk { + if result, ok := successMap[block.String()]; ok { + finalResults = append(finalResults, result) + } else { + // Add error result for missing blocks + finalResults = append(finalResults, rpc.GetFullBlockResult{ + BlockNumber: block, + Error: fmt.Errorf("failed to fetch block"), + }) + } + } - var wg sync.WaitGroup - wg.Add(2) + return finalResults +} - go func() { - defer wg.Done() - w.processChunkWithRetry(ctx, leftChunk, resultsCh, sem) - }() +// processBatch processes a batch of blocks from a specific source +func (w *Worker) processBatch(ctx context.Context, blocks []*big.Int, sourceType SourceType, fetchFunc func(context.Context, []*big.Int) []rpc.GetFullBlockResult) []rpc.GetFullBlockResult { + if len(blocks) == 0 { + return nil + } - go func() { - defer wg.Done() - w.processChunkWithRetry(ctx, rightChunk, resultsCh, sem) - }() + // Determine chunk size based on source + chunkSize := w.rpc.GetBlocksPerRequest().Blocks + if sourceType == SourceTypeArchive && w.archive != nil { + chunkSize = len(blocks) // Fetch all at once from archive + } - wg.Wait() -} + chunks := common.SliceToChunks(blocks, chunkSize) -func (w *Worker) Run(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult { - blockCount := len(blockNumbers) - chunks := common.SliceToChunks(blockNumbers, w.rpc.GetBlocksPerRequest().Blocks) + log.Debug(). + Str("source", sourceType.String()). + Int("total_blocks", len(blocks)). + Int("chunks", len(chunks)). + Int("chunk_size", chunkSize). + Msg("Processing blocks") + var allResults []rpc.GetFullBlockResult + var mu sync.Mutex var wg sync.WaitGroup - resultsCh := make(chan []rpc.GetFullBlockResult, blockCount) - // Create a semaphore channel to limit concurrent goroutines - sem := make(chan struct{}, 20) - - log.Debug().Msgf("Worker Processing %d blocks in %d chunks of max %d blocks", blockCount, len(chunks), w.rpc.GetBlocksPerRequest().Blocks) + batchDelay := time.Duration(config.Cfg.RPC.Blocks.BatchDelay) * time.Millisecond for i, chunk := range chunks { - if i > 0 { - time.Sleep(time.Duration(config.Cfg.RPC.Blocks.BatchDelay) * time.Millisecond) + // Check context before starting new work + if ctx.Err() != nil { + log.Debug().Msg("Context canceled, skipping remaining chunks") + break // Don't start new chunks, but let existing ones finish } - select { - case <-ctx.Done(): - log.Debug().Msg("Context canceled, stopping Worker") - return nil - default: - // continue processing + + // Add delay between batches for RPC (except first batch) + if i > 0 && sourceType == SourceTypeRPC && batchDelay > 0 { + select { + case <-ctx.Done(): + log.Debug().Msg("Context canceled during batch delay") + break + case <-time.After(batchDelay): + // Continue after delay + } } wg.Add(1) go func(chunk []*big.Int) { defer wg.Done() - w.processChunkWithRetry(ctx, chunk, resultsCh, sem) + results := w.processChunkWithRetry(ctx, chunk, fetchFunc) + + mu.Lock() + allResults = append(allResults, results...) + mu.Unlock() }(chunk) } - go func() { - wg.Wait() - close(resultsCh) - }() + // Wait for all started goroutines to complete + wg.Wait() + + // Sort results by block number (only if we have results) + if len(allResults) > 0 { + sort.Slice(allResults, func(i, j int) bool { + return allResults[i].BlockNumber.Cmp(allResults[j].BlockNumber) < 0 + }) + } + + return allResults +} - results := make([]rpc.GetFullBlockResult, 0, blockCount) - for batchResults := range resultsCh { - results = append(results, batchResults...) +// shouldUseArchive determines if ALL requested blocks are within archive range +func (w *Worker) shouldUseArchive(ctx context.Context, blockNumbers []*big.Int) bool { + // Check if archive is configured and we have blocks to process + if w.archive == nil || len(blockNumbers) == 0 { + return false } - // Sort results by block number - sort.Slice(results, func(i, j int) bool { - return results[i].BlockNumber.Cmp(results[j].BlockNumber) < 0 - }) + // Get archive block range + minArchive, maxArchive, err := w.archive.GetSupportedBlockRange(ctx) + if err != nil { + log.Warn().Err(err).Msg("Failed to get archive block range") + return false + } - // track the last fetched block number + // Check if ALL blocks are within archive range + for _, block := range blockNumbers { + if block.Cmp(minArchive) < 0 || block.Cmp(maxArchive) > 0 { + // At least one block is outside archive range + return false + } + } + + // All blocks are within archive range + return true +} + +// Run processes blocks using either archive OR rpc +func (w *Worker) Run(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult { + if len(blockNumbers) == 0 { + return nil + } + + var results []rpc.GetFullBlockResult + + // Determine which source to use + sourceType := SourceTypeRPC + fetchFunc := w.fetchFromRPC + + if w.shouldUseArchive(ctx, blockNumbers) { + sourceType = SourceTypeArchive + fetchFunc = w.fetchFromArchive + log.Debug(). + Int("count", len(blockNumbers)). + Str("source", sourceType.String()). + Msg("Using archive for all blocks") + } else { + log.Debug(). + Int("count", len(blockNumbers)). + Str("source", sourceType.String()). + Msg("Using RPC for all blocks") + } + + // Process all blocks with the selected source + results = w.processBatch(ctx, blockNumbers, sourceType, fetchFunc) + + // Update metrics and log summary if len(results) > 0 { lastBlockNumberFloat, _ := results[len(results)-1].BlockNumber.Float64() metrics.LastFetchedBlock.Set(lastBlockNumberFloat) + + // Count successes and failures + successful := 0 + failed := 0 + for _, r := range results { + if r.Error == nil { + successful++ + } else { + failed++ + } + } + + log.Debug(). + Int("total", len(results)). + Int("successful", successful). + Int("failed", failed). + Str("source", sourceType.String()). + Msg("Block fetching complete") } + return results } + +// Close gracefully shuts down the worker and cleans up resources +func (w *Worker) Close() error { + // Close archive if it exists + if w.archive != nil { + log.Debug().Msg("Closing archive connection") + w.archive.Close() + } + + log.Debug().Msg("Worker closed successfully") + return nil +} From 884a3aa46b5aa145a04b96189068ea038687cb09 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Wed, 27 Aug 2025 17:50:56 +0000 Subject: [PATCH 37/43] Fix boundaries for migration --- cmd/migrate_valid.go | 11 +++++++++-- cmd/root.go | 3 +++ configs/config.go | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go index cb384de..33b93ad 100644 --- a/cmd/migrate_valid.go +++ b/cmd/migrate_valid.go @@ -55,6 +55,9 @@ func RunValidationMigration(cmd *cobra.Command, args []string) { // Calculate work distribution for workers numWorkers := DEFAULT_WORKERS + if config.Cfg.Migrator.WorkerCount > 0 { + numWorkers = int(config.Cfg.Migrator.WorkerCount) + } workRanges := divideBlockRange(rangeStartBlock, rangeEndBlock, numWorkers) log.Info().Msgf("Starting %d workers to process migration", len(workRanges)) @@ -376,7 +379,9 @@ func (m *Migrator) DetermineMigrationBoundaries(targetStartBlock, targetEndBlock } log.Info().Msgf("Block in the target storage for range %s to %s: count=%s, max=%s", startBlock.String(), endBlock.String(), blockCount.String(), maxStoredBlock.String()) - if maxStoredBlock != nil && maxStoredBlock.Cmp(startBlock) >= 0 { + // Only adjust start block if we actually have blocks stored (count > 0) + // When count is 0, maxStoredBlock might be 0 but that doesn't mean block 0 exists + if blockCount.Sign() > 0 && maxStoredBlock != nil && maxStoredBlock.Cmp(startBlock) >= 0 { startBlock = new(big.Int).Add(maxStoredBlock, big.NewInt(1)) } @@ -411,7 +416,9 @@ func (m *Migrator) DetermineMigrationBoundariesForRange(rangeStart, rangeEnd *bi } actualStart := rangeStart - if maxStoredBlock != nil && maxStoredBlock.Cmp(rangeStart) >= 0 { + // Only adjust start block if we actually have blocks stored (blockCount > 0) + // When blockCount is 0, maxStoredBlock might be 0 but that doesn't mean block 0 exists + if blockCount.Sign() > 0 && maxStoredBlock != nil && maxStoredBlock.Cmp(rangeStart) >= 0 { // We have some blocks already, start from the next one actualStart = new(big.Int).Add(maxStoredBlock, big.NewInt(1)) diff --git a/cmd/root.go b/cmd/root.go index fb3999f..efcd200 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -239,6 +239,7 @@ func init() { rootCmd.PersistentFlags().Uint("migrator-batchSize", 2000, "Batch size for storage operations in migrator") rootCmd.PersistentFlags().Uint("migrator-startBlock", 0, "Start block for migration") rootCmd.PersistentFlags().Uint("migrator-endBlock", 0, "End block for migration") + rootCmd.PersistentFlags().Uint("migrator-workerCount", 0, "Worker count for migration") viper.BindPFlag("rpc.url", rootCmd.PersistentFlags().Lookup("rpc-url")) viper.BindPFlag("rpc.blocks.blocksPerRequest", rootCmd.PersistentFlags().Lookup("rpc-blocks-blocksPerRequest")) @@ -443,6 +444,8 @@ func init() { viper.BindPFlag("migrator.startBlock", rootCmd.PersistentFlags().Lookup("migrator-startBlock")) viper.BindPFlag("migrator.endBlock", rootCmd.PersistentFlags().Lookup("migrator-endBlock")) viper.BindPFlag("migrator.batchSize", rootCmd.PersistentFlags().Lookup("migrator-batchSize")) + viper.BindPFlag("migrator.workerCount", rootCmd.PersistentFlags().Lookup("migrator-workerCount")) + rootCmd.AddCommand(orchestratorCmd) rootCmd.AddCommand(apiCmd) rootCmd.AddCommand(validateAndFixCmd) diff --git a/configs/config.go b/configs/config.go index 78daec6..c8c52e0 100644 --- a/configs/config.go +++ b/configs/config.go @@ -267,6 +267,7 @@ type MigratorConfig struct { StartBlock uint `mapstructure:"startBlock"` EndBlock uint `mapstructure:"endBlock"` BatchSize uint `mapstructure:"batchSize"` + WorkerCount uint `mapstructure:"workerCount"` } type Config struct { From 86f3d68bba3f64d25b70f30d2f277e2c90594293 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Wed, 27 Aug 2025 22:27:08 +0000 Subject: [PATCH 38/43] Badger for caching in s3 connector --- internal/storage/block_buffer.go | 23 +- internal/storage/block_buffer_badger.go | 476 +++++++++++++++++++ internal/storage/block_buffer_badger_test.go | 144 ++++++ internal/storage/s3.go | 20 +- 4 files changed, 659 insertions(+), 4 deletions(-) create mode 100644 internal/storage/block_buffer_badger.go create mode 100644 internal/storage/block_buffer_badger_test.go diff --git a/internal/storage/block_buffer.go b/internal/storage/block_buffer.go index ddec1aa..a2d9158 100644 --- a/internal/storage/block_buffer.go +++ b/internal/storage/block_buffer.go @@ -18,7 +18,22 @@ type BlockBuffer struct { maxBlocks int } -// NewBlockBuffer creates a new block buffer +// IBlockBuffer defines the interface for block buffer implementations +type IBlockBuffer interface { + Add(blocks []common.BlockData, actualSizeBytes int64) bool + Flush() []common.BlockData + ShouldFlush() bool + Size() (int64, int) + IsEmpty() bool + GetData() []common.BlockData + GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData + GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData + GetMaxBlockNumber(chainId *big.Int) *big.Int + Clear() + Stats() BufferStats +} + +// NewBlockBuffer creates a new in-memory block buffer func NewBlockBuffer(maxSizeMB int64, maxBlocks int) *BlockBuffer { return &BlockBuffer{ data: make([]common.BlockData, 0), @@ -27,6 +42,12 @@ func NewBlockBuffer(maxSizeMB int64, maxBlocks int) *BlockBuffer { } } +// NewBlockBufferWithBadger creates a new Badger-backed block buffer for better memory management +// This uses ephemeral storage with optimized settings for caching +func NewBlockBufferWithBadger(maxSizeMB int64, maxBlocks int) (IBlockBuffer, error) { + return NewBadgerBlockBuffer(maxSizeMB, maxBlocks) +} + // Add adds blocks to the buffer and returns true if flush is needed func (b *BlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64) bool { if len(blocks) == 0 { diff --git a/internal/storage/block_buffer_badger.go b/internal/storage/block_buffer_badger.go new file mode 100644 index 0000000..39775a8 --- /dev/null +++ b/internal/storage/block_buffer_badger.go @@ -0,0 +1,476 @@ +package storage + +import ( + "bytes" + "encoding/gob" + "fmt" + "math/big" + "os" + "sync" + "time" + + "github.com/dgraph-io/badger/v4" + "github.com/dgraph-io/badger/v4/options" + "github.com/rs/zerolog/log" + "github.com/thirdweb-dev/indexer/internal/common" +) + +// BadgerBlockBuffer manages buffering of block data using Badger as an ephemeral cache +type BadgerBlockBuffer struct { + mu sync.RWMutex + db *badger.DB + tempDir string + sizeBytes int64 + maxSizeBytes int64 + maxBlocks int + blockCount int + gcTicker *time.Ticker + stopGC chan struct{} + + // Chain metadata cache for O(1) lookups + chainMetadata map[uint64]*ChainMetadata +} + +// ChainMetadata tracks per-chain statistics for fast lookups +type ChainMetadata struct { + MinBlock *big.Int + MaxBlock *big.Int + BlockCount int +} + +// NewBadgerBlockBuffer creates a new Badger-backed block buffer with ephemeral storage +func NewBadgerBlockBuffer(maxSizeMB int64, maxBlocks int) (*BadgerBlockBuffer, error) { + // Create temporary directory for ephemeral storage + tempDir, err := os.MkdirTemp("", "blockbuffer-*") + if err != nil { + return nil, fmt.Errorf("failed to create temp dir: %w", err) + } + + // Configure Badger with optimized settings for ephemeral cache + opts := badger.DefaultOptions(tempDir) + + // Memory optimization settings (similar to badger.go but tuned for ephemeral use) + opts.ValueLogFileSize = 256 * 1024 * 1024 // 256MB (smaller for cache) + opts.BaseTableSize = 64 * 1024 * 1024 // 64MB + opts.BaseLevelSize = 64 * 1024 * 1024 // 64MB + opts.LevelSizeMultiplier = 10 // Aggressive growth + opts.NumMemtables = 5 // ~320MB + opts.MemTableSize = opts.BaseTableSize // 64MB per memtable + opts.NumLevelZeroTables = 5 + opts.NumLevelZeroTablesStall = 10 + opts.SyncWrites = false // No durability needed for cache + opts.DetectConflicts = false // No ACID needed + opts.NumCompactors = 2 // Less compactors for cache + opts.CompactL0OnClose = false // Don't compact on close (ephemeral) + opts.ValueLogMaxEntries = 100000 // Smaller for cache + opts.ValueThreshold = 1024 // Store values > 512 bytes in value log + opts.IndexCacheSize = 128 * 1024 * 1024 // 128MB index cache + opts.BlockCacheSize = 64 * 1024 * 1024 // 64MB block cache + opts.Compression = options.Snappy + opts.Logger = nil // Disable badger's internal logging + + // Ephemeral-specific settings + opts.InMemory = false // Use disk but in temp directory + opts.ReadOnly = false + opts.MetricsEnabled = false + + db, err := badger.Open(opts) + if err != nil { + os.RemoveAll(tempDir) + return nil, fmt.Errorf("failed to open badger db: %w", err) + } + + b := &BadgerBlockBuffer{ + db: db, + tempDir: tempDir, + maxSizeBytes: maxSizeMB * 1024 * 1024, + maxBlocks: maxBlocks, + stopGC: make(chan struct{}), + chainMetadata: make(map[uint64]*ChainMetadata), + } + + // Start GC routine with faster interval for cache + b.gcTicker = time.NewTicker(30 * time.Second) + go b.runGC() + + return b, nil +} + +// Add adds blocks to the buffer and returns true if flush is needed +func (b *BadgerBlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64) bool { + if len(blocks) == 0 { + return false + } + + b.mu.Lock() + defer b.mu.Unlock() + + err := b.db.Update(func(txn *badger.Txn) error { + for _, block := range blocks { + key := b.makeKey(block.Block.ChainId, block.Block.Number) + + var buf bytes.Buffer + if err := gob.NewEncoder(&buf).Encode(block); err != nil { + return err + } + + if err := txn.Set(key, buf.Bytes()); err != nil { + return err + } + } + return nil + }) + + if err != nil { + log.Error().Err(err).Msg("Failed to add blocks to badger buffer") + return false + } + + // Update counters + b.blockCount += len(blocks) + b.sizeBytes += actualSizeBytes + + // Update chain metadata for O(1) lookups + for _, block := range blocks { + chainId := block.Block.ChainId.Uint64() + meta, exists := b.chainMetadata[chainId] + if !exists { + meta = &ChainMetadata{ + MinBlock: new(big.Int).Set(block.Block.Number), + MaxBlock: new(big.Int).Set(block.Block.Number), + BlockCount: 1, + } + b.chainMetadata[chainId] = meta + } else { + if block.Block.Number.Cmp(meta.MinBlock) < 0 { + meta.MinBlock = new(big.Int).Set(block.Block.Number) + } + if block.Block.Number.Cmp(meta.MaxBlock) > 0 { + meta.MaxBlock = new(big.Int).Set(block.Block.Number) + } + meta.BlockCount++ + } + } + + log.Debug(). + Int("block_count", len(blocks)). + Int64("size_bytes", actualSizeBytes). + Int64("total_size_bytes", b.sizeBytes). + Int("total_blocks", b.blockCount). + Msg("Added blocks to badger buffer") + + // Check if flush is needed + return b.shouldFlushLocked() +} + +// Flush removes all data from the buffer and returns it +func (b *BadgerBlockBuffer) Flush() []common.BlockData { + b.mu.Lock() + defer b.mu.Unlock() + + if b.blockCount == 0 { + return nil + } + + var result []common.BlockData + + // Read all data + err := b.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.PrefetchValues = true + opts.PrefetchSize = 100 + it := txn.NewIterator(opts) + defer it.Close() + + for it.Rewind(); it.Valid(); it.Next() { + item := it.Item() + err := item.Value(func(val []byte) error { + var blockData common.BlockData + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { + return err + } + result = append(result, blockData) + return nil + }) + if err != nil { + log.Error().Err(err).Msg("Failed to decode block data during flush") + } + } + return nil + }) + + if err != nil { + log.Error().Err(err).Msg("Failed to read blocks during flush") + } + + // Clear the database + err = b.db.DropAll() + if err != nil { + log.Error().Err(err).Msg("Failed to clear badger buffer") + } + + // Reset counters and metadata + oldCount := b.blockCount + b.blockCount = 0 + b.sizeBytes = 0 + b.chainMetadata = make(map[uint64]*ChainMetadata) + + log.Info(). + Int("block_count", oldCount). + Msg("Flushing badger buffer") + + return result +} + +// ShouldFlush checks if the buffer should be flushed based on configured thresholds +func (b *BadgerBlockBuffer) ShouldFlush() bool { + b.mu.RLock() + defer b.mu.RUnlock() + return b.shouldFlushLocked() +} + +// Size returns the current buffer size in bytes and block count +func (b *BadgerBlockBuffer) Size() (int64, int) { + b.mu.RLock() + defer b.mu.RUnlock() + return b.sizeBytes, b.blockCount +} + +// IsEmpty returns true if the buffer is empty +func (b *BadgerBlockBuffer) IsEmpty() bool { + b.mu.RLock() + defer b.mu.RUnlock() + return b.blockCount == 0 +} + +// GetData returns a copy of the current buffer data +func (b *BadgerBlockBuffer) GetData() []common.BlockData { + b.mu.RLock() + defer b.mu.RUnlock() + + var result []common.BlockData + + err := b.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.PrefetchValues = true + it := txn.NewIterator(opts) + defer it.Close() + + for it.Rewind(); it.Valid(); it.Next() { + item := it.Item() + err := item.Value(func(val []byte) error { + var blockData common.BlockData + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { + return err + } + result = append(result, blockData) + return nil + }) + if err != nil { + log.Error().Err(err).Msg("Failed to decode block data") + } + } + return nil + }) + + if err != nil { + log.Error().Err(err).Msg("Failed to get data from badger buffer") + } + + return result +} + +// GetBlocksInRange returns blocks from the buffer that fall within the given range +func (b *BadgerBlockBuffer) GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData { + b.mu.RLock() + defer b.mu.RUnlock() + + var result []common.BlockData + prefix := b.makePrefix(chainId) + + err := b.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.Prefix = prefix + it := txn.NewIterator(opts) + defer it.Close() + + for it.Rewind(); it.Valid(); it.Next() { + item := it.Item() + err := item.Value(func(val []byte) error { + var blockData common.BlockData + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { + return err + } + + blockNum := blockData.Block.Number + if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { + result = append(result, blockData) + } + return nil + }) + if err != nil { + log.Error().Err(err).Msg("Failed to decode block data in range") + } + } + return nil + }) + + if err != nil { + log.Error().Err(err).Msg("Failed to get blocks in range from badger buffer") + } + + return result +} + +// GetBlockByNumber returns a specific block from the buffer if it exists +func (b *BadgerBlockBuffer) GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData { + b.mu.RLock() + defer b.mu.RUnlock() + + var result *common.BlockData + key := b.makeKey(chainId, blockNumber) + + err := b.db.View(func(txn *badger.Txn) error { + item, err := txn.Get(key) + if err == badger.ErrKeyNotFound { + return nil + } + if err != nil { + return err + } + + return item.Value(func(val []byte) error { + var blockData common.BlockData + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { + return err + } + result = &blockData + return nil + }) + }) + + if err != nil && err != badger.ErrKeyNotFound { + log.Error().Err(err).Msg("Failed to get block by number from badger buffer") + } + + return result +} + +// GetMaxBlockNumber returns the maximum block number for a chain in the buffer +func (b *BadgerBlockBuffer) GetMaxBlockNumber(chainId *big.Int) *big.Int { + b.mu.RLock() + defer b.mu.RUnlock() + + // O(1) lookup using cached metadata + meta, exists := b.chainMetadata[chainId.Uint64()] + if !exists || meta.MaxBlock == nil { + return nil + } + + // Return a copy to prevent external modification + return new(big.Int).Set(meta.MaxBlock) +} + +// Clear empties the buffer without returning data +func (b *BadgerBlockBuffer) Clear() { + b.mu.Lock() + defer b.mu.Unlock() + + err := b.db.DropAll() + if err != nil { + log.Error().Err(err).Msg("Failed to clear badger buffer") + } + + b.blockCount = 0 + b.sizeBytes = 0 + b.chainMetadata = make(map[uint64]*ChainMetadata) +} + +// Stats returns statistics about the buffer +func (b *BadgerBlockBuffer) Stats() BufferStats { + b.mu.RLock() + defer b.mu.RUnlock() + + stats := BufferStats{ + BlockCount: b.blockCount, + SizeBytes: b.sizeBytes, + ChainCount: len(b.chainMetadata), + ChainStats: make(map[uint64]ChainStats), + } + + // Use cached metadata for O(1) stats generation + for chainId, meta := range b.chainMetadata { + if meta.MinBlock != nil && meta.MaxBlock != nil { + stats.ChainStats[chainId] = ChainStats{ + BlockCount: meta.BlockCount, + MinBlock: new(big.Int).Set(meta.MinBlock), + MaxBlock: new(big.Int).Set(meta.MaxBlock), + } + } + } + + return stats +} + +// Close closes the buffer and cleans up resources +func (b *BadgerBlockBuffer) Close() error { + b.mu.Lock() + defer b.mu.Unlock() + + // Stop GC routine + if b.gcTicker != nil { + b.gcTicker.Stop() + close(b.stopGC) + } + + // Close database + if err := b.db.Close(); err != nil { + log.Error().Err(err).Msg("Failed to close badger buffer database") + } + + // Clean up temporary directory + if err := os.RemoveAll(b.tempDir); err != nil { + log.Error().Err(err).Msg("Failed to remove temp directory") + } + + return nil +} + +// Private methods + +func (b *BadgerBlockBuffer) shouldFlushLocked() bool { + // Check size limit + if b.maxSizeBytes > 0 && b.sizeBytes >= b.maxSizeBytes { + return true + } + + // Check block count limit + if b.maxBlocks > 0 && b.blockCount >= b.maxBlocks { + return true + } + + return false +} + +func (b *BadgerBlockBuffer) makeKey(chainId *big.Int, blockNumber *big.Int) []byte { + // Use padded format to ensure lexicographic ordering matches numeric ordering + return fmt.Appendf(nil, "block:%s:%s", chainId.String(), blockNumber.String()) +} + +func (b *BadgerBlockBuffer) makePrefix(chainId *big.Int) []byte { + return fmt.Appendf(nil, "block:%s:", chainId.String()) +} + +func (b *BadgerBlockBuffer) runGC() { + for { + select { + case <-b.gcTicker.C: + err := b.db.RunValueLogGC(0.7) // More aggressive GC for cache + if err != nil && err != badger.ErrNoRewrite { + log.Debug().Err(err).Msg("BadgerBlockBuffer GC error") + } + case <-b.stopGC: + return + } + } +} diff --git a/internal/storage/block_buffer_badger_test.go b/internal/storage/block_buffer_badger_test.go new file mode 100644 index 0000000..7901a67 --- /dev/null +++ b/internal/storage/block_buffer_badger_test.go @@ -0,0 +1,144 @@ +package storage + +import ( + "math/big" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/thirdweb-dev/indexer/internal/common" +) + +func TestBadgerBlockBufferMetadataOptimization(t *testing.T) { + // Create a new Badger buffer + buffer, err := NewBadgerBlockBuffer(10, 1000) // 10MB, 1000 blocks max + require.NoError(t, err) + defer buffer.Close() + + chainId := big.NewInt(1) + + // Add blocks + blocks := []common.BlockData{ + { + Block: common.Block{ + ChainId: chainId, + Number: big.NewInt(100), + Hash: "0x1234", + }, + }, + { + Block: common.Block{ + ChainId: chainId, + Number: big.NewInt(101), + Hash: "0x5678", + }, + }, + { + Block: common.Block{ + ChainId: chainId, + Number: big.NewInt(99), + Hash: "0xabcd", + }, + }, + } + + buffer.Add(blocks, 1024) + + // Test O(1) GetMaxBlockNumber + start := time.Now() + maxBlock := buffer.GetMaxBlockNumber(chainId) + elapsed := time.Since(start) + + assert.NotNil(t, maxBlock) + assert.Equal(t, big.NewInt(101), maxBlock) + assert.Less(t, elapsed, time.Millisecond, "GetMaxBlockNumber should be O(1) and very fast") + + // Test O(1) Stats + start = time.Now() + stats := buffer.Stats() + elapsed = time.Since(start) + + assert.Equal(t, 3, stats.BlockCount) + assert.Equal(t, 1, stats.ChainCount) + chainStats := stats.ChainStats[1] + assert.Equal(t, 3, chainStats.BlockCount) + assert.Equal(t, big.NewInt(99), chainStats.MinBlock) + assert.Equal(t, big.NewInt(101), chainStats.MaxBlock) + assert.Less(t, elapsed, time.Millisecond, "Stats should be O(1) and very fast") + + // Test metadata is updated after flush + buffer.Flush() + maxBlock = buffer.GetMaxBlockNumber(chainId) + assert.Nil(t, maxBlock) + + // Add new blocks and verify metadata is rebuilt + newBlocks := []common.BlockData{ + { + Block: common.Block{ + ChainId: chainId, + Number: big.NewInt(200), + Hash: "0xffff", + }, + }, + } + buffer.Add(newBlocks, 512) + + maxBlock = buffer.GetMaxBlockNumber(chainId) + assert.NotNil(t, maxBlock) + assert.Equal(t, big.NewInt(200), maxBlock) +} + +func BenchmarkBadgerBlockBufferGetMaxBlockNumber(b *testing.B) { + buffer, err := NewBadgerBlockBuffer(100, 10000) + require.NoError(b, err) + defer buffer.Close() + + chainId := big.NewInt(1) + + // Add many blocks + for i := 0; i < 1000; i++ { + blocks := []common.BlockData{ + { + Block: common.Block{ + ChainId: chainId, + Number: big.NewInt(int64(i)), + Hash: "0x1234", + }, + }, + } + buffer.Add(blocks, 1024) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = buffer.GetMaxBlockNumber(chainId) + } +} + +func BenchmarkBadgerBlockBufferStats(b *testing.B) { + buffer, err := NewBadgerBlockBuffer(100, 10000) + require.NoError(b, err) + defer buffer.Close() + + // Add blocks for multiple chains + for chainId := 1; chainId <= 5; chainId++ { + for i := 0; i < 100; i++ { + blocks := []common.BlockData{ + { + Block: common.Block{ + ChainId: big.NewInt(int64(chainId)), + Number: big.NewInt(int64(i)), + Hash: "0x1234", + }, + }, + } + buffer.Add(blocks, 1024) + } + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = buffer.Stats() + } +} \ No newline at end of file diff --git a/internal/storage/s3.go b/internal/storage/s3.go index 9cd5f24..d50328b 100644 --- a/internal/storage/s3.go +++ b/internal/storage/s3.go @@ -27,7 +27,7 @@ type S3Connector struct { client *s3.Client config *config.S3StorageConfig formatter DataFormatter - buffer *BlockBuffer + buffer IBlockBuffer // Flush control stopCh chan struct{} @@ -115,7 +115,12 @@ func NewS3Connector(cfg *config.S3StorageConfig) (*S3Connector, error) { } // Create buffer with configured settings - buffer := NewBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile) + buffer, err := NewBlockBufferWithBadger(cfg.BufferSize, cfg.MaxBlocksPerFile) + if err != nil { + // Fall back to in-memory buffer if Badger fails + log.Warn().Err(err).Msg("Failed to create Badger buffer, falling back to in-memory buffer") + buffer = NewBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile) + } s3c := &S3Connector{ client: s3Client, @@ -351,6 +356,16 @@ func (s *S3Connector) Close() error { // Wait for worker to finish s.wg.Wait() + + // Clean up buffer resources (especially important for BadgerBlockBuffer) + if badgerBuffer, ok := s.buffer.(*BadgerBlockBuffer); ok { + if err := badgerBuffer.Close(); err != nil { + log.Error().Err(err).Msg("Error closing badger buffer") + if closeErr == nil { + closeErr = err + } + } + } }) return closeErr @@ -481,7 +496,6 @@ func (f *ParquetFormatter) FormatBlockData(data []common.BlockData) ([]byte, err } // Convert block number to uint64 for efficient queries - // If block number is too large for uint64, use MaxUint64 blockNum := d.Block.Number.Uint64() if d.Block.Number.BitLen() > 64 { return nil, fmt.Errorf("block number exceeds uint64 is not supported") From 4595fa61dd08304155ca8ea406c70e74ac2f36d3 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Wed, 27 Aug 2025 23:54:57 +0000 Subject: [PATCH 39/43] optimize s3 insertion --- internal/storage/block_buffer.go | 40 ++++++++++++++++++-- internal/storage/block_buffer_badger.go | 30 +++++++++------ internal/storage/block_buffer_badger_test.go | 8 ++-- internal/storage/s3.go | 40 +++++++------------- 4 files changed, 71 insertions(+), 47 deletions(-) diff --git a/internal/storage/block_buffer.go b/internal/storage/block_buffer.go index a2d9158..90c6ed8 100644 --- a/internal/storage/block_buffer.go +++ b/internal/storage/block_buffer.go @@ -1,6 +1,8 @@ package storage import ( + "bytes" + "encoding/gob" "fmt" "math/big" "sync" @@ -20,7 +22,7 @@ type BlockBuffer struct { // IBlockBuffer defines the interface for block buffer implementations type IBlockBuffer interface { - Add(blocks []common.BlockData, actualSizeBytes int64) bool + Add(blocks []common.BlockData) bool Flush() []common.BlockData ShouldFlush() bool Size() (int64, int) @@ -31,6 +33,7 @@ type IBlockBuffer interface { GetMaxBlockNumber(chainId *big.Int) *big.Int Clear() Stats() BufferStats + Close() error } // NewBlockBuffer creates a new in-memory block buffer @@ -49,7 +52,7 @@ func NewBlockBufferWithBadger(maxSizeMB int64, maxBlocks int) (IBlockBuffer, err } // Add adds blocks to the buffer and returns true if flush is needed -func (b *BlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64) bool { +func (b *BlockBuffer) Add(blocks []common.BlockData) bool { if len(blocks) == 0 { return false } @@ -57,13 +60,27 @@ func (b *BlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64) bool b.mu.Lock() defer b.mu.Unlock() + // Calculate actual size by marshaling the entire batch once + // This gives us accurate size with minimal overhead since we marshal once per Add call + var actualSize int64 + var buf bytes.Buffer + enc := gob.NewEncoder(&buf) + + // Marshal all blocks to get actual serialized size + if err := enc.Encode(blocks); err != nil { + // If encoding fails, use estimation as fallback + log.Warn().Err(err).Msg("Failed to marshal blocks for size calculation, buffer size is not reported correctly") + } else { + actualSize = int64(buf.Len()) + } + // Add to buffer b.data = append(b.data, blocks...) - b.sizeBytes += actualSizeBytes + b.sizeBytes += actualSize log.Debug(). Int("block_count", len(blocks)). - Int64("size_bytes", actualSizeBytes). + Int64("actual_size_bytes", actualSize). Int64("total_size_bytes", b.sizeBytes). Int("total_blocks", len(b.data)). Msg("Added blocks to buffer") @@ -248,3 +265,18 @@ func (s BufferStats) String() string { return fmt.Sprintf("BufferStats{blocks=%d, size=%dMB, chains=%d}", s.BlockCount, s.SizeBytes/(1024*1024), s.ChainCount) } + +// Close closes the buffer (no-op for in-memory buffer) +func (b *BlockBuffer) Close() error { + b.mu.Lock() + defer b.mu.Unlock() + + // Clear the buffer to free memory + b.data = nil + b.sizeBytes = 0 + + return nil +} + +// Ensure BlockBuffer implements IBlockBuffer interface +var _ IBlockBuffer = (*BlockBuffer)(nil) diff --git a/internal/storage/block_buffer_badger.go b/internal/storage/block_buffer_badger.go index 39775a8..9d28cc5 100644 --- a/internal/storage/block_buffer_badger.go +++ b/internal/storage/block_buffer_badger.go @@ -20,7 +20,6 @@ type BadgerBlockBuffer struct { mu sync.RWMutex db *badger.DB tempDir string - sizeBytes int64 maxSizeBytes int64 maxBlocks int blockCount int @@ -97,7 +96,7 @@ func NewBadgerBlockBuffer(maxSizeMB int64, maxBlocks int) (*BadgerBlockBuffer, e } // Add adds blocks to the buffer and returns true if flush is needed -func (b *BadgerBlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64) bool { +func (b *BadgerBlockBuffer) Add(blocks []common.BlockData) bool { if len(blocks) == 0 { return false } @@ -128,7 +127,6 @@ func (b *BadgerBlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64 // Update counters b.blockCount += len(blocks) - b.sizeBytes += actualSizeBytes // Update chain metadata for O(1) lookups for _, block := range blocks { @@ -154,8 +152,6 @@ func (b *BadgerBlockBuffer) Add(blocks []common.BlockData, actualSizeBytes int64 log.Debug(). Int("block_count", len(blocks)). - Int64("size_bytes", actualSizeBytes). - Int64("total_size_bytes", b.sizeBytes). Int("total_blocks", b.blockCount). Msg("Added blocks to badger buffer") @@ -212,7 +208,6 @@ func (b *BadgerBlockBuffer) Flush() []common.BlockData { // Reset counters and metadata oldCount := b.blockCount b.blockCount = 0 - b.sizeBytes = 0 b.chainMetadata = make(map[uint64]*ChainMetadata) log.Info(). @@ -233,7 +228,10 @@ func (b *BadgerBlockBuffer) ShouldFlush() bool { func (b *BadgerBlockBuffer) Size() (int64, int) { b.mu.RLock() defer b.mu.RUnlock() - return b.sizeBytes, b.blockCount + + // Get actual size from Badger's LSM tree + lsm, _ := b.db.Size() + return lsm, b.blockCount } // IsEmpty returns true if the buffer is empty @@ -382,7 +380,6 @@ func (b *BadgerBlockBuffer) Clear() { } b.blockCount = 0 - b.sizeBytes = 0 b.chainMetadata = make(map[uint64]*ChainMetadata) } @@ -391,9 +388,12 @@ func (b *BadgerBlockBuffer) Stats() BufferStats { b.mu.RLock() defer b.mu.RUnlock() + // Get actual size from Badger + lsm, _ := b.db.Size() + stats := BufferStats{ BlockCount: b.blockCount, - SizeBytes: b.sizeBytes, + SizeBytes: lsm, ChainCount: len(b.chainMetadata), ChainStats: make(map[uint64]ChainStats), } @@ -439,9 +439,12 @@ func (b *BadgerBlockBuffer) Close() error { // Private methods func (b *BadgerBlockBuffer) shouldFlushLocked() bool { - // Check size limit - if b.maxSizeBytes > 0 && b.sizeBytes >= b.maxSizeBytes { - return true + // Check size limit using Badger's actual size + if b.maxSizeBytes > 0 { + lsm, _ := b.db.Size() + if lsm >= b.maxSizeBytes { + return true + } } // Check block count limit @@ -474,3 +477,6 @@ func (b *BadgerBlockBuffer) runGC() { } } } + +// Ensure BadgerBlockBuffer implements IBlockBuffer interface +var _ IBlockBuffer = (*BadgerBlockBuffer)(nil) diff --git a/internal/storage/block_buffer_badger_test.go b/internal/storage/block_buffer_badger_test.go index 7901a67..b1c4c83 100644 --- a/internal/storage/block_buffer_badger_test.go +++ b/internal/storage/block_buffer_badger_test.go @@ -43,7 +43,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) { }, } - buffer.Add(blocks, 1024) + buffer.Add(blocks) // Test O(1) GetMaxBlockNumber start := time.Now() @@ -82,7 +82,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) { }, }, } - buffer.Add(newBlocks, 512) + buffer.Add(newBlocks) maxBlock = buffer.GetMaxBlockNumber(chainId) assert.NotNil(t, maxBlock) @@ -107,7 +107,7 @@ func BenchmarkBadgerBlockBufferGetMaxBlockNumber(b *testing.B) { }, }, } - buffer.Add(blocks, 1024) + buffer.Add(blocks) } b.ResetTimer() @@ -133,7 +133,7 @@ func BenchmarkBadgerBlockBufferStats(b *testing.B) { }, }, } - buffer.Add(blocks, 1024) + buffer.Add(blocks) } } diff --git a/internal/storage/s3.go b/internal/storage/s3.go index d50328b..2e37aa6 100644 --- a/internal/storage/s3.go +++ b/internal/storage/s3.go @@ -115,10 +115,11 @@ func NewS3Connector(cfg *config.S3StorageConfig) (*S3Connector, error) { } // Create buffer with configured settings - buffer, err := NewBlockBufferWithBadger(cfg.BufferSize, cfg.MaxBlocksPerFile) + var buffer IBlockBuffer + buffer, err = NewBadgerBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile) if err != nil { - // Fall back to in-memory buffer if Badger fails - log.Warn().Err(err).Msg("Failed to create Badger buffer, falling back to in-memory buffer") + // fallback + log.Error().Err(err).Msg("Failed to create Badger buffer, falling back to in-memory buffer") buffer = NewBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile) } @@ -144,27 +145,14 @@ func (s *S3Connector) InsertBlockData(data []common.BlockData) error { return nil } - // Calculate actual serialized size for accurate memory tracking - formattedData, err := s.formatter.FormatBlockData(data) - if err != nil { - return fmt.Errorf("failed to format block data for size calculation: %w", err) - } - - // Use actual serialized size for accurate memory tracking - actualSize := int64(len(formattedData)) - log.Debug(). - Int("block_count", len(data)). - Int64("size_bytes", actualSize). - Int64("avg_bytes_per_block", actualSize/int64(len(data))). - Msg("Calculated actual block data size") - // Add to buffer and check if flush is needed - shouldFlush := s.buffer.Add(data, actualSize) + shouldFlush := s.buffer.Add(data) // Start or reset timer when first data is added s.timerMu.Lock() - sizeBytes, blockCount := s.buffer.Size() - if sizeBytes == actualSize && blockCount == len(data) && s.config.BufferTimeout > 0 { + _, blockCount := s.buffer.Size() + // Check if this is the first batch added (buffer was empty before) + if blockCount == len(data) && s.config.BufferTimeout > 0 { // First data added to buffer, track time and start timer s.lastAddTime = time.Now() if s.flushTimer != nil { @@ -357,13 +345,11 @@ func (s *S3Connector) Close() error { // Wait for worker to finish s.wg.Wait() - // Clean up buffer resources (especially important for BadgerBlockBuffer) - if badgerBuffer, ok := s.buffer.(*BadgerBlockBuffer); ok { - if err := badgerBuffer.Close(); err != nil { - log.Error().Err(err).Msg("Error closing badger buffer") - if closeErr == nil { - closeErr = err - } + // Clean up buffer resources + if err := s.buffer.Close(); err != nil { + log.Error().Err(err).Msg("Error closing buffer") + if closeErr == nil { + closeErr = err } } }) From 136a346c3892ac07fc242e6ba36f5d5159d7b28b Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Thu, 28 Aug 2025 01:05:56 +0000 Subject: [PATCH 40/43] redis tls. erc1155 batch mv --- cmd/root.go | 2 + configs/config.go | 9 +- internal/storage/redis.go | 15 +++- ...7_clickhouse_create_token_transfers_mv.sql | 86 ++++++++++--------- 4 files changed, 64 insertions(+), 48 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index efcd200..391ad78 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -151,6 +151,7 @@ func init() { rootCmd.PersistentFlags().Int("storage-orchestrator-redis-port", 6379, "Redis port for orchestrator storage metadata") rootCmd.PersistentFlags().String("storage-orchestrator-redis-password", "", "Redis password for orchestator storage metadata") rootCmd.PersistentFlags().Int("storage-orchestrator-redis-db", 0, "Redis database number for orchestrator storage metadata") + rootCmd.PersistentFlags().Bool("storage-orchestrator-redis-enableTLS", true, "Enable TLS for Redis connection in orchestrator storage metadata") rootCmd.PersistentFlags().String("storage-staging-type", "auto", "Storage type for staging (auto, clickhouse, postgres, kafka, badger, s3)") rootCmd.PersistentFlags().String("storage-main-type", "auto", "Storage type for main (auto, clickhouse, postgres, kafka, badger, s3)") rootCmd.PersistentFlags().String("storage-orchestrator-type", "auto", "Storage type for orchestrator (auto, clickhouse, postgres, badger)") @@ -341,6 +342,7 @@ func init() { viper.BindPFlag("storage.orchestrator.redis.port", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-port")) viper.BindPFlag("storage.orchestrator.redis.password", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-password")) viper.BindPFlag("storage.orchestrator.redis.db", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-db")) + viper.BindPFlag("storage.orchestrator.redis.enableTLS", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-enableTLS")) viper.BindPFlag("storage.orchestrator.badger.path", rootCmd.PersistentFlags().Lookup("storage-orchestrator-badger-path")) viper.BindPFlag("storage.orchestrator.type", rootCmd.PersistentFlags().Lookup("storage-orchestrator-type")) viper.BindPFlag("storage.staging.postgres.host", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-host")) diff --git a/configs/config.go b/configs/config.go index c8c52e0..395d2f1 100644 --- a/configs/config.go +++ b/configs/config.go @@ -147,10 +147,11 @@ type PostgresConfig struct { } type RedisConfig struct { - Host string `mapstructure:"host"` - Port int `mapstructure:"port"` - Password string `mapstructure:"password"` - DB int `mapstructure:"db"` + Host string `mapstructure:"host"` + Port int `mapstructure:"port"` + Password string `mapstructure:"password"` + DB int `mapstructure:"db"` + EnableTLS bool `mapstructure:"enableTLS"` } type KafkaConfig struct { diff --git a/internal/storage/redis.go b/internal/storage/redis.go index d48b17f..bb71810 100644 --- a/internal/storage/redis.go +++ b/internal/storage/redis.go @@ -2,6 +2,7 @@ package storage import ( "context" + "crypto/tls" "fmt" "math/big" "time" @@ -26,10 +27,18 @@ type RedisConnector struct { func NewRedisConnector(cfg *config.RedisConfig) (*RedisConnector, error) { // Connect to Redis + var tlsConfig *tls.Config + if cfg.EnableTLS { + tlsConfig = &tls.Config{ + MinVersion: tls.VersionTLS12, // Ensure a secure TLS version + } + } + redisClient := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%d", cfg.Host, cfg.Port), - Password: cfg.Password, - DB: cfg.DB, + Addr: fmt.Sprintf("%s:%d", cfg.Host, cfg.Port), + Password: cfg.Password, + DB: cfg.DB, + TLSConfig: tlsConfig, }) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) diff --git a/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql index 7c09aea..30d01a5 100644 --- a/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql +++ b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql @@ -80,52 +80,56 @@ WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0 -- ERC1155 (batch) CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc1155_batch_mv TO token_transfers -AS -SELECT - chain_id, - address AS token_address, - 'erc1155' AS token_type, - reinterpretAsUInt256(reverse(unhex(id_hex))) AS token_id, - concat('0x', substring(topic_2, 27, 40)) AS from_address, - concat('0x', substring(topic_3, 27, 40)) AS to_address, +AS +SELECT + chain_id, + address AS token_address, + 'erc1155' AS token_type, + reinterpretAsUInt256(reverse(substring(bin, (ids_base + ((i - 1) * 32)) + 1, 32))) AS token_id, + concat('0x', substring(topic_2, 27, 40)) AS from_address, + concat('0x', substring(topic_3, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(substring(bin, (am_base + ((i - 1) * 32)) + 1, 32))) AS amount, + log_index, + toNullable(toUInt16(i - 1)) AS batch_index, + insert_timestamp, + is_deleted +FROM ( + SELECT + chain_id, + address, + topic_2, + topic_3, block_number, block_timestamp, transaction_hash, transaction_index, - reinterpretAsUInt256(reverse(unhex(amount_hex))) AS amount, log_index, - toNullable(toUInt16(array_index - 1)) AS batch_index, - insert_timestamp, - is_deleted -FROM ( - SELECT - chain_id, - address, - topic_2, - topic_3, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - log_index, - is_deleted, - insert_timestamp, - toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64))))) AS ids_offset, - toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64))))) AS amounts_offset, - toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + ids_offset * 2, 64))))) AS ids_length, - toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + amounts_offset * 2, 64))))) AS amounts_length, - arrayMap(i -> substring(data, 3 + ids_offset * 2 + 64 + (i-1)*64, 64), range(1, least(ids_length, 10000) + 1)) AS ids_array, - arrayMap(i -> substring(data, 3 + amounts_offset * 2 + 64 + (i-1)*64, 64), range(1, least(amounts_length, 10000) + 1)) AS amounts_array - FROM logs - WHERE topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb' - AND length(topic_2) = 66 - AND length(topic_3) = 66 - AND ids_length = amounts_length -) -ARRAY JOIN - ids_array AS id_hex, - amounts_array AS amount_hex, - arrayEnumerate(ids_array) AS array_index; + is_deleted, + insert_timestamp, + unhex(substring(data, 3)) AS bin, + length(unhex(substring(data, 3))) AS bin_len, + toUInt32(reinterpretAsUInt256(reverse(substring(unhex(substring(data, 3)), 1, 32)))) AS ids_off, + toUInt32(reinterpretAsUInt256(reverse(substring(unhex(substring(data, 3)), 33, 32)))) AS am_off, + toUInt32(reinterpretAsUInt256(reverse(substring(unhex(substring(data, 3)), ids_off + 1, 32)))) AS ids_len, + toUInt32(reinterpretAsUInt256(reverse(substring(unhex(substring(data, 3)), am_off + 1, 32)))) AS am_len, + ids_off + 32 AS ids_base, + am_off + 32 AS am_base +FROM default.logs +WHERE (topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') + AND (length(topic_2) = 66) + AND (length(topic_3) = 66) + AND (ids_len = am_len) + AND (ids_len > 0) + AND ((ids_off + 32) <= bin_len) + AND ((am_off + 32) <= bin_len) + AND ((ids_base + (ids_len * 32)) <= bin_len) + AND ((am_base + (am_len * 32)) <= bin_len) +) ARRAY JOIN range(1, ids_len + 1) AS i; + -- ERC6909 CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc6909_mv From b9828af1d479d1ec9f4ec4640f421ccc515bff61 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Thu, 28 Aug 2025 05:08:09 +0000 Subject: [PATCH 41/43] fix projections, use _part_offset projections --- .../0000_clickhouse_create_blocks_table.sql | 16 +++++++ ...1_clickhouse_create_transactions_table.sql | 28 ++++++------ .../0002_clickhouse_create_logs_table.sql | 16 +++---- .../0003_clickhouse_create_traces_table.sql | 6 +-- ...0006_clickhouse_create_token_transfers.sql | 44 +++++++++---------- .../0008_clickhouse_create_token_balances.sql | 9 +++- ...clickhouse_create_address_transactions.sql | 11 ++--- ...12_clickhouse_create_address_transfers.sql | 24 +++++----- 8 files changed, 90 insertions(+), 64 deletions(-) diff --git a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql index a1d1979..1bab7b8 100644 --- a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql +++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql @@ -27,6 +27,22 @@ CREATE TABLE IF NOT EXISTS blocks ( INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2, + + PROJECTION chain_state_projection + ( + SELECT + chain_id, + count() AS count, + uniqExact(block_number) AS unique_block_count, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp + GROUP BY + chain_id + ) + + ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) ORDER BY (chain_id, block_number) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) diff --git a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql index 11dff13..562f339 100644 --- a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql +++ b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql @@ -45,7 +45,7 @@ CREATE TABLE IF NOT EXISTS transactions ( PROJECTION from_address_projection ( SELECT - * + _part_offset ORDER BY chain_id, from_address, @@ -55,7 +55,7 @@ CREATE TABLE IF NOT EXISTS transactions ( PROJECTION to_address_projection ( SELECT - * + _part_offset ORDER BY chain_id, to_address, @@ -67,11 +67,12 @@ CREATE TABLE IF NOT EXISTS transactions ( SELECT chain_id, from_address, - countState() AS tx_count_state, - minState(block_number) AS min_block_number_state, - minState(block_timestamp) AS min_block_timestamp_state, - maxState(block_number) AS max_block_number_state, - maxState(block_timestamp) AS max_block_timestamp_state + count() AS tx_count, + uniqExact(hash) AS unique_tx_count, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp GROUP BY chain_id, from_address @@ -81,11 +82,12 @@ CREATE TABLE IF NOT EXISTS transactions ( SELECT chain_id, to_address, - countState() AS tx_count_state, - minState(block_number) AS min_block_number_state, - minState(block_timestamp) AS min_block_timestamp_state, - maxState(block_number) AS max_block_number_state, - maxState(block_timestamp) AS max_block_timestamp_state + count() AS tx_count, + uniqExact(hash) AS unique_tx_count, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp GROUP BY chain_id, to_address @@ -93,4 +95,4 @@ CREATE TABLE IF NOT EXISTS transactions ( ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) ORDER BY (chain_id, block_number, hash) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) -SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql index 89f6e1c..d4e202c 100644 --- a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql +++ b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql @@ -28,7 +28,7 @@ CREATE TABLE IF NOT EXISTS logs ( PROJECTION chain_address_topic0_projection ( SELECT - * + _part_offset ORDER BY chain_id, address, @@ -40,7 +40,7 @@ CREATE TABLE IF NOT EXISTS logs ( PROJECTION chain_topic0_projection ( SELECT - * + _part_offset ORDER BY chain_id, topic_0, @@ -55,11 +55,11 @@ CREATE TABLE IF NOT EXISTS logs ( chain_id, address, topic_0, - countState() AS log_count_state, - minState(block_number) AS min_block_number_state, - minState(block_timestamp) AS min_block_timestamp_state, - maxState(block_number) AS max_block_number_state, - maxState(block_timestamp) AS max_block_timestamp_state + count() AS log_count, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp GROUP BY chain_id, address, @@ -68,4 +68,4 @@ CREATE TABLE IF NOT EXISTS logs ( ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) ORDER BY (chain_id, block_number, transaction_hash, log_index) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) -SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; diff --git a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql index 8f69a1f..6b65467 100644 --- a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql +++ b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql @@ -32,7 +32,7 @@ CREATE TABLE IF NOT EXISTS traces ( PROJECTION from_address_projection ( SELECT - * + _part_offset ORDER BY chain_id, from_address, @@ -43,7 +43,7 @@ CREATE TABLE IF NOT EXISTS traces ( PROJECTION to_address_projection ( SELECT - * + _part_offset ORDER BY chain_id, to_address, @@ -55,4 +55,4 @@ CREATE TABLE IF NOT EXISTS traces ( ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) ORDER BY (chain_id, transaction_hash, trace_address) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) -SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; diff --git a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql index 9007649..edb92cb 100644 --- a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql +++ b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql @@ -24,7 +24,7 @@ CREATE TABLE IF NOT EXISTS token_transfers PROJECTION from_address_projection ( SELECT - * + _part_offset ORDER BY chain_id, from_address, @@ -34,7 +34,7 @@ CREATE TABLE IF NOT EXISTS token_transfers ), PROJECTION to_address_projection ( SELECT - * + _part_offset ORDER BY chain_id, to_address, @@ -44,7 +44,7 @@ CREATE TABLE IF NOT EXISTS token_transfers ), PROJECTION token_id_projection ( SELECT - * + _part_offset ORDER BY chain_id, token_address, @@ -59,12 +59,12 @@ CREATE TABLE IF NOT EXISTS token_transfers from_address, token_address, token_type, - countState() AS transfer_count_state, - sumState(toInt256(amount)) AS total_amount_state, - minState(block_number) AS min_block_number_state, - minState(block_timestamp) AS min_block_timestamp_state, - maxState(block_number) AS max_block_number_state, - maxState(block_timestamp) AS max_block_timestamp_state + count() AS transfer_count, + sum(toInt256(amount)) AS total_amount, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp GROUP BY chain_id, from_address, @@ -77,12 +77,12 @@ CREATE TABLE IF NOT EXISTS token_transfers to_address, token_address, token_type, - countState() AS transfer_count_state, - sumState(toInt256(amount)) AS total_amount_state, - minState(block_number) AS min_block_number_state, - minState(block_timestamp) AS min_block_timestamp_state, - maxState(block_number) AS max_block_number_state, - maxState(block_timestamp) AS max_block_timestamp_state + count() AS transfer_count, + sum(toInt256(amount)) AS total_amount, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp GROUP BY chain_id, to_address, @@ -95,12 +95,12 @@ CREATE TABLE IF NOT EXISTS token_transfers token_address, token_id, token_type, - countState() AS transfer_count_state, - sumState(toInt256(amount)) AS total_volume_state, - minState(block_number) AS min_block_number_state, - minState(block_timestamp) AS min_block_timestamp_state, - maxState(block_number) AS max_block_number_state, - maxState(block_timestamp) AS max_block_timestamp_state + count() AS transfer_count, + sum(toInt256(amount)) AS total_volume, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp GROUP BY chain_id, token_address, @@ -111,4 +111,4 @@ CREATE TABLE IF NOT EXISTS token_transfers ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) ORDER BY (chain_id, token_address, block_number, transaction_index, log_index) -SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild'; \ No newline at end of file +SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql index 11e0c6a..49444f1 100644 --- a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql +++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql @@ -52,9 +52,16 @@ CREATE TABLE IF NOT EXISTS token_balances maxState(block_number) AS max_block_number_state, maxState(block_timestamp) AS max_block_timestamp_state GROUP BY chain_id, token_address, token_id, owner_address + ), + + PROJECTION token_projection + ( + SELECT + _part_offset + ORDER BY chain_id, token_address, token_id, owner_address ) ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) PARTITION BY chain_id ORDER BY (chain_id, owner_address, token_address, token_id, block_number, transaction_index, log_index, direction) -SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild'; \ No newline at end of file +SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql index fa9f55a..f546f40 100644 --- a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql +++ b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql @@ -47,11 +47,12 @@ CREATE TABLE IF NOT EXISTS address_transactions ( SELECT chain_id, address, - countState() AS tx_count_state, - minState(block_number) AS min_block_number_state, - minState(block_timestamp) AS min_block_timestamp_state, - maxState(block_number) AS max_block_number_state, - maxState(block_timestamp) AS max_block_timestamp_state + count() AS tx_count, + uniqExact(hash) AS unique_tx_count, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp GROUP BY chain_id, address diff --git a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql index 3803323..c130e70 100644 --- a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql +++ b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql @@ -30,12 +30,12 @@ CREATE TABLE IF NOT EXISTS address_transfers ( address_type, token_address, token_type, - countState() AS transfer_count_state, - sumState(toInt256(amount)) AS total_amount_state, - minState(block_number) AS min_block_number_state, - minState(block_timestamp) AS min_block_timestamp_state, - maxState(block_number) AS max_block_number_state, - maxState(block_timestamp) AS max_block_timestamp_state + count() AS transfer_count, + sum(toInt256(amount)) AS total_amount, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp GROUP BY chain_id, address, @@ -49,12 +49,12 @@ CREATE TABLE IF NOT EXISTS address_transfers ( address, token_address, token_type, - countState() AS transfer_count_state, - sumState(toInt256(amount)) AS total_amount_state, - minState(block_number) AS min_block_number_state, - minState(block_timestamp) AS min_block_timestamp_state, - maxState(block_number) AS max_block_number_state, - maxState(block_timestamp) AS max_block_timestamp_state + count() AS transfer_count, + sum(toInt256(amount)) AS total_amount, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp GROUP BY chain_id, address, From 69f5f78656253b07702a61cde422a51e967910e4 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Thu, 28 Aug 2025 05:19:16 +0000 Subject: [PATCH 42/43] gofmt --- internal/storage/block_buffer_badger.go | 4 ++-- internal/storage/block_buffer_badger_test.go | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/internal/storage/block_buffer_badger.go b/internal/storage/block_buffer_badger.go index 9d28cc5..09469c4 100644 --- a/internal/storage/block_buffer_badger.go +++ b/internal/storage/block_buffer_badger.go @@ -228,7 +228,7 @@ func (b *BadgerBlockBuffer) ShouldFlush() bool { func (b *BadgerBlockBuffer) Size() (int64, int) { b.mu.RLock() defer b.mu.RUnlock() - + // Get actual size from Badger's LSM tree lsm, _ := b.db.Size() return lsm, b.blockCount @@ -390,7 +390,7 @@ func (b *BadgerBlockBuffer) Stats() BufferStats { // Get actual size from Badger lsm, _ := b.db.Size() - + stats := BufferStats{ BlockCount: b.blockCount, SizeBytes: lsm, diff --git a/internal/storage/block_buffer_badger_test.go b/internal/storage/block_buffer_badger_test.go index b1c4c83..b10e8d8 100644 --- a/internal/storage/block_buffer_badger_test.go +++ b/internal/storage/block_buffer_badger_test.go @@ -17,7 +17,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) { defer buffer.Close() chainId := big.NewInt(1) - + // Add blocks blocks := []common.BlockData{ { @@ -49,7 +49,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) { start := time.Now() maxBlock := buffer.GetMaxBlockNumber(chainId) elapsed := time.Since(start) - + assert.NotNil(t, maxBlock) assert.Equal(t, big.NewInt(101), maxBlock) assert.Less(t, elapsed, time.Millisecond, "GetMaxBlockNumber should be O(1) and very fast") @@ -58,7 +58,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) { start = time.Now() stats := buffer.Stats() elapsed = time.Since(start) - + assert.Equal(t, 3, stats.BlockCount) assert.Equal(t, 1, stats.ChainCount) chainStats := stats.ChainStats[1] @@ -71,7 +71,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) { buffer.Flush() maxBlock = buffer.GetMaxBlockNumber(chainId) assert.Nil(t, maxBlock) - + // Add new blocks and verify metadata is rebuilt newBlocks := []common.BlockData{ { @@ -83,7 +83,7 @@ func TestBadgerBlockBufferMetadataOptimization(t *testing.T) { }, } buffer.Add(newBlocks) - + maxBlock = buffer.GetMaxBlockNumber(chainId) assert.NotNil(t, maxBlock) assert.Equal(t, big.NewInt(200), maxBlock) @@ -95,7 +95,7 @@ func BenchmarkBadgerBlockBufferGetMaxBlockNumber(b *testing.B) { defer buffer.Close() chainId := big.NewInt(1) - + // Add many blocks for i := 0; i < 1000; i++ { blocks := []common.BlockData{ @@ -141,4 +141,4 @@ func BenchmarkBadgerBlockBufferStats(b *testing.B) { for i := 0; i < b.N; i++ { _ = buffer.Stats() } -} \ No newline at end of file +} From e551c17c2cef9618f1480ecf2c9b63dc76cb1371 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Thu, 28 Aug 2025 05:31:39 +0000 Subject: [PATCH 43/43] Fix test --- internal/orchestrator/committer_test.go | 2 +- test/mocks/MockIMainStorage.go | 167 +++++++++++++++- test/mocks/MockIOrchestratorStorage.go | 242 +++++++++++++++++------- test/mocks/MockIRPCClient.go | 2 +- test/mocks/MockIStagingStorage.go | 230 +++++++++++++++------- 5 files changed, 500 insertions(+), 143 deletions(-) diff --git a/internal/orchestrator/committer_test.go b/internal/orchestrator/committer_test.go index 8e2cb90..160a748 100644 --- a/internal/orchestrator/committer_test.go +++ b/internal/orchestrator/committer_test.go @@ -426,7 +426,7 @@ func TestHandleGap(t *testing.T) { mockRPC.EXPECT().GetBlocksPerRequest().Return(rpc.BlocksPerRequestConfig{ Blocks: 5, }) - mockRPC.EXPECT().GetChainID().Return(big.NewInt(1)) + // GetChainID is not called in this flow since there are no block failures mockRPC.EXPECT().GetFullBlocks(context.Background(), []*big.Int{big.NewInt(100), big.NewInt(101), big.NewInt(102), big.NewInt(103), big.NewInt(104)}).Return([]rpc.GetFullBlockResult{ {BlockNumber: big.NewInt(100), Data: common.BlockData{Block: common.Block{Number: big.NewInt(100)}}}, {BlockNumber: big.NewInt(101), Data: common.BlockData{Block: common.Block{Number: big.NewInt(101)}}}, diff --git a/test/mocks/MockIMainStorage.go b/test/mocks/MockIMainStorage.go index a77c398..e13e4ee 100644 --- a/test/mocks/MockIMainStorage.go +++ b/test/mocks/MockIMainStorage.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.50.4. DO NOT EDIT. +// Code generated by mockery v2.53.5. DO NOT EDIT. //go:build !production @@ -26,6 +26,51 @@ func (_m *MockIMainStorage) EXPECT() *MockIMainStorage_Expecter { return &MockIMainStorage_Expecter{mock: &_m.Mock} } +// Close provides a mock function with no fields +func (_m *MockIMainStorage) Close() error { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for Close") + } + + var r0 error + if rf, ok := ret.Get(0).(func() error); ok { + r0 = rf() + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MockIMainStorage_Close_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Close' +type MockIMainStorage_Close_Call struct { + *mock.Call +} + +// Close is a helper method to define mock.On call +func (_e *MockIMainStorage_Expecter) Close() *MockIMainStorage_Close_Call { + return &MockIMainStorage_Close_Call{Call: _e.mock.On("Close")} +} + +func (_c *MockIMainStorage_Close_Call) Run(run func()) *MockIMainStorage_Close_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockIMainStorage_Close_Call) Return(_a0 error) *MockIMainStorage_Close_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockIMainStorage_Close_Call) RunAndReturn(run func() error) *MockIMainStorage_Close_Call { + _c.Call.Return(run) + return _c +} + // FindMissingBlockNumbers provides a mock function with given fields: chainId, startBlock, endBlock func (_m *MockIMainStorage) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { ret := _m.Called(chainId, startBlock, endBlock) @@ -143,6 +188,66 @@ func (_c *MockIMainStorage_GetAggregations_Call) RunAndReturn(run func(string, s return _c } +// GetBlockCount provides a mock function with given fields: chainId, startBlock, endBlock +func (_m *MockIMainStorage) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + ret := _m.Called(chainId, startBlock, endBlock) + + if len(ret) == 0 { + panic("no return value specified for GetBlockCount") + } + + var r0 *big.Int + var r1 error + if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) (*big.Int, error)); ok { + return rf(chainId, startBlock, endBlock) + } + if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) *big.Int); ok { + r0 = rf(chainId, startBlock, endBlock) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*big.Int) + } + } + + if rf, ok := ret.Get(1).(func(*big.Int, *big.Int, *big.Int) error); ok { + r1 = rf(chainId, startBlock, endBlock) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockIMainStorage_GetBlockCount_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetBlockCount' +type MockIMainStorage_GetBlockCount_Call struct { + *mock.Call +} + +// GetBlockCount is a helper method to define mock.On call +// - chainId *big.Int +// - startBlock *big.Int +// - endBlock *big.Int +func (_e *MockIMainStorage_Expecter) GetBlockCount(chainId interface{}, startBlock interface{}, endBlock interface{}) *MockIMainStorage_GetBlockCount_Call { + return &MockIMainStorage_GetBlockCount_Call{Call: _e.mock.On("GetBlockCount", chainId, startBlock, endBlock)} +} + +func (_c *MockIMainStorage_GetBlockCount_Call) Run(run func(chainId *big.Int, startBlock *big.Int, endBlock *big.Int)) *MockIMainStorage_GetBlockCount_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*big.Int), args[1].(*big.Int), args[2].(*big.Int)) + }) + return _c +} + +func (_c *MockIMainStorage_GetBlockCount_Call) Return(blockCount *big.Int, err error) *MockIMainStorage_GetBlockCount_Call { + _c.Call.Return(blockCount, err) + return _c +} + +func (_c *MockIMainStorage_GetBlockCount_Call) RunAndReturn(run func(*big.Int, *big.Int, *big.Int) (*big.Int, error)) *MockIMainStorage_GetBlockCount_Call { + _c.Call.Return(run) + return _c +} + // GetBlockHeadersDescending provides a mock function with given fields: chainId, from, to func (_m *MockIMainStorage) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { ret := _m.Called(chainId, from, to) @@ -462,6 +567,66 @@ func (_c *MockIMainStorage_GetMaxBlockNumber_Call) RunAndReturn(run func(*big.In return _c } +// GetMaxBlockNumberInRange provides a mock function with given fields: chainId, startBlock, endBlock +func (_m *MockIMainStorage) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + ret := _m.Called(chainId, startBlock, endBlock) + + if len(ret) == 0 { + panic("no return value specified for GetMaxBlockNumberInRange") + } + + var r0 *big.Int + var r1 error + if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) (*big.Int, error)); ok { + return rf(chainId, startBlock, endBlock) + } + if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) *big.Int); ok { + r0 = rf(chainId, startBlock, endBlock) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*big.Int) + } + } + + if rf, ok := ret.Get(1).(func(*big.Int, *big.Int, *big.Int) error); ok { + r1 = rf(chainId, startBlock, endBlock) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockIMainStorage_GetMaxBlockNumberInRange_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetMaxBlockNumberInRange' +type MockIMainStorage_GetMaxBlockNumberInRange_Call struct { + *mock.Call +} + +// GetMaxBlockNumberInRange is a helper method to define mock.On call +// - chainId *big.Int +// - startBlock *big.Int +// - endBlock *big.Int +func (_e *MockIMainStorage_Expecter) GetMaxBlockNumberInRange(chainId interface{}, startBlock interface{}, endBlock interface{}) *MockIMainStorage_GetMaxBlockNumberInRange_Call { + return &MockIMainStorage_GetMaxBlockNumberInRange_Call{Call: _e.mock.On("GetMaxBlockNumberInRange", chainId, startBlock, endBlock)} +} + +func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) Run(run func(chainId *big.Int, startBlock *big.Int, endBlock *big.Int)) *MockIMainStorage_GetMaxBlockNumberInRange_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*big.Int), args[1].(*big.Int), args[2].(*big.Int)) + }) + return _c +} + +func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) Return(maxBlockNumber *big.Int, err error) *MockIMainStorage_GetMaxBlockNumberInRange_Call { + _c.Call.Return(maxBlockNumber, err) + return _c +} + +func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) RunAndReturn(run func(*big.Int, *big.Int, *big.Int) (*big.Int, error)) *MockIMainStorage_GetMaxBlockNumberInRange_Call { + _c.Call.Return(run) + return _c +} + // GetTokenBalances provides a mock function with given fields: qf, fields func (_m *MockIMainStorage) GetTokenBalances(qf storage.BalancesQueryFilter, fields ...string) (storage.QueryResult[common.TokenBalance], error) { _va := make([]interface{}, len(fields)) diff --git a/test/mocks/MockIOrchestratorStorage.go b/test/mocks/MockIOrchestratorStorage.go index fe382f0..c8d0932 100644 --- a/test/mocks/MockIOrchestratorStorage.go +++ b/test/mocks/MockIOrchestratorStorage.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.50.4. DO NOT EDIT. +// Code generated by mockery v2.53.5. DO NOT EDIT. //go:build !production @@ -8,9 +8,6 @@ import ( big "math/big" mock "github.com/stretchr/testify/mock" - common "github.com/thirdweb-dev/indexer/internal/common" - - storage "github.com/thirdweb-dev/indexer/internal/storage" ) // MockIOrchestratorStorage is an autogenerated mock type for the IOrchestratorStorage type @@ -26,17 +23,17 @@ func (_m *MockIOrchestratorStorage) EXPECT() *MockIOrchestratorStorage_Expecter return &MockIOrchestratorStorage_Expecter{mock: &_m.Mock} } -// DeleteBlockFailures provides a mock function with given fields: failures -func (_m *MockIOrchestratorStorage) DeleteBlockFailures(failures []common.BlockFailure) error { - ret := _m.Called(failures) +// Close provides a mock function with no fields +func (_m *MockIOrchestratorStorage) Close() error { + ret := _m.Called() if len(ret) == 0 { - panic("no return value specified for DeleteBlockFailures") + panic("no return value specified for Close") } var r0 error - if rf, ok := ret.Get(0).(func([]common.BlockFailure) error); ok { - r0 = rf(failures) + if rf, ok := ret.Get(0).(func() error); ok { + r0 = rf() } else { r0 = ret.Error(0) } @@ -44,57 +41,56 @@ func (_m *MockIOrchestratorStorage) DeleteBlockFailures(failures []common.BlockF return r0 } -// MockIOrchestratorStorage_DeleteBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteBlockFailures' -type MockIOrchestratorStorage_DeleteBlockFailures_Call struct { +// MockIOrchestratorStorage_Close_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Close' +type MockIOrchestratorStorage_Close_Call struct { *mock.Call } -// DeleteBlockFailures is a helper method to define mock.On call -// - failures []common.BlockFailure -func (_e *MockIOrchestratorStorage_Expecter) DeleteBlockFailures(failures interface{}) *MockIOrchestratorStorage_DeleteBlockFailures_Call { - return &MockIOrchestratorStorage_DeleteBlockFailures_Call{Call: _e.mock.On("DeleteBlockFailures", failures)} +// Close is a helper method to define mock.On call +func (_e *MockIOrchestratorStorage_Expecter) Close() *MockIOrchestratorStorage_Close_Call { + return &MockIOrchestratorStorage_Close_Call{Call: _e.mock.On("Close")} } -func (_c *MockIOrchestratorStorage_DeleteBlockFailures_Call) Run(run func(failures []common.BlockFailure)) *MockIOrchestratorStorage_DeleteBlockFailures_Call { +func (_c *MockIOrchestratorStorage_Close_Call) Run(run func()) *MockIOrchestratorStorage_Close_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].([]common.BlockFailure)) + run() }) return _c } -func (_c *MockIOrchestratorStorage_DeleteBlockFailures_Call) Return(_a0 error) *MockIOrchestratorStorage_DeleteBlockFailures_Call { +func (_c *MockIOrchestratorStorage_Close_Call) Return(_a0 error) *MockIOrchestratorStorage_Close_Call { _c.Call.Return(_a0) return _c } -func (_c *MockIOrchestratorStorage_DeleteBlockFailures_Call) RunAndReturn(run func([]common.BlockFailure) error) *MockIOrchestratorStorage_DeleteBlockFailures_Call { +func (_c *MockIOrchestratorStorage_Close_Call) RunAndReturn(run func() error) *MockIOrchestratorStorage_Close_Call { _c.Call.Return(run) return _c } -// GetBlockFailures provides a mock function with given fields: qf -func (_m *MockIOrchestratorStorage) GetBlockFailures(qf storage.QueryFilter) ([]common.BlockFailure, error) { - ret := _m.Called(qf) +// GetLastCommittedBlockNumber provides a mock function with given fields: chainId +func (_m *MockIOrchestratorStorage) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { + ret := _m.Called(chainId) if len(ret) == 0 { - panic("no return value specified for GetBlockFailures") + panic("no return value specified for GetLastCommittedBlockNumber") } - var r0 []common.BlockFailure + var r0 *big.Int var r1 error - if rf, ok := ret.Get(0).(func(storage.QueryFilter) ([]common.BlockFailure, error)); ok { - return rf(qf) + if rf, ok := ret.Get(0).(func(*big.Int) (*big.Int, error)); ok { + return rf(chainId) } - if rf, ok := ret.Get(0).(func(storage.QueryFilter) []common.BlockFailure); ok { - r0 = rf(qf) + if rf, ok := ret.Get(0).(func(*big.Int) *big.Int); ok { + r0 = rf(chainId) } else { if ret.Get(0) != nil { - r0 = ret.Get(0).([]common.BlockFailure) + r0 = ret.Get(0).(*big.Int) } } - if rf, ok := ret.Get(1).(func(storage.QueryFilter) error); ok { - r1 = rf(qf) + if rf, ok := ret.Get(1).(func(*big.Int) error); ok { + r1 = rf(chainId) } else { r1 = ret.Error(1) } @@ -102,30 +98,88 @@ func (_m *MockIOrchestratorStorage) GetBlockFailures(qf storage.QueryFilter) ([] return r0, r1 } -// MockIOrchestratorStorage_GetBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetBlockFailures' -type MockIOrchestratorStorage_GetBlockFailures_Call struct { +// MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetLastCommittedBlockNumber' +type MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call struct { *mock.Call } -// GetBlockFailures is a helper method to define mock.On call -// - qf storage.QueryFilter -func (_e *MockIOrchestratorStorage_Expecter) GetBlockFailures(qf interface{}) *MockIOrchestratorStorage_GetBlockFailures_Call { - return &MockIOrchestratorStorage_GetBlockFailures_Call{Call: _e.mock.On("GetBlockFailures", qf)} +// GetLastCommittedBlockNumber is a helper method to define mock.On call +// - chainId *big.Int +func (_e *MockIOrchestratorStorage_Expecter) GetLastCommittedBlockNumber(chainId interface{}) *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call { + return &MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call{Call: _e.mock.On("GetLastCommittedBlockNumber", chainId)} } -func (_c *MockIOrchestratorStorage_GetBlockFailures_Call) Run(run func(qf storage.QueryFilter)) *MockIOrchestratorStorage_GetBlockFailures_Call { +func (_c *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call) Run(run func(chainId *big.Int)) *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(storage.QueryFilter)) + run(args[0].(*big.Int)) }) return _c } -func (_c *MockIOrchestratorStorage_GetBlockFailures_Call) Return(_a0 []common.BlockFailure, _a1 error) *MockIOrchestratorStorage_GetBlockFailures_Call { - _c.Call.Return(_a0, _a1) +func (_c *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call) Return(blockNumber *big.Int, err error) *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call { + _c.Call.Return(blockNumber, err) + return _c +} + +func (_c *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call) RunAndReturn(run func(*big.Int) (*big.Int, error)) *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call { + _c.Call.Return(run) + return _c +} + +// GetLastPublishedBlockNumber provides a mock function with given fields: chainId +func (_m *MockIOrchestratorStorage) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { + ret := _m.Called(chainId) + + if len(ret) == 0 { + panic("no return value specified for GetLastPublishedBlockNumber") + } + + var r0 *big.Int + var r1 error + if rf, ok := ret.Get(0).(func(*big.Int) (*big.Int, error)); ok { + return rf(chainId) + } + if rf, ok := ret.Get(0).(func(*big.Int) *big.Int); ok { + r0 = rf(chainId) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*big.Int) + } + } + + if rf, ok := ret.Get(1).(func(*big.Int) error); ok { + r1 = rf(chainId) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetLastPublishedBlockNumber' +type MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call struct { + *mock.Call +} + +// GetLastPublishedBlockNumber is a helper method to define mock.On call +// - chainId *big.Int +func (_e *MockIOrchestratorStorage_Expecter) GetLastPublishedBlockNumber(chainId interface{}) *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call { + return &MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call{Call: _e.mock.On("GetLastPublishedBlockNumber", chainId)} +} + +func (_c *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call) Run(run func(chainId *big.Int)) *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*big.Int)) + }) + return _c +} + +func (_c *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call) Return(blockNumber *big.Int, err error) *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call { + _c.Call.Return(blockNumber, err) return _c } -func (_c *MockIOrchestratorStorage_GetBlockFailures_Call) RunAndReturn(run func(storage.QueryFilter) ([]common.BlockFailure, error)) *MockIOrchestratorStorage_GetBlockFailures_Call { +func (_c *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call) RunAndReturn(run func(*big.Int) (*big.Int, error)) *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call { _c.Call.Return(run) return _c } @@ -188,12 +242,12 @@ func (_c *MockIOrchestratorStorage_GetLastReorgCheckedBlockNumber_Call) RunAndRe return _c } -// SetLastReorgCheckedBlockNumber provides a mock function with given fields: chainId, blockNumber -func (_m *MockIOrchestratorStorage) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { +// SetLastCommittedBlockNumber provides a mock function with given fields: chainId, blockNumber +func (_m *MockIOrchestratorStorage) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { ret := _m.Called(chainId, blockNumber) if len(ret) == 0 { - panic("no return value specified for SetLastReorgCheckedBlockNumber") + panic("no return value specified for SetLastCommittedBlockNumber") } var r0 error @@ -206,46 +260,46 @@ func (_m *MockIOrchestratorStorage) SetLastReorgCheckedBlockNumber(chainId *big. return r0 } -// MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastReorgCheckedBlockNumber' -type MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call struct { +// MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastCommittedBlockNumber' +type MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call struct { *mock.Call } -// SetLastReorgCheckedBlockNumber is a helper method to define mock.On call +// SetLastCommittedBlockNumber is a helper method to define mock.On call // - chainId *big.Int // - blockNumber *big.Int -func (_e *MockIOrchestratorStorage_Expecter) SetLastReorgCheckedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { - return &MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call{Call: _e.mock.On("SetLastReorgCheckedBlockNumber", chainId, blockNumber)} +func (_e *MockIOrchestratorStorage_Expecter) SetLastCommittedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call { + return &MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call{Call: _e.mock.On("SetLastCommittedBlockNumber", chainId, blockNumber)} } -func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { +func (_c *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call { _c.Call.Run(func(args mock.Arguments) { run(args[0].(*big.Int), args[1].(*big.Int)) }) return _c } -func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) Return(_a0 error) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { +func (_c *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call) Return(_a0 error) *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call { _c.Call.Return(_a0) return _c } -func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { +func (_c *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call { _c.Call.Return(run) return _c } -// StoreBlockFailures provides a mock function with given fields: failures -func (_m *MockIOrchestratorStorage) StoreBlockFailures(failures []common.BlockFailure) error { - ret := _m.Called(failures) +// SetLastPublishedBlockNumber provides a mock function with given fields: chainId, blockNumber +func (_m *MockIOrchestratorStorage) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ret := _m.Called(chainId, blockNumber) if len(ret) == 0 { - panic("no return value specified for StoreBlockFailures") + panic("no return value specified for SetLastPublishedBlockNumber") } var r0 error - if rf, ok := ret.Get(0).(func([]common.BlockFailure) error); ok { - r0 = rf(failures) + if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok { + r0 = rf(chainId, blockNumber) } else { r0 = ret.Error(0) } @@ -253,30 +307,78 @@ func (_m *MockIOrchestratorStorage) StoreBlockFailures(failures []common.BlockFa return r0 } -// MockIOrchestratorStorage_StoreBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'StoreBlockFailures' -type MockIOrchestratorStorage_StoreBlockFailures_Call struct { +// MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastPublishedBlockNumber' +type MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call struct { *mock.Call } -// StoreBlockFailures is a helper method to define mock.On call -// - failures []common.BlockFailure -func (_e *MockIOrchestratorStorage_Expecter) StoreBlockFailures(failures interface{}) *MockIOrchestratorStorage_StoreBlockFailures_Call { - return &MockIOrchestratorStorage_StoreBlockFailures_Call{Call: _e.mock.On("StoreBlockFailures", failures)} +// SetLastPublishedBlockNumber is a helper method to define mock.On call +// - chainId *big.Int +// - blockNumber *big.Int +func (_e *MockIOrchestratorStorage_Expecter) SetLastPublishedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call { + return &MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call{Call: _e.mock.On("SetLastPublishedBlockNumber", chainId, blockNumber)} } -func (_c *MockIOrchestratorStorage_StoreBlockFailures_Call) Run(run func(failures []common.BlockFailure)) *MockIOrchestratorStorage_StoreBlockFailures_Call { +func (_c *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].([]common.BlockFailure)) + run(args[0].(*big.Int), args[1].(*big.Int)) }) return _c } -func (_c *MockIOrchestratorStorage_StoreBlockFailures_Call) Return(_a0 error) *MockIOrchestratorStorage_StoreBlockFailures_Call { +func (_c *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call) Return(_a0 error) *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call { _c.Call.Return(_a0) return _c } -func (_c *MockIOrchestratorStorage_StoreBlockFailures_Call) RunAndReturn(run func([]common.BlockFailure) error) *MockIOrchestratorStorage_StoreBlockFailures_Call { +func (_c *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call { + _c.Call.Return(run) + return _c +} + +// SetLastReorgCheckedBlockNumber provides a mock function with given fields: chainId, blockNumber +func (_m *MockIOrchestratorStorage) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ret := _m.Called(chainId, blockNumber) + + if len(ret) == 0 { + panic("no return value specified for SetLastReorgCheckedBlockNumber") + } + + var r0 error + if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok { + r0 = rf(chainId, blockNumber) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastReorgCheckedBlockNumber' +type MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call struct { + *mock.Call +} + +// SetLastReorgCheckedBlockNumber is a helper method to define mock.On call +// - chainId *big.Int +// - blockNumber *big.Int +func (_e *MockIOrchestratorStorage_Expecter) SetLastReorgCheckedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { + return &MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call{Call: _e.mock.On("SetLastReorgCheckedBlockNumber", chainId, blockNumber)} +} + +func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*big.Int), args[1].(*big.Int)) + }) + return _c +} + +func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) Return(_a0 error) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { _c.Call.Return(run) return _c } diff --git a/test/mocks/MockIRPCClient.go b/test/mocks/MockIRPCClient.go index 42f37ef..f7045c4 100644 --- a/test/mocks/MockIRPCClient.go +++ b/test/mocks/MockIRPCClient.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.50.4. DO NOT EDIT. +// Code generated by mockery v2.53.5. DO NOT EDIT. //go:build !production diff --git a/test/mocks/MockIStagingStorage.go b/test/mocks/MockIStagingStorage.go index bd73136..53964d3 100644 --- a/test/mocks/MockIStagingStorage.go +++ b/test/mocks/MockIStagingStorage.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.50.4. DO NOT EDIT. +// Code generated by mockery v2.53.5. DO NOT EDIT. //go:build !production @@ -26,6 +26,97 @@ func (_m *MockIStagingStorage) EXPECT() *MockIStagingStorage_Expecter { return &MockIStagingStorage_Expecter{mock: &_m.Mock} } +// Close provides a mock function with no fields +func (_m *MockIStagingStorage) Close() error { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for Close") + } + + var r0 error + if rf, ok := ret.Get(0).(func() error); ok { + r0 = rf() + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MockIStagingStorage_Close_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Close' +type MockIStagingStorage_Close_Call struct { + *mock.Call +} + +// Close is a helper method to define mock.On call +func (_e *MockIStagingStorage_Expecter) Close() *MockIStagingStorage_Close_Call { + return &MockIStagingStorage_Close_Call{Call: _e.mock.On("Close")} +} + +func (_c *MockIStagingStorage_Close_Call) Run(run func()) *MockIStagingStorage_Close_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockIStagingStorage_Close_Call) Return(_a0 error) *MockIStagingStorage_Close_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockIStagingStorage_Close_Call) RunAndReturn(run func() error) *MockIStagingStorage_Close_Call { + _c.Call.Return(run) + return _c +} + +// DeleteBlockFailures provides a mock function with given fields: failures +func (_m *MockIStagingStorage) DeleteBlockFailures(failures []common.BlockFailure) error { + ret := _m.Called(failures) + + if len(ret) == 0 { + panic("no return value specified for DeleteBlockFailures") + } + + var r0 error + if rf, ok := ret.Get(0).(func([]common.BlockFailure) error); ok { + r0 = rf(failures) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MockIStagingStorage_DeleteBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteBlockFailures' +type MockIStagingStorage_DeleteBlockFailures_Call struct { + *mock.Call +} + +// DeleteBlockFailures is a helper method to define mock.On call +// - failures []common.BlockFailure +func (_e *MockIStagingStorage_Expecter) DeleteBlockFailures(failures interface{}) *MockIStagingStorage_DeleteBlockFailures_Call { + return &MockIStagingStorage_DeleteBlockFailures_Call{Call: _e.mock.On("DeleteBlockFailures", failures)} +} + +func (_c *MockIStagingStorage_DeleteBlockFailures_Call) Run(run func(failures []common.BlockFailure)) *MockIStagingStorage_DeleteBlockFailures_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].([]common.BlockFailure)) + }) + return _c +} + +func (_c *MockIStagingStorage_DeleteBlockFailures_Call) Return(_a0 error) *MockIStagingStorage_DeleteBlockFailures_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockIStagingStorage_DeleteBlockFailures_Call) RunAndReturn(run func([]common.BlockFailure) error) *MockIStagingStorage_DeleteBlockFailures_Call { + _c.Call.Return(run) + return _c +} + // DeleteStagingData provides a mock function with given fields: data func (_m *MockIStagingStorage) DeleteStagingData(data []common.BlockData) error { ret := _m.Called(data) @@ -72,107 +163,107 @@ func (_c *MockIStagingStorage_DeleteStagingData_Call) RunAndReturn(run func([]co return _c } -// GetLastPublishedBlockNumber provides a mock function with given fields: chainId -func (_m *MockIStagingStorage) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { - ret := _m.Called(chainId) +// DeleteStagingDataOlderThan provides a mock function with given fields: chainId, blockNumber +func (_m *MockIStagingStorage) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { + ret := _m.Called(chainId, blockNumber) if len(ret) == 0 { - panic("no return value specified for GetLastPublishedBlockNumber") - } - - var r0 *big.Int - var r1 error - if rf, ok := ret.Get(0).(func(*big.Int) (*big.Int, error)); ok { - return rf(chainId) - } - if rf, ok := ret.Get(0).(func(*big.Int) *big.Int); ok { - r0 = rf(chainId) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*big.Int) - } + panic("no return value specified for DeleteStagingDataOlderThan") } - if rf, ok := ret.Get(1).(func(*big.Int) error); ok { - r1 = rf(chainId) + var r0 error + if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok { + r0 = rf(chainId, blockNumber) } else { - r1 = ret.Error(1) + r0 = ret.Error(0) } - return r0, r1 + return r0 } -// MockIStagingStorage_GetLastPublishedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetLastPublishedBlockNumber' -type MockIStagingStorage_GetLastPublishedBlockNumber_Call struct { +// MockIStagingStorage_DeleteStagingDataOlderThan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteStagingDataOlderThan' +type MockIStagingStorage_DeleteStagingDataOlderThan_Call struct { *mock.Call } -// GetLastPublishedBlockNumber is a helper method to define mock.On call +// DeleteStagingDataOlderThan is a helper method to define mock.On call // - chainId *big.Int -func (_e *MockIStagingStorage_Expecter) GetLastPublishedBlockNumber(chainId interface{}) *MockIStagingStorage_GetLastPublishedBlockNumber_Call { - return &MockIStagingStorage_GetLastPublishedBlockNumber_Call{Call: _e.mock.On("GetLastPublishedBlockNumber", chainId)} +// - blockNumber *big.Int +func (_e *MockIStagingStorage_Expecter) DeleteStagingDataOlderThan(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { + return &MockIStagingStorage_DeleteStagingDataOlderThan_Call{Call: _e.mock.On("DeleteStagingDataOlderThan", chainId, blockNumber)} } -func (_c *MockIStagingStorage_GetLastPublishedBlockNumber_Call) Run(run func(chainId *big.Int)) *MockIStagingStorage_GetLastPublishedBlockNumber_Call { +func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int)) + run(args[0].(*big.Int), args[1].(*big.Int)) }) return _c } -func (_c *MockIStagingStorage_GetLastPublishedBlockNumber_Call) Return(maxBlockNumber *big.Int, err error) *MockIStagingStorage_GetLastPublishedBlockNumber_Call { - _c.Call.Return(maxBlockNumber, err) +func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Return(_a0 error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { + _c.Call.Return(_a0) return _c } -func (_c *MockIStagingStorage_GetLastPublishedBlockNumber_Call) RunAndReturn(run func(*big.Int) (*big.Int, error)) *MockIStagingStorage_GetLastPublishedBlockNumber_Call { +func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { _c.Call.Return(run) return _c } -// SetLastPublishedBlockNumber provides a mock function with given fields: chainId, blockNumber -func (_m *MockIStagingStorage) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - ret := _m.Called(chainId, blockNumber) +// GetBlockFailures provides a mock function with given fields: qf +func (_m *MockIStagingStorage) GetBlockFailures(qf storage.QueryFilter) ([]common.BlockFailure, error) { + ret := _m.Called(qf) if len(ret) == 0 { - panic("no return value specified for SetLastPublishedBlockNumber") + panic("no return value specified for GetBlockFailures") } - var r0 error - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok { - r0 = rf(chainId, blockNumber) + var r0 []common.BlockFailure + var r1 error + if rf, ok := ret.Get(0).(func(storage.QueryFilter) ([]common.BlockFailure, error)); ok { + return rf(qf) + } + if rf, ok := ret.Get(0).(func(storage.QueryFilter) []common.BlockFailure); ok { + r0 = rf(qf) } else { - r0 = ret.Error(0) + if ret.Get(0) != nil { + r0 = ret.Get(0).([]common.BlockFailure) + } } - return r0 + if rf, ok := ret.Get(1).(func(storage.QueryFilter) error); ok { + r1 = rf(qf) + } else { + r1 = ret.Error(1) + } + + return r0, r1 } -// MockIStagingStorage_SetLastPublishedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastPublishedBlockNumber' -type MockIStagingStorage_SetLastPublishedBlockNumber_Call struct { +// MockIStagingStorage_GetBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetBlockFailures' +type MockIStagingStorage_GetBlockFailures_Call struct { *mock.Call } -// SetLastPublishedBlockNumber is a helper method to define mock.On call -// - chainId *big.Int -// - blockNumber *big.Int -func (_e *MockIStagingStorage_Expecter) SetLastPublishedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_SetLastPublishedBlockNumber_Call { - return &MockIStagingStorage_SetLastPublishedBlockNumber_Call{Call: _e.mock.On("SetLastPublishedBlockNumber", chainId, blockNumber)} +// GetBlockFailures is a helper method to define mock.On call +// - qf storage.QueryFilter +func (_e *MockIStagingStorage_Expecter) GetBlockFailures(qf interface{}) *MockIStagingStorage_GetBlockFailures_Call { + return &MockIStagingStorage_GetBlockFailures_Call{Call: _e.mock.On("GetBlockFailures", qf)} } -func (_c *MockIStagingStorage_SetLastPublishedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_SetLastPublishedBlockNumber_Call { +func (_c *MockIStagingStorage_GetBlockFailures_Call) Run(run func(qf storage.QueryFilter)) *MockIStagingStorage_GetBlockFailures_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int), args[1].(*big.Int)) + run(args[0].(storage.QueryFilter)) }) return _c } -func (_c *MockIStagingStorage_SetLastPublishedBlockNumber_Call) Return(_a0 error) *MockIStagingStorage_SetLastPublishedBlockNumber_Call { - _c.Call.Return(_a0) +func (_c *MockIStagingStorage_GetBlockFailures_Call) Return(_a0 []common.BlockFailure, _a1 error) *MockIStagingStorage_GetBlockFailures_Call { + _c.Call.Return(_a0, _a1) return _c } -func (_c *MockIStagingStorage_SetLastPublishedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_SetLastPublishedBlockNumber_Call { +func (_c *MockIStagingStorage_GetBlockFailures_Call) RunAndReturn(run func(storage.QueryFilter) ([]common.BlockFailure, error)) *MockIStagingStorage_GetBlockFailures_Call { _c.Call.Return(run) return _c } @@ -341,17 +432,17 @@ func (_c *MockIStagingStorage_InsertStagingData_Call) RunAndReturn(run func([]co return _c } -// DeleteStagingDataOlderThan provides a mock function with given fields: chainId, blockNumber -func (_m *MockIStagingStorage) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { - ret := _m.Called(chainId, blockNumber) +// StoreBlockFailures provides a mock function with given fields: failures +func (_m *MockIStagingStorage) StoreBlockFailures(failures []common.BlockFailure) error { + ret := _m.Called(failures) if len(ret) == 0 { - panic("no return value specified for DeleteStagingDataOlderThan") + panic("no return value specified for StoreBlockFailures") } var r0 error - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok { - r0 = rf(chainId, blockNumber) + if rf, ok := ret.Get(0).(func([]common.BlockFailure) error); ok { + r0 = rf(failures) } else { r0 = ret.Error(0) } @@ -359,31 +450,30 @@ func (_m *MockIStagingStorage) DeleteStagingDataOlderThan(chainId *big.Int, bloc return r0 } -// MockIStagingStorage_DeleteStagingDataOlderThan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteStagingDataOlderThan' -type MockIStagingStorage_DeleteStagingDataOlderThan_Call struct { +// MockIStagingStorage_StoreBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'StoreBlockFailures' +type MockIStagingStorage_StoreBlockFailures_Call struct { *mock.Call } -// DeleteStagingDataOlderThan is a helper method to define mock.On call -// - chainId *big.Int -// - blockNumber *big.Int -func (_e *MockIStagingStorage_Expecter) DeleteStagingDataOlderThan(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { - return &MockIStagingStorage_DeleteStagingDataOlderThan_Call{Call: _e.mock.On("DeleteStagingDataOlderThan", chainId, blockNumber)} +// StoreBlockFailures is a helper method to define mock.On call +// - failures []common.BlockFailure +func (_e *MockIStagingStorage_Expecter) StoreBlockFailures(failures interface{}) *MockIStagingStorage_StoreBlockFailures_Call { + return &MockIStagingStorage_StoreBlockFailures_Call{Call: _e.mock.On("StoreBlockFailures", failures)} } -func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { +func (_c *MockIStagingStorage_StoreBlockFailures_Call) Run(run func(failures []common.BlockFailure)) *MockIStagingStorage_StoreBlockFailures_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int), args[1].(*big.Int)) + run(args[0].([]common.BlockFailure)) }) return _c } -func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Return(_a0 error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { +func (_c *MockIStagingStorage_StoreBlockFailures_Call) Return(_a0 error) *MockIStagingStorage_StoreBlockFailures_Call { _c.Call.Return(_a0) return _c } -func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { +func (_c *MockIStagingStorage_StoreBlockFailures_Call) RunAndReturn(run func([]common.BlockFailure) error) *MockIStagingStorage_StoreBlockFailures_Call { _c.Call.Return(run) return _c }