diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go index cc3e912..33b93ad 100644 --- a/cmd/migrate_valid.go +++ b/cmd/migrate_valid.go @@ -2,9 +2,13 @@ package cmd import ( "context" + "fmt" "math/big" "os" - "strconv" + "os/signal" + "sync" + "syscall" + "time" "github.com/rs/zerolog/log" "github.com/spf13/cobra" @@ -13,6 +17,7 @@ import ( "github.com/thirdweb-dev/indexer/internal/orchestrator" "github.com/thirdweb-dev/indexer/internal/rpc" "github.com/thirdweb-dev/indexer/internal/storage" + "github.com/thirdweb-dev/indexer/internal/worker" ) var ( @@ -27,31 +32,192 @@ var ( ) const ( - TARGET_STORAGE_DATABASE = "temp" - DEFAULT_RPC_BATCH_SIZE = 200 - DEFAULT_BATCH_SIZE = 1000 + DEFAULT_BATCH_SIZE = 2000 + DEFAULT_WORKERS = 1 ) func RunValidationMigration(cmd *cobra.Command, args []string) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Set up signal handling for graceful shutdown + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) + migrator := NewMigrator() defer migrator.Close() - rangeStartBlock, rangeEndBlock := migrator.DetermineMigrationBoundaries() + targetEndBlock := big.NewInt(int64(config.Cfg.Migrator.EndBlock)) + targetStartBlock := big.NewInt(int64(config.Cfg.Migrator.StartBlock)) + rangeStartBlock, rangeEndBlock := migrator.DetermineMigrationBoundaries(targetStartBlock, targetEndBlock) log.Info().Msgf("Migrating blocks from %s to %s (both ends inclusive)", rangeStartBlock.String(), rangeEndBlock.String()) - // 2. Start going in loops - for currentBlock := rangeStartBlock; currentBlock.Cmp(rangeEndBlock) <= 0; { - endBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.migrationBatchSize-1))) - if endBlock.Cmp(rangeEndBlock) > 0 { - endBlock = rangeEndBlock + // Calculate work distribution for workers + numWorkers := DEFAULT_WORKERS + if config.Cfg.Migrator.WorkerCount > 0 { + numWorkers = int(config.Cfg.Migrator.WorkerCount) + } + workRanges := divideBlockRange(rangeStartBlock, rangeEndBlock, numWorkers) + log.Info().Msgf("Starting %d workers to process migration", len(workRanges)) + + // Create error channel and wait group + errChan := make(chan error, numWorkers) + var wg sync.WaitGroup + + // Start workers + for workerID, workRange := range workRanges { + wg.Add(1) + go func(id int, startBlock, endBlock *big.Int) { + defer wg.Done() + + // Only check boundaries per-worker if we have multiple workers + // For single worker, we already determined boundaries globally + var actualStart, actualEnd *big.Int + if numWorkers > 1 { + // Multiple workers: each needs to check their specific range + actualStart, actualEnd = migrator.DetermineMigrationBoundariesForRange(startBlock, endBlock) + if actualStart == nil || actualEnd == nil { + log.Info().Msgf("Worker %d: Range %s to %s already fully migrated", id, startBlock.String(), endBlock.String()) + return + } + log.Info().Msgf("Worker %d starting: blocks %s to %s (adjusted from %s to %s)", + id, actualStart.String(), actualEnd.String(), startBlock.String(), endBlock.String()) + } else { + // Single worker: use the already-determined boundaries + actualStart, actualEnd = startBlock, endBlock + log.Info().Msgf("Worker %d starting: blocks %s to %s", id, actualStart.String(), actualEnd.String()) + } + + if err := processBlockRange(ctx, migrator, id, actualStart, actualEnd); err != nil { + errChan <- err + log.Error().Err(err).Msgf("Worker %d failed", id) + return + } + + log.Info().Msgf("Worker %d completed successfully", id) + }(workerID, workRange.start, workRange.end) + } + + // Monitor for completion or interruption + done := make(chan struct{}) + go func() { + wg.Wait() + close(done) + }() + + // Wait for either completion, error, or interrupt signal + select { + case <-done: + log.Info().Msg("All workers completed successfully") + // 3. then finally copy partitions from target table to main tables + log.Info().Msg("Migration completed successfully") + case err := <-errChan: + log.Error().Err(err).Msg("Migration failed due to worker error") + cancel() + wg.Wait() + log.Fatal().Msg("Migration stopped due to error") + case sig := <-sigChan: + log.Info().Msgf("Received signal: %s, initiating graceful shutdown...", sig) + cancel() + wg.Wait() + log.Info().Msg("Migration stopped gracefully") + } +} + +type blockRange struct { + start *big.Int + end *big.Int +} + +func divideBlockRange(startBlock, endBlock *big.Int, numWorkers int) []blockRange { + ranges := make([]blockRange, 0, numWorkers) + + // Calculate total blocks + totalBlocks := new(big.Int).Sub(endBlock, startBlock) + totalBlocks.Add(totalBlocks, big.NewInt(1)) // inclusive range + + // Calculate blocks per worker + blocksPerWorker := new(big.Int).Div(totalBlocks, big.NewInt(int64(numWorkers))) + remainder := new(big.Int).Mod(totalBlocks, big.NewInt(int64(numWorkers))) + + currentStart := new(big.Int).Set(startBlock) + + for i := 0; i < numWorkers; i++ { + // Calculate end block for this worker + workerBlockCount := new(big.Int).Set(blocksPerWorker) + + // Distribute remainder blocks to first workers + if big.NewInt(int64(i)).Cmp(remainder) < 0 { + workerBlockCount.Add(workerBlockCount, big.NewInt(1)) } - blockNumbers := generateBlockNumbersForRange(currentBlock, endBlock) - log.Info().Msgf("Processing blocks %s to %s", blockNumbers[0].String(), blockNumbers[len(blockNumbers)-1].String()) + // Skip if no blocks for this worker + if workerBlockCount.Sign() == 0 { + continue + } - validBlocksForRange := migrator.GetValidBlocksForRange(blockNumbers) + currentEnd := new(big.Int).Add(currentStart, workerBlockCount) + currentEnd.Sub(currentEnd, big.NewInt(1)) // inclusive range + // Ensure we don't exceed the end block + if currentEnd.Cmp(endBlock) > 0 { + currentEnd = new(big.Int).Set(endBlock) + } + + ranges = append(ranges, blockRange{ + start: new(big.Int).Set(currentStart), + end: new(big.Int).Set(currentEnd), + }) + + // Move to next range + currentStart = new(big.Int).Add(currentEnd, big.NewInt(1)) + + // Stop if we've covered all blocks + if currentStart.Cmp(endBlock) > 0 { + break + } + } + + return ranges +} + +func processBlockRange(ctx context.Context, migrator *Migrator, workerID int, startBlock, endBlock *big.Int) error { + currentBlock := new(big.Int).Set(startBlock) + + for currentBlock.Cmp(endBlock) <= 0 { + batchStartTime := time.Now() + + // Check for cancellation + select { + case <-ctx.Done(): + log.Info().Msgf("Worker %d: Migration interrupted at block %s", workerID, currentBlock.String()) + return nil + default: + } + + batchEndBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.batchSize-1))) + if batchEndBlock.Cmp(endBlock) > 0 { + batchEndBlock = endBlock + } + + blockNumbers := generateBlockNumbersForRange(currentBlock, batchEndBlock) + log.Info().Msgf("Worker %d: Processing blocks %s to %s", workerID, blockNumbers[0].String(), blockNumbers[len(blockNumbers)-1].String()) + + // Fetch valid blocks from source + fetchStartTime := time.Now() + validBlocksForRange, err := migrator.GetValidBlocksForRange(blockNumbers) + fetchDuration := time.Since(fetchStartTime) + if err != nil { + // If we got an error fetching valid blocks, we'll continue + log.Error().Err(err).Msgf("Worker %d: Failed to get valid blocks for range", workerID) + time.Sleep(3 * time.Second) + continue + } + log.Debug().Dur("duration", fetchDuration).Int("blocks_fetched", len(validBlocksForRange)).Msgf("Worker %d: Fetched valid blocks from source", workerID) + + // Build map of fetched blocks + mapBuildStartTime := time.Now() blocksToInsertMap := make(map[string]common.BlockData) for _, blockData := range validBlocksForRange { blocksToInsertMap[blockData.Block.Number.String()] = blockData @@ -64,64 +230,84 @@ func RunValidationMigration(cmd *cobra.Command, args []string) { missingBlocks = append(missingBlocks, blockNum) } } - - validMissingBlocks := migrator.GetValidBlocksFromRPC(missingBlocks) - for _, blockData := range validMissingBlocks { - blocksToInsertMap[blockData.Block.Number.String()] = blockData + mapBuildDuration := time.Since(mapBuildStartTime) + log.Debug().Dur("duration", mapBuildDuration).Int("missing_blocks", len(missingBlocks)).Msgf("Worker %d: Identified missing blocks", workerID) + + // Fetch missing blocks from RPC + if len(missingBlocks) > 0 { + rpcFetchStartTime := time.Now() + validMissingBlocks := migrator.GetValidBlocksFromRPC(missingBlocks) + rpcFetchDuration := time.Since(rpcFetchStartTime) + log.Debug().Dur("duration", rpcFetchDuration).Int("blocks_fetched", len(validMissingBlocks)).Msgf("Worker %d: Fetched missing blocks from RPC", workerID) + + for _, blockData := range validMissingBlocks { + if blockData.Block.ChainId.Sign() == 0 { + return fmt.Errorf("worker %d: block %s has chain ID 0", workerID, blockData.Block.Number.String()) + } + blocksToInsertMap[blockData.Block.Number.String()] = blockData + } } - blocksToInsert := make([]common.BlockData, 0) + // Prepare blocks for insertion + prepStartTime := time.Now() + blocksToInsert := make([]common.BlockData, 0, len(blocksToInsertMap)) for _, blockData := range blocksToInsertMap { blocksToInsert = append(blocksToInsert, blockData) } + prepDuration := time.Since(prepStartTime) + log.Debug().Dur("duration", prepDuration).Int("blocks_to_insert", len(blocksToInsert)).Msgf("Worker %d: Prepared blocks for insertion", workerID) - err := migrator.targetConn.InsertBlockData(blocksToInsert) + // Insert blocks to destination + insertStartTime := time.Now() + err = migrator.destination.InsertBlockData(blocksToInsert) + insertDuration := time.Since(insertStartTime) if err != nil { - log.Fatal().Err(err).Msg("Failed to insert blocks to target storage") + log.Error().Err(err).Dur("duration", insertDuration).Msgf("Worker %d: Failed to insert blocks to target storage", workerID) + time.Sleep(3 * time.Second) + continue } - currentBlock = new(big.Int).Add(endBlock, big.NewInt(1)) + batchDuration := time.Since(batchStartTime) + log.Info(). + Dur("total_duration", batchDuration). + Dur("fetch_duration", fetchDuration). + Dur("insert_duration", insertDuration). + Int("blocks_processed", len(blocksToInsert)). + Msgf("Worker %d: Batch processed successfully", workerID) + + currentBlock = new(big.Int).Add(batchEndBlock, big.NewInt(1)) } - // 3. then finally copy partitions from target table to main tables - log.Info().Msg("Done") + return nil } type Migrator struct { - rpcClient rpc.IRPCClient - storage storage.IStorage - validator *orchestrator.Validator - targetConn *storage.ClickHouseConnector - migrationBatchSize int - rpcBatchSize int + rpcClient rpc.IRPCClient + worker *worker.Worker + source storage.IStorage + destination storage.IMainStorage + validator *orchestrator.Validator + batchSize int } func NewMigrator() *Migrator { - targetDBName := os.Getenv("TARGET_STORAGE_DATABASE") - if targetDBName == "" { - targetDBName = TARGET_STORAGE_DATABASE - } batchSize := DEFAULT_BATCH_SIZE - batchSizeEnvInt, err := strconv.Atoi(os.Getenv("MIGRATION_BATCH_SIZE")) - if err == nil && batchSizeEnvInt > 0 { - batchSize = batchSizeEnvInt - } - rpcBatchSize := DEFAULT_RPC_BATCH_SIZE - rpcBatchSizeEnvInt, err := strconv.Atoi(os.Getenv("MIGRATION_RPC_BATCH_SIZE")) - if err == nil && rpcBatchSizeEnvInt > 0 { - rpcBatchSize = rpcBatchSizeEnvInt + if config.Cfg.Migrator.BatchSize > 0 { + batchSize = int(config.Cfg.Migrator.BatchSize) } + rpcClient, err := rpc.Initialize() if err != nil { log.Fatal().Err(err).Msg("Failed to initialize RPC") } - s, err := storage.NewStorageConnector(&config.Cfg.Storage) + + sourceConnector, err := storage.NewStorageConnector(&config.Cfg.Storage) if err != nil { log.Fatal().Err(err).Msg("Failed to initialize storage") } // check if chain was indexed with block receipts. If it was, then the current RPC must support block receipts - validRpc, err := validateRPC(rpcClient, s) + validRpc, err := validateRPC(rpcClient, sourceConnector) if err != nil { log.Fatal().Err(err).Msg("Failed to validate RPC") } @@ -129,114 +315,159 @@ func NewMigrator() *Migrator { log.Fatal().Msg("RPC does not support block receipts, but transactions were indexed with receipts") } - validator := orchestrator.NewValidator(rpcClient, s) + validator := orchestrator.NewValidator(rpcClient, sourceConnector) - targetStorageConfig := *config.Cfg.Storage.Main.Clickhouse - targetStorageConfig.Database = targetDBName - targetConn, err := storage.NewClickHouseConnector(&targetStorageConfig) + destinationConnector, err := storage.NewMainConnector(&config.Cfg.Migrator.Destination, &sourceConnector.OrchestratorStorage) if err != nil { - log.Fatal().Err(err).Msg("Failed to initialize target storage") + log.Fatal().Err(err).Msg("Failed to initialize storage") } return &Migrator{ - migrationBatchSize: batchSize, - rpcBatchSize: rpcBatchSize, - rpcClient: rpcClient, - storage: s, - validator: validator, - targetConn: targetConn, + batchSize: batchSize, + rpcClient: rpcClient, + source: sourceConnector, + destination: destinationConnector, + validator: validator, + worker: worker.NewWorker(rpcClient), } } func (m *Migrator) Close() { m.rpcClient.Close() + + if err := m.source.Close(); err != nil { + log.Fatal().Err(err).Msg("Failed to close source storage") + } + + if err := m.destination.Close(); err != nil { + log.Fatal().Err(err).Msg("Failed to close destination storage") + } } -func (m *Migrator) DetermineMigrationBoundaries() (*big.Int, *big.Int) { +func (m *Migrator) DetermineMigrationBoundaries(targetStartBlock, targetEndBlock *big.Int) (*big.Int, *big.Int) { // get latest block from main storage - latestBlockStored, err := m.storage.MainStorage.GetMaxBlockNumber(m.rpcClient.GetChainID()) + latestBlockStored, err := m.source.MainStorage.GetMaxBlockNumber(m.rpcClient.GetChainID()) if err != nil { log.Fatal().Err(err).Msg("Failed to get latest block from main storage") } log.Info().Msgf("Latest block in main storage: %d", latestBlockStored) endBlock := latestBlockStored - // set range end from env instead if configured - endBlockEnv := os.Getenv("END_BLOCK") - if endBlockEnv != "" { - configuredEndBlock, ok := new(big.Int).SetString(endBlockEnv, 10) - if !ok { - log.Fatal().Msgf("Failed to parse end block %s", endBlockEnv) - } - log.Info().Msgf("Configured end block: %s", configuredEndBlock.String()) - // set configured end block only if it's greater than 0 and less than latest block in main storage - if configuredEndBlock.Sign() > 0 && configuredEndBlock.Cmp(latestBlockStored) < 0 { - endBlock = configuredEndBlock - } + if targetEndBlock.Sign() > 0 && targetEndBlock.Cmp(latestBlockStored) < 0 { + endBlock = targetEndBlock } - startBlock := big.NewInt(0) // default start block is 0 - // if start block is configured, use it - startBlockEnv := os.Getenv("START_BLOCK") - if startBlockEnv != "" { - configuredStartBlock, ok := new(big.Int).SetString(startBlockEnv, 10) - if !ok { - log.Fatal().Msgf("Failed to parse start block %s", startBlockEnv) - } - log.Info().Msgf("Configured start block: %s", configuredStartBlock.String()) - startBlock = configuredStartBlock - } + startBlock := targetStartBlock - latestMigratedBlock, err := m.targetConn.GetMaxBlockNumberInRange(m.rpcClient.GetChainID(), startBlock, endBlock) + blockCount, err := m.destination.GetBlockCount(m.rpcClient.GetChainID(), startBlock, endBlock) if err != nil { log.Fatal().Err(err).Msg("Failed to get latest block from target storage") } - log.Info().Msgf("Latest block in target storage: %d", latestMigratedBlock) + log.Info().Msgf("Block count in the target storage for range %s to %s: count=%s", startBlock.String(), endBlock.String(), blockCount.String()) - if latestMigratedBlock.Cmp(endBlock) >= 0 { + expectedCount := new(big.Int).Sub(endBlock, startBlock) + expectedCount = expectedCount.Add(expectedCount, big.NewInt(1)) + if expectedCount.Cmp(blockCount) == 0 { log.Fatal().Msgf("Full range is already migrated") + return nil, nil } - // if configured start block is less than or equal to already migrated and migrated block is not 0, start from last migrated + 1 - if startBlock.Cmp(latestMigratedBlock) <= 0 && latestMigratedBlock.Sign() > 0 { - startBlock = new(big.Int).Add(latestMigratedBlock, big.NewInt(1)) + maxStoredBlock, err := m.destination.GetMaxBlockNumberInRange(m.rpcClient.GetChainID(), startBlock, endBlock) + if err != nil { + log.Fatal().Err(err).Msg("Failed to get max block from destination storage") + return nil, nil + } + + log.Info().Msgf("Block in the target storage for range %s to %s: count=%s, max=%s", startBlock.String(), endBlock.String(), blockCount.String(), maxStoredBlock.String()) + // Only adjust start block if we actually have blocks stored (count > 0) + // When count is 0, maxStoredBlock might be 0 but that doesn't mean block 0 exists + if blockCount.Sign() > 0 && maxStoredBlock != nil && maxStoredBlock.Cmp(startBlock) >= 0 { + startBlock = new(big.Int).Add(maxStoredBlock, big.NewInt(1)) } return startBlock, endBlock } -func (m *Migrator) FetchBlocksFromRPC(blockNumbers []*big.Int) ([]common.BlockData, error) { - allBlockData := make([]common.BlockData, 0) - for i := 0; i < len(blockNumbers); i += m.rpcBatchSize { - end := i + m.rpcBatchSize - if end > len(blockNumbers) { - end = len(blockNumbers) +// DetermineMigrationBoundariesForRange determines the actual migration boundaries for a worker's specific range +// Returns nil, nil if the range is already fully migrated +// Fails fatally if it cannot determine boundaries (to ensure data correctness) +func (m *Migrator) DetermineMigrationBoundariesForRange(rangeStart, rangeEnd *big.Int) (*big.Int, *big.Int) { + // Check how many blocks we have in this specific range + blockCount, err := m.destination.GetBlockCount(m.rpcClient.GetChainID(), rangeStart, rangeEnd) + if err != nil { + log.Fatal().Err(err).Msgf("Worker failed to get block count for range %s to %s", rangeStart.String(), rangeEnd.String()) + return nil, nil + } + + expectedCount := new(big.Int).Sub(rangeEnd, rangeStart) + expectedCount = expectedCount.Add(expectedCount, big.NewInt(1)) + + // If all blocks are already migrated, return nil + if expectedCount.Cmp(blockCount) == 0 { + log.Debug().Msgf("Range %s to %s already fully migrated (%s blocks)", rangeStart.String(), rangeEnd.String(), blockCount.String()) + return nil, nil + } + + // Find the actual starting point by checking what blocks we already have + maxStoredBlock, err := m.destination.GetMaxBlockNumberInRange(m.rpcClient.GetChainID(), rangeStart, rangeEnd) + if err != nil { + log.Fatal().Err(err).Msgf("Worker failed to get max block in range %s to %s", rangeStart.String(), rangeEnd.String()) + return nil, nil + } + + actualStart := rangeStart + // Only adjust start block if we actually have blocks stored (blockCount > 0) + // When blockCount is 0, maxStoredBlock might be 0 but that doesn't mean block 0 exists + if blockCount.Sign() > 0 && maxStoredBlock != nil && maxStoredBlock.Cmp(rangeStart) >= 0 { + // We have some blocks already, start from the next one + actualStart = new(big.Int).Add(maxStoredBlock, big.NewInt(1)) + + // If the new start is beyond our range end, the range is fully migrated + if actualStart.Cmp(rangeEnd) > 0 { + log.Debug().Msgf("Range %s to %s already fully migrated (max block: %s)", rangeStart.String(), rangeEnd.String(), maxStoredBlock.String()) + return nil, nil } - batch := blockNumbers[i:end] - blockData := m.rpcClient.GetFullBlocks(context.Background(), batch) + } - for _, block := range blockData { - if block.Error != nil { - log.Warn().Err(block.Error).Msgf("Failed to fetch block %s from RPC", block.BlockNumber.String()) - continue - } - allBlockData = append(allBlockData, block.Data) + log.Debug().Msgf("Range %s-%s: found %s blocks, max stored: %v, will migrate from %s", + rangeStart.String(), rangeEnd.String(), blockCount.String(), maxStoredBlock, actualStart.String()) + + return actualStart, rangeEnd +} + +func (m *Migrator) FetchBlocksFromRPC(blockNumbers []*big.Int) ([]common.BlockData, error) { + allBlockData := make([]common.BlockData, 0, len(blockNumbers)) + + blockData := m.worker.Run(context.Background(), blockNumbers) + for _, block := range blockData { + if block.Error != nil { + log.Warn().Err(block.Error).Msgf("Failed to fetch block %s from RPC", block.BlockNumber.String()) + continue } + allBlockData = append(allBlockData, block.Data) } return allBlockData, nil } -func (m *Migrator) GetValidBlocksForRange(blockNumbers []*big.Int) []common.BlockData { - blockData, err := m.storage.MainStorage.GetFullBlockData(m.rpcClient.GetChainID(), blockNumbers) +func (m *Migrator) GetValidBlocksForRange(blockNumbers []*big.Int) ([]common.BlockData, error) { + getFullBlockTime := time.Now() + blockData, err := m.source.MainStorage.GetFullBlockData(m.rpcClient.GetChainID(), blockNumbers) + getFullBlockDuration := time.Since(getFullBlockTime) if err != nil { - log.Fatal().Err(err).Msg("Failed to get full block data") + log.Error().Err(err).Msg("Failed to get full block data") + return nil, err } + validateBlockTime := time.Now() validBlocks, _, err := m.validator.ValidateBlocks(blockData) + validateBlockDuration := time.Since(validateBlockTime) if err != nil { - log.Fatal().Err(err).Msg("Failed to validate blocks") + log.Error().Err(err).Msg("Failed to validate blocks") + return nil, err } - return validBlocks + + log.Debug().Dur("get_full_block", getFullBlockDuration).Dur("validate_block", validateBlockDuration).Int("count", len(blockNumbers)).Msg("Get valid blocks for range") + return validBlocks, nil } func (m *Migrator) GetValidBlocksFromRPC(blockNumbers []*big.Int) []common.BlockData { @@ -282,7 +513,15 @@ func validateRPC(rpcClient rpc.IRPCClient, s storage.IStorage) (bool, error) { } func generateBlockNumbersForRange(startBlock, endBlock *big.Int) []*big.Int { - blockNumbers := make([]*big.Int, 0) + if startBlock.Cmp(endBlock) > 0 { + return []*big.Int{} + } + + // Pre-calculate capacity to avoid slice growth + length := new(big.Int).Sub(endBlock, startBlock) + length.Add(length, big.NewInt(1)) + + blockNumbers := make([]*big.Int, 0, length.Int64()) for i := new(big.Int).Set(startBlock); i.Cmp(endBlock) <= 0; i.Add(i, big.NewInt(1)) { blockNumbers = append(blockNumbers, new(big.Int).Set(i)) } diff --git a/cmd/orchestrator.go b/cmd/orchestrator.go index 84665df..6d8a357 100644 --- a/cmd/orchestrator.go +++ b/cmd/orchestrator.go @@ -32,12 +32,19 @@ func RunOrchestrator(cmd *cobra.Command, args []string) { if err != nil { log.Fatal().Err(err).Msg("Failed to create orchestrator") } + // Start Prometheus metrics server log.Info().Msg("Starting Metrics Server on port 2112") go func() { http.Handle("/metrics", promhttp.Handler()) - http.ListenAndServe(":2112", nil) + if err := http.ListenAndServe(":2112", nil); err != nil { + log.Error().Err(err).Msg("Metrics server error") + } }() + // Start orchestrator (blocks until shutdown) + // The orchestrator handles signals internally and coordinates shutdown orchestrator.Start() + + log.Info().Msg("Shutdown complete") } diff --git a/cmd/root.go b/cmd/root.go index 3c58dcb..391ad78 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -56,6 +56,19 @@ func init() { rootCmd.PersistentFlags().Bool("poller-force-from-block", false, "Force the poller to start from the block specified in `poller-from-block`") rootCmd.PersistentFlags().Int("poller-until-block", 0, "Until which block to poll") rootCmd.PersistentFlags().Int("poller-parallel-pollers", 5, "Maximum number of parallel pollers") + rootCmd.PersistentFlags().String("poller-s3-bucket", "", "S3 bucket for poller archive source") + rootCmd.PersistentFlags().String("poller-s3-region", "", "S3 region for poller archive source") + rootCmd.PersistentFlags().String("poller-s3-prefix", "", "S3 prefix for poller archive source") + rootCmd.PersistentFlags().String("poller-s3-accessKeyId", "", "S3 access key ID for poller archive source") + rootCmd.PersistentFlags().String("poller-s3-secretAccessKey", "", "S3 secret access key for poller archive source") + rootCmd.PersistentFlags().String("poller-s3-endpoint", "", "S3 endpoint for poller archive source (for S3-compatible services)") + rootCmd.PersistentFlags().String("poller-s3-format", "parquet", "S3 storage format for poller archive source") + rootCmd.PersistentFlags().String("poller-s3-cacheDir", "/tmp/insight-archive", "Local cache directory for poller archive source") + rootCmd.PersistentFlags().Int("poller-s3-metadataTTL", 0, "Metadata cache TTL in seconds for poller archive source") + rootCmd.PersistentFlags().Int("poller-s3-fileCacheTTL", 0, "File cache TTL in seconds for poller archive source") + rootCmd.PersistentFlags().Int64("poller-s3-maxCacheSize", 0, "Max cache size in bytes for poller archive source (default 5GB)") + rootCmd.PersistentFlags().Int("poller-s3-cleanupInterval", 0, "Cache cleanup interval in seconds for poller archive source") + rootCmd.PersistentFlags().Int("poller-s3-maxConcurrentDownloads", 3, "Max concurrent downloads for poller archive source") rootCmd.PersistentFlags().Bool("committer-enabled", true, "Toggle committer") rootCmd.PersistentFlags().Int("committer-blocks-per-commit", 10, "How many blocks to commit each interval") rootCmd.PersistentFlags().Int("committer-interval", 1000, "How often to commit blocks in milliseconds") @@ -83,6 +96,7 @@ func init() { rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for orchestrator storage") rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-disableTLS", false, "Clickhouse disableTLS for orchestrator storage") rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-enableParallelViewProcessing", false, "Clickhouse enableParallelViewProcessing for orchestrator storage") + rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-enableCompression", false, "Clickhouse enableCompression for orchestrator storage") rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxQueryTime", 60, "Clickhouse max query time for orchestrator storage") rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxMemoryUsage", 1000000000, "Clickhouse max memory usage in bytes for orchestrator storage") rootCmd.PersistentFlags().String("storage-orchestrator-postgres-host", "", "PostgreSQL host for orchestrator storage") @@ -105,6 +119,7 @@ func init() { rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for main storage") rootCmd.PersistentFlags().Bool("storage-main-clickhouse-disableTLS", false, "Clickhouse disableTLS for main storage") rootCmd.PersistentFlags().Bool("storage-main-clickhouse-enableParallelViewProcessing", false, "Clickhouse enableParallelViewProcessing for main storage") + rootCmd.PersistentFlags().Bool("storage-main-clickhouse-enableCompression", false, "Clickhouse enableCompression for main storage") rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxQueryTime", 60, "Clickhouse max query time for main storage") rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxMemoryUsage", 1000000000, "Clickhouse max memory usage in bytes for main storage") rootCmd.PersistentFlags().String("storage-staging-clickhouse-username", "", "Clickhouse username for staging storage") @@ -115,6 +130,7 @@ func init() { rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for staging storage") rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-disableTLS", false, "Clickhouse disableTLS for staging storage") rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-enableParallelViewProcessing", false, "Clickhouse enableParallelViewProcessing for staging storage") + rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-enableCompression", false, "Clickhouse enableCompression for staging storage") rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxQueryTime", 60, "Clickhouse max query time for staging storage") rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxMemoryUsage", 1000000000, "Clickhouse max memory usage in bytes for staging storage") rootCmd.PersistentFlags().String("storage-staging-postgres-host", "", "PostgreSQL host for staging storage") @@ -127,6 +143,33 @@ func init() { rootCmd.PersistentFlags().Int("storage-staging-postgres-maxIdleConns", 25, "PostgreSQL max idle connections for staging storage") rootCmd.PersistentFlags().Int("storage-staging-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for staging storage") rootCmd.PersistentFlags().Int("storage-staging-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for staging storage") + rootCmd.PersistentFlags().String("storage-main-kafka-brokers", "", "Kafka brokers for main storage") + rootCmd.PersistentFlags().String("storage-main-kafka-username", "", "Kafka username for main storage") + rootCmd.PersistentFlags().String("storage-main-kafka-password", "", "Kafka password for main storage") + rootCmd.PersistentFlags().Bool("storage-main-kafka-enable-tls", true, "Enable TLS for Kafka connection in main storage") + rootCmd.PersistentFlags().String("storage-orchestrator-redis-host", "", "Redis host for orchestrator storage metadata") + rootCmd.PersistentFlags().Int("storage-orchestrator-redis-port", 6379, "Redis port for orchestrator storage metadata") + rootCmd.PersistentFlags().String("storage-orchestrator-redis-password", "", "Redis password for orchestator storage metadata") + rootCmd.PersistentFlags().Int("storage-orchestrator-redis-db", 0, "Redis database number for orchestrator storage metadata") + rootCmd.PersistentFlags().Bool("storage-orchestrator-redis-enableTLS", true, "Enable TLS for Redis connection in orchestrator storage metadata") + rootCmd.PersistentFlags().String("storage-staging-type", "auto", "Storage type for staging (auto, clickhouse, postgres, kafka, badger, s3)") + rootCmd.PersistentFlags().String("storage-main-type", "auto", "Storage type for main (auto, clickhouse, postgres, kafka, badger, s3)") + rootCmd.PersistentFlags().String("storage-orchestrator-type", "auto", "Storage type for orchestrator (auto, clickhouse, postgres, badger)") + rootCmd.PersistentFlags().String("storage-staging-badger-path", "", "BadgerDB path for staging storage") + rootCmd.PersistentFlags().String("storage-orchestrator-badger-path", "", "BadgerDB path for orchestrator storage") + rootCmd.PersistentFlags().String("storage-main-s3-bucket", "", "S3 bucket for main storage") + rootCmd.PersistentFlags().String("storage-main-s3-region", "", "S3 region for main storage") + rootCmd.PersistentFlags().String("storage-main-s3-prefix", "", "S3 key prefix for main storage") + rootCmd.PersistentFlags().String("storage-main-s3-accessKeyId", "", "S3 access key ID for main storage") + rootCmd.PersistentFlags().String("storage-main-s3-secretAccessKey", "", "S3 secret access key for main storage") + rootCmd.PersistentFlags().String("storage-main-s3-endpoint", "", "S3 endpoint URL for main storage (for S3-compatible services)") + rootCmd.PersistentFlags().String("storage-main-s3-format", "parquet", "S3 storage format for main storage (parquet or json)") + rootCmd.PersistentFlags().Int64("storage-main-s3-bufferSizeMB", 512, "S3 buffer size in MB before flush for main storage") + rootCmd.PersistentFlags().Int("storage-main-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for main storage") + rootCmd.PersistentFlags().Int("storage-main-s3-maxBlocksPerFile", 0, "S3 max blocks per file for main storage (0 = no limit)") + rootCmd.PersistentFlags().String("storage-main-s3-parquet-compression", "snappy", "Parquet compression type for S3 main storage") + rootCmd.PersistentFlags().Int64("storage-main-s3-parquet-rowGroupSize", 256, "Parquet row group size in MB for S3 main storage") + rootCmd.PersistentFlags().Int64("storage-main-s3-parquet-pageSize", 8192, "Parquet page size in KB for S3 main storage") rootCmd.PersistentFlags().String("api-host", "localhost:3000", "API host") rootCmd.PersistentFlags().String("api-basicAuth-username", "", "API basic auth username") rootCmd.PersistentFlags().String("api-basicAuth-password", "", "API basic auth password") @@ -140,6 +183,9 @@ func init() { rootCmd.PersistentFlags().Bool("publisher-enabled", false, "Toggle publisher") rootCmd.PersistentFlags().String("publisher-mode", "default", "Publisher mode: default or parallel") rootCmd.PersistentFlags().String("publisher-brokers", "", "Kafka brokers") + rootCmd.PersistentFlags().String("publisher-username", "", "Kafka username for publisher") + rootCmd.PersistentFlags().String("publisher-password", "", "Kafka password for publisher") + rootCmd.PersistentFlags().Bool("publisher-enable-tls", true, "Enable TLS for Kafka connection in publisher") rootCmd.PersistentFlags().Bool("publisher-blocks-enabled", false, "Toggle block publisher") rootCmd.PersistentFlags().String("publisher-blocks-topicName", "", "Kafka topic name for blocks") rootCmd.PersistentFlags().Bool("publisher-transactions-enabled", false, "Toggle transaction publisher") @@ -155,6 +201,47 @@ func init() { rootCmd.PersistentFlags().Int("workMode-checkIntervalMinutes", 10, "How often to check work mode in minutes") rootCmd.PersistentFlags().Int64("workMode-liveModeThreshold", 500, "How many blocks the indexer can be behind before switching to live mode") rootCmd.PersistentFlags().String("validation-mode", "strict", "Validation mode. Strict will validate logsBloom and transactionsRoot. Minimal will validate transaction count and logs existence.") + rootCmd.PersistentFlags().String("migrator-destination-type", "auto", "Storage type for migrator destination (auto, clickhouse, postgres, kafka, badger, s3)") + rootCmd.PersistentFlags().String("migrator-destination-clickhouse-host", "", "Clickhouse host for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-port", 0, "Clickhouse port for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-clickhouse-username", "", "Clickhouse username for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-clickhouse-password", "", "Clickhouse password for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-clickhouse-database", "", "Clickhouse database for migrator destination") + rootCmd.PersistentFlags().Bool("migrator-destination-clickhouse-disableTLS", false, "Clickhouse disableTLS for migrator destination") + rootCmd.PersistentFlags().Bool("migrator-destination-clickhouse-asyncInsert", false, "Clickhouse async insert for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-maxRowsPerInsert", 100000, "Clickhouse max rows per insert for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-maxOpenConns", 30, "Clickhouse max open connections for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-postgres-host", "", "PostgreSQL host for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-postgres-port", 5432, "PostgreSQL port for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-postgres-username", "", "PostgreSQL username for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-postgres-password", "", "PostgreSQL password for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-postgres-database", "", "PostgreSQL database for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-postgres-sslMode", "require", "PostgreSQL SSL mode for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-postgres-maxOpenConns", 50, "PostgreSQL max open connections for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-postgres-maxIdleConns", 25, "PostgreSQL max idle connections for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-kafka-brokers", "", "Kafka brokers for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-kafka-username", "", "Kafka username for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-kafka-password", "", "Kafka password for migrator destination") + rootCmd.PersistentFlags().Bool("migrator-destination-kafka-enableTLS", true, "Enable TLS for Kafka connection in migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-badger-path", "", "BadgerDB path for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-bucket", "", "S3 bucket for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-region", "", "S3 region for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-prefix", "", "S3 key prefix for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-accessKeyId", "", "S3 access key ID for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-secretAccessKey", "", "S3 secret access key for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-endpoint", "", "S3 endpoint URL for migrator destination") + rootCmd.PersistentFlags().String("migrator-destination-s3-format", "parquet", "S3 storage format for migrator destination") + rootCmd.PersistentFlags().Int64("migrator-destination-s3-bufferSizeMB", 512, "S3 buffer size in MB before flush for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for migrator destination") + rootCmd.PersistentFlags().Int("migrator-destination-s3-maxBlocksPerFile", 0, "S3 max blocks per file for migrator destination") + rootCmd.PersistentFlags().Uint("migrator-batchSize", 2000, "Batch size for storage operations in migrator") + rootCmd.PersistentFlags().Uint("migrator-startBlock", 0, "Start block for migration") + rootCmd.PersistentFlags().Uint("migrator-endBlock", 0, "End block for migration") + rootCmd.PersistentFlags().Uint("migrator-workerCount", 0, "Worker count for migration") + viper.BindPFlag("rpc.url", rootCmd.PersistentFlags().Lookup("rpc-url")) viper.BindPFlag("rpc.blocks.blocksPerRequest", rootCmd.PersistentFlags().Lookup("rpc-blocks-blocksPerRequest")) viper.BindPFlag("rpc.blocks.batchDelay", rootCmd.PersistentFlags().Lookup("rpc-blocks-batchDelay")) @@ -175,6 +262,18 @@ func init() { viper.BindPFlag("poller.forceFromBlock", rootCmd.PersistentFlags().Lookup("poller-force-from-block")) viper.BindPFlag("poller.untilBlock", rootCmd.PersistentFlags().Lookup("poller-until-block")) viper.BindPFlag("poller.parallelPollers", rootCmd.PersistentFlags().Lookup("poller-parallel-pollers")) + viper.BindPFlag("poller.s3.endpoint", rootCmd.PersistentFlags().Lookup("poller-s3-endpoint")) + viper.BindPFlag("poller.s3.accessKeyId", rootCmd.PersistentFlags().Lookup("poller-s3-accessKeyId")) + viper.BindPFlag("poller.s3.secretAccessKey", rootCmd.PersistentFlags().Lookup("poller-s3-secretAccessKey")) + viper.BindPFlag("poller.s3.bucket", rootCmd.PersistentFlags().Lookup("poller-s3-bucket")) + viper.BindPFlag("poller.s3.region", rootCmd.PersistentFlags().Lookup("poller-s3-region")) + viper.BindPFlag("poller.s3.prefix", rootCmd.PersistentFlags().Lookup("poller-s3-prefix")) + viper.BindPFlag("poller.s3.cacheDir", rootCmd.PersistentFlags().Lookup("poller-s3-cacheDir")) + viper.BindPFlag("poller.s3.metadataTTL", rootCmd.PersistentFlags().Lookup("poller-s3-metadataTTL")) + viper.BindPFlag("poller.s3.fileCacheTTL", rootCmd.PersistentFlags().Lookup("poller-s3-fileCacheTTL")) + viper.BindPFlag("poller.s3.maxCacheSize", rootCmd.PersistentFlags().Lookup("poller-s3-maxCacheSize")) + viper.BindPFlag("poller.s3.cleanupInterval", rootCmd.PersistentFlags().Lookup("poller-s3-cleanupInterval")) + viper.BindPFlag("poller.s3.maxConcurrentDownloads", rootCmd.PersistentFlags().Lookup("poller-s3-maxConcurrentDownloads")) viper.BindPFlag("committer.enabled", rootCmd.PersistentFlags().Lookup("committer-enabled")) viper.BindPFlag("committer.blocksPerCommit", rootCmd.PersistentFlags().Lookup("committer-blocks-per-commit")) viper.BindPFlag("committer.interval", rootCmd.PersistentFlags().Lookup("committer-interval")) @@ -198,6 +297,7 @@ func init() { viper.BindPFlag("storage.staging.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxIdleConns")) viper.BindPFlag("storage.staging.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-disableTLS")) viper.BindPFlag("storage.staging.clickhouse.enableParallelViewProcessing", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-enableParallelViewProcessing")) + viper.BindPFlag("storage.staging.clickhouse.enableCompression", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-enableCompression")) viper.BindPFlag("storage.staging.clickhouse.maxQueryTime", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxQueryTime")) viper.BindPFlag("storage.staging.clickhouse.maxMemoryUsage", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxMemoryUsage")) viper.BindPFlag("storage.main.clickhouse.database", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-database")) @@ -211,6 +311,7 @@ func init() { viper.BindPFlag("storage.main.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxIdleConns")) viper.BindPFlag("storage.main.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-disableTLS")) viper.BindPFlag("storage.main.clickhouse.enableParallelViewProcessing", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-enableParallelViewProcessing")) + viper.BindPFlag("storage.main.clickhouse.enableCompression", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-enableCompression")) viper.BindPFlag("storage.main.clickhouse.maxQueryTime", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxQueryTime")) viper.BindPFlag("storage.main.clickhouse.maxMemoryUsage", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxMemoryUsage")) viper.BindPFlag("storage.orchestrator.clickhouse.database", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-database")) @@ -224,6 +325,7 @@ func init() { viper.BindPFlag("storage.orchestrator.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxIdleConns")) viper.BindPFlag("storage.orchestrator.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-disableTLS")) viper.BindPFlag("storage.orchestrator.clickhouse.enableParallelViewProcessing", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-enableParallelViewProcessing")) + viper.BindPFlag("storage.orchestrator.clickhouse.enableCompression", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-enableCompression")) viper.BindPFlag("storage.orchestrator.clickhouse.maxQueryTime", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxQueryTime")) viper.BindPFlag("storage.orchestrator.clickhouse.maxMemoryUsage", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxMemoryUsage")) viper.BindPFlag("storage.orchestrator.postgres.host", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-host")) @@ -236,6 +338,13 @@ func init() { viper.BindPFlag("storage.orchestrator.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-maxIdleConns")) viper.BindPFlag("storage.orchestrator.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-maxConnLifetime")) viper.BindPFlag("storage.orchestrator.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-connectTimeout")) + viper.BindPFlag("storage.orchestrator.redis.host", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-host")) + viper.BindPFlag("storage.orchestrator.redis.port", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-port")) + viper.BindPFlag("storage.orchestrator.redis.password", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-password")) + viper.BindPFlag("storage.orchestrator.redis.db", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-db")) + viper.BindPFlag("storage.orchestrator.redis.enableTLS", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-enableTLS")) + viper.BindPFlag("storage.orchestrator.badger.path", rootCmd.PersistentFlags().Lookup("storage-orchestrator-badger-path")) + viper.BindPFlag("storage.orchestrator.type", rootCmd.PersistentFlags().Lookup("storage-orchestrator-type")) viper.BindPFlag("storage.staging.postgres.host", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-host")) viper.BindPFlag("storage.staging.postgres.port", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-port")) viper.BindPFlag("storage.staging.postgres.username", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-username")) @@ -246,6 +355,26 @@ func init() { viper.BindPFlag("storage.staging.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxIdleConns")) viper.BindPFlag("storage.staging.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxConnLifetime")) viper.BindPFlag("storage.staging.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-connectTimeout")) + viper.BindPFlag("storage.staging.badger.path", rootCmd.PersistentFlags().Lookup("storage-staging-badger-path")) + viper.BindPFlag("storage.staging.type", rootCmd.PersistentFlags().Lookup("storage-staging-type")) + viper.BindPFlag("storage.main.kafka.brokers", rootCmd.PersistentFlags().Lookup("storage-main-kafka-brokers")) + viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username")) + viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password")) + viper.BindPFlag("storage.main.kafka.enableTLS", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls")) + viper.BindPFlag("storage.main.type", rootCmd.PersistentFlags().Lookup("storage-main-type")) + viper.BindPFlag("storage.main.s3.bucket", rootCmd.PersistentFlags().Lookup("storage-main-s3-bucket")) + viper.BindPFlag("storage.main.s3.region", rootCmd.PersistentFlags().Lookup("storage-main-s3-region")) + viper.BindPFlag("storage.main.s3.prefix", rootCmd.PersistentFlags().Lookup("storage-main-s3-prefix")) + viper.BindPFlag("storage.main.s3.accessKeyId", rootCmd.PersistentFlags().Lookup("storage-main-s3-accessKeyId")) + viper.BindPFlag("storage.main.s3.secretAccessKey", rootCmd.PersistentFlags().Lookup("storage-main-s3-secretAccessKey")) + viper.BindPFlag("storage.main.s3.endpoint", rootCmd.PersistentFlags().Lookup("storage-main-s3-endpoint")) + viper.BindPFlag("storage.main.s3.format", rootCmd.PersistentFlags().Lookup("storage-main-s3-format")) + viper.BindPFlag("storage.main.s3.bufferSizeMB", rootCmd.PersistentFlags().Lookup("storage-main-s3-bufferSizeMB")) + viper.BindPFlag("storage.main.s3.bufferTimeoutSeconds", rootCmd.PersistentFlags().Lookup("storage-main-s3-bufferTimeoutSeconds")) + viper.BindPFlag("storage.main.s3.maxBlocksPerFile", rootCmd.PersistentFlags().Lookup("storage-main-s3-maxBlocksPerFile")) + viper.BindPFlag("storage.main.s3.parquet.compression", rootCmd.PersistentFlags().Lookup("storage-main-s3-parquet-compression")) + viper.BindPFlag("storage.main.s3.parquet.rowGroupSize", rootCmd.PersistentFlags().Lookup("storage-main-s3-parquet-rowGroupSize")) + viper.BindPFlag("storage.main.s3.parquet.pageSize", rootCmd.PersistentFlags().Lookup("storage-main-s3-parquet-pageSize")) viper.BindPFlag("api.host", rootCmd.PersistentFlags().Lookup("api-host")) viper.BindPFlag("api.basicAuth.username", rootCmd.PersistentFlags().Lookup("api-basicAuth-username")) viper.BindPFlag("api.basicAuth.password", rootCmd.PersistentFlags().Lookup("api-basicAuth-password")) @@ -259,6 +388,9 @@ func init() { viper.BindPFlag("publisher.enabled", rootCmd.PersistentFlags().Lookup("publisher-enabled")) viper.BindPFlag("publisher.mode", rootCmd.PersistentFlags().Lookup("publisher-mode")) viper.BindPFlag("publisher.brokers", rootCmd.PersistentFlags().Lookup("publisher-brokers")) + viper.BindPFlag("publisher.username", rootCmd.PersistentFlags().Lookup("publisher-username")) + viper.BindPFlag("publisher.password", rootCmd.PersistentFlags().Lookup("publisher-password")) + viper.BindPFlag("publisher.enableTLS", rootCmd.PersistentFlags().Lookup("publisher-enable-tls")) viper.BindPFlag("publisher.blocks.enabled", rootCmd.PersistentFlags().Lookup("publisher-blocks-enabled")) viper.BindPFlag("publisher.blocks.topicName", rootCmd.PersistentFlags().Lookup("publisher-blocks-topicName")) viper.BindPFlag("publisher.transactions.enabled", rootCmd.PersistentFlags().Lookup("publisher-transactions-enabled")) @@ -274,6 +406,48 @@ func init() { viper.BindPFlag("workMode.checkIntervalMinutes", rootCmd.PersistentFlags().Lookup("workMode-checkIntervalMinutes")) viper.BindPFlag("workMode.liveModeThreshold", rootCmd.PersistentFlags().Lookup("workMode-liveModeThreshold")) viper.BindPFlag("validation.mode", rootCmd.PersistentFlags().Lookup("validation-mode")) + // Migrator viper bindings + viper.BindPFlag("migrator.destination.type", rootCmd.PersistentFlags().Lookup("migrator-destination-type")) + viper.BindPFlag("migrator.destination.clickhouse.host", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-host")) + viper.BindPFlag("migrator.destination.clickhouse.port", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-port")) + viper.BindPFlag("migrator.destination.clickhouse.username", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-username")) + viper.BindPFlag("migrator.destination.clickhouse.password", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-password")) + viper.BindPFlag("migrator.destination.clickhouse.database", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-database")) + viper.BindPFlag("migrator.destination.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-disableTLS")) + viper.BindPFlag("migrator.destination.clickhouse.asyncInsert", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-asyncInsert")) + viper.BindPFlag("migrator.destination.clickhouse.maxRowsPerInsert", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-maxRowsPerInsert")) + viper.BindPFlag("migrator.destination.clickhouse.maxOpenConns", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-maxOpenConns")) + viper.BindPFlag("migrator.destination.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-maxIdleConns")) + viper.BindPFlag("migrator.destination.postgres.host", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-host")) + viper.BindPFlag("migrator.destination.postgres.port", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-port")) + viper.BindPFlag("migrator.destination.postgres.username", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-username")) + viper.BindPFlag("migrator.destination.postgres.password", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-password")) + viper.BindPFlag("migrator.destination.postgres.database", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-database")) + viper.BindPFlag("migrator.destination.postgres.sslMode", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-sslMode")) + viper.BindPFlag("migrator.destination.postgres.maxOpenConns", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-maxOpenConns")) + viper.BindPFlag("migrator.destination.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-maxIdleConns")) + viper.BindPFlag("migrator.destination.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-maxConnLifetime")) + viper.BindPFlag("migrator.destination.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-connectTimeout")) + viper.BindPFlag("migrator.destination.kafka.brokers", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-brokers")) + viper.BindPFlag("migrator.destination.kafka.username", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-username")) + viper.BindPFlag("migrator.destination.kafka.password", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-password")) + viper.BindPFlag("migrator.destination.kafka.enableTLS", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-enableTLS")) + viper.BindPFlag("migrator.destination.badger.path", rootCmd.PersistentFlags().Lookup("migrator-destination-badger-path")) + viper.BindPFlag("migrator.destination.s3.bucket", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-bucket")) + viper.BindPFlag("migrator.destination.s3.region", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-region")) + viper.BindPFlag("migrator.destination.s3.prefix", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-prefix")) + viper.BindPFlag("migrator.destination.s3.accessKeyId", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-accessKeyId")) + viper.BindPFlag("migrator.destination.s3.secretAccessKey", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-secretAccessKey")) + viper.BindPFlag("migrator.destination.s3.endpoint", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-endpoint")) + viper.BindPFlag("migrator.destination.s3.format", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-format")) + viper.BindPFlag("migrator.destination.s3.bufferSizeMB", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-bufferSizeMB")) + viper.BindPFlag("migrator.destination.s3.bufferTimeoutSeconds", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-bufferTimeoutSeconds")) + viper.BindPFlag("migrator.destination.s3.maxBlocksPerFile", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-maxBlocksPerFile")) + viper.BindPFlag("migrator.startBlock", rootCmd.PersistentFlags().Lookup("migrator-startBlock")) + viper.BindPFlag("migrator.endBlock", rootCmd.PersistentFlags().Lookup("migrator-endBlock")) + viper.BindPFlag("migrator.batchSize", rootCmd.PersistentFlags().Lookup("migrator-batchSize")) + viper.BindPFlag("migrator.workerCount", rootCmd.PersistentFlags().Lookup("migrator-workerCount")) + rootCmd.AddCommand(orchestratorCmd) rootCmd.AddCommand(apiCmd) rootCmd.AddCommand(validateAndFixCmd) diff --git a/configs/config.go b/configs/config.go index ec3c042..395d2f1 100644 --- a/configs/config.go +++ b/configs/config.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "strings" + "time" "github.com/rs/zerolog/log" "github.com/spf13/viper" @@ -16,13 +17,14 @@ type LogConfig struct { } type PollerConfig struct { - Enabled bool `mapstructure:"enabled"` - Interval int `mapstructure:"interval"` - BlocksPerPoll int `mapstructure:"blocksPerPoll"` - FromBlock int `mapstructure:"fromBlock"` - ForceFromBlock bool `mapstructure:"forceFromBlock"` - UntilBlock int `mapstructure:"untilBlock"` - ParallelPollers int `mapstructure:"parallelPollers"` + Enabled bool `mapstructure:"enabled"` + Interval int `mapstructure:"interval"` + BlocksPerPoll int `mapstructure:"blocksPerPoll"` + FromBlock int `mapstructure:"fromBlock"` + ForceFromBlock bool `mapstructure:"forceFromBlock"` + UntilBlock int `mapstructure:"untilBlock"` + ParallelPollers int `mapstructure:"parallelPollers"` + S3 *S3SourceConfig `mapstructure:"s3"` } type CommitterConfig struct { @@ -30,6 +32,7 @@ type CommitterConfig struct { Interval int `mapstructure:"interval"` BlocksPerCommit int `mapstructure:"blocksPerCommit"` FromBlock int `mapstructure:"fromBlock"` + UntilBlock int `mapstructure:"untilBlock"` } type ReorgHandlerConfig struct { @@ -47,21 +50,62 @@ type FailureRecovererConfig struct { } type StorageConfig struct { - Staging StorageConnectionConfig `mapstructure:"staging"` - Main StorageConnectionConfig `mapstructure:"main"` - Orchestrator StorageConnectionConfig `mapstructure:"orchestrator"` + Orchestrator StorageOrchestratorConfig `mapstructure:"orchestrator"` + Staging StorageStagingConfig `mapstructure:"staging"` + Main StorageMainConfig `mapstructure:"main"` } -type StorageType string -const ( - StorageTypeMain StorageType = "main" - StorageTypeStaging StorageType = "staging" - StorageTypeOrchestrator StorageType = "orchestrator" -) +type StorageOrchestratorConfig struct { + Type string `mapstructure:"type"` + Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"` + Postgres *PostgresConfig `mapstructure:"postgres"` + Redis *RedisConfig `mapstructure:"redis"` + Badger *BadgerConfig `mapstructure:"badger"` +} -type StorageConnectionConfig struct { +type StorageStagingConfig struct { + Type string `mapstructure:"type"` Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"` Postgres *PostgresConfig `mapstructure:"postgres"` + Badger *BadgerConfig `mapstructure:"badger"` +} + +type StorageMainConfig struct { + Type string `mapstructure:"type"` + Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"` + Postgres *PostgresConfig `mapstructure:"postgres"` + Kafka *KafkaConfig `mapstructure:"kafka"` + Badger *BadgerConfig `mapstructure:"badger"` + S3 *S3StorageConfig `mapstructure:"s3"` +} + +type BadgerConfig struct { + Path string `mapstructure:"path"` +} + +type S3Config struct { + Bucket string `mapstructure:"bucket"` + Region string `mapstructure:"region"` + Prefix string `mapstructure:"prefix"` + AccessKeyID string `mapstructure:"accessKeyId"` + SecretAccessKey string `mapstructure:"secretAccessKey"` + Endpoint string `mapstructure:"endpoint"` +} + +type S3StorageConfig struct { + S3Config `mapstructure:",squash"` + Format string `mapstructure:"format"` + Parquet *ParquetConfig `mapstructure:"parquet"` + // Buffering configuration + BufferSize int64 `mapstructure:"bufferSizeMB"` // Target buffer size in MB before flush (default 512 MB) + BufferTimeout int `mapstructure:"bufferTimeoutSeconds"` // Max time in seconds before flush (default 300 = 5 min) + MaxBlocksPerFile int `mapstructure:"maxBlocksPerFile"` // Max blocks per parquet file (0 = no limit, only size/timeout triggers) +} + +type ParquetConfig struct { + Compression string `mapstructure:"compression"` + RowGroupSize int64 `mapstructure:"rowGroupSize"` + PageSize int64 `mapstructure:"pageSize"` } type TableConfig struct { @@ -86,6 +130,7 @@ type ClickhouseConfig struct { EnableParallelViewProcessing bool `mapstructure:"enableParallelViewProcessing"` MaxQueryTime int `mapstructure:"maxQueryTime"` MaxMemoryUsage int `mapstructure:"maxMemoryUsage"` + EnableCompression bool `mapstructure:"enableCompression"` } type PostgresConfig struct { @@ -101,6 +146,21 @@ type PostgresConfig struct { ConnectTimeout int `mapstructure:"connectTimeout"` } +type RedisConfig struct { + Host string `mapstructure:"host"` + Port int `mapstructure:"port"` + Password string `mapstructure:"password"` + DB int `mapstructure:"db"` + EnableTLS bool `mapstructure:"enableTLS"` +} + +type KafkaConfig struct { + Brokers string `mapstructure:"brokers"` + Username string `mapstructure:"username"` + Password string `mapstructure:"password"` + EnableTLS bool `mapstructure:"enableTLS"` +} + type RPCBatchRequestConfig struct { BlocksPerRequest int `mapstructure:"blocksPerRequest"` BatchDelay int `mapstructure:"batchDelay"` @@ -177,12 +237,23 @@ type PublisherConfig struct { Brokers string `mapstructure:"brokers"` Username string `mapstructure:"username"` Password string `mapstructure:"password"` + EnableTLS bool `mapstructure:"enableTLS"` Blocks BlockPublisherConfig `mapstructure:"blocks"` Transactions TransactionPublisherConfig `mapstructure:"transactions"` Traces TracePublisherConfig `mapstructure:"traces"` Events EventPublisherConfig `mapstructure:"events"` } +type S3SourceConfig struct { + S3Config `mapstructure:",squash"` + CacheDir string `mapstructure:"cacheDir"` + MetadataTTL time.Duration `mapstructure:"metadataTTL"` + FileCacheTTL time.Duration `mapstructure:"fileCacheTTL"` + MaxCacheSize int64 `mapstructure:"maxCacheSize"` + CleanupInterval time.Duration `mapstructure:"cleanupInterval"` + MaxConcurrentDownloads int `mapstructure:"maxConcurrentDownloads"` +} + type WorkModeConfig struct { CheckIntervalMinutes int `mapstructure:"checkIntervalMinutes"` LiveModeThreshold int64 `mapstructure:"liveModeThreshold"` @@ -192,6 +263,14 @@ type ValidationConfig struct { Mode string `mapstructure:"mode"` // "disabled", "minimal", "strict" } +type MigratorConfig struct { + Destination StorageMainConfig `mapstructure:"destination"` + StartBlock uint `mapstructure:"startBlock"` + EndBlock uint `mapstructure:"endBlock"` + BatchSize uint `mapstructure:"batchSize"` + WorkerCount uint `mapstructure:"workerCount"` +} + type Config struct { RPC RPCConfig `mapstructure:"rpc"` Log LogConfig `mapstructure:"log"` @@ -204,6 +283,7 @@ type Config struct { Publisher PublisherConfig `mapstructure:"publisher"` WorkMode WorkModeConfig `mapstructure:"workMode"` Validation ValidationConfig `mapstructure:"validation"` + Migrator MigratorConfig `mapstructure:"migrator"` } var Cfg Config diff --git a/go.mod b/go.mod index f5e6788..68052a9 100644 --- a/go.mod +++ b/go.mod @@ -4,14 +4,20 @@ go 1.23.0 require ( github.com/ClickHouse/clickhouse-go/v2 v2.36.0 + github.com/aws/aws-sdk-go-v2 v1.38.0 + github.com/aws/aws-sdk-go-v2/config v1.31.0 + github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0 + github.com/dgraph-io/badger/v4 v4.8.0 github.com/ethereum/go-ethereum v1.15.11 github.com/gin-gonic/gin v1.10.0 github.com/gorilla/schema v1.4.1 github.com/holiman/uint256 v1.3.2 github.com/lib/pq v1.10.9 + github.com/parquet-go/parquet-go v0.25.1 github.com/prometheus/client_golang v1.20.4 + github.com/redis/go-redis/v9 v9.12.1 github.com/rs/zerolog v1.33.0 - github.com/spf13/cobra v1.8.1 + github.com/spf13/cobra v1.9.1 github.com/spf13/viper v1.18.0 github.com/stretchr/testify v1.10.0 github.com/swaggo/files v1.0.1 @@ -25,6 +31,21 @@ require ( github.com/KyleBanks/depth v1.2.1 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/andybalholm/brotli v1.1.1 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.18.4 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.3 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.3 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.3 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.3 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.28.0 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.0 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.37.0 // indirect + github.com/aws/smithy-go v1.22.5 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bits-and-blooms/bitset v1.20.0 // indirect github.com/bytedance/sonic v1.12.6 // indirect @@ -39,6 +60,9 @@ require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/deckarep/golang-set/v2 v2.6.0 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 // indirect + github.com/dgraph-io/ristretto/v2 v2.2.0 // indirect + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/ethereum/c-kzg-4844/v2 v2.1.0 // indirect github.com/ethereum/go-verkle v0.2.2 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect @@ -46,6 +70,8 @@ require ( github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-faster/city v1.0.1 // indirect github.com/go-faster/errors v0.7.1 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.3.0 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect @@ -57,6 +83,7 @@ require ( github.com/goccy/go-json v0.10.4 // indirect github.com/gofrs/flock v0.8.1 // indirect github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect + github.com/google/flatbuffers v25.2.10+incompatible // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/websocket v1.4.2 // indirect github.com/hashicorp/hcl v1.0.0 // indirect @@ -94,7 +121,7 @@ require ( github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.11.0 // indirect github.com/spf13/cast v1.6.0 // indirect - github.com/spf13/pflag v1.0.5 // indirect + github.com/spf13/pflag v1.0.6 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/subosito/gotenv v1.6.0 // indirect github.com/supranational/blst v0.3.14 // indirect @@ -104,18 +131,20 @@ require ( github.com/twmb/franz-go/pkg/kmsg v1.9.0 // indirect github.com/ugorji/go/codec v1.2.12 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect - go.opentelemetry.io/otel v1.36.0 // indirect - go.opentelemetry.io/otel/trace v1.36.0 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/otel v1.37.0 // indirect + go.opentelemetry.io/otel/metric v1.37.0 // indirect + go.opentelemetry.io/otel/trace v1.37.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/arch v0.12.0 // indirect - golang.org/x/crypto v0.38.0 // indirect + golang.org/x/crypto v0.39.0 // indirect golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 // indirect - golang.org/x/net v0.40.0 // indirect - golang.org/x/sync v0.14.0 // indirect - golang.org/x/sys v0.33.0 // indirect - golang.org/x/text v0.25.0 // indirect - golang.org/x/tools v0.30.0 // indirect - google.golang.org/protobuf v1.36.1 // indirect + golang.org/x/net v0.41.0 // indirect + golang.org/x/sync v0.15.0 // indirect + golang.org/x/sys v0.34.0 // indirect + golang.org/x/text v0.26.0 // indirect + golang.org/x/tools v0.33.0 // indirect + google.golang.org/protobuf v1.36.6 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect rsc.io/tmplfunc v0.0.3 // indirect diff --git a/go.sum b/go.sum index 2d7c778..92d6bd5 100644 --- a/go.sum +++ b/go.sum @@ -12,10 +12,50 @@ github.com/VictoriaMetrics/fastcache v1.12.2 h1:N0y9ASrJ0F6h0QaC3o6uJb3NIZ9VKLjC github.com/VictoriaMetrics/fastcache v1.12.2/go.mod h1:AmC+Nzz1+3G2eCPapF6UcsnkThDcMsQicp4xDukwJYI= github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= +github.com/aws/aws-sdk-go-v2 v1.38.0 h1:UCRQ5mlqcFk9HJDIqENSLR3wiG1VTWlyUfLDEvY7RxU= +github.com/aws/aws-sdk-go-v2 v1.38.0/go.mod h1:9Q0OoGQoboYIAJyslFyF1f5K1Ryddop8gqMhWx/n4Wg= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0 h1:6GMWV6CNpA/6fbFHnoAjrv4+LGfyTqZz2LtCHnspgDg= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0/go.mod h1:/mXlTIVG9jbxkqDnr5UQNQxW1HRYxeGklkM9vAFeabg= +github.com/aws/aws-sdk-go-v2/config v1.31.0 h1:9yH0xiY5fUnVNLRWO0AtayqwU1ndriZdN78LlhruJR4= +github.com/aws/aws-sdk-go-v2/config v1.31.0/go.mod h1:VeV3K72nXnhbe4EuxxhzsDc/ByrCSlZwUnWH52Nde/I= +github.com/aws/aws-sdk-go-v2/credentials v1.18.4 h1:IPd0Algf1b+Qy9BcDp0sCUcIWdCQPSzDoMK3a8pcbUM= +github.com/aws/aws-sdk-go-v2/credentials v1.18.4/go.mod h1:nwg78FjH2qvsRM1EVZlX9WuGUJOL5od+0qvm0adEzHk= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.3 h1:GicIdnekoJsjq9wqnvyi2elW6CGMSYKhdozE7/Svh78= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.3/go.mod h1:R7BIi6WNC5mc1kfRM7XM/VHC3uRWkjc396sfabq4iOo= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.3 h1:o9RnO+YZ4X+kt5Z7Nvcishlz0nksIt2PIzDglLMP0vA= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.3/go.mod h1:+6aLJzOG1fvMOyzIySYjOFjcguGvVRL68R+uoRencN4= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.3 h1:joyyUFhiTQQmVK6ImzNU9TQSNRNeD9kOklqTzyk5v6s= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.3/go.mod h1:+vNIyZQP3b3B1tSLI0lxvrU9cfM7gpdRXMFfm67ZcPc= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.3 h1:ZV2XK2L3HBq9sCKQiQ/MdhZJppH/rH0vddEAamsHUIs= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.3/go.mod h1:b9F9tk2HdHpbf3xbN7rUZcfmJI26N6NcJu/8OsBFI/0= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 h1:6+lZi2JeGKtCraAj1rpoZfKqnQ9SptseRZioejfUOLM= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0/go.mod h1:eb3gfbVIxIoGgJsi9pGne19dhCBpK6opTYpQqAmdy44= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.3 h1:3ZKmesYBaFX33czDl6mbrcHb6jeheg6LqjJhQdefhsY= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.3/go.mod h1:7ryVb78GLCnjq7cw45N6oUb9REl7/vNUwjvIqC5UgdY= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.3 h1:ieRzyHXypu5ByllM7Sp4hC5f/1Fy5wqxqY0yB85hC7s= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.3/go.mod h1:O5ROz8jHiOAKAwx179v+7sHMhfobFVi6nZt8DEyiYoM= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.3 h1:SE/e52dq9a05RuxzLcjT+S5ZpQobj3ie3UTaSf2NnZc= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.3/go.mod h1:zkpvBTsR020VVr8TOrwK2TrUW9pOir28sH5ECHpnAfo= +github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0 h1:egoDf+Geuuntmw79Mz6mk9gGmELCPzg5PFEABOHB+6Y= +github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0/go.mod h1:t9MDi29H+HDbkolTSQtbI0HP9DemAWQzUjmWC7LGMnE= +github.com/aws/aws-sdk-go-v2/service/sso v1.28.0 h1:Mc/MKBf2m4VynyJkABoVEN+QzkfLqGj0aiJuEe7cMeM= +github.com/aws/aws-sdk-go-v2/service/sso v1.28.0/go.mod h1:iS5OmxEcN4QIPXARGhavH7S8kETNL11kym6jhoS7IUQ= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.0 h1:6csaS/aJmqZQbKhi1EyEMM7yBW653Wy/B9hnBofW+sw= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.0/go.mod h1:59qHWaY5B+Rs7HGTuVGaC32m0rdpQ68N8QCN3khYiqs= +github.com/aws/aws-sdk-go-v2/service/sts v1.37.0 h1:MG9VFW43M4A8BYeAfaJJZWrroinxeTi2r3+SnmLQfSA= +github.com/aws/aws-sdk-go-v2/service/sts v1.37.0/go.mod h1:JdeBDPgpJfuS6rU/hNglmOigKhyEZtBmbraLE4GK1J8= +github.com/aws/smithy-go v1.22.5 h1:P9ATCXPMb2mPjYBgueqJNCA5S9UfktsW0tTxi+a7eqw= +github.com/aws/smithy-go v1.22.5/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bits-and-blooms/bitset v1.20.0 h1:2F+rfL86jE2d/bmw7OhqUg2Sj/1rURkBn3MdfoPyRVU= github.com/bits-and-blooms/bitset v1.20.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= +github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= +github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= +github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= github.com/bytedance/sonic v1.12.6 h1:/isNmCUF2x3Sh8RAp/4mh4ZGkcFAX/hLrzrK3AvpRzk= github.com/bytedance/sonic v1.12.6/go.mod h1:B8Gt/XvtZ3Fqj+iSKMypzymZxw/FVwgIGKzMzT9r/rk= github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= @@ -44,9 +84,8 @@ github.com/consensys/bavard v0.1.27/go.mod h1:k/zVjHHC4B+PQy1Pg7fgvG3ALicQw540Cr github.com/consensys/gnark-crypto v0.16.0 h1:8Dl4eYmUWK9WmlP1Bj6je688gBRJCJbT8Mw4KoTAawo= github.com/consensys/gnark-crypto v0.16.0/go.mod h1:Ke3j06ndtPTVvo++PhGNgvm+lgpLvzbcE2MqljY7diU= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= -github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.6 h1:XJtiaUW6dEEqVuZiMTn1ldk455QWwEIsMIJlo5vtkx0= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/crate-crypto/go-eth-kzg v1.3.0 h1:05GrhASN9kDAidaFJOda6A4BEvgvuXbazXg/0E3OOdI= github.com/crate-crypto/go-eth-kzg v1.3.0/go.mod h1:J9/u5sWfznSObptgfa92Jq8rTswn6ahQWEuiLHOjCUI= github.com/crate-crypto/go-ipa v0.0.0-20240724233137-53bbb0ceb27a h1:W8mUrRp6NOVl3J+MYp5kPMoUZPp7aOYHtaua31lwRHg= @@ -63,6 +102,16 @@ github.com/decred/dcrd/crypto/blake256 v1.0.0 h1:/8DMNYp9SGi5f0w7uCm6d6M4OU2rGFK github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 h1:YLtO71vCjJRCBcrPMtQ9nqBsqpA1m5sE92cU+pd5Mcc= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs= +github.com/dgraph-io/badger/v4 v4.8.0 h1:JYph1ChBijCw8SLeybvPINizbDKWZ5n/GYbz2yhN/bs= +github.com/dgraph-io/badger/v4 v4.8.0/go.mod h1:U6on6e8k/RTbUWxqKR0MvugJuVmkxSNc79ap4917h4w= +github.com/dgraph-io/ristretto/v2 v2.2.0 h1:bkY3XzJcXoMuELV8F+vS8kzNgicwQFAaGINAEJdWGOM= +github.com/dgraph-io/ristretto/v2 v2.2.0/go.mod h1:RZrm63UmcBAaYWC1DotLYBmTvgkrs0+XhBd7Npn7/zI= +github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38= +github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/ethereum/c-kzg-4844/v2 v2.1.0 h1:gQropX9YFBhl3g4HYhwE70zq3IHFRgbbNPw0Shwzf5w= github.com/ethereum/c-kzg-4844/v2 v2.1.0/go.mod h1:TC48kOKjJKPbN7C++qIgt0TJzZ70QznYR7Ob+WXl57E= github.com/ethereum/go-ethereum v1.15.11 h1:JK73WKeu0WC0O1eyX+mdQAVHUV+UR1a9VB/domDngBU= @@ -87,6 +136,11 @@ github.com/go-faster/city v1.0.1 h1:4WAxSZ3V2Ws4QRDrscLEDcibJY8uf41H6AhXDrNDcGw= github.com/go-faster/city v1.0.1/go.mod h1:jKcUJId49qdW3L1qKHH/3wPeUstCVpVSXTM6vO3VcTw= github.com/go-faster/errors v0.7.1 h1:MkJTnDoEdi9pDabt1dpWf7AA8/BaSYZqibYyhZ20AYg= github.com/go-faster/errors v0.7.1/go.mod h1:5ySTjWFiphBs07IKuiL69nxdfd5+fzh1u7FPGZP2quo= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= @@ -119,6 +173,8 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb h1:PBC98N2aIaM3XXiurYmW7fx4GZkL8feAMVq7nEjURHk= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q= +github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= @@ -137,6 +193,8 @@ github.com/hashicorp/go-bexpr v0.1.10 h1:9kuI5PFotCboP3dkDYFr/wi0gg0QVbSNz5oFRpx github.com/hashicorp/go-bexpr v0.1.10/go.mod h1:oxlubA2vC/gFVfX1A6JGp7ls7uCDlfJn732ehYYg+g0= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= +github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/holiman/billy v0.0.0-20240216141850-2abb0c79d3c4 h1:X4egAf/gcS1zATw6wn4Ej8vjuVGxeHdan+bRb2ebyv4= github.com/holiman/billy v0.0.0-20240216141850-2abb0c79d3c4/go.mod h1:5GuXa7vkL8u9FkFuWdVvfR5ix8hRB7DbOAaYULamFpc= github.com/holiman/bloomfilter/v2 v2.0.3 h1:73e0e/V0tCydx14a0SCYS/EWCxgwLZ18CZcZKVu0fao= @@ -207,6 +265,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= +github.com/parquet-go/parquet-go v0.25.1 h1:l7jJwNM0xrk0cnIIptWMtnSnuxRkwq53S+Po3KG8Xgo= +github.com/parquet-go/parquet-go v0.25.1/go.mod h1:AXBuotO1XiBtcqJb/FKFyjBG4aqa3aQAAWF3ZPzCanY= github.com/paulmach/orb v0.11.1 h1:3koVegMC4X/WeiXYz9iswopaTwMem53NzTJuTF20JzU= github.com/paulmach/orb v0.11.1/go.mod h1:5mULz1xQfs3bmQm63QEJA6lNGujuRafwA5S/EnuLaLU= github.com/paulmach/protoscan v0.2.1/go.mod h1:SpcSwydNLrxUGSDvXvO0P7g7AuhJ7lcKfDlhJCDw2gY= @@ -237,10 +297,12 @@ github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/redis/go-redis/v9 v9.12.1 h1:k5iquqv27aBtnTm2tIkROUDp8JBXhXZIVu1InSgvovg= +github.com/redis/go-redis/v9 v9.12.1/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik= github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= @@ -264,10 +326,10 @@ github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0= github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= -github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= -github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.18.0 h1:pN6W1ub/G4OfnM+NR9p7xP9R6TltLUzp5JG9yZD3Qg0= github.com/spf13/viper v1.18.0/go.mod h1:EKmWIqdnk5lOcmR72yw6hS+8OPYcwD0jteitLMVB+yk= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -324,10 +386,14 @@ github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5t github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.mongodb.org/mongo-driver v1.11.4/go.mod h1:PTSz5yu21bkT/wXpkS7WR5f0ddqw5quethTUn9WM+2g= -go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg= -go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E= -go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w= -go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= +go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= +go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= +go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= +go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= +go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= golang.org/x/arch v0.12.0 h1:UsYJhbzPYGsT0HbEdmYcqtCv8UNGvnaL561NnIUvaKg= @@ -337,15 +403,15 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8= -golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw= +golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= +golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 h1:aAcj0Da7eBAtrTp03QXWvm88pSyOt+UgdZw2BFZ+lEw= golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8/go.mod h1:CQ1k9gNrJ50XIzaKCRR2hssIjF07kZFEiieALBM/ARQ= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM= -golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w= +golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -354,15 +420,15 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY= -golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds= +golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= +golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= -golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= +golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -379,8 +445,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= -golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= +golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -389,8 +455,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= -golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= +golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= +golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -398,16 +464,16 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY= -golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY= +golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= +golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk= -google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/internal/common/block.go b/internal/common/block.go index 4c9e8dc..f0cd019 100644 --- a/internal/common/block.go +++ b/internal/common/block.go @@ -59,10 +59,10 @@ type BlockModel struct { } type BlockData struct { - Block Block - Transactions []Transaction - Logs []Log - Traces []Trace + Block Block `json:"block"` + Transactions []Transaction `json:"transactions"` + Logs []Log `json:"logs"` + Traces []Trace `json:"traces"` } type BlockHeader struct { @@ -99,3 +99,22 @@ func (b *Block) Serialize() BlockModel { BaseFeePerGas: b.BaseFeePerGas, } } + +func (b *BlockData) Serialize() BlockData { + data := BlockData{ + Block: b.Block, + Transactions: b.Transactions, + Logs: b.Logs, + Traces: b.Traces, + } + if data.Transactions == nil { + data.Transactions = []Transaction{} + } + if data.Logs == nil { + data.Logs = []Log{} + } + if data.Traces == nil { + data.Traces = []Trace{} + } + return data +} diff --git a/internal/handlers/logs_handlers.go b/internal/handlers/logs_handlers.go index 965aeae..63ac197 100644 --- a/internal/handlers/logs_handlers.go +++ b/internal/handlers/logs_handlers.go @@ -224,7 +224,8 @@ func decodeLogsIfNeeded(chainId string, logs []common.Log, eventABI *abi.Event, func getMainStorage() (storage.IMainStorage, error) { storageOnce.Do(func() { var err error - mainStorage, err = storage.NewConnector[storage.IMainStorage](&config.Cfg.Storage.Main) + // TODO: move this to a QueryConnector later to decouple read/write connector + mainStorage, err = storage.NewMainConnector(&config.Cfg.Storage.Main, nil) if err != nil { storageErr = err log.Error().Err(err).Msg("Error creating storage connector") diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go index e88a0cc..1e00602 100644 --- a/internal/orchestrator/committer.go +++ b/internal/orchestrator/committer.go @@ -26,10 +26,12 @@ type Committer struct { blocksPerCommit int storage storage.IStorage commitFromBlock *big.Int + commitUntilBlock *big.Int rpc rpc.IRPCClient lastCommittedBlock atomic.Uint64 lastPublishedBlock atomic.Uint64 publisher *publisher.Publisher + poller *Poller workMode WorkMode workModeMutex sync.RWMutex workModeChan chan WorkMode @@ -60,14 +62,26 @@ func NewCommitter(rpc rpc.IRPCClient, storage storage.IStorage, opts ...Committe blocksPerCommit = DEFAULT_BLOCKS_PER_COMMIT } + commitUntilBlock := config.Cfg.Committer.UntilBlock + if commitUntilBlock == 0 { + // default to match the poller.untilBlock + if config.Cfg.Poller.UntilBlock != 0 { + commitUntilBlock = config.Cfg.Poller.UntilBlock + } else { + commitUntilBlock = -1 + } + } + commitFromBlock := big.NewInt(int64(config.Cfg.Committer.FromBlock)) committer := &Committer{ triggerIntervalMs: triggerInterval, blocksPerCommit: blocksPerCommit, storage: storage, commitFromBlock: commitFromBlock, + commitUntilBlock: big.NewInt(int64(commitUntilBlock)), rpc: rpc, publisher: publisher.GetInstance(), + poller: NewBoundlessPoller(rpc, storage), workMode: "", } cfb := commitFromBlock.Uint64() @@ -97,11 +111,36 @@ func (c *Committer) Start(ctx context.Context) { } // Initialize publisher position - always use max(lastPublished, lastCommitted) to prevent double publishing - lastPublished, err := c.storage.StagingStorage.GetLastPublishedBlockNumber(chainID) + lastPublished, err := c.storage.OrchestratorStorage.GetLastPublishedBlockNumber(chainID) if err != nil { - log.Error().Err(err).Msg("Failed to get last published block number from storage") - // If we can't read, assume we need to start from the beginning - lastPublished = nil + // It's okay to fail silently here; it's only used for staging cleanup and will be + // corrected by the worker loop. + log.Error().Err(err).Msg("failed to get last published block number") + } else if lastPublished != nil && lastPublished.Sign() > 0 { + // Always ensure publisher starts from at least the committed value + if latestCommittedBlockNumber != nil && latestCommittedBlockNumber.Sign() > 0 { + if lastPublished.Cmp(latestCommittedBlockNumber) < 0 { + gap := new(big.Int).Sub(latestCommittedBlockNumber, lastPublished) + log.Warn(). + Str("last_published", lastPublished.String()). + Str("latest_committed", latestCommittedBlockNumber.String()). + Str("gap", gap.String()). + Msg("Publisher is behind committed position, seeking forward to committed value") + + c.lastPublishedBlock.Store(latestCommittedBlockNumber.Uint64()) + if err := c.storage.OrchestratorStorage.SetLastPublishedBlockNumber(chainID, latestCommittedBlockNumber); err != nil { + log.Error().Err(err).Msg("Failed to update last published block number after seeking forward") + // Fall back to the stored value on error + c.lastPublishedBlock.Store(lastPublished.Uint64()) + } + } else { + c.lastPublishedBlock.Store(lastPublished.Uint64()) + } + } else { + c.lastPublishedBlock.Store(lastPublished.Uint64()) + } + } else { + c.lastPublishedBlock.Store(c.lastCommittedBlock.Load()) } // Determine the correct publish position - always take the maximum to avoid going backwards @@ -130,7 +169,7 @@ func (c *Committer) Start(ctx context.Context) { // Only update storage if we're changing the position if lastPublished == nil || targetPublishBlock.Cmp(lastPublished) != 0 { - if err := c.storage.StagingStorage.SetLastPublishedBlockNumber(chainID, targetPublishBlock); err != nil { + if err := c.storage.OrchestratorStorage.SetLastPublishedBlockNumber(chainID, targetPublishBlock); err != nil { log.Error().Err(err).Msg("Failed to update published block number in storage") // If we can't update storage, use what was there originally to avoid issues if lastPublished != nil { @@ -179,6 +218,7 @@ func (c *Committer) Start(ctx context.Context) { } c.runCommitLoop(ctx, interval) + log.Info().Msg("Committer shutting down") c.publisher.Close() } @@ -207,6 +247,11 @@ func (c *Committer) runCommitLoop(ctx context.Context, interval time.Duration) { log.Debug().Msg("Committer work mode not set, skipping commit") continue } + if c.commitUntilBlock.Sign() > 0 && c.lastCommittedBlock.Load() >= c.commitUntilBlock.Uint64() { + // Completing the commit loop if we've committed more than commit until block + log.Info().Msgf("Committer reached configured untilBlock %s, the last commit block is %d, stopping commits", c.commitUntilBlock.String(), c.lastCommittedBlock.Load()) + return + } blockDataToCommit, err := c.getSequentialBlockDataToCommit(ctx) if err != nil { log.Error().Err(err).Msg("Error getting block data to commit") @@ -260,11 +305,11 @@ func (c *Committer) cleanupProcessedStagingBlocks() { chainID := c.rpc.GetChainID() blockNumber := new(big.Int).SetUint64(limit) stagingDeleteStart := time.Now() - if err := c.storage.StagingStorage.DeleteOlderThan(chainID, blockNumber); err != nil { + if err := c.storage.StagingStorage.DeleteStagingDataOlderThan(chainID, blockNumber); err != nil { log.Error().Err(err).Msg("Failed to delete staging data") return } - log.Debug().Str("metric", "staging_delete_duration").Msgf("StagingStorage.DeleteOlderThan duration: %f", time.Since(stagingDeleteStart).Seconds()) + log.Debug().Str("metric", "staging_delete_duration").Msgf("StagingStorage.DeleteStagingDataOlderThan duration: %f", time.Since(stagingDeleteStart).Seconds()) metrics.StagingDeleteDuration.Observe(time.Since(stagingDeleteStart).Seconds()) } @@ -315,7 +360,7 @@ func (c *Committer) getBlockNumbersToCommit(ctx context.Context) ([]*big.Int, er func (c *Committer) getBlockNumbersToPublish(ctx context.Context) ([]*big.Int, error) { // Get the last published block from storage (which was already corrected in Start) - latestPublishedBlockNumber, err := c.storage.StagingStorage.GetLastPublishedBlockNumber(c.rpc.GetChainID()) + latestPublishedBlockNumber, err := c.storage.OrchestratorStorage.GetLastPublishedBlockNumber(c.rpc.GetChainID()) if err != nil { return nil, fmt.Errorf("failed to get last published block number: %v", err) } @@ -356,9 +401,16 @@ func (c *Committer) getBlockNumbersToPublish(ctx context.Context) ([]*big.Int, e func (c *Committer) getBlockToCommitUntil(ctx context.Context, latestCommittedBlockNumber *big.Int) (*big.Int, error) { untilBlock := new(big.Int).Add(latestCommittedBlockNumber, big.NewInt(int64(c.blocksPerCommit))) + + // If a commit until block is set, then set a limit on the commit until block + if c.commitUntilBlock.Sign() > 0 && untilBlock.Cmp(c.commitUntilBlock) > 0 { + return new(big.Int).Set(c.commitUntilBlock), nil + } + c.workModeMutex.RLock() currentMode := c.workMode c.workModeMutex.RUnlock() + if currentMode == WorkModeBackfill { return untilBlock, nil } else { @@ -395,8 +447,7 @@ func (c *Committer) fetchBlockData(ctx context.Context, blockNumbers []*big.Int) } return blocksData, nil } else { - poller := NewBoundlessPoller(c.rpc, c.storage) - blocksData, err := poller.PollWithoutSaving(ctx, blockNumbers) + blocksData, err := c.poller.PollWithoutSaving(ctx, blockNumbers) if err != nil { return nil, fmt.Errorf("poller error: %v", err) } @@ -500,7 +551,7 @@ func (c *Committer) publish(ctx context.Context) error { chainID := c.rpc.GetChainID() highest := blockData[len(blockData)-1].Block.Number - if err := c.storage.StagingStorage.SetLastPublishedBlockNumber(chainID, highest); err != nil { + if err := c.storage.OrchestratorStorage.SetLastPublishedBlockNumber(chainID, highest); err != nil { return err } c.lastPublishedBlock.Store(highest.Uint64()) @@ -562,13 +613,11 @@ func (c *Committer) handleGap(ctx context.Context, expectedStartBlockNumber *big return nil } - poller := NewBoundlessPoller(c.rpc, c.storage) - missingBlockCount := new(big.Int).Sub(actualFirstBlock.Number, expectedStartBlockNumber).Int64() log.Debug().Msgf("Detected %d missing blocks between blocks %s and %s", missingBlockCount, expectedStartBlockNumber.String(), actualFirstBlock.Number.String()) - if missingBlockCount > poller.blocksPerPoll { - log.Debug().Msgf("Limiting polling missing blocks to %d blocks due to config", poller.blocksPerPoll) - missingBlockCount = poller.blocksPerPoll + if missingBlockCount > c.poller.blocksPerPoll { + log.Debug().Msgf("Limiting polling missing blocks to %d blocks due to config", c.poller.blocksPerPoll) + missingBlockCount = c.poller.blocksPerPoll } missingBlockNumbers := make([]*big.Int, missingBlockCount) for i := int64(0); i < missingBlockCount; i++ { @@ -577,7 +626,7 @@ func (c *Committer) handleGap(ctx context.Context, expectedStartBlockNumber *big } log.Debug().Msgf("Polling %d blocks while handling gap: %v", len(missingBlockNumbers), missingBlockNumbers) - poller.Poll(ctx, missingBlockNumbers) + c.poller.Poll(ctx, missingBlockNumbers) return fmt.Errorf("first block number (%s) in commit batch does not match expected (%s)", actualFirstBlock.Number.String(), expectedStartBlockNumber.String()) } @@ -594,11 +643,10 @@ func (c *Committer) handleMissingStagingData(ctx context.Context, blocksToCommit } log.Debug().Msgf("Detected missing blocks in staging data starting from %s.", blocksToCommit[0].String()) - poller := NewBoundlessPoller(c.rpc, c.storage) blocksToPoll := blocksToCommit - if len(blocksToCommit) > int(poller.blocksPerPoll) { - blocksToPoll = blocksToCommit[:int(poller.blocksPerPoll)] + if len(blocksToCommit) > int(c.poller.blocksPerPoll) { + blocksToPoll = blocksToCommit[:int(c.poller.blocksPerPoll)] } - poller.Poll(ctx, blocksToPoll) + c.poller.Poll(ctx, blocksToPoll) log.Debug().Msgf("Polled %d blocks due to committer detecting them as missing. Range: %s - %s", len(blocksToPoll), blocksToPoll[0].String(), blocksToPoll[len(blocksToPoll)-1].String()) } diff --git a/internal/orchestrator/committer_test.go b/internal/orchestrator/committer_test.go index 0c39ba4..160a748 100644 --- a/internal/orchestrator/committer_test.go +++ b/internal/orchestrator/committer_test.go @@ -336,7 +336,7 @@ func TestCommitDeletesAfterPublish(t *testing.T) { mockRPC.EXPECT().GetChainID().Return(chainID) mockMainStorage.EXPECT().InsertBlockData(blockData).Return(nil) - mockStagingStorage.EXPECT().DeleteOlderThan(chainID, big.NewInt(102)).RunAndReturn(func(*big.Int, *big.Int) error { + mockStagingStorage.EXPECT().DeleteStagingDataOlderThan(chainID, big.NewInt(102)).RunAndReturn(func(*big.Int, *big.Int) error { close(deleteDone) return nil }) @@ -347,7 +347,7 @@ func TestCommitDeletesAfterPublish(t *testing.T) { select { case <-deleteDone: case <-time.After(2 * time.Second): - t.Fatal("DeleteOlderThan was not called within timeout period") + t.Fatal("DeleteStagingDataOlderThan was not called within timeout period") } } @@ -380,7 +380,7 @@ func TestCommitParallelPublisherMode(t *testing.T) { mockStagingStorage.AssertNotCalled(t, "GetLastPublishedBlockNumber", mock.Anything) mockStagingStorage.AssertNotCalled(t, "SetLastPublishedBlockNumber", mock.Anything, mock.Anything) - mockStagingStorage.AssertNotCalled(t, "DeleteOlderThan", mock.Anything, mock.Anything) + mockStagingStorage.AssertNotCalled(t, "DeleteStagingDataOlderThan", mock.Anything, mock.Anything) } func TestCleanupProcessedStagingBlocks(t *testing.T) { @@ -400,11 +400,11 @@ func TestCleanupProcessedStagingBlocks(t *testing.T) { committer.lastPublishedBlock.Store(0) committer.cleanupProcessedStagingBlocks() - mockStagingStorage.AssertNotCalled(t, "DeleteOlderThan", mock.Anything, mock.Anything) + mockStagingStorage.AssertNotCalled(t, "DeleteStagingDataOlderThan", mock.Anything, mock.Anything) committer.lastPublishedBlock.Store(90) mockRPC.EXPECT().GetChainID().Return(chainID) - mockStagingStorage.EXPECT().DeleteOlderThan(chainID, big.NewInt(90)).Return(nil) + mockStagingStorage.EXPECT().DeleteStagingDataOlderThan(chainID, big.NewInt(90)).Return(nil) committer.cleanupProcessedStagingBlocks() } func TestHandleGap(t *testing.T) { @@ -426,6 +426,7 @@ func TestHandleGap(t *testing.T) { mockRPC.EXPECT().GetBlocksPerRequest().Return(rpc.BlocksPerRequestConfig{ Blocks: 5, }) + // GetChainID is not called in this flow since there are no block failures mockRPC.EXPECT().GetFullBlocks(context.Background(), []*big.Int{big.NewInt(100), big.NewInt(101), big.NewInt(102), big.NewInt(103), big.NewInt(104)}).Return([]rpc.GetFullBlockResult{ {BlockNumber: big.NewInt(100), Data: common.BlockData{Block: common.Block{Number: big.NewInt(100)}}}, {BlockNumber: big.NewInt(101), Data: common.BlockData{Block: common.Block{Number: big.NewInt(101)}}}, diff --git a/internal/orchestrator/failure_recoverer.go b/internal/orchestrator/failure_recoverer.go index da1ae91..8ca110f 100644 --- a/internal/orchestrator/failure_recoverer.go +++ b/internal/orchestrator/failure_recoverer.go @@ -55,7 +55,7 @@ func (fr *FailureRecoverer) Start(ctx context.Context) { log.Info().Msg("Failure recoverer shutting down") return case <-ticker.C: - blockFailures, err := fr.storage.OrchestratorStorage.GetBlockFailures(storage.QueryFilter{ + blockFailures, err := fr.storage.StagingStorage.GetBlockFailures(storage.QueryFilter{ ChainId: fr.rpc.GetChainID(), Limit: fr.failuresPerPoll, }) @@ -122,11 +122,11 @@ func (fr *FailureRecoverer) handleWorkerResults(blockFailures []common.BlockFail log.Error().Err(fmt.Errorf("error inserting block data in failure recoverer: %v", err)) return } - if err := fr.storage.OrchestratorStorage.StoreBlockFailures(newBlockFailures); err != nil { + if err := fr.storage.StagingStorage.StoreBlockFailures(newBlockFailures); err != nil { log.Error().Err(err).Msg("Error storing block failures") return } - if err := fr.storage.OrchestratorStorage.DeleteBlockFailures(failuresToDelete); err != nil { + if err := fr.storage.StagingStorage.DeleteBlockFailures(failuresToDelete); err != nil { log.Error().Err(err).Msg("Error deleting block failures") return } diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index 154dc89..ab54eb5 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -21,6 +21,7 @@ type Orchestrator struct { committerEnabled bool reorgHandlerEnabled bool cancel context.CancelFunc + wg sync.WaitGroup } func NewOrchestrator(rpc rpc.IRPCClient) (*Orchestrator, error) { @@ -43,8 +44,6 @@ func (o *Orchestrator) Start() { ctx, cancel := context.WithCancel(context.Background()) o.cancel = cancel - var wg sync.WaitGroup - sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT) @@ -58,67 +57,88 @@ func (o *Orchestrator) Start() { workModeMonitor := NewWorkModeMonitor(o.rpc, o.storage) if o.pollerEnabled { - wg.Add(1) + o.wg.Add(1) go func() { - defer wg.Done() + defer o.wg.Done() pollerWorkModeChan := make(chan WorkMode, 1) workModeMonitor.RegisterChannel(pollerWorkModeChan) defer workModeMonitor.UnregisterChannel(pollerWorkModeChan) - poller := NewPoller(o.rpc, o.storage, WithPollerWorkModeChan(pollerWorkModeChan)) + + poller := NewPoller(o.rpc, o.storage, + WithPollerWorkModeChan(pollerWorkModeChan), + WithPollerS3Source(config.Cfg.Poller.S3), + ) poller.Start(ctx) + + log.Info().Msg("Poller completed") + // If the poller is terminated, cancel the orchestrator + o.cancel() }() } if o.failureRecovererEnabled { - wg.Add(1) + o.wg.Add(1) go func() { - defer wg.Done() + defer o.wg.Done() failureRecoverer := NewFailureRecoverer(o.rpc, o.storage) failureRecoverer.Start(ctx) + + log.Info().Msg("Failure recoverer completed") }() } if o.committerEnabled { - wg.Add(1) + o.wg.Add(1) go func() { - defer wg.Done() + defer o.wg.Done() committerWorkModeChan := make(chan WorkMode, 1) workModeMonitor.RegisterChannel(committerWorkModeChan) defer workModeMonitor.UnregisterChannel(committerWorkModeChan) validator := NewValidator(o.rpc, o.storage) committer := NewCommitter(o.rpc, o.storage, WithCommitterWorkModeChan(committerWorkModeChan), WithValidator(validator)) committer.Start(ctx) + + // If the committer is terminated, cancel the orchestrator + log.Info().Msg("Committer completed") + o.cancel() }() } if o.reorgHandlerEnabled { - wg.Add(1) + o.wg.Add(1) go func() { - defer wg.Done() + defer o.wg.Done() reorgHandler := NewReorgHandler(o.rpc, o.storage) reorgHandler.Start(ctx) + + log.Info().Msg("Reorg handler completed") }() } - wg.Add(1) + o.wg.Add(1) go func() { - defer wg.Done() + defer o.wg.Done() workModeMonitor.Start(ctx) + + log.Info().Msg("Work mode monitor completed") }() // The chain tracker is always running - wg.Add(1) + o.wg.Add(1) go func() { - defer wg.Done() + defer o.wg.Done() chainTracker := NewChainTracker(o.rpc) chainTracker.Start(ctx) + + log.Info().Msg("Chain tracker completed") }() - wg.Wait() -} + // Waiting for all goroutines to complete + o.wg.Wait() -func (o *Orchestrator) Shutdown() { - if o.cancel != nil { - o.cancel() + if err := o.storage.Close(); err != nil { + log.Error().Err(err).Msg("Error closing storage connections") } + + log.Info().Msg("Orchestrator shutdown complete") } diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go index a1cca21..527bf8d 100644 --- a/internal/orchestrator/poller.go +++ b/internal/orchestrator/poller.go @@ -12,6 +12,7 @@ import ( "github.com/thirdweb-dev/indexer/internal/common" "github.com/thirdweb-dev/indexer/internal/metrics" "github.com/thirdweb-dev/indexer/internal/rpc" + "github.com/thirdweb-dev/indexer/internal/source" "github.com/thirdweb-dev/indexer/internal/storage" "github.com/thirdweb-dev/indexer/internal/worker" ) @@ -21,6 +22,7 @@ const DEFAULT_TRIGGER_INTERVAL = 1000 type Poller struct { rpc rpc.IRPCClient + worker *worker.Worker blocksPerPoll int64 triggerIntervalMs int64 storage storage.IStorage @@ -47,15 +49,33 @@ func WithPollerWorkModeChan(ch chan WorkMode) PollerOption { } } +func WithPollerS3Source(cfg *config.S3SourceConfig) PollerOption { + return func(p *Poller) { + if cfg == nil || cfg.Region == "" || cfg.Bucket == "" { + return + } + + source, err := source.NewS3Source(cfg, p.rpc.GetChainID()) + if err != nil { + log.Fatal().Err(err).Msg("Failed to create S3 source") + } + + log.Info().Msg("Poller S3 source configuration detected, setting up S3 source for poller") + p.worker = worker.NewWorkerWithArchive(p.rpc, source) + } +} + func NewBoundlessPoller(rpc rpc.IRPCClient, storage storage.IStorage, opts ...PollerOption) *Poller { blocksPerPoll := config.Cfg.Poller.BlocksPerPoll if blocksPerPoll == 0 { blocksPerPoll = DEFAULT_BLOCKS_PER_POLL } + triggerInterval := config.Cfg.Poller.Interval if triggerInterval == 0 { triggerInterval = DEFAULT_TRIGGER_INTERVAL } + poller := &Poller{ rpc: rpc, triggerIntervalMs: int64(triggerInterval), @@ -68,6 +88,10 @@ func NewBoundlessPoller(rpc rpc.IRPCClient, storage storage.IStorage, opts ...Po opt(poller) } + if poller.worker == nil { + poller.worker = worker.NewWorker(poller.rpc) + } + return poller } @@ -158,8 +182,7 @@ func (p *Poller) Start(ctx context.Context) { lastPolledBlock := p.Poll(pollCtx, blockNumbers) if p.reachedPollLimit(lastPolledBlock) { - log.Debug().Msg("Reached poll limit, exiting poller") - cancel() + log.Info().Msgf("Reached poll limit at block %s, completing poller", lastPolledBlock.String()) return } } @@ -236,8 +259,7 @@ func (p *Poller) PollWithoutSaving(ctx context.Context, blockNumbers []*big.Int) endBlockNumberFloat, _ := endBlock.Float64() metrics.PollerLastTriggeredBlock.Set(endBlockNumberFloat) - worker := worker.NewWorker(p.rpc) - results := worker.Run(ctx, blockNumbers) + results := p.worker.Run(ctx, blockNumbers) blockData, failedResults := p.convertPollResultsToBlockData(results) return blockData, failedResults } @@ -353,7 +375,7 @@ func (p *Poller) handleBlockFailures(results []rpc.GetFullBlockResult) { }) } } - err := p.storage.OrchestratorStorage.StoreBlockFailures(blockFailures) + err := p.storage.StagingStorage.StoreBlockFailures(blockFailures) if err != nil { // TODO: exiting if this fails, but should handle this better log.Error().Err(err).Msg("Error saving block failures") diff --git a/internal/orchestrator/validator.go b/internal/orchestrator/validator.go index db03cbe..63a174f 100644 --- a/internal/orchestrator/validator.go +++ b/internal/orchestrator/validator.go @@ -98,8 +98,11 @@ func (v *Validator) ValidateBlock(blockData common.BlockData) (valid bool, err e return true, nil } - // TODO: remove this once we know how to validate all tx types for _, tx := range blockData.Transactions { + if tx.TransactionType == 0x7E { + // TODO: Need to properly validate op-stack deposit transaction + return true, nil + } if tx.TransactionType > 4 { // Currently supported types are 0-4 log.Warn().Msgf("Skipping transaction root validation for block %s due to unsupported transaction type %d", blockData.Block.Number, tx.TransactionType) return true, nil @@ -183,5 +186,6 @@ func (v *Validator) FindAndFixGaps(startBlock *big.Int, endBlock *big.Int) error log.Error().Err(err).Msgf("Failed to insert missing blocks: %v", polledBlocks) return err } + return nil } diff --git a/internal/publisher/publisher.go b/internal/publisher/publisher.go index 984115a..0f8a761 100644 --- a/internal/publisher/publisher.go +++ b/internal/publisher/publisher.go @@ -76,6 +76,9 @@ func (p *Publisher) initialize() error { User: config.Cfg.Publisher.Username, Pass: config.Cfg.Publisher.Password, }.AsMechanism())) + } + + if config.Cfg.Publisher.EnableTLS { tlsDialer := &tls.Dialer{NetDialer: &net.Dialer{Timeout: 10 * time.Second}} opts = append(opts, kgo.Dialer(tlsDialer.DialContext)) } diff --git a/internal/rpc/batcher.go b/internal/rpc/batcher.go index c34fd13..2589e0d 100644 --- a/internal/rpc/batcher.go +++ b/internal/rpc/batcher.go @@ -2,6 +2,7 @@ package rpc import ( "context" + "strings" "sync" "time" @@ -50,6 +51,121 @@ func RPCFetchInBatches[K any, T any](rpc *Client, ctx context.Context, keys []K, return results } +func RPCFetchInBatchesWithRetry[K any, T any](rpc *Client, ctx context.Context, keys []K, batchSize int, batchDelay int, method string, argsFunc func(K) []interface{}) []RPCFetchBatchResult[K, T] { + if len(keys) <= batchSize { + return RPCFetchSingleBatchWithRetry[K, T](rpc, ctx, keys, method, argsFunc) + } + chunks := common.SliceToChunks[K](keys, batchSize) + + log.Debug().Msgf("Fetching %s for %d blocks in %d chunks of max %d requests", method, len(keys), len(chunks), batchSize) + + var wg sync.WaitGroup + resultsCh := make(chan []RPCFetchBatchResult[K, T], len(chunks)) + + for _, chunk := range chunks { + wg.Add(1) + go func(chunk []K) { + defer wg.Done() + resultsCh <- RPCFetchSingleBatchWithRetry[K, T](rpc, ctx, chunk, method, argsFunc) + if batchDelay > 0 { + time.Sleep(time.Duration(batchDelay) * time.Millisecond) + } + }(chunk) + } + go func() { + wg.Wait() + close(resultsCh) + }() + + results := make([]RPCFetchBatchResult[K, T], 0, len(keys)) + for batchResults := range resultsCh { + results = append(results, batchResults...) + } + + return results +} + +func RPCFetchSingleBatchWithRetry[K any, T any](rpc *Client, ctx context.Context, keys []K, method string, argsFunc func(K) []interface{}) []RPCFetchBatchResult[K, T] { + currentBatchSize := len(keys) + minBatchSize := 1 + + // First try with the full batch + results := RPCFetchSingleBatch[K, T](rpc, ctx, keys, method, argsFunc) + if !hasBatchError(results) { + return results + } + + // If we got 413, start retrying with smaller batches + newBatchSize := len(keys) / 2 + if newBatchSize < minBatchSize { + newBatchSize = minBatchSize + } + log.Debug().Msgf("Got error for batch size %d, retrying with batch size %d", currentBatchSize, newBatchSize) + + // Start with half the size + currentBatchSize = newBatchSize + + // Keep retrying with smaller batch sizes + for currentBatchSize >= minBatchSize { + chunks := common.SliceToChunks[K](keys, currentBatchSize) + allResults := make([]RPCFetchBatchResult[K, T], 0, len(keys)) + hasError := false + + // Process chunks sequentially to maintain order + for _, chunk := range chunks { + chunkResults := RPCFetchSingleBatch[K, T](rpc, ctx, chunk, method, argsFunc) + + if hasBatchError(chunkResults) { + hasError = true + break + } + allResults = append(allResults, chunkResults...) + } + + if !hasError { + // Successfully processed all chunks, return results in original order + return allResults + } + + // Still getting error, reduce batch size further + newBatchSize := currentBatchSize / 2 + if newBatchSize < minBatchSize { + newBatchSize = minBatchSize + } + log.Debug().Msgf("Got error for batch size %d, retrying with batch size %d", currentBatchSize, newBatchSize) + currentBatchSize = newBatchSize + + // If we're already at minimum batch size and still failing, try one more time + if currentBatchSize == minBatchSize && hasError { + // Process items one by one as last resort + finalResults := make([]RPCFetchBatchResult[K, T], 0, len(keys)) + for _, key := range keys { + singleResult := RPCFetchSingleBatch[K, T](rpc, ctx, []K{key}, method, argsFunc) + finalResults = append(finalResults, singleResult...) + } + return finalResults + } + } + + // Should not reach here, but return error results as fallback + log.Fatal().Msgf("Unable to process batch even with size 1, returning errors") + return nil +} + +func hasBatchError[K any, T any](results []RPCFetchBatchResult[K, T]) bool { + for _, result := range results { + if result.Error != nil { + if httpErr, ok := result.Error.(gethRpc.HTTPError); ok && httpErr.StatusCode == 413 { + return true + } + if strings.Contains(result.Error.Error(), "413") { + return true + } + } + } + return false +} + func RPCFetchSingleBatch[K any, T any](rpc *Client, ctx context.Context, keys []K, method string, argsFunc func(K) []interface{}) []RPCFetchBatchResult[K, T] { batch := make([]gethRpc.BatchElem, len(keys)) results := make([]RPCFetchBatchResult[K, T], len(keys)) diff --git a/internal/rpc/rpc.go b/internal/rpc/rpc.go index d148418..67295df 100644 --- a/internal/rpc/rpc.go +++ b/internal/rpc/rpc.go @@ -238,20 +238,20 @@ func (rpc *Client) GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) [ go func() { defer wg.Done() - result := RPCFetchSingleBatch[*big.Int, common.RawBlock](rpc, ctx, blockNumbers, "eth_getBlockByNumber", GetBlockWithTransactionsParams) + result := RPCFetchSingleBatchWithRetry[*big.Int, common.RawBlock](rpc, ctx, blockNumbers, "eth_getBlockByNumber", GetBlockWithTransactionsParams) blocks = result }() if rpc.supportsBlockReceipts { go func() { defer wg.Done() - result := RPCFetchInBatches[*big.Int, common.RawReceipts](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Receipts, config.Cfg.RPC.BlockReceipts.BatchDelay, "eth_getBlockReceipts", GetBlockReceiptsParams) + result := RPCFetchInBatchesWithRetry[*big.Int, common.RawReceipts](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Receipts, config.Cfg.RPC.BlockReceipts.BatchDelay, "eth_getBlockReceipts", GetBlockReceiptsParams) receipts = result }() } else { go func() { defer wg.Done() - result := RPCFetchInBatches[*big.Int, common.RawLogs](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Logs, config.Cfg.RPC.Logs.BatchDelay, "eth_getLogs", GetLogsParams) + result := RPCFetchInBatchesWithRetry[*big.Int, common.RawLogs](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Logs, config.Cfg.RPC.Logs.BatchDelay, "eth_getLogs", GetLogsParams) logs = result }() } @@ -260,7 +260,7 @@ func (rpc *Client) GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) [ wg.Add(1) go func() { defer wg.Done() - result := RPCFetchInBatches[*big.Int, common.RawTraces](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Traces, config.Cfg.RPC.Traces.BatchDelay, "trace_block", TraceBlockParams) + result := RPCFetchInBatchesWithRetry[*big.Int, common.RawTraces](rpc, ctx, blockNumbers, rpc.blocksPerRequest.Traces, config.Cfg.RPC.Traces.BatchDelay, "trace_block", TraceBlockParams) traces = result }() } diff --git a/internal/source/s3.go b/internal/source/s3.go new file mode 100644 index 0000000..676a9ad --- /dev/null +++ b/internal/source/s3.go @@ -0,0 +1,1119 @@ +package source + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "math/big" + "os" + "path/filepath" + "sort" + "strings" + "sync" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + awsconfig "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/parquet-go/parquet-go" + "github.com/rs/zerolog/log" + config "github.com/thirdweb-dev/indexer/configs" + "github.com/thirdweb-dev/indexer/internal/common" + "github.com/thirdweb-dev/indexer/internal/rpc" +) + +// FileMetadata represents cached information about S3 files +type FileMetadata struct { + Key string + MinBlock *big.Int + MaxBlock *big.Int + Size int64 + LastAccess time.Time +} + +// BlockIndex represents the index of blocks within a file +type BlockIndex struct { + BlockNumber uint64 + RowOffset int64 + RowSize int +} + +type S3Source struct { + client *s3.Client + config *config.S3SourceConfig + chainId *big.Int + cacheDir string + + // Configurable settings + metadataTTL time.Duration // How long to cache metadata + fileCacheTTL time.Duration // How long to keep files in cache + maxCacheSize int64 // Max cache size in bytes + cleanupInterval time.Duration // How often to run cleanup + maxConcurrentDownloads int // Max concurrent S3 downloads + + // Metadata cache + metaMu sync.RWMutex + fileMetadata map[string]*FileMetadata // S3 key -> metadata + minBlock *big.Int + maxBlock *big.Int + metaLoaded bool + metaLoadTime time.Time // When metadata was last loaded + + // Local file cache + cacheMu sync.RWMutex + cacheMap map[string]time.Time // Track cache file access times + blockIndex map[string][]BlockIndex // File -> block indices + downloadMu sync.Mutex // Prevent duplicate downloads + + // Download tracking + downloading map[string]*sync.WaitGroup // Files currently downloading + + // Active use tracking + activeUseMu sync.RWMutex + activeUse map[string]int // Files currently being read (reference count) +} + +// ParquetBlockData represents the block data structure in parquet files +type ParquetBlockData struct { + ChainId uint64 `parquet:"chain_id"` + BlockNumber uint64 `parquet:"block_number"` + BlockHash string `parquet:"block_hash"` + BlockTimestamp int64 `parquet:"block_timestamp"` + Block []byte `parquet:"block_json"` + Transactions []byte `parquet:"transactions_json"` + Logs []byte `parquet:"logs_json"` + Traces []byte `parquet:"traces_json"` +} + +func NewS3Source(cfg *config.S3SourceConfig, chainId *big.Int) (*S3Source, error) { + // Apply defaults + if cfg.MetadataTTL == 0 { + cfg.MetadataTTL = 10 * time.Minute + } + if cfg.FileCacheTTL == 0 { + cfg.FileCacheTTL = 15 * time.Minute // 15 minutes + } + if cfg.MaxCacheSize == 0 { + cfg.MaxCacheSize = 5 * 1024 * 1024 * 1024 // Increased from 5GB to 10GB + } + if cfg.CleanupInterval == 0 { + cfg.CleanupInterval = 5 * time.Minute // 5 minutes + } + if cfg.MaxConcurrentDownloads == 0 { + cfg.MaxConcurrentDownloads = 3 + } + + awsCfg, err := awsconfig.LoadDefaultConfig(context.Background(), + awsconfig.WithRegion(cfg.Region), + ) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + // Override with explicit credentials if provided + if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" { + awsCfg.Credentials = aws.CredentialsProviderFunc(func(ctx context.Context) (aws.Credentials, error) { + return aws.Credentials{ + AccessKeyID: cfg.AccessKeyID, + SecretAccessKey: cfg.SecretAccessKey, + }, nil + }) + } + + s3Client := s3.NewFromConfig(awsCfg, func(o *s3.Options) { + if cfg.Endpoint != "" { + o.BaseEndpoint = aws.String(cfg.Endpoint) + } + }) + + // Create cache directory + cacheDir := cfg.CacheDir + if cacheDir == "" { + cacheDir = filepath.Join(os.TempDir(), "s3-archive-cache", fmt.Sprintf("chain_%d", chainId.Uint64())) + } + if err := os.MkdirAll(cacheDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create cache directory: %w", err) + } + + archive := &S3Source{ + client: s3Client, + config: cfg, + chainId: chainId, + cacheDir: cacheDir, + metadataTTL: cfg.MetadataTTL, + fileCacheTTL: cfg.FileCacheTTL, + maxCacheSize: cfg.MaxCacheSize, + cleanupInterval: cfg.CleanupInterval, + maxConcurrentDownloads: cfg.MaxConcurrentDownloads, + fileMetadata: make(map[string]*FileMetadata), + cacheMap: make(map[string]time.Time), + blockIndex: make(map[string][]BlockIndex), + downloading: make(map[string]*sync.WaitGroup), + activeUse: make(map[string]int), + } + + // Start cache cleanup goroutine + go archive.cleanupCache() + + // Load metadata in background (optional) + if cfg.Bucket != "" { + go func() { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if err := archive.loadMetadata(ctx); err != nil { + log.Warn().Err(err).Msg("Failed to preload S3 metadata") + } + }() + } + + return archive, nil +} + +func (s *S3Source) GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult { + if len(blockNumbers) == 0 { + return nil + } + + // Ensure metadata is loaded + if err := s.ensureMetadataLoaded(ctx); err != nil { + log.Error().Err(err).Msg("Failed to load metadata") + return s.makeErrorResults(blockNumbers, err) + } + + // Sort block numbers for efficient file access + sortedBlocks := make([]*big.Int, len(blockNumbers)) + copy(sortedBlocks, blockNumbers) + sort.Slice(sortedBlocks, func(i, j int) bool { + return sortedBlocks[i].Cmp(sortedBlocks[j]) < 0 + }) + + // Group blocks by files that contain them + fileGroups := s.groupBlocksByFiles(sortedBlocks) + + // Mark files as being actively used + s.activeUseMu.Lock() + for fileKey := range fileGroups { + s.activeUse[fileKey]++ + log.Trace(). + Str("file", fileKey). + Int("new_count", s.activeUse[fileKey]). + Msg("Incrementing file reference count") + } + s.activeUseMu.Unlock() + + // Ensure we release the hold on files when done + defer func() { + s.activeUseMu.Lock() + for fileKey := range fileGroups { + s.activeUse[fileKey]-- + log.Trace(). + Str("file", fileKey). + Int("new_count", s.activeUse[fileKey]). + Msg("Decrementing file reference count") + if s.activeUse[fileKey] <= 0 { + delete(s.activeUse, fileKey) + } + } + s.activeUseMu.Unlock() + + // Update access times to keep files in cache + s.cacheMu.Lock() + now := time.Now() + for fileKey := range fileGroups { + s.cacheMap[fileKey] = now + } + s.cacheMu.Unlock() + }() + + // Download required files and wait for ALL to be ready + if err := s.ensureFilesAvailable(ctx, fileGroups); err != nil { + log.Error().Err(err).Msg("Failed to ensure files are available") + return s.makeErrorResults(blockNumbers, err) + } + + // Read blocks from local files - at this point all files should be available + results := make([]rpc.GetFullBlockResult, 0, len(blockNumbers)) + resultMap := make(map[uint64]rpc.GetFullBlockResult) + + for fileKey, blocks := range fileGroups { + localPath := s.getCacheFilePath(fileKey) + + // Double-check file still exists (defensive programming) + if !s.isFileCached(localPath) { + log.Error().Str("file", fileKey).Str("path", localPath).Msg("File disappeared after ensureFilesAvailable") + // Try to re-download the file synchronously as a last resort + if err := s.downloadFile(ctx, fileKey); err != nil { + log.Error().Err(err).Str("file", fileKey).Msg("Failed to re-download disappeared file") + for _, bn := range blocks { + resultMap[bn.Uint64()] = rpc.GetFullBlockResult{ + BlockNumber: bn, + Error: fmt.Errorf("file disappeared and re-download failed: %w", err), + } + } + continue + } + } + + // Read blocks from local file efficiently + fileResults, err := s.readBlocksFromLocalFile(localPath, blocks) + if err != nil { + log.Error().Err(err).Str("file", fileKey).Msg("Failed to read blocks from local file") + // Even if one file fails, continue with others + for _, bn := range blocks { + resultMap[bn.Uint64()] = rpc.GetFullBlockResult{ + BlockNumber: bn, + Error: fmt.Errorf("failed to read from file: %w", err), + } + } + continue + } + + for blockNum, result := range fileResults { + resultMap[blockNum] = result + } + } + + // Build ordered results + for _, bn := range blockNumbers { + if result, ok := resultMap[bn.Uint64()]; ok { + results = append(results, result) + } else { + results = append(results, rpc.GetFullBlockResult{ + BlockNumber: bn, + Error: fmt.Errorf("block %s not found", bn.String()), + }) + } + } + + return results +} + +func (s *S3Source) GetSupportedBlockRange(ctx context.Context) (minBlockNumber *big.Int, maxBlockNumber *big.Int, err error) { + if err := s.ensureMetadataLoaded(ctx); err != nil { + return nil, nil, err + } + + s.metaMu.RLock() + defer s.metaMu.RUnlock() + + if s.minBlock == nil || s.maxBlock == nil { + return big.NewInt(0), big.NewInt(0), fmt.Errorf("no blocks found for chain %d", s.chainId.Uint64()) + } + + return new(big.Int).Set(s.minBlock), new(big.Int).Set(s.maxBlock), nil +} + +func (s *S3Source) Close() { + // Clean up cache directory + if s.cacheDir != "" { + os.RemoveAll(s.cacheDir) + } +} + +// Metadata management + +func (s *S3Source) loadMetadata(ctx context.Context) error { + s.metaMu.Lock() + defer s.metaMu.Unlock() + + // Check if metadata is still fresh + if s.metaLoaded && time.Since(s.metaLoadTime) < s.metadataTTL { + return nil + } + + prefix := fmt.Sprintf("chain_%d/", s.chainId.Uint64()) + if s.config.Prefix != "" { + prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) + } + + paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ + Bucket: aws.String(s.config.Bucket), + Prefix: aws.String(prefix), + }) + + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return fmt.Errorf("failed to list S3 objects: %w", err) + } + + for _, obj := range page.Contents { + if obj.Key == nil || obj.Size == nil { + continue + } + + startBlock, endBlock := s.extractBlockRangeFromKey(*obj.Key) + if startBlock == nil || endBlock == nil { + continue + } + + // Store metadata + s.fileMetadata[*obj.Key] = &FileMetadata{ + Key: *obj.Key, + MinBlock: startBlock, + MaxBlock: endBlock, + Size: *obj.Size, + } + + // Update global min/max + if s.minBlock == nil || startBlock.Cmp(s.minBlock) < 0 { + s.minBlock = new(big.Int).Set(startBlock) + } + if s.maxBlock == nil || endBlock.Cmp(s.maxBlock) > 0 { + s.maxBlock = new(big.Int).Set(endBlock) + } + } + } + + s.metaLoaded = true + s.metaLoadTime = time.Now() + log.Info(). + Int("files", len(s.fileMetadata)). + Str("min_block", s.minBlock.String()). + Str("max_block", s.maxBlock.String()). + Dur("ttl", s.metadataTTL). + Msg("Loaded S3 metadata cache") + + return nil +} + +func (s *S3Source) ensureMetadataLoaded(ctx context.Context) error { + s.metaMu.RLock() + // Check if metadata is loaded and still fresh + if s.metaLoaded && time.Since(s.metaLoadTime) < s.metadataTTL { + s.metaMu.RUnlock() + return nil + } + s.metaMu.RUnlock() + + return s.loadMetadata(ctx) +} + +// File grouping and downloading + +func (s *S3Source) ensureFilesAvailable(ctx context.Context, fileGroups map[string][]*big.Int) error { + var wg sync.WaitGroup + errChan := make(chan error, len(fileGroups)) + + // Limit concurrent downloads + sem := make(chan struct{}, s.maxConcurrentDownloads) + + for fileKey := range fileGroups { + wg.Add(1) + go func(key string) { + defer wg.Done() + + // First check if file is already being downloaded by another goroutine + s.downloadMu.Lock() + if downloadWg, downloading := s.downloading[key]; downloading { + s.downloadMu.Unlock() + // Wait for the existing download to complete + downloadWg.Wait() + + // Verify file exists after waiting + localPath := s.getCacheFilePath(key) + if !s.isFileCached(localPath) { + errChan <- fmt.Errorf("file %s not available after waiting for download", key) + } else { + // Ensure file is tracked in cache map + s.ensureFileInCacheMap(key) + // Update access time for this file since we'll be using it + s.cacheMu.Lock() + s.cacheMap[key] = time.Now() + s.cacheMu.Unlock() + } + return + } + s.downloadMu.Unlock() + + // Check if file is already cached + localPath := s.getCacheFilePath(key) + if s.isFileCached(localPath) { + // Ensure file is in cache map (in case it was on disk but not tracked) + s.ensureFileInCacheMap(key) + // Update access time + s.cacheMu.Lock() + s.cacheMap[key] = time.Now() + s.cacheMu.Unlock() + return + } + + // Need to download the file + sem <- struct{}{} + defer func() { <-sem }() + + if err := s.downloadFile(ctx, key); err != nil { + errChan <- fmt.Errorf("failed to download %s: %w", key, err) + return + } + + // Verify file exists after download + if !s.isFileCached(localPath) { + errChan <- fmt.Errorf("file %s not cached after download", key) + } + }(fileKey) + } + + // Wait for all files to be available + wg.Wait() + close(errChan) + + // Collect any errors + var errors []string + for err := range errChan { + if err != nil { + errors = append(errors, err.Error()) + } + } + + if len(errors) > 0 { + return fmt.Errorf("failed to ensure files available: %s", strings.Join(errors, "; ")) + } + + return nil +} + +func (s *S3Source) groupBlocksByFiles(blockNumbers []*big.Int) map[string][]*big.Int { + s.metaMu.RLock() + defer s.metaMu.RUnlock() + + fileGroups := make(map[string][]*big.Int) + + for _, blockNum := range blockNumbers { + // Find files that contain this block + for _, meta := range s.fileMetadata { + if blockNum.Cmp(meta.MinBlock) >= 0 && blockNum.Cmp(meta.MaxBlock) <= 0 { + fileGroups[meta.Key] = append(fileGroups[meta.Key], blockNum) + break // Each block should only be in one file + } + } + } + + return fileGroups +} + +func (s *S3Source) downloadFile(ctx context.Context, fileKey string) error { + // Prevent duplicate downloads + s.downloadMu.Lock() + if wg, downloading := s.downloading[fileKey]; downloading { + s.downloadMu.Unlock() + wg.Wait() + return nil + } + + wg := &sync.WaitGroup{} + wg.Add(1) + s.downloading[fileKey] = wg + s.downloadMu.Unlock() + + defer func() { + wg.Done() + s.downloadMu.Lock() + delete(s.downloading, fileKey) + s.downloadMu.Unlock() + }() + + localPath := s.getCacheFilePath(fileKey) + + // Create temp file for atomic write + tempPath := localPath + ".tmp" + + // Download from S3 + result, err := s.client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(s.config.Bucket), + Key: aws.String(fileKey), + }) + if err != nil { + return fmt.Errorf("failed to download file: %w", err) + } + defer result.Body.Close() + + // Create directory if needed + dir := filepath.Dir(tempPath) + if err := os.MkdirAll(dir, 0755); err != nil { + return err + } + + // Write to temp file + file, err := os.Create(tempPath) + if err != nil { + return err + } + + _, err = io.Copy(file, result.Body) + file.Close() + + if err != nil { + os.Remove(tempPath) + return err + } + + // Atomic rename + if err := os.Rename(tempPath, localPath); err != nil { + os.Remove(tempPath) + return err + } + + // Build block index for the file + go s.buildBlockIndex(localPath, fileKey) + + // Update cache map + s.cacheMu.Lock() + s.cacheMap[fileKey] = time.Now() + s.cacheMu.Unlock() + + log.Info().Str("file", fileKey).Str("path", localPath).Msg("Downloaded file from S3") + + return nil +} + +// Optimized parquet reading + +func (s *S3Source) buildBlockIndex(filePath, fileKey string) error { + file, err := os.Open(filePath) + if err != nil { + return err + } + defer file.Close() + + stat, err := file.Stat() + if err != nil { + return err + } + + pFile, err := parquet.OpenFile(file, stat.Size()) + if err != nil { + return err + } + + // Read only the block_number column to build index + blockNumCol := -1 + for i, field := range pFile.Schema().Fields() { + if field.Name() == "block_number" { + blockNumCol = i + break + } + } + + if blockNumCol < 0 { + return fmt.Errorf("block_number column not found") + } + + var index []BlockIndex + for _, rg := range pFile.RowGroups() { + chunk := rg.ColumnChunks()[blockNumCol] + pages := chunk.Pages() + offset := int64(0) + + for { + page, err := pages.ReadPage() + if err != nil { + break + } + + values := page.Values() + // Type assert to the specific reader type + switch reader := values.(type) { + case parquet.Int64Reader: + // Handle int64 block numbers + blockNums := make([]int64, page.NumValues()) + n, _ := reader.ReadInt64s(blockNums) + + for i := 0; i < n; i++ { + if blockNums[i] >= 0 { + index = append(index, BlockIndex{ + BlockNumber: uint64(blockNums[i]), + RowOffset: offset + int64(i), + RowSize: 1, + }) + } + } + default: + // Try to read as generic values + values := make([]parquet.Value, page.NumValues()) + n, _ := reader.ReadValues(values) + + for i := 0; i < n; i++ { + if !values[i].IsNull() { + blockNum := values[i].Uint64() + index = append(index, BlockIndex{ + BlockNumber: blockNum, + RowOffset: offset + int64(i), + RowSize: 1, + }) + } + } + } + offset += int64(page.NumValues()) + } + } + + // Store index + s.cacheMu.Lock() + s.blockIndex[fileKey] = index + s.cacheMu.Unlock() + + return nil +} + +func (s *S3Source) readBlocksFromLocalFile(filePath string, blockNumbers []*big.Int) (map[uint64]rpc.GetFullBlockResult, error) { + // Update access time for this file + fileKey := s.getFileKeyFromPath(filePath) + if fileKey != "" { + s.cacheMu.Lock() + s.cacheMap[fileKey] = time.Now() + s.cacheMu.Unlock() + } + + file, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer file.Close() + + stat, err := file.Stat() + if err != nil { + return nil, err + } + + // Create block map for quick lookup + blockMap := make(map[uint64]bool) + for _, bn := range blockNumbers { + blockMap[bn.Uint64()] = true + } + + // Use optimized parquet reading + pFile, err := parquet.OpenFile(file, stat.Size()) + if err != nil { + return nil, err + } + + results := make(map[uint64]rpc.GetFullBlockResult) + + // Read row groups + for _, rg := range pFile.RowGroups() { + // Check row group statistics to see if it contains our blocks + if !s.rowGroupContainsBlocks(rg, blockMap) { + continue + } + + // Read rows from this row group using generic reader + rows := make([]parquet.Row, rg.NumRows()) + reader := parquet.NewRowGroupReader(rg) + + n, err := reader.ReadRows(rows) + if err != nil && err != io.EOF { + log.Warn().Err(err).Msg("Error reading row group") + continue + } + + // Convert rows to our struct + for i := 0; i < n; i++ { + row := rows[i] + if len(row) < 8 { + continue // Not enough columns + } + + // Extract block number first to check if we need this row + blockNum := row[1].Uint64() // block_number is second column + + // Skip if not in requested blocks + if !blockMap[blockNum] { + continue + } + + // Build ParquetBlockData from row + pd := ParquetBlockData{ + ChainId: row[0].Uint64(), + BlockNumber: blockNum, + BlockHash: row[2].String(), + BlockTimestamp: row[3].Int64(), + Block: row[4].ByteArray(), + Transactions: row[5].ByteArray(), + Logs: row[6].ByteArray(), + Traces: row[7].ByteArray(), + } + + // Parse block data + result, err := s.parseBlockData(pd) + if err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to parse block data") + continue + } + + results[pd.BlockNumber] = result + } + } + + return results, nil +} + +func (s *S3Source) rowGroupContainsBlocks(rg parquet.RowGroup, blockMap map[uint64]bool) bool { + // Get the block_number column chunk + for i, col := range rg.Schema().Fields() { + if col.Name() == "block_number" { + chunk := rg.ColumnChunks()[i] + ci, _ := chunk.ColumnIndex() + if ci != nil { + // Check min/max values + for j := 0; j < ci.NumPages(); j++ { + minVal := ci.MinValue(j) + maxVal := ci.MaxValue(j) + + if minVal.IsNull() || maxVal.IsNull() { + continue + } + + minBlock := minVal.Uint64() + maxBlock := maxVal.Uint64() + + // Check if any requested blocks fall in this range + for blockNum := range blockMap { + if blockNum >= minBlock && blockNum <= maxBlock { + return true + } + } + } + } + break + } + } + + // If no statistics, assume it might contain blocks + return true +} + +func (s *S3Source) parseBlockData(pd ParquetBlockData) (rpc.GetFullBlockResult, error) { + var block common.Block + if err := json.Unmarshal(pd.Block, &block); err != nil { + return rpc.GetFullBlockResult{}, err + } + + var transactions []common.Transaction + if len(pd.Transactions) > 0 { + if err := json.Unmarshal(pd.Transactions, &transactions); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal transactions") + } + } + + var logs []common.Log + if len(pd.Logs) > 0 { + if err := json.Unmarshal(pd.Logs, &logs); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal logs") + } + } + + var traces []common.Trace + if len(pd.Traces) > 0 { + if err := json.Unmarshal(pd.Traces, &traces); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal traces") + } + } + + return rpc.GetFullBlockResult{ + BlockNumber: new(big.Int).SetUint64(pd.BlockNumber), + Data: common.BlockData{ + Block: block, + Transactions: transactions, + Logs: logs, + Traces: traces, + }, + Error: nil, + }, nil +} + +// RefreshMetadata forces a refresh of the metadata cache +func (s *S3Source) RefreshMetadata(ctx context.Context) error { + s.metaMu.Lock() + s.metaLoaded = false + s.metaLoadTime = time.Time{} + s.metaMu.Unlock() + + return s.loadMetadata(ctx) +} + +// GetCacheStats returns statistics about the cache +func (s *S3Source) GetCacheStats() (fileCount int, totalSize int64, oldestAccess time.Time) { + s.cacheMu.RLock() + defer s.cacheMu.RUnlock() + + fileCount = len(s.cacheMap) + now := time.Now() + + for key, accessTime := range s.cacheMap { + path := s.getCacheFilePath(key) + if info, err := os.Stat(path); err == nil { + totalSize += info.Size() + } + if oldestAccess.IsZero() || accessTime.Before(oldestAccess) { + oldestAccess = accessTime + } + } + + // Also check metadata freshness + s.metaMu.RLock() + metaAge := now.Sub(s.metaLoadTime) + s.metaMu.RUnlock() + + log.Debug(). + Int("file_count", fileCount). + Int64("total_size_mb", totalSize/(1024*1024)). + Dur("oldest_file_age", now.Sub(oldestAccess)). + Dur("metadata_age", metaAge). + Msg("Cache statistics") + + return fileCount, totalSize, oldestAccess +} + +// Helper functions + +func (s *S3Source) extractBlockRangeFromKey(key string) (*big.Int, *big.Int) { + parts := strings.Split(key, "/") + if len(parts) == 0 { + return nil, nil + } + + filename := parts[len(parts)-1] + if !strings.HasPrefix(filename, "blocks_") || !strings.HasSuffix(filename, ".parquet") { + return nil, nil + } + + rangeStr := strings.TrimPrefix(filename, "blocks_") + rangeStr = strings.TrimSuffix(rangeStr, ".parquet") + + rangeParts := strings.Split(rangeStr, "_") + if len(rangeParts) != 2 { + return nil, nil + } + + startBlock, ok1 := new(big.Int).SetString(rangeParts[0], 10) + endBlock, ok2 := new(big.Int).SetString(rangeParts[1], 10) + if !ok1 || !ok2 { + return nil, nil + } + + return startBlock, endBlock +} + +func (s *S3Source) getCacheFilePath(fileKey string) string { + // Create a safe filename from the S3 key + hash := sha256.Sum256([]byte(fileKey)) + filename := hex.EncodeToString(hash[:])[:16] + ".parquet" + return filepath.Join(s.cacheDir, filename) +} + +func (s *S3Source) getFileKeyFromPath(filePath string) string { + // Reverse lookup - find the key for a given cache path + s.cacheMu.RLock() + defer s.cacheMu.RUnlock() + + for key := range s.cacheMap { + if s.getCacheFilePath(key) == filePath { + return key + } + } + return "" +} + +func (s *S3Source) isFileCached(filePath string) bool { + // First check if file exists at all + info, err := os.Stat(filePath) + if err != nil { + return false + } + + // Check if file has content + if info.Size() == 0 { + return false + } + + // Check if a temp file exists (indicating incomplete download) + tempPath := filePath + ".tmp" + if _, err := os.Stat(tempPath); err == nil { + // Temp file exists, download is incomplete + return false + } + + // File exists, has content, and no temp file - it's cached + return true +} + +// ensureFileInCacheMap ensures a file that exists on disk is tracked in the cache map +func (s *S3Source) ensureFileInCacheMap(fileKey string) { + s.cacheMu.Lock() + defer s.cacheMu.Unlock() + + // If not in cache map, add it with current time + if _, exists := s.cacheMap[fileKey]; !exists { + localPath := s.getCacheFilePath(fileKey) + if info, err := os.Stat(localPath); err == nil { + // Use file modification time if it's recent, otherwise use current time + modTime := info.ModTime() + if time.Since(modTime) < s.fileCacheTTL { + s.cacheMap[fileKey] = modTime + } else { + s.cacheMap[fileKey] = time.Now() + } + log.Trace(). + Str("file", fileKey). + Time("access_time", s.cacheMap[fileKey]). + Msg("Added existing file to cache map") + } + } +} + +func (s *S3Source) makeErrorResults(blockNumbers []*big.Int, err error) []rpc.GetFullBlockResult { + results := make([]rpc.GetFullBlockResult, len(blockNumbers)) + for i, bn := range blockNumbers { + results[i] = rpc.GetFullBlockResult{ + BlockNumber: bn, + Error: err, + } + } + return results +} + +func (s *S3Source) cleanupCache() { + ticker := time.NewTicker(s.cleanupInterval) + defer ticker.Stop() + + for range ticker.C { + s.cacheMu.Lock() + s.downloadMu.Lock() + s.activeUseMu.RLock() + + // Remove files not accessed within the TTL + cutoff := time.Now().Add(-s.fileCacheTTL) + protectedCount := 0 + expiredCount := 0 + + for fileKey, accessTime := range s.cacheMap { + // Skip files that are currently being downloaded + if _, downloading := s.downloading[fileKey]; downloading { + protectedCount++ + continue + } + + // Skip files that are actively being used + if count, active := s.activeUse[fileKey]; active && count > 0 { + protectedCount++ + // Only log at trace level to reduce noise + log.Trace(). + Str("file", fileKey). + Int("ref_count", count). + Msg("Skipping actively used file in cleanup") + continue + } + + if accessTime.Before(cutoff) { + expiredCount++ + cacheFile := s.getCacheFilePath(fileKey) + log.Debug(). + Str("file", fileKey). + Str("path", cacheFile). + Time("last_access", accessTime). + Time("cutoff", cutoff). + Msg("Removing expired file from cache") + os.Remove(cacheFile) + delete(s.cacheMap, fileKey) + delete(s.blockIndex, fileKey) + } + } + + s.activeUseMu.RUnlock() + s.downloadMu.Unlock() + s.cacheMu.Unlock() + + // Only log if something interesting happened (files were deleted) + if expiredCount > 0 { + log.Debug(). + Int("protected", protectedCount). + Int("expired", expiredCount). + Int("total_cached", len(s.cacheMap)). + Msg("Cache cleanup cycle completed - removed expired files") + } else if protectedCount > 0 { + // Use trace level for routine cleanup cycles with no deletions + log.Trace(). + Int("protected", protectedCount). + Int("total_cached", len(s.cacheMap)). + Msg("Cache cleanup cycle completed - no files expired") + } + + // Also check disk usage and remove oldest files if needed + s.enforceMaxCacheSize() + } +} + +func (s *S3Source) enforceMaxCacheSize() { + maxSize := s.maxCacheSize + + var totalSize int64 + var files []struct { + path string + key string + size int64 + access time.Time + } + + s.cacheMu.RLock() + for key, accessTime := range s.cacheMap { + path := s.getCacheFilePath(key) + if info, err := os.Stat(path); err == nil { + totalSize += info.Size() + files = append(files, struct { + path string + key string + size int64 + access time.Time + }{path, key, info.Size(), accessTime}) + } + } + s.cacheMu.RUnlock() + + if totalSize <= maxSize { + return + } + + log.Debug(). + Int64("total_size_mb", totalSize/(1024*1024)). + Int64("max_size_mb", maxSize/(1024*1024)). + Int("file_count", len(files)). + Msg("Cache size exceeded, removing old files") + + // Sort by access time (oldest first) + sort.Slice(files, func(i, j int) bool { + return files[i].access.Before(files[j].access) + }) + + // Remove oldest files until under limit + s.cacheMu.Lock() + s.downloadMu.Lock() + s.activeUseMu.RLock() + defer s.activeUseMu.RUnlock() + defer s.downloadMu.Unlock() + defer s.cacheMu.Unlock() + + for _, f := range files { + if totalSize <= maxSize { + break + } + + // Skip files that are currently being downloaded + if _, downloading := s.downloading[f.key]; downloading { + continue + } + + // Skip files that are actively being used + if count, active := s.activeUse[f.key]; active && count > 0 { + continue + } + + os.Remove(f.path) + delete(s.cacheMap, f.key) + delete(s.blockIndex, f.key) + totalSize -= f.size + } +} diff --git a/internal/source/source.go b/internal/source/source.go new file mode 100644 index 0000000..2b9ef85 --- /dev/null +++ b/internal/source/source.go @@ -0,0 +1,14 @@ +package source + +import ( + "context" + "math/big" + + "github.com/thirdweb-dev/indexer/internal/rpc" +) + +type ISource interface { + GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult + GetSupportedBlockRange(ctx context.Context) (minBlockNumber *big.Int, maxBlockNumber *big.Int, err error) + Close() +} diff --git a/internal/storage/badger.go b/internal/storage/badger.go new file mode 100644 index 0000000..991e479 --- /dev/null +++ b/internal/storage/badger.go @@ -0,0 +1,526 @@ +package storage + +import ( + "bytes" + "encoding/gob" + "fmt" + "math/big" + "sort" + "strings" + "sync" + "time" + + "github.com/dgraph-io/badger/v4" + "github.com/dgraph-io/badger/v4/options" + "github.com/rs/zerolog/log" + config "github.com/thirdweb-dev/indexer/configs" + "github.com/thirdweb-dev/indexer/internal/common" +) + +type BadgerConnector struct { + db *badger.DB + mu sync.RWMutex + gcTicker *time.Ticker + stopGC chan struct{} +} + +func NewBadgerConnector(cfg *config.BadgerConfig) (*BadgerConnector, error) { + opts := badger.DefaultOptions(cfg.Path) + + opts.ValueLogFileSize = 1024 * 1024 * 1024 // 1GB + opts.BaseTableSize = 128 * 1024 * 1024 // 128MB + opts.BaseLevelSize = 128 * 1024 * 1024 // 128MB + opts.LevelSizeMultiplier = 10 // Aggressive growth + opts.NumMemtables = 10 // ~1.28GB + opts.MemTableSize = opts.BaseTableSize // 128MB per memtable + opts.NumLevelZeroTables = 10 + opts.NumLevelZeroTablesStall = 30 + opts.SyncWrites = false // Faster but less durable + opts.DetectConflicts = false // No need for ACID in staging + opts.NumCompactors = 4 // More compactors for parallel compaction + opts.CompactL0OnClose = true // Compact L0 tables on close + opts.ValueLogMaxEntries = 1000000 // More entries per value log + opts.ValueThreshold = 1024 // Store values > 1024 bytes in value log + opts.IndexCacheSize = 512 * 1024 * 1024 // 512MB index cache + opts.BlockCacheSize = 256 * 1024 * 1024 // 256MB block cache + opts.Compression = options.Snappy + + opts.Logger = nil // Disable badger's internal logging + + db, err := badger.Open(opts) + if err != nil { + return nil, fmt.Errorf("failed to open badger db: %w", err) + } + + bc := &BadgerConnector{ + db: db, + stopGC: make(chan struct{}), + } + + // Start GC routine + bc.gcTicker = time.NewTicker(time.Duration(60) * time.Second) + go bc.runGC() + + return bc, nil +} + +func (bc *BadgerConnector) runGC() { + for { + select { + case <-bc.gcTicker.C: + err := bc.db.RunValueLogGC(0.5) + if err != nil && err != badger.ErrNoRewrite { + log.Debug().Err(err).Msg("BadgerDB GC error") + } + case <-bc.stopGC: + return + } + } +} + +func (bc *BadgerConnector) Close() error { + if bc.gcTicker != nil { + bc.gcTicker.Stop() + close(bc.stopGC) + } + return bc.db.Close() +} + +// Key construction helpers +func blockKey(chainId *big.Int, blockNumber *big.Int) []byte { + return []byte(fmt.Sprintf("blockdata:%s:%s", chainId.String(), blockNumber.String())) +} + +func blockKeyRange(chainId *big.Int) []byte { + return []byte(fmt.Sprintf("blockdata:%s:", chainId.String())) +} + +func blockFailureKey(chainId *big.Int, blockNumber *big.Int) []byte { + return []byte(fmt.Sprintf("blockfailure:%s:%s", chainId.String(), blockNumber.String())) +} + +func blockFailureKeyRange(chainId *big.Int) []byte { + return []byte(fmt.Sprintf("blockfailure:%s:", chainId.String())) +} + +func lastReorgKey(chainId *big.Int) []byte { + return []byte(fmt.Sprintf("reorg:%s", chainId.String())) +} + +func lastPublishedKey(chainId *big.Int) []byte { + return []byte(fmt.Sprintf("publish:%s", chainId.String())) +} + +func lastCommittedKey(chainId *big.Int) []byte { + return []byte(fmt.Sprintf("commit:%s", chainId.String())) +} + +// IOrchestratorStorage implementation +func (bc *BadgerConnector) GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) { + bc.mu.RLock() + defer bc.mu.RUnlock() + + var failures []common.BlockFailure + prefix := blockFailureKeyRange(qf.ChainId) + + err := bc.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.Prefix = []byte(prefix) + it := txn.NewIterator(opts) + defer it.Close() + + for it.Rewind(); it.Valid(); it.Next() { + item := it.Item() + err := item.Value(func(val []byte) error { + var failure common.BlockFailure + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&failure); err != nil { + return err + } + + // Apply filters + if qf.StartBlock != nil && failure.BlockNumber.Cmp(qf.StartBlock) < 0 { + return nil + } + if qf.EndBlock != nil && failure.BlockNumber.Cmp(qf.EndBlock) > 0 { + return nil + } + + failures = append(failures, failure) + return nil + }) + if err != nil { + return err + } + + if qf.Limit > 0 && len(failures) >= qf.Limit { + break + } + } + return nil + }) + + return failures, err +} + +func (bc *BadgerConnector) StoreBlockFailures(failures []common.BlockFailure) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + for _, failure := range failures { + key := blockFailureKey(failure.ChainId, failure.BlockNumber) + + var buf bytes.Buffer + if err := gob.NewEncoder(&buf).Encode(failure); err != nil { + return err + } + + if err := txn.Set(key, buf.Bytes()); err != nil { + return err + } + } + return nil + }) +} + +func (bc *BadgerConnector) DeleteBlockFailures(failures []common.BlockFailure) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + for _, failure := range failures { + // Delete all failure entries for this block + prefix := blockFailureKey(failure.ChainId, failure.BlockNumber) + + opts := badger.DefaultIteratorOptions + opts.Prefix = []byte(prefix) + it := txn.NewIterator(opts) + defer it.Close() + + for it.Rewind(); it.Valid(); it.Next() { + if err := txn.Delete(it.Item().Key()); err != nil { + return err + } + } + } + return nil + }) +} + +func (bc *BadgerConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { + bc.mu.RLock() + defer bc.mu.RUnlock() + + var blockNumber *big.Int + err := bc.db.View(func(txn *badger.Txn) error { + item, err := txn.Get(lastReorgKey(chainId)) + if err == badger.ErrKeyNotFound { + return nil + } + if err != nil { + return err + } + + return item.Value(func(val []byte) error { + blockNumber = new(big.Int).SetBytes(val) + return nil + }) + }) + + if blockNumber == nil { + return big.NewInt(0), nil + } + return blockNumber, err +} + +func (bc *BadgerConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + return txn.Set(lastReorgKey(chainId), blockNumber.Bytes()) + }) +} + +// IStagingStorage implementation +func (bc *BadgerConnector) InsertStagingData(data []common.BlockData) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + for _, blockData := range data { + key := blockKey(blockData.Block.ChainId, blockData.Block.Number) + + var buf bytes.Buffer + if err := gob.NewEncoder(&buf).Encode(blockData); err != nil { + return err + } + + if err := txn.Set(key, buf.Bytes()); err != nil { + return err + } + } + return nil + }) +} + +func (bc *BadgerConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) { + bc.mu.RLock() + defer bc.mu.RUnlock() + + var results []common.BlockData + + if len(qf.BlockNumbers) > 0 { + // Fetch specific blocks + err := bc.db.View(func(txn *badger.Txn) error { + for _, blockNum := range qf.BlockNumbers { + key := blockKey(qf.ChainId, blockNum) + item, err := txn.Get(key) + if err == badger.ErrKeyNotFound { + continue + } + if err != nil { + return err + } + + err = item.Value(func(val []byte) error { + var blockData common.BlockData + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { + return err + } + results = append(results, blockData) + return nil + }) + if err != nil { + return err + } + } + return nil + }) + return results, err + } + + // Range query + prefix := blockKeyRange(qf.ChainId) + + err := bc.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.Prefix = []byte(prefix) + it := txn.NewIterator(opts) + defer it.Close() + + count := 0 + for it.Rewind(); it.Valid(); it.Next() { + if qf.Offset > 0 && count < qf.Offset { + count++ + continue + } + + item := it.Item() + err := item.Value(func(val []byte) error { + var blockData common.BlockData + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { + return err + } + + // Apply filters + if qf.StartBlock != nil && blockData.Block.Number.Cmp(qf.StartBlock) < 0 { + return nil + } + if qf.EndBlock != nil && blockData.Block.Number.Cmp(qf.EndBlock) > 0 { + return nil + } + + results = append(results, blockData) + return nil + }) + if err != nil { + return err + } + + count++ + if qf.Limit > 0 && len(results) >= qf.Limit { + break + } + } + return nil + }) + + // Sort by block number + sort.Slice(results, func(i, j int) bool { + return results[i].Block.Number.Cmp(results[j].Block.Number) < 0 + }) + + return results, err +} + +func (bc *BadgerConnector) DeleteStagingData(data []common.BlockData) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + for _, blockData := range data { + key := blockKey(blockData.Block.ChainId, blockData.Block.Number) + if err := txn.Delete(key); err != nil && err != badger.ErrKeyNotFound { + return err + } + } + return nil + }) +} + +func (bc *BadgerConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) { + bc.mu.RLock() + defer bc.mu.RUnlock() + + var maxBlock *big.Int + prefix := blockKeyRange(chainId) + + err := bc.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.Prefix = []byte(prefix) + opts.Reverse = true // Iterate in reverse to find max quickly + it := txn.NewIterator(opts) + defer it.Close() + + for it.Rewind(); it.Valid(); it.Next() { + key := string(it.Item().Key()) + parts := strings.Split(key, ":") + if len(parts) != 3 { + continue + } + + blockNum, ok := new(big.Int).SetString(parts[2], 10) + if !ok { + continue + } + + // Apply range filters if provided + if rangeStart != nil && rangeStart.Sign() > 0 && blockNum.Cmp(rangeStart) < 0 { + continue + } + if rangeEnd != nil && rangeEnd.Sign() > 0 && blockNum.Cmp(rangeEnd) > 0 { + continue + } + + maxBlock = blockNum + break // Found the maximum since we're iterating in reverse + } + return nil + }) + + if maxBlock == nil { + return big.NewInt(0), nil + } + return maxBlock, err +} + +func (bc *BadgerConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { + bc.mu.RLock() + defer bc.mu.RUnlock() + + var blockNumber *big.Int + err := bc.db.View(func(txn *badger.Txn) error { + item, err := txn.Get(lastPublishedKey(chainId)) + if err == badger.ErrKeyNotFound { + return nil + } + if err != nil { + return err + } + + return item.Value(func(val []byte) error { + blockNumber = new(big.Int).SetBytes(val) + return nil + }) + }) + + if blockNumber == nil { + return big.NewInt(0), nil + } + return blockNumber, err +} + +func (bc *BadgerConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + return txn.Set(lastPublishedKey(chainId), blockNumber.Bytes()) + }) +} + +func (bc *BadgerConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { + bc.mu.RLock() + defer bc.mu.RUnlock() + + var blockNumber *big.Int + err := bc.db.View(func(txn *badger.Txn) error { + item, err := txn.Get(lastCommittedKey(chainId)) + if err == badger.ErrKeyNotFound { + return nil + } + if err != nil { + return err + } + + return item.Value(func(val []byte) error { + blockNumber = new(big.Int).SetBytes(val) + return nil + }) + }) + + if blockNumber == nil { + return big.NewInt(0), nil + } + return blockNumber, err +} + +func (bc *BadgerConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + return bc.db.Update(func(txn *badger.Txn) error { + return txn.Set(lastCommittedKey(chainId), blockNumber.Bytes()) + }) +} + +func (bc *BadgerConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { + bc.mu.Lock() + defer bc.mu.Unlock() + + prefix := blockKeyRange(chainId) + + return bc.db.Update(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.Prefix = []byte(prefix) + it := txn.NewIterator(opts) + defer it.Close() + + var keysToDelete [][]byte + for it.Rewind(); it.Valid(); it.Next() { + key := string(it.Item().Key()) + parts := strings.Split(key, ":") + if len(parts) != 3 { + continue + } + + blockNum, ok := new(big.Int).SetString(parts[2], 10) + if !ok { + continue + } + + if blockNum.Cmp(blockNumber) <= 0 { + keysToDelete = append(keysToDelete, it.Item().KeyCopy(nil)) + } + } + + for _, key := range keysToDelete { + if err := txn.Delete(key); err != nil { + return err + } + } + + return nil + }) +} diff --git a/internal/storage/block_buffer.go b/internal/storage/block_buffer.go new file mode 100644 index 0000000..90c6ed8 --- /dev/null +++ b/internal/storage/block_buffer.go @@ -0,0 +1,282 @@ +package storage + +import ( + "bytes" + "encoding/gob" + "fmt" + "math/big" + "sync" + + "github.com/rs/zerolog/log" + "github.com/thirdweb-dev/indexer/internal/common" +) + +// BlockBuffer manages buffering of block data with size and count limits +type BlockBuffer struct { + mu sync.RWMutex + data []common.BlockData + sizeBytes int64 + maxSizeBytes int64 + maxBlocks int +} + +// IBlockBuffer defines the interface for block buffer implementations +type IBlockBuffer interface { + Add(blocks []common.BlockData) bool + Flush() []common.BlockData + ShouldFlush() bool + Size() (int64, int) + IsEmpty() bool + GetData() []common.BlockData + GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData + GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData + GetMaxBlockNumber(chainId *big.Int) *big.Int + Clear() + Stats() BufferStats + Close() error +} + +// NewBlockBuffer creates a new in-memory block buffer +func NewBlockBuffer(maxSizeMB int64, maxBlocks int) *BlockBuffer { + return &BlockBuffer{ + data: make([]common.BlockData, 0), + maxSizeBytes: maxSizeMB * 1024 * 1024, + maxBlocks: maxBlocks, + } +} + +// NewBlockBufferWithBadger creates a new Badger-backed block buffer for better memory management +// This uses ephemeral storage with optimized settings for caching +func NewBlockBufferWithBadger(maxSizeMB int64, maxBlocks int) (IBlockBuffer, error) { + return NewBadgerBlockBuffer(maxSizeMB, maxBlocks) +} + +// Add adds blocks to the buffer and returns true if flush is needed +func (b *BlockBuffer) Add(blocks []common.BlockData) bool { + if len(blocks) == 0 { + return false + } + + b.mu.Lock() + defer b.mu.Unlock() + + // Calculate actual size by marshaling the entire batch once + // This gives us accurate size with minimal overhead since we marshal once per Add call + var actualSize int64 + var buf bytes.Buffer + enc := gob.NewEncoder(&buf) + + // Marshal all blocks to get actual serialized size + if err := enc.Encode(blocks); err != nil { + // If encoding fails, use estimation as fallback + log.Warn().Err(err).Msg("Failed to marshal blocks for size calculation, buffer size is not reported correctly") + } else { + actualSize = int64(buf.Len()) + } + + // Add to buffer + b.data = append(b.data, blocks...) + b.sizeBytes += actualSize + + log.Debug(). + Int("block_count", len(blocks)). + Int64("actual_size_bytes", actualSize). + Int64("total_size_bytes", b.sizeBytes). + Int("total_blocks", len(b.data)). + Msg("Added blocks to buffer") + + // Check if flush is needed + return b.shouldFlushLocked() +} + +// Flush removes all data from the buffer and returns it +func (b *BlockBuffer) Flush() []common.BlockData { + b.mu.Lock() + defer b.mu.Unlock() + + if len(b.data) == 0 { + return nil + } + + // Take ownership of data + data := b.data + b.data = make([]common.BlockData, 0) + b.sizeBytes = 0 + + log.Info(). + Int("block_count", len(data)). + Msg("Flushing buffer") + + return data +} + +// ShouldFlush checks if the buffer should be flushed based on configured thresholds +func (b *BlockBuffer) ShouldFlush() bool { + b.mu.RLock() + defer b.mu.RUnlock() + return b.shouldFlushLocked() +} + +// Size returns the current buffer size in bytes and block count +func (b *BlockBuffer) Size() (int64, int) { + b.mu.RLock() + defer b.mu.RUnlock() + return b.sizeBytes, len(b.data) +} + +// IsEmpty returns true if the buffer is empty +func (b *BlockBuffer) IsEmpty() bool { + b.mu.RLock() + defer b.mu.RUnlock() + return len(b.data) == 0 +} + +// GetData returns a copy of the current buffer data +func (b *BlockBuffer) GetData() []common.BlockData { + b.mu.RLock() + defer b.mu.RUnlock() + + result := make([]common.BlockData, len(b.data)) + copy(result, b.data) + return result +} + +// GetBlocksInRange returns blocks from the buffer that fall within the given range +func (b *BlockBuffer) GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData { + b.mu.RLock() + defer b.mu.RUnlock() + + var result []common.BlockData + for _, block := range b.data { + if block.Block.ChainId.Cmp(chainId) == 0 { + blockNum := block.Block.Number + if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { + result = append(result, block) + } + } + } + return result +} + +// GetBlockByNumber returns a specific block from the buffer if it exists +func (b *BlockBuffer) GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData { + b.mu.RLock() + defer b.mu.RUnlock() + + for _, block := range b.data { + if block.Block.ChainId.Cmp(chainId) == 0 && block.Block.Number.Cmp(blockNumber) == 0 { + blockCopy := block + return &blockCopy + } + } + return nil +} + +// GetMaxBlockNumber returns the maximum block number for a chain in the buffer +func (b *BlockBuffer) GetMaxBlockNumber(chainId *big.Int) *big.Int { + b.mu.RLock() + defer b.mu.RUnlock() + + var maxBlock *big.Int + for _, block := range b.data { + if block.Block.ChainId.Cmp(chainId) == 0 { + if maxBlock == nil || block.Block.Number.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(block.Block.Number) + } + } + } + return maxBlock +} + +// Clear empties the buffer without returning data +func (b *BlockBuffer) Clear() { + b.mu.Lock() + defer b.mu.Unlock() + + b.data = make([]common.BlockData, 0) + b.sizeBytes = 0 +} + +// Stats returns statistics about the buffer +func (b *BlockBuffer) Stats() BufferStats { + b.mu.RLock() + defer b.mu.RUnlock() + + stats := BufferStats{ + BlockCount: len(b.data), + SizeBytes: b.sizeBytes, + ChainCount: 0, + ChainStats: make(map[uint64]ChainStats), + } + + // Calculate per-chain statistics + for _, block := range b.data { + chainId := block.Block.ChainId.Uint64() + chainStat := stats.ChainStats[chainId] + + if chainStat.MinBlock == nil || block.Block.Number.Cmp(chainStat.MinBlock) < 0 { + chainStat.MinBlock = new(big.Int).Set(block.Block.Number) + } + if chainStat.MaxBlock == nil || block.Block.Number.Cmp(chainStat.MaxBlock) > 0 { + chainStat.MaxBlock = new(big.Int).Set(block.Block.Number) + } + chainStat.BlockCount++ + + stats.ChainStats[chainId] = chainStat + } + + stats.ChainCount = len(stats.ChainStats) + return stats +} + +// Private methods + +func (b *BlockBuffer) shouldFlushLocked() bool { + // Check size limit + if b.maxSizeBytes > 0 && b.sizeBytes >= b.maxSizeBytes { + return true + } + + // Check block count limit + if b.maxBlocks > 0 && len(b.data) >= b.maxBlocks { + return true + } + + return false +} + +// BufferStats contains statistics about the buffer +type BufferStats struct { + BlockCount int + SizeBytes int64 + ChainCount int + ChainStats map[uint64]ChainStats +} + +// ChainStats contains per-chain statistics +type ChainStats struct { + BlockCount int + MinBlock *big.Int + MaxBlock *big.Int +} + +// String returns a string representation of buffer stats +func (s BufferStats) String() string { + return fmt.Sprintf("BufferStats{blocks=%d, size=%dMB, chains=%d}", + s.BlockCount, s.SizeBytes/(1024*1024), s.ChainCount) +} + +// Close closes the buffer (no-op for in-memory buffer) +func (b *BlockBuffer) Close() error { + b.mu.Lock() + defer b.mu.Unlock() + + // Clear the buffer to free memory + b.data = nil + b.sizeBytes = 0 + + return nil +} + +// Ensure BlockBuffer implements IBlockBuffer interface +var _ IBlockBuffer = (*BlockBuffer)(nil) diff --git a/internal/storage/block_buffer_badger.go b/internal/storage/block_buffer_badger.go new file mode 100644 index 0000000..09469c4 --- /dev/null +++ b/internal/storage/block_buffer_badger.go @@ -0,0 +1,482 @@ +package storage + +import ( + "bytes" + "encoding/gob" + "fmt" + "math/big" + "os" + "sync" + "time" + + "github.com/dgraph-io/badger/v4" + "github.com/dgraph-io/badger/v4/options" + "github.com/rs/zerolog/log" + "github.com/thirdweb-dev/indexer/internal/common" +) + +// BadgerBlockBuffer manages buffering of block data using Badger as an ephemeral cache +type BadgerBlockBuffer struct { + mu sync.RWMutex + db *badger.DB + tempDir string + maxSizeBytes int64 + maxBlocks int + blockCount int + gcTicker *time.Ticker + stopGC chan struct{} + + // Chain metadata cache for O(1) lookups + chainMetadata map[uint64]*ChainMetadata +} + +// ChainMetadata tracks per-chain statistics for fast lookups +type ChainMetadata struct { + MinBlock *big.Int + MaxBlock *big.Int + BlockCount int +} + +// NewBadgerBlockBuffer creates a new Badger-backed block buffer with ephemeral storage +func NewBadgerBlockBuffer(maxSizeMB int64, maxBlocks int) (*BadgerBlockBuffer, error) { + // Create temporary directory for ephemeral storage + tempDir, err := os.MkdirTemp("", "blockbuffer-*") + if err != nil { + return nil, fmt.Errorf("failed to create temp dir: %w", err) + } + + // Configure Badger with optimized settings for ephemeral cache + opts := badger.DefaultOptions(tempDir) + + // Memory optimization settings (similar to badger.go but tuned for ephemeral use) + opts.ValueLogFileSize = 256 * 1024 * 1024 // 256MB (smaller for cache) + opts.BaseTableSize = 64 * 1024 * 1024 // 64MB + opts.BaseLevelSize = 64 * 1024 * 1024 // 64MB + opts.LevelSizeMultiplier = 10 // Aggressive growth + opts.NumMemtables = 5 // ~320MB + opts.MemTableSize = opts.BaseTableSize // 64MB per memtable + opts.NumLevelZeroTables = 5 + opts.NumLevelZeroTablesStall = 10 + opts.SyncWrites = false // No durability needed for cache + opts.DetectConflicts = false // No ACID needed + opts.NumCompactors = 2 // Less compactors for cache + opts.CompactL0OnClose = false // Don't compact on close (ephemeral) + opts.ValueLogMaxEntries = 100000 // Smaller for cache + opts.ValueThreshold = 1024 // Store values > 512 bytes in value log + opts.IndexCacheSize = 128 * 1024 * 1024 // 128MB index cache + opts.BlockCacheSize = 64 * 1024 * 1024 // 64MB block cache + opts.Compression = options.Snappy + opts.Logger = nil // Disable badger's internal logging + + // Ephemeral-specific settings + opts.InMemory = false // Use disk but in temp directory + opts.ReadOnly = false + opts.MetricsEnabled = false + + db, err := badger.Open(opts) + if err != nil { + os.RemoveAll(tempDir) + return nil, fmt.Errorf("failed to open badger db: %w", err) + } + + b := &BadgerBlockBuffer{ + db: db, + tempDir: tempDir, + maxSizeBytes: maxSizeMB * 1024 * 1024, + maxBlocks: maxBlocks, + stopGC: make(chan struct{}), + chainMetadata: make(map[uint64]*ChainMetadata), + } + + // Start GC routine with faster interval for cache + b.gcTicker = time.NewTicker(30 * time.Second) + go b.runGC() + + return b, nil +} + +// Add adds blocks to the buffer and returns true if flush is needed +func (b *BadgerBlockBuffer) Add(blocks []common.BlockData) bool { + if len(blocks) == 0 { + return false + } + + b.mu.Lock() + defer b.mu.Unlock() + + err := b.db.Update(func(txn *badger.Txn) error { + for _, block := range blocks { + key := b.makeKey(block.Block.ChainId, block.Block.Number) + + var buf bytes.Buffer + if err := gob.NewEncoder(&buf).Encode(block); err != nil { + return err + } + + if err := txn.Set(key, buf.Bytes()); err != nil { + return err + } + } + return nil + }) + + if err != nil { + log.Error().Err(err).Msg("Failed to add blocks to badger buffer") + return false + } + + // Update counters + b.blockCount += len(blocks) + + // Update chain metadata for O(1) lookups + for _, block := range blocks { + chainId := block.Block.ChainId.Uint64() + meta, exists := b.chainMetadata[chainId] + if !exists { + meta = &ChainMetadata{ + MinBlock: new(big.Int).Set(block.Block.Number), + MaxBlock: new(big.Int).Set(block.Block.Number), + BlockCount: 1, + } + b.chainMetadata[chainId] = meta + } else { + if block.Block.Number.Cmp(meta.MinBlock) < 0 { + meta.MinBlock = new(big.Int).Set(block.Block.Number) + } + if block.Block.Number.Cmp(meta.MaxBlock) > 0 { + meta.MaxBlock = new(big.Int).Set(block.Block.Number) + } + meta.BlockCount++ + } + } + + log.Debug(). + Int("block_count", len(blocks)). + Int("total_blocks", b.blockCount). + Msg("Added blocks to badger buffer") + + // Check if flush is needed + return b.shouldFlushLocked() +} + +// Flush removes all data from the buffer and returns it +func (b *BadgerBlockBuffer) Flush() []common.BlockData { + b.mu.Lock() + defer b.mu.Unlock() + + if b.blockCount == 0 { + return nil + } + + var result []common.BlockData + + // Read all data + err := b.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.PrefetchValues = true + opts.PrefetchSize = 100 + it := txn.NewIterator(opts) + defer it.Close() + + for it.Rewind(); it.Valid(); it.Next() { + item := it.Item() + err := item.Value(func(val []byte) error { + var blockData common.BlockData + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { + return err + } + result = append(result, blockData) + return nil + }) + if err != nil { + log.Error().Err(err).Msg("Failed to decode block data during flush") + } + } + return nil + }) + + if err != nil { + log.Error().Err(err).Msg("Failed to read blocks during flush") + } + + // Clear the database + err = b.db.DropAll() + if err != nil { + log.Error().Err(err).Msg("Failed to clear badger buffer") + } + + // Reset counters and metadata + oldCount := b.blockCount + b.blockCount = 0 + b.chainMetadata = make(map[uint64]*ChainMetadata) + + log.Info(). + Int("block_count", oldCount). + Msg("Flushing badger buffer") + + return result +} + +// ShouldFlush checks if the buffer should be flushed based on configured thresholds +func (b *BadgerBlockBuffer) ShouldFlush() bool { + b.mu.RLock() + defer b.mu.RUnlock() + return b.shouldFlushLocked() +} + +// Size returns the current buffer size in bytes and block count +func (b *BadgerBlockBuffer) Size() (int64, int) { + b.mu.RLock() + defer b.mu.RUnlock() + + // Get actual size from Badger's LSM tree + lsm, _ := b.db.Size() + return lsm, b.blockCount +} + +// IsEmpty returns true if the buffer is empty +func (b *BadgerBlockBuffer) IsEmpty() bool { + b.mu.RLock() + defer b.mu.RUnlock() + return b.blockCount == 0 +} + +// GetData returns a copy of the current buffer data +func (b *BadgerBlockBuffer) GetData() []common.BlockData { + b.mu.RLock() + defer b.mu.RUnlock() + + var result []common.BlockData + + err := b.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.PrefetchValues = true + it := txn.NewIterator(opts) + defer it.Close() + + for it.Rewind(); it.Valid(); it.Next() { + item := it.Item() + err := item.Value(func(val []byte) error { + var blockData common.BlockData + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { + return err + } + result = append(result, blockData) + return nil + }) + if err != nil { + log.Error().Err(err).Msg("Failed to decode block data") + } + } + return nil + }) + + if err != nil { + log.Error().Err(err).Msg("Failed to get data from badger buffer") + } + + return result +} + +// GetBlocksInRange returns blocks from the buffer that fall within the given range +func (b *BadgerBlockBuffer) GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData { + b.mu.RLock() + defer b.mu.RUnlock() + + var result []common.BlockData + prefix := b.makePrefix(chainId) + + err := b.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.Prefix = prefix + it := txn.NewIterator(opts) + defer it.Close() + + for it.Rewind(); it.Valid(); it.Next() { + item := it.Item() + err := item.Value(func(val []byte) error { + var blockData common.BlockData + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { + return err + } + + blockNum := blockData.Block.Number + if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { + result = append(result, blockData) + } + return nil + }) + if err != nil { + log.Error().Err(err).Msg("Failed to decode block data in range") + } + } + return nil + }) + + if err != nil { + log.Error().Err(err).Msg("Failed to get blocks in range from badger buffer") + } + + return result +} + +// GetBlockByNumber returns a specific block from the buffer if it exists +func (b *BadgerBlockBuffer) GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData { + b.mu.RLock() + defer b.mu.RUnlock() + + var result *common.BlockData + key := b.makeKey(chainId, blockNumber) + + err := b.db.View(func(txn *badger.Txn) error { + item, err := txn.Get(key) + if err == badger.ErrKeyNotFound { + return nil + } + if err != nil { + return err + } + + return item.Value(func(val []byte) error { + var blockData common.BlockData + if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { + return err + } + result = &blockData + return nil + }) + }) + + if err != nil && err != badger.ErrKeyNotFound { + log.Error().Err(err).Msg("Failed to get block by number from badger buffer") + } + + return result +} + +// GetMaxBlockNumber returns the maximum block number for a chain in the buffer +func (b *BadgerBlockBuffer) GetMaxBlockNumber(chainId *big.Int) *big.Int { + b.mu.RLock() + defer b.mu.RUnlock() + + // O(1) lookup using cached metadata + meta, exists := b.chainMetadata[chainId.Uint64()] + if !exists || meta.MaxBlock == nil { + return nil + } + + // Return a copy to prevent external modification + return new(big.Int).Set(meta.MaxBlock) +} + +// Clear empties the buffer without returning data +func (b *BadgerBlockBuffer) Clear() { + b.mu.Lock() + defer b.mu.Unlock() + + err := b.db.DropAll() + if err != nil { + log.Error().Err(err).Msg("Failed to clear badger buffer") + } + + b.blockCount = 0 + b.chainMetadata = make(map[uint64]*ChainMetadata) +} + +// Stats returns statistics about the buffer +func (b *BadgerBlockBuffer) Stats() BufferStats { + b.mu.RLock() + defer b.mu.RUnlock() + + // Get actual size from Badger + lsm, _ := b.db.Size() + + stats := BufferStats{ + BlockCount: b.blockCount, + SizeBytes: lsm, + ChainCount: len(b.chainMetadata), + ChainStats: make(map[uint64]ChainStats), + } + + // Use cached metadata for O(1) stats generation + for chainId, meta := range b.chainMetadata { + if meta.MinBlock != nil && meta.MaxBlock != nil { + stats.ChainStats[chainId] = ChainStats{ + BlockCount: meta.BlockCount, + MinBlock: new(big.Int).Set(meta.MinBlock), + MaxBlock: new(big.Int).Set(meta.MaxBlock), + } + } + } + + return stats +} + +// Close closes the buffer and cleans up resources +func (b *BadgerBlockBuffer) Close() error { + b.mu.Lock() + defer b.mu.Unlock() + + // Stop GC routine + if b.gcTicker != nil { + b.gcTicker.Stop() + close(b.stopGC) + } + + // Close database + if err := b.db.Close(); err != nil { + log.Error().Err(err).Msg("Failed to close badger buffer database") + } + + // Clean up temporary directory + if err := os.RemoveAll(b.tempDir); err != nil { + log.Error().Err(err).Msg("Failed to remove temp directory") + } + + return nil +} + +// Private methods + +func (b *BadgerBlockBuffer) shouldFlushLocked() bool { + // Check size limit using Badger's actual size + if b.maxSizeBytes > 0 { + lsm, _ := b.db.Size() + if lsm >= b.maxSizeBytes { + return true + } + } + + // Check block count limit + if b.maxBlocks > 0 && b.blockCount >= b.maxBlocks { + return true + } + + return false +} + +func (b *BadgerBlockBuffer) makeKey(chainId *big.Int, blockNumber *big.Int) []byte { + // Use padded format to ensure lexicographic ordering matches numeric ordering + return fmt.Appendf(nil, "block:%s:%s", chainId.String(), blockNumber.String()) +} + +func (b *BadgerBlockBuffer) makePrefix(chainId *big.Int) []byte { + return fmt.Appendf(nil, "block:%s:", chainId.String()) +} + +func (b *BadgerBlockBuffer) runGC() { + for { + select { + case <-b.gcTicker.C: + err := b.db.RunValueLogGC(0.7) // More aggressive GC for cache + if err != nil && err != badger.ErrNoRewrite { + log.Debug().Err(err).Msg("BadgerBlockBuffer GC error") + } + case <-b.stopGC: + return + } + } +} + +// Ensure BadgerBlockBuffer implements IBlockBuffer interface +var _ IBlockBuffer = (*BadgerBlockBuffer)(nil) diff --git a/internal/storage/block_buffer_badger_test.go b/internal/storage/block_buffer_badger_test.go new file mode 100644 index 0000000..b10e8d8 --- /dev/null +++ b/internal/storage/block_buffer_badger_test.go @@ -0,0 +1,144 @@ +package storage + +import ( + "math/big" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/thirdweb-dev/indexer/internal/common" +) + +func TestBadgerBlockBufferMetadataOptimization(t *testing.T) { + // Create a new Badger buffer + buffer, err := NewBadgerBlockBuffer(10, 1000) // 10MB, 1000 blocks max + require.NoError(t, err) + defer buffer.Close() + + chainId := big.NewInt(1) + + // Add blocks + blocks := []common.BlockData{ + { + Block: common.Block{ + ChainId: chainId, + Number: big.NewInt(100), + Hash: "0x1234", + }, + }, + { + Block: common.Block{ + ChainId: chainId, + Number: big.NewInt(101), + Hash: "0x5678", + }, + }, + { + Block: common.Block{ + ChainId: chainId, + Number: big.NewInt(99), + Hash: "0xabcd", + }, + }, + } + + buffer.Add(blocks) + + // Test O(1) GetMaxBlockNumber + start := time.Now() + maxBlock := buffer.GetMaxBlockNumber(chainId) + elapsed := time.Since(start) + + assert.NotNil(t, maxBlock) + assert.Equal(t, big.NewInt(101), maxBlock) + assert.Less(t, elapsed, time.Millisecond, "GetMaxBlockNumber should be O(1) and very fast") + + // Test O(1) Stats + start = time.Now() + stats := buffer.Stats() + elapsed = time.Since(start) + + assert.Equal(t, 3, stats.BlockCount) + assert.Equal(t, 1, stats.ChainCount) + chainStats := stats.ChainStats[1] + assert.Equal(t, 3, chainStats.BlockCount) + assert.Equal(t, big.NewInt(99), chainStats.MinBlock) + assert.Equal(t, big.NewInt(101), chainStats.MaxBlock) + assert.Less(t, elapsed, time.Millisecond, "Stats should be O(1) and very fast") + + // Test metadata is updated after flush + buffer.Flush() + maxBlock = buffer.GetMaxBlockNumber(chainId) + assert.Nil(t, maxBlock) + + // Add new blocks and verify metadata is rebuilt + newBlocks := []common.BlockData{ + { + Block: common.Block{ + ChainId: chainId, + Number: big.NewInt(200), + Hash: "0xffff", + }, + }, + } + buffer.Add(newBlocks) + + maxBlock = buffer.GetMaxBlockNumber(chainId) + assert.NotNil(t, maxBlock) + assert.Equal(t, big.NewInt(200), maxBlock) +} + +func BenchmarkBadgerBlockBufferGetMaxBlockNumber(b *testing.B) { + buffer, err := NewBadgerBlockBuffer(100, 10000) + require.NoError(b, err) + defer buffer.Close() + + chainId := big.NewInt(1) + + // Add many blocks + for i := 0; i < 1000; i++ { + blocks := []common.BlockData{ + { + Block: common.Block{ + ChainId: chainId, + Number: big.NewInt(int64(i)), + Hash: "0x1234", + }, + }, + } + buffer.Add(blocks) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = buffer.GetMaxBlockNumber(chainId) + } +} + +func BenchmarkBadgerBlockBufferStats(b *testing.B) { + buffer, err := NewBadgerBlockBuffer(100, 10000) + require.NoError(b, err) + defer buffer.Close() + + // Add blocks for multiple chains + for chainId := 1; chainId <= 5; chainId++ { + for i := 0; i < 100; i++ { + blocks := []common.BlockData{ + { + Block: common.Block{ + ChainId: big.NewInt(int64(chainId)), + Number: big.NewInt(int64(i)), + Hash: "0x1234", + }, + }, + } + buffer.Add(blocks) + } + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = buffer.Stats() + } +} diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go index 2dba0d5..013e917 100644 --- a/internal/storage/clickhouse.go +++ b/internal/storage/clickhouse.go @@ -78,6 +78,14 @@ func NewClickHouseConnector(cfg *config.ClickhouseConfig) (*ClickHouseConnector, }, nil } +// Close closes the ClickHouse connection +func (c *ClickHouseConnector) Close() error { + if c.conn != nil { + return c.conn.Close() + } + return nil +} + func connectDB(cfg *config.ClickhouseConfig) (clickhouse.Conn, error) { port := cfg.Port if port == 0 { @@ -99,6 +107,14 @@ func connectDB(cfg *config.ClickhouseConfig) (clickhouse.Conn, error) { }, MaxOpenConns: cfg.MaxOpenConns, MaxIdleConns: cfg.MaxIdleConns, + Compression: func() *clickhouse.Compression { + c := &clickhouse.Compression{} + if cfg.EnableCompression { + zLog.Debug().Msg("ClickHouse LZ4 compression is enabled") + c.Method = clickhouse.CompressionLZ4 + } + return c + }(), Settings: func() clickhouse.Settings { settings := clickhouse.Settings{ "do_not_merge_across_partitions_select_final": "1", @@ -893,6 +909,19 @@ func (c *ClickHouseConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBl return maxBlockNumber, nil } +func (c *ClickHouseConnector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (blockCount *big.Int, err error) { + tableName := c.getTableName(chainId, "blocks") + query := fmt.Sprintf("SELECT COUNT(DISTINCT block_number) FROM %s.%s WHERE chain_id = ? AND block_number >= ? AND block_number <= ?", c.cfg.Database, tableName) + err = c.conn.QueryRow(context.Background(), query, chainId, startBlock, endBlock).Scan(&blockCount) + if err != nil { + if err == sql.ErrNoRows { + return big.NewInt(0), nil + } + return nil, err + } + return blockCount, nil +} + func (c *ClickHouseConnector) getMaxBlockNumberConsistent(chainId *big.Int) (maxBlockNumber *big.Int, err error) { tableName := c.getTableName(chainId, "blocks") query := fmt.Sprintf("SELECT block_number FROM %s.%s WHERE chain_id = ? ORDER BY block_number DESC LIMIT 1 SETTINGS select_sequential_consistency = 1", c.cfg.Database, tableName) @@ -1118,6 +1147,31 @@ func (c *ClickHouseConnector) SetLastPublishedBlockNumber(chainId *big.Int, bloc return c.conn.Exec(context.Background(), query) } +func (c *ClickHouseConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { + query := fmt.Sprintf("SELECT cursor_value FROM %s.cursors FINAL WHERE cursor_type = 'commit'", c.cfg.Database) + if chainId.Sign() > 0 { + query += fmt.Sprintf(" AND chain_id = %s", chainId.String()) + } + var blockNumberString string + err := c.conn.QueryRow(context.Background(), query).Scan(&blockNumberString) + if err != nil { + if err == sql.ErrNoRows { + return big.NewInt(0), nil + } + return nil, err + } + blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) + } + return blockNumber, nil +} + +func (c *ClickHouseConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + query := fmt.Sprintf("INSERT INTO %s.cursors (chain_id, cursor_type, cursor_value) VALUES (%s, 'commit', '%s')", c.cfg.Database, chainId, blockNumber.String()) + return c.conn.Exec(context.Background(), query) +} + func (c *ClickHouseConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { query := fmt.Sprintf("SELECT cursor_value FROM %s.cursors FINAL WHERE cursor_type = 'reorg'", c.cfg.Database) if chainId.Sign() > 0 { @@ -2157,7 +2211,7 @@ func (c *ClickHouseConnector) GetFullBlockData(chainId *big.Int, blockNumbers [] return blockData, nil } -func (c *ClickHouseConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error { +func (c *ClickHouseConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { query := fmt.Sprintf(` INSERT INTO %s.block_data (chain_id, block_number, is_deleted) SELECT chain_id, block_number, 1 diff --git a/internal/storage/connector.go b/internal/storage/connector.go index 1253213..23fdb52 100644 --- a/internal/storage/connector.go +++ b/internal/storage/connector.go @@ -72,22 +72,57 @@ type IStorage struct { StagingStorage IStagingStorage } +// Close closes all storage connections +func (s *IStorage) Close() error { + var errs []error + + // Close each storage that implements Closer interface + if err := s.OrchestratorStorage.Close(); err != nil { + errs = append(errs, fmt.Errorf("failed to close orchestrator storage: %w", err)) + } + + if err := s.MainStorage.Close(); err != nil { + errs = append(errs, fmt.Errorf("failed to close main storage: %w", err)) + } + + if err := s.StagingStorage.Close(); err != nil { + errs = append(errs, fmt.Errorf("failed to close staging storage: %w", err)) + } + + if len(errs) > 0 { + return fmt.Errorf("errors closing storage: %v", errs) + } + + return nil +} + +// The orchestartor storage is a persisted key/value store type IOrchestratorStorage interface { - GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) - StoreBlockFailures(failures []common.BlockFailure) error - DeleteBlockFailures(failures []common.BlockFailure) error GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error + GetLastPublishedBlockNumber(chainId *big.Int) (blockNumber *big.Int, err error) + SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error + GetLastCommittedBlockNumber(chainId *big.Int) (blockNumber *big.Int, err error) + SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error + + Close() error } +// The staging storage is a emphemeral block data store type IStagingStorage interface { + // Staging block data InsertStagingData(data []common.BlockData) error GetStagingData(qf QueryFilter) (data []common.BlockData, err error) - DeleteStagingData(data []common.BlockData) error GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (maxBlockNumber *big.Int, err error) - GetLastPublishedBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error) - SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error - DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error + DeleteStagingData(data []common.BlockData) error + DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error + + // Block failures + GetBlockFailures(qf QueryFilter) ([]common.BlockFailure, error) + StoreBlockFailures(failures []common.BlockFailure) error + DeleteBlockFailures(failures []common.BlockFailure) error + + Close() error } type IMainStorage interface { @@ -99,16 +134,17 @@ type IMainStorage interface { GetLogs(qf QueryFilter, fields ...string) (logs QueryResult[common.Log], err error) GetTraces(qf QueryFilter, fields ...string) (traces QueryResult[common.Trace], err error) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) + GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) + GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) + GetMaxBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (maxBlockNumber *big.Int, err error) + GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (blockCount *big.Int, err error) + /** * Get block headers ordered from latest to oldest. */ GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) (blockHeaders []common.BlockHeader, err error) - - GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) - GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) - /** * Gets only the data required for validation. */ @@ -121,48 +157,222 @@ type IMainStorage interface { * Gets full block data with transactions, logs and traces. */ GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) (blocks []common.BlockData, err error) + + Close() error } func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) { var storage IStorage var err error - storage.OrchestratorStorage, err = NewConnector[IOrchestratorStorage](&cfg.Orchestrator) + storage.OrchestratorStorage, err = NewOrchestratorConnector(&cfg.Orchestrator) if err != nil { return IStorage{}, fmt.Errorf("failed to create orchestrator storage: %w", err) } - storage.MainStorage, err = NewConnector[IMainStorage](&cfg.Main) + storage.StagingStorage, err = NewStagingConnector(&cfg.Staging) if err != nil { - return IStorage{}, fmt.Errorf("failed to create main storage: %w", err) + return IStorage{}, fmt.Errorf("failed to create staging storage: %w", err) } - storage.StagingStorage, err = NewConnector[IStagingStorage](&cfg.Staging) + storage.MainStorage, err = NewMainConnector(&cfg.Main, &storage.OrchestratorStorage) if err != nil { - return IStorage{}, fmt.Errorf("failed to create staging storage: %w", err) + return IStorage{}, fmt.Errorf("failed to create main storage: %w", err) } return storage, nil } -func NewConnector[T any](cfg *config.StorageConnectionConfig) (T, error) { +func NewOrchestratorConnector(cfg *config.StorageOrchestratorConfig) (IOrchestratorStorage, error) { var conn interface{} var err error - if cfg.Postgres != nil { - conn, err = NewPostgresConnector(cfg.Postgres) - } else if cfg.Clickhouse != nil { - conn, err = NewClickHouseConnector(cfg.Clickhouse) + + // Default to "auto" if Type is not specified + storageType := cfg.Type + if storageType == "" { + storageType = "auto" + } + + // Handle explicit type selection + if storageType != "auto" { + switch storageType { + case "redis": + if cfg.Redis == nil { + return nil, fmt.Errorf("redis storage type specified but redis config is nil") + } + conn, err = NewRedisConnector(cfg.Redis) + case "postgres": + if cfg.Postgres == nil { + return nil, fmt.Errorf("postgres storage type specified but postgres config is nil") + } + conn, err = NewPostgresConnector(cfg.Postgres) + case "clickhouse": + if cfg.Clickhouse == nil { + return nil, fmt.Errorf("clickhouse storage type specified but clickhouse config is nil") + } + conn, err = NewClickHouseConnector(cfg.Clickhouse) + case "badger": + if cfg.Badger == nil { + return nil, fmt.Errorf("badger storage type specified but badger config is nil") + } + conn, err = NewBadgerConnector(cfg.Badger) + default: + return nil, fmt.Errorf("unknown storage type: %s", storageType) + } + } else { + // Auto mode: use the first non-nil config (existing behavior) + if cfg.Redis != nil { + conn, err = NewRedisConnector(cfg.Redis) + } else if cfg.Postgres != nil { + conn, err = NewPostgresConnector(cfg.Postgres) + } else if cfg.Clickhouse != nil { + conn, err = NewClickHouseConnector(cfg.Clickhouse) + } else if cfg.Badger != nil { + conn, err = NewBadgerConnector(cfg.Badger) + } else { + return nil, fmt.Errorf("no storage driver configured") + } + } + + if err != nil { + return nil, err + } + + typedConn, ok := conn.(IOrchestratorStorage) + if !ok { + return nil, fmt.Errorf("connector does not implement the required interface") + } + + return typedConn, nil +} + +func NewStagingConnector(cfg *config.StorageStagingConfig) (IStagingStorage, error) { + var conn interface{} + var err error + + // Default to "auto" if Type is not specified + storageType := cfg.Type + if storageType == "" { + storageType = "auto" + } + + // Handle explicit type selection + if storageType != "auto" { + switch storageType { + case "postgres": + if cfg.Postgres == nil { + return nil, fmt.Errorf("postgres storage type specified but postgres config is nil") + } + conn, err = NewPostgresConnector(cfg.Postgres) + case "clickhouse": + if cfg.Clickhouse == nil { + return nil, fmt.Errorf("clickhouse storage type specified but clickhouse config is nil") + } + conn, err = NewClickHouseConnector(cfg.Clickhouse) + case "badger": + if cfg.Badger == nil { + return nil, fmt.Errorf("badger storage type specified but badger config is nil") + } + conn, err = NewBadgerConnector(cfg.Badger) + default: + return nil, fmt.Errorf("unknown storage type: %s", storageType) + } + } else { + // Auto mode: use the first non-nil config (existing behavior) + if cfg.Postgres != nil { + conn, err = NewPostgresConnector(cfg.Postgres) + } else if cfg.Clickhouse != nil { + conn, err = NewClickHouseConnector(cfg.Clickhouse) + } else if cfg.Badger != nil { + conn, err = NewBadgerConnector(cfg.Badger) + } else { + return nil, fmt.Errorf("no storage driver configured") + } + } + + if err != nil { + return nil, err + } + + typedConn, ok := conn.(IStagingStorage) + if !ok { + return nil, fmt.Errorf("connector does not implement the required interface") + } + + return typedConn, nil +} + +func NewMainConnector(cfg *config.StorageMainConfig, orchestratorStorage *IOrchestratorStorage) (IMainStorage, error) { + var conn interface{} + var err error + + // Default to "auto" if Type is not specified + storageType := cfg.Type + if storageType == "" { + storageType = "auto" + } + + // Handle explicit type selection + if storageType != "auto" { + switch storageType { + case "kafka": + if cfg.Kafka == nil { + return nil, fmt.Errorf("kafka storage type specified but kafka config is nil") + } + if orchestratorStorage == nil { + return nil, fmt.Errorf("orchestrator storage must be provided for kafka main storage") + } + conn, err = NewKafkaConnector(cfg.Kafka, orchestratorStorage) + case "s3": + if cfg.S3 == nil { + return nil, fmt.Errorf("s3 storage type specified but s3 config is nil") + } + conn, err = NewS3Connector(cfg.S3) + case "postgres": + if cfg.Postgres == nil { + return nil, fmt.Errorf("postgres storage type specified but postgres config is nil") + } + conn, err = NewPostgresConnector(cfg.Postgres) + case "clickhouse": + if cfg.Clickhouse == nil { + return nil, fmt.Errorf("clickhouse storage type specified but clickhouse config is nil") + } + conn, err = NewClickHouseConnector(cfg.Clickhouse) + case "badger": + if cfg.Badger == nil { + return nil, fmt.Errorf("badger storage type specified but badger config is nil") + } + conn, err = NewBadgerConnector(cfg.Badger) + default: + return nil, fmt.Errorf("unknown storage type: %s", storageType) + } } else { - return *new(T), fmt.Errorf("no storage driver configured") + // Auto mode: use the first non-nil config (existing behavior) + if cfg.Kafka != nil { + if orchestratorStorage == nil { + return nil, fmt.Errorf("orchestrator storage must be provided for kafka main storage") + } + conn, err = NewKafkaConnector(cfg.Kafka, orchestratorStorage) + } else if cfg.S3 != nil { + conn, err = NewS3Connector(cfg.S3) + } else if cfg.Postgres != nil { + conn, err = NewPostgresConnector(cfg.Postgres) + } else if cfg.Clickhouse != nil { + conn, err = NewClickHouseConnector(cfg.Clickhouse) + } else if cfg.Badger != nil { + conn, err = NewBadgerConnector(cfg.Badger) + } else { + return nil, fmt.Errorf("no storage driver configured") + } } if err != nil { - return *new(T), err + return nil, err } - typedConn, ok := conn.(T) + typedConn, ok := conn.(IMainStorage) if !ok { - return *new(T), fmt.Errorf("connector does not implement the required interface") + return nil, fmt.Errorf("connector does not implement the required interface") } return typedConn, nil diff --git a/internal/storage/kafka.go b/internal/storage/kafka.go new file mode 100644 index 0000000..747d853 --- /dev/null +++ b/internal/storage/kafka.go @@ -0,0 +1,137 @@ +package storage + +import ( + "fmt" + "math/big" + + "github.com/rs/zerolog/log" + config "github.com/thirdweb-dev/indexer/configs" + "github.com/thirdweb-dev/indexer/internal/common" +) + +// KafkaConnector uses Redis for metadata storage and Kafka for block data delivery +type KafkaConnector struct { + cfg *config.KafkaConfig + kafkaPublisher *KafkaPublisher + orchestratorStorage IOrchestratorStorage +} + +func NewKafkaConnector(cfg *config.KafkaConfig, orchestratorStorage *IOrchestratorStorage) (*KafkaConnector, error) { + // Initialize Kafka publisher + kafkaPublisher, err := NewKafkaPublisher(cfg) + if err != nil { + return nil, err + } + + if orchestratorStorage == nil { + return nil, fmt.Errorf("orchestrator storage must be provided for kafka connector") + } + + return &KafkaConnector{ + cfg: cfg, + kafkaPublisher: kafkaPublisher, + orchestratorStorage: *orchestratorStorage, + }, nil +} + +// InsertBlockData publishes block data to Kafka instead of storing in database +func (kr *KafkaConnector) InsertBlockData(data []common.BlockData) error { + if len(data) == 0 { + return nil + } + + // Publish to Kafka + if err := kr.kafkaPublisher.PublishBlockData(data); err != nil { + return fmt.Errorf("failed to publish block data to kafka: %w", err) + } + log.Debug(). + Int("blocks", len(data)). + Msg("Published block data to Kafka") + + chainId := data[0].Block.ChainId + maxBlockNumber := data[len(data)-1].Block.Number + if err := kr.orchestratorStorage.SetLastCommittedBlockNumber(chainId, maxBlockNumber); err != nil { + return fmt.Errorf("failed to update last committed block number in orchestrator storage: %w", err) + } + + return nil +} + +// ReplaceBlockData handles reorg by publishing both old and new data to Kafka +func (kr *KafkaConnector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) { + if len(data) == 0 { + return nil, nil + } + + oldBlocks := []common.BlockData{} + + // TODO: We need to fetch the old blocks from the primary data store + if err := kr.kafkaPublisher.PublishReorg(data, data); err != nil { + return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err) + } + + // save cursor + return oldBlocks, nil +} + +func (kr *KafkaConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { + return kr.orchestratorStorage.GetLastCommittedBlockNumber(chainId) +} + +func (kr *KafkaConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) { + return QueryResult[common.TokenBalance]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) { + return QueryResult[common.TokenTransfer]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) { + return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +// Query methods return errors as this is a write-only connector for streaming +func (kr *KafkaConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) { + return QueryResult[common.Block]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) { + return QueryResult[common.Transaction]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) { + return QueryResult[common.Log]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) { + return QueryResult[common.Trace]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +func (kr *KafkaConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) { + return QueryResult[interface{}]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") +} + +// Close closes the Redis connection +func (kr *KafkaConnector) Close() error { + return kr.kafkaPublisher.Close() +} diff --git a/internal/storage/kafka_publisher.go b/internal/storage/kafka_publisher.go new file mode 100644 index 0000000..72dc96f --- /dev/null +++ b/internal/storage/kafka_publisher.go @@ -0,0 +1,299 @@ +package storage + +import ( + "context" + "crypto/tls" + "encoding/json" + "fmt" + "net" + "strings" + "sync" + "time" + + "github.com/rs/zerolog/log" + config "github.com/thirdweb-dev/indexer/configs" + "github.com/thirdweb-dev/indexer/internal/common" + "github.com/twmb/franz-go/pkg/kgo" + "github.com/twmb/franz-go/pkg/sasl/plain" +) + +type KafkaPublisher struct { + client *kgo.Client + mu sync.RWMutex +} + +type MessageType string + +type PublishableData interface { + GetType() MessageType +} + +type PublishableMessagePayload struct { + Data PublishableData `json:"data"` + Type MessageType `json:"type"` + Timestamp time.Time `json:"timestamp"` +} + +type PublishableMessageBlockData struct { + common.BlockData + ChainId uint64 `json:"chain_id"` + IsDeleted int8 `json:"is_deleted"` + InsertTimestamp time.Time `json:"insert_timestamp"` +} + +type PublishableMessageRevert struct { + ChainId uint64 `json:"chain_id"` + BlockNumber uint64 `json:"block_number"` + IsDeleted int8 `json:"is_deleted"` + InsertTimestamp time.Time `json:"insert_timestamp"` +} + +func (b PublishableMessageBlockData) GetType() MessageType { + return "block_data" +} + +func (b PublishableMessageRevert) GetType() MessageType { + return "revert" +} + +// NewKafkaPublisher method for storage connector (public) +func NewKafkaPublisher(cfg *config.KafkaConfig) (*KafkaPublisher, error) { + brokers := strings.Split(cfg.Brokers, ",") + chainID := config.Cfg.RPC.ChainID + + opts := []kgo.Opt{ + kgo.SeedBrokers(brokers...), + kgo.AllowAutoTopicCreation(), + kgo.ProducerBatchCompression(kgo.ZstdCompression()), + kgo.ClientID(fmt.Sprintf("insight-indexer-kafka-storage-%s", chainID)), + kgo.TransactionalID(fmt.Sprintf("insight-producer-%s", chainID)), + kgo.MaxBufferedBytes(2 * 1024 * 1024 * 1024), // 2GB + kgo.MaxBufferedRecords(1_000_000), + kgo.ProducerBatchMaxBytes(16_000_000), + kgo.RecordPartitioner(kgo.ManualPartitioner()), + kgo.ProduceRequestTimeout(30 * time.Second), + kgo.MetadataMaxAge(60 * time.Second), + kgo.DialTimeout(10 * time.Second), + kgo.RequiredAcks(kgo.AllISRAcks()), + kgo.RequestRetries(5), + } + + if cfg.Username != "" && cfg.Password != "" { + opts = append(opts, kgo.SASL(plain.Auth{ + User: cfg.Username, + Pass: cfg.Password, + }.AsMechanism())) + } + + if cfg.EnableTLS { + tlsDialer := &tls.Dialer{NetDialer: &net.Dialer{Timeout: 10 * time.Second}} + opts = append(opts, kgo.Dialer(tlsDialer.DialContext)) + } + + client, err := kgo.NewClient(opts...) + if err != nil { + return nil, fmt.Errorf("failed to create Kafka client: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := client.Ping(ctx); err != nil { + client.Close() + return nil, fmt.Errorf("failed to connect to Kafka: %v", err) + } + + publisher := &KafkaPublisher{ + client: client, + } + + return publisher, nil +} + +func (p *KafkaPublisher) PublishBlockData(blockData []common.BlockData) error { + return p.publishBlockData(blockData, false) +} + +func (p *KafkaPublisher) PublishReorg(oldData []common.BlockData, newData []common.BlockData) error { + chainId := newData[0].Block.ChainId.Uint64() + newHead := uint64(newData[0].Block.Number.Uint64()) + // Publish revert the revert to the new head - 1, so that the new updated block data can be re-processed + if err := p.publishBlockRevert(chainId, newHead-1); err != nil { + return fmt.Errorf("failed to revert: %v", err) + } + + if err := p.publishBlockData(oldData, true); err != nil { + return fmt.Errorf("failed to publish old block data: %v", err) + } + + if err := p.publishBlockData(newData, false); err != nil { + return fmt.Errorf("failed to publish new block data: %v", err) + } + return nil +} + +func (p *KafkaPublisher) Close() error { + p.mu.Lock() + defer p.mu.Unlock() + + if p.client != nil { + p.client.Close() + log.Debug().Msg("Publisher client closed") + } + return nil +} + +func (p *KafkaPublisher) publishMessages(ctx context.Context, messages []*kgo.Record) error { + if len(messages) == 0 { + return nil + } + + // Lock for the entire transaction lifecycle to ensure thread safety + p.mu.Lock() + defer p.mu.Unlock() + + if p.client == nil { + return fmt.Errorf("no kafka client configured") + } + + // Start a new transaction + if err := p.client.BeginTransaction(); err != nil { + return fmt.Errorf("failed to begin transaction: %v", err) + } + + // Produce all messages in the transaction + for _, msg := range messages { + p.client.Produce(ctx, msg, nil) + } + + // Flush all messages + if err := p.client.Flush(ctx); err != nil { + p.client.EndTransaction(ctx, kgo.TryAbort) + return fmt.Errorf("failed to flush messages: %v", err) + } + + // Commit the transaction + if err := p.client.EndTransaction(ctx, kgo.TryCommit); err != nil { + return fmt.Errorf("failed to commit transaction: %v", err) + } + + return nil +} + +func (p *KafkaPublisher) publishBlockRevert(chainId uint64, blockNumber uint64) error { + publishStart := time.Now() + + // Prepare messages for blocks, events, transactions and traces + blockMessages := make([]*kgo.Record, 1) + + // Block message + if blockMsg, err := p.createBlockRevertMessage(chainId, blockNumber); err == nil { + blockMessages[0] = blockMsg + } else { + return fmt.Errorf("failed to create block revert message: %v", err) + } + + if err := p.publishMessages(context.Background(), blockMessages); err != nil { + return fmt.Errorf("failed to publish block revert messages: %v", err) + } + + log.Debug().Str("metric", "publish_duration").Msgf("Publisher.PublishBlockData duration: %f", time.Since(publishStart).Seconds()) + return nil +} + +func (p *KafkaPublisher) publishBlockData(blockData []common.BlockData, isDeleted bool) error { + if len(blockData) == 0 { + return nil + } + + publishStart := time.Now() + + // Prepare messages for blocks, events, transactions and traces + blockMessages := make([]*kgo.Record, len(blockData)) + + for i, data := range blockData { + // Block message + if blockMsg, err := p.createBlockDataMessage(data, isDeleted); err == nil { + blockMessages[i] = blockMsg + } else { + return fmt.Errorf("failed to create block message: %v", err) + } + } + + if err := p.publishMessages(context.Background(), blockMessages); err != nil { + return fmt.Errorf("failed to publish block messages: %v", err) + } + + log.Debug().Str("metric", "publish_duration").Msgf("Publisher.PublishBlockData duration: %f", time.Since(publishStart).Seconds()) + return nil +} + +func (p *KafkaPublisher) createBlockDataMessage(block common.BlockData, isDeleted bool) (*kgo.Record, error) { + timestamp := time.Now() + + data := PublishableMessageBlockData{ + BlockData: block, + ChainId: block.Block.ChainId.Uint64(), + IsDeleted: 0, + InsertTimestamp: timestamp, + } + if isDeleted { + data.IsDeleted = 1 + } + + msg := PublishableMessagePayload{ + Data: data, + Type: data.GetType(), + Timestamp: timestamp, + } + + msgJson, err := json.Marshal(msg) + if err != nil { + return nil, fmt.Errorf("failed to marshal block data: %v", err) + } + + return p.createRecord(data.GetType(), data.ChainId, block.Block.Number.Uint64(), timestamp, msgJson) +} + +func (p *KafkaPublisher) createBlockRevertMessage(chainId uint64, blockNumber uint64) (*kgo.Record, error) { + timestamp := time.Now() + + data := PublishableMessageRevert{ + ChainId: chainId, + BlockNumber: blockNumber, + IsDeleted: 0, + InsertTimestamp: timestamp, + } + + msg := PublishableMessagePayload{ + Data: data, + Type: data.GetType(), + Timestamp: timestamp, + } + + msgJson, err := json.Marshal(msg) + if err != nil { + return nil, fmt.Errorf("failed to marshal block data: %v", err) + } + + return p.createRecord(data.GetType(), chainId, blockNumber, timestamp, msgJson) +} + +func (p *KafkaPublisher) createRecord(msgType MessageType, chainId uint64, blockNumber uint64, timestamp time.Time, msgJson []byte) (*kgo.Record, error) { + // Create headers with metadata + headers := []kgo.RecordHeader{ + {Key: "chain_id", Value: []byte(fmt.Sprintf("%d", chainId))}, + {Key: "block_number", Value: []byte(fmt.Sprintf("%d", blockNumber))}, + {Key: "type", Value: []byte(fmt.Sprintf("%s", msgType))}, + {Key: "timestamp", Value: []byte(timestamp.Format(time.RFC3339Nano))}, + {Key: "schema_version", Value: []byte("1")}, + } + + return &kgo.Record{ + Topic: fmt.Sprintf("insight.commit.blocks.%d", chainId), + Key: []byte(fmt.Sprintf("%d:%s:%d", chainId, msgType, blockNumber)), + Value: msgJson, + Headers: headers, + Partition: 0, + }, nil +} diff --git a/internal/storage/postgres.go b/internal/storage/postgres.go index 1476c44..fb0748d 100644 --- a/internal/storage/postgres.go +++ b/internal/storage/postgres.go @@ -388,6 +388,35 @@ func (p *PostgresConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockN return err } +func (p *PostgresConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { + query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'commit' AND chain_id = $1` + + var blockNumberString string + err := p.db.QueryRow(query, chainId.String()).Scan(&blockNumberString) + if err != nil { + if err == sql.ErrNoRows { + return big.NewInt(0), nil + } + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) + } + return blockNumber, nil +} + +func (p *PostgresConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value) + VALUES ($1, 'commit', $2) + ON CONFLICT (chain_id, cursor_type) + DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()` + + _, err := p.db.Exec(query, chainId.String(), blockNumber.String()) + return err +} + func (p *PostgresConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStart *big.Int, rangeEnd *big.Int) (*big.Int, error) { query := `SELECT MAX(block_number) FROM block_data WHERE 1=1` @@ -431,7 +460,7 @@ func (p *PostgresConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeStar return blockNumber, nil } -func (p *PostgresConnector) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error { +func (p *PostgresConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { query := `DELETE FROM block_data WHERE ctid IN ( SELECT ctid diff --git a/internal/storage/redis.go b/internal/storage/redis.go new file mode 100644 index 0000000..bb71810 --- /dev/null +++ b/internal/storage/redis.go @@ -0,0 +1,134 @@ +package storage + +import ( + "context" + "crypto/tls" + "fmt" + "math/big" + "time" + + "github.com/redis/go-redis/v9" + config "github.com/thirdweb-dev/indexer/configs" +) + +// Redis key namespace constants for better organization and maintainability +const ( + // Cursor keys for tracking positions + KeyCursorReorg = "cursor:reorg" // String: cursor:reorg:{chainId} + KeyCursorPublish = "cursor:publish" // String: cursor:publish:{chainId} + KeyCursorCommit = "cursor:commit" // String: cursor:commit:{chainId} +) + +// RedisConnector uses Redis for metadata storage +type RedisConnector struct { + redisClient *redis.Client + cfg *config.RedisConfig +} + +func NewRedisConnector(cfg *config.RedisConfig) (*RedisConnector, error) { + // Connect to Redis + var tlsConfig *tls.Config + if cfg.EnableTLS { + tlsConfig = &tls.Config{ + MinVersion: tls.VersionTLS12, // Ensure a secure TLS version + } + } + + redisClient := redis.NewClient(&redis.Options{ + Addr: fmt.Sprintf("%s:%d", cfg.Host, cfg.Port), + Password: cfg.Password, + DB: cfg.DB, + TLSConfig: tlsConfig, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + if err := redisClient.Ping(ctx).Err(); err != nil { + return nil, fmt.Errorf("failed to connect to redis: %w", err) + } + + return &RedisConnector{ + redisClient: redisClient, + cfg: cfg, + }, nil +} + +// Orchestrator Storage Implementation +func (kr *RedisConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String()) + + val, err := kr.redisClient.Get(ctx, key).Result() + if err == redis.Nil { + return big.NewInt(0), nil + } else if err != nil { + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(val, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", val) + } + + return blockNumber, nil +} + +func (kr *RedisConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String()) + return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() +} + +func (kr *RedisConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String()) + + val, err := kr.redisClient.Get(ctx, key).Result() + if err == redis.Nil { + return big.NewInt(0), nil + } else if err != nil { + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(val, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", val) + } + return blockNumber, nil +} + +func (kr *RedisConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String()) + return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() +} + +func (kr *RedisConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) + + val, err := kr.redisClient.Get(ctx, key).Result() + if err == redis.Nil { + return big.NewInt(0), nil + } else if err != nil { + return nil, err + } + + blockNumber, ok := new(big.Int).SetString(val, 10) + if !ok { + return nil, fmt.Errorf("failed to parse block number: %s", val) + } + return blockNumber, nil +} + +func (kr *RedisConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ctx := context.Background() + key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) + return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() +} + +// Close closes the Redis connection +func (kr *RedisConnector) Close() error { + return kr.redisClient.Close() +} diff --git a/internal/storage/s3.go b/internal/storage/s3.go new file mode 100644 index 0000000..2e37aa6 --- /dev/null +++ b/internal/storage/s3.go @@ -0,0 +1,1172 @@ +package storage + +import ( + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "math/big" + "sort" + "strings" + "sync" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + awsconfig "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/parquet-go/parquet-go" + "github.com/rs/zerolog/log" + config "github.com/thirdweb-dev/indexer/configs" + "github.com/thirdweb-dev/indexer/internal/common" +) + +type S3Connector struct { + client *s3.Client + config *config.S3StorageConfig + formatter DataFormatter + buffer IBlockBuffer + + // Flush control + stopCh chan struct{} + flushCh chan struct{} + flushDoneCh chan struct{} // Signals when flush is complete + flushTimer *time.Timer + timerMu sync.Mutex + lastAddTime time.Time + wg sync.WaitGroup + closeOnce sync.Once +} + +// DataFormatter interface for different file formats +type DataFormatter interface { + FormatBlockData(data []common.BlockData) ([]byte, error) + GetFileExtension() string + GetContentType() string +} + +// ParquetBlockData represents the complete block data in Parquet format +type ParquetBlockData struct { + ChainId uint64 `parquet:"chain_id"` + BlockNumber uint64 `parquet:"block_number"` // Numeric for efficient min/max queries + BlockHash string `parquet:"block_hash"` + BlockTimestamp int64 `parquet:"block_timestamp"` + Block []byte `parquet:"block_json"` + Transactions []byte `parquet:"transactions_json"` + Logs []byte `parquet:"logs_json"` + Traces []byte `parquet:"traces_json"` +} + +func NewS3Connector(cfg *config.S3StorageConfig) (*S3Connector, error) { + awsCfg, err := awsconfig.LoadDefaultConfig(context.Background(), + awsconfig.WithRegion(cfg.Region), + ) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + // Override with explicit credentials if provided + if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" { + awsCfg.Credentials = aws.CredentialsProviderFunc(func(ctx context.Context) (aws.Credentials, error) { + return aws.Credentials{ + AccessKeyID: cfg.AccessKeyID, + SecretAccessKey: cfg.SecretAccessKey, + }, nil + }) + } + + s3Client := s3.NewFromConfig(awsCfg, func(o *s3.Options) { + if cfg.Endpoint != "" { + o.BaseEndpoint = aws.String(cfg.Endpoint) + } + }) + + // Set defaults + if cfg.Format == "" { + cfg.Format = "parquet" + } + + // Initialize parquet config with defaults if using parquet + if cfg.Format == "parquet" && cfg.Parquet == nil { + cfg.Parquet = &config.ParquetConfig{ + Compression: "snappy", + RowGroupSize: 256, // MB + PageSize: 8192, // KB + } + } + + // Set buffer defaults + if cfg.BufferSize == 0 { + cfg.BufferSize = 1024 // 1GB default + } + if cfg.BufferTimeout == 0 { + cfg.BufferTimeout = 300 // 5 minutes default + } + + // Create formatter based on format + var formatter DataFormatter + switch cfg.Format { + case "parquet": + formatter = &ParquetFormatter{config: cfg.Parquet} + default: + return nil, fmt.Errorf("unsupported format: %s", cfg.Format) + } + + // Create buffer with configured settings + var buffer IBlockBuffer + buffer, err = NewBadgerBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile) + if err != nil { + // fallback + log.Error().Err(err).Msg("Failed to create Badger buffer, falling back to in-memory buffer") + buffer = NewBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile) + } + + s3c := &S3Connector{ + client: s3Client, + config: cfg, + formatter: formatter, + buffer: buffer, + stopCh: make(chan struct{}), + flushCh: make(chan struct{}, 1), + flushDoneCh: make(chan struct{}), + } + + // Start background flush worker + s3c.wg.Add(1) + go s3c.flushWorker() + + return s3c, nil +} + +func (s *S3Connector) InsertBlockData(data []common.BlockData) error { + if len(data) == 0 { + return nil + } + + // Add to buffer and check if flush is needed + shouldFlush := s.buffer.Add(data) + + // Start or reset timer when first data is added + s.timerMu.Lock() + _, blockCount := s.buffer.Size() + // Check if this is the first batch added (buffer was empty before) + if blockCount == len(data) && s.config.BufferTimeout > 0 { + // First data added to buffer, track time and start timer + s.lastAddTime = time.Now() + if s.flushTimer != nil { + s.flushTimer.Stop() + } + s.flushTimer = time.AfterFunc(time.Duration(s.config.BufferTimeout)*time.Second, func() { + select { + case s.flushCh <- struct{}{}: + default: + } + }) + } + s.timerMu.Unlock() + + if shouldFlush { + // Stop timer and trigger flush + s.stopFlushTimer() + select { + case s.flushCh <- struct{}{}: + default: + } + } + + return nil +} + +// flushWorker runs in background and handles buffer flushes +func (s *S3Connector) flushWorker() { + defer s.wg.Done() + + // Check periodically for expired buffers + ticker := time.NewTicker(10 * time.Second) + defer ticker.Stop() + + for { + select { + case <-s.stopCh: + // Final flush before stopping + s.flushBuffer() + return + case <-s.flushCh: + s.flushBuffer() + // Signal flush completion + select { + case s.flushDoneCh <- struct{}{}: + default: + } + case <-ticker.C: + // Check if buffer has expired based on our own tracking + if s.isBufferExpired() { + s.flushBuffer() + } + } + } +} + +// stopFlushTimer stops the flush timer if it's running +func (s *S3Connector) stopFlushTimer() { + s.timerMu.Lock() + defer s.timerMu.Unlock() + + if s.flushTimer != nil { + s.flushTimer.Stop() + s.flushTimer = nil + } +} + +// isBufferExpired checks if the buffer has exceeded the timeout duration +func (s *S3Connector) isBufferExpired() bool { + s.timerMu.Lock() + defer s.timerMu.Unlock() + + if s.config.BufferTimeout <= 0 || s.lastAddTime.IsZero() || s.buffer.IsEmpty() { + return false + } + + return time.Since(s.lastAddTime) > time.Duration(s.config.BufferTimeout)*time.Second +} + +// flushBuffer writes buffered data to S3 +func (s *S3Connector) flushBuffer() error { + data := s.buffer.Flush() + if len(data) == 0 { + return nil + } + + // Stop timer and reset last add time since we're flushing + s.stopFlushTimer() + s.timerMu.Lock() + s.lastAddTime = time.Time{} + s.timerMu.Unlock() + + return s.uploadBatchData(data) +} + +// uploadBatchData handles uploading batched data to S3, grouped by chain +func (s *S3Connector) uploadBatchData(data []common.BlockData) error { + // Group blocks by chain to generate appropriate keys + chainGroups := make(map[uint64][]common.BlockData) + for _, block := range data { + chainId := block.Block.ChainId.Uint64() + chainGroups[chainId] = append(chainGroups[chainId], block) + } + + for _, blocks := range chainGroups { + // Sort blocks by number + sort.Slice(blocks, func(i, j int) bool { + return blocks[i].Block.Number.Cmp(blocks[j].Block.Number) < 0 + }) + + // Process in chunks if MaxBlocksPerFile is set, otherwise upload all at once + if s.config.MaxBlocksPerFile > 0 { + // Split into chunks based on MaxBlocksPerFile + for i := 0; i < len(blocks); i += s.config.MaxBlocksPerFile { + end := i + s.config.MaxBlocksPerFile + if end > len(blocks) { + end = len(blocks) + } + + chunk := blocks[i:end] + if err := s.uploadBatch(chunk); err != nil { + log.Error().Err(err).Msg("Failed to upload batch to S3") + return err + } + } + } else { + // No block limit, upload entire buffer as one file + if err := s.uploadBatch(blocks); err != nil { + log.Error().Err(err).Msg("Failed to upload batch to S3") + return err + } + } + } + + return nil +} + +// Flush manually triggers a buffer flush and waits for completion +func (s *S3Connector) Flush() error { + // Check if buffer has data + if s.buffer.IsEmpty() { + return nil + } + + // Clear any pending flush completion signals + select { + case <-s.flushDoneCh: + default: + } + + // Trigger flush + select { + case s.flushCh <- struct{}{}: + // Wait for flush to complete + select { + case <-s.flushDoneCh: + return nil + case <-time.After(60 * time.Second): + return fmt.Errorf("flush timeout after 60 seconds") + } + default: + // Flush channel is full, likely a flush is already in progress + // Wait for it to complete + select { + case <-s.flushDoneCh: + return nil + case <-time.After(60 * time.Second): + return fmt.Errorf("flush timeout after 60 seconds") + } + } +} + +// Close closes the S3 connector and flushes any remaining data +func (s *S3Connector) Close() error { + var closeErr error + + s.closeOnce.Do(func() { + // Stop the flush timer + s.stopFlushTimer() + + // First, ensure any pending data is flushed + if err := s.Flush(); err != nil { + log.Error().Err(err).Msg("Error flushing buffer during close") + closeErr = err + } + + // Signal stop + close(s.stopCh) + + // Wait for worker to finish + s.wg.Wait() + + // Clean up buffer resources + if err := s.buffer.Close(); err != nil { + log.Error().Err(err).Msg("Error closing buffer") + if closeErr == nil { + closeErr = err + } + } + }) + + return closeErr +} + +func (s *S3Connector) uploadBatch(data []common.BlockData) error { + if len(data) == 0 { + return nil + } + + chainId := data[0].Block.ChainId.Uint64() + startBlock := data[0].Block.Number + endBlock := data[len(data)-1].Block.Number + // Use the first block's timestamp for year partitioning + blockTimestamp := data[0].Block.Timestamp + + // Format data using the configured formatter + formattedData, err := s.formatter.FormatBlockData(data) + if err != nil { + return fmt.Errorf("failed to format block data: %w", err) + } + + // Generate S3 key with chain_id/year partitioning based on block timestamp + key := s.generateS3Key(chainId, startBlock, endBlock, blockTimestamp) + + // Upload to S3 + ctx := context.Background() + _, err = s.client.PutObject(ctx, &s3.PutObjectInput{ + Bucket: aws.String(s.config.Bucket), + Key: aws.String(key), + Body: bytes.NewReader(formattedData), + ContentType: aws.String(s.formatter.GetContentType()), + Metadata: map[string]string{ + "chain_id": fmt.Sprintf("%d", chainId), + "start_block": startBlock.String(), + "end_block": endBlock.String(), + "block_count": fmt.Sprintf("%d", len(data)), + "timestamp": blockTimestamp.Format(time.RFC3339), + "checksum": s.calculateChecksum(formattedData), + "file_size": fmt.Sprintf("%d", len(formattedData)), + }, + }) + + if err != nil { + return fmt.Errorf("failed to upload to S3: %w", err) + } + + log.Info(). + Uint64("chain_id", chainId). + Str("min_block", startBlock.String()). + Str("max_block", endBlock.String()). + Int("block_count", len(data)). + Int("file_size_mb", len(formattedData)/(1024*1024)). + Str("s3_key", key). + Msg("Successfully uploaded buffered blocks to S3") + + return nil +} + +func (s *S3Connector) generateS3Key(chainID uint64, startBlock, endBlock *big.Int, blockTimestamp time.Time) string { + // Use the block's timestamp for year partitioning + year := blockTimestamp.Year() + if len(s.config.Prefix) > 0 { + return fmt.Sprintf("%s/chain_%d/year=%d/blocks_%s_%s%s", + s.config.Prefix, + chainID, + year, + startBlock.String(), + endBlock.String(), + s.formatter.GetFileExtension(), + ) + } + return fmt.Sprintf("chain_%d/year=%d/blocks_%s_%s%s", + chainID, + year, + startBlock.String(), + endBlock.String(), + s.formatter.GetFileExtension(), + ) +} + +// ParquetFormatter implements DataFormatter for Parquet format +type ParquetFormatter struct { + config *config.ParquetConfig +} + +func (f *ParquetFormatter) FormatBlockData(data []common.BlockData) ([]byte, error) { + var parquetData []ParquetBlockData + + for _, d := range data { + // Serialize each component to JSON + blockJSON, err := json.Marshal(d.Block) + if err != nil { + return nil, fmt.Errorf("failed to marshal block: %w", err) + } + + // Default transactions to empty array if nil + var txJSON []byte + if d.Transactions == nil { + txJSON, err = json.Marshal([]common.Transaction{}) + } else { + txJSON, err = json.Marshal(d.Transactions) + } + if err != nil { + return nil, fmt.Errorf("failed to marshal transactions: %w", err) + } + + // Default logs to empty array if nil + var logsJSON []byte + if d.Logs == nil { + logsJSON, err = json.Marshal([]common.Log{}) + } else { + logsJSON, err = json.Marshal(d.Logs) + } + if err != nil { + return nil, fmt.Errorf("failed to marshal logs: %w", err) + } + + // Default traces to empty array if nil + var tracesJSON []byte + if d.Traces == nil { + tracesJSON, err = json.Marshal([]common.Trace{}) + } else { + tracesJSON, err = json.Marshal(d.Traces) + } + if err != nil { + return nil, fmt.Errorf("failed to marshal traces: %w", err) + } + + // Convert block number to uint64 for efficient queries + blockNum := d.Block.Number.Uint64() + if d.Block.Number.BitLen() > 64 { + return nil, fmt.Errorf("block number exceeds uint64 is not supported") + } + + pd := ParquetBlockData{ + ChainId: d.Block.ChainId.Uint64(), + BlockNumber: blockNum, + BlockHash: d.Block.Hash, + BlockTimestamp: d.Block.Timestamp.Unix(), + Block: blockJSON, + Transactions: txJSON, + Logs: logsJSON, + Traces: tracesJSON, + } + + parquetData = append(parquetData, pd) + } + + var buf bytes.Buffer + + // Configure writer with compression and statistics for efficient queries + writerOptions := []parquet.WriterOption{ + f.getCompressionCodec(), + // Enable page statistics for query optimization (min/max per page) + parquet.DataPageStatistics(true), + // Set page buffer size for better statistics granularity + parquet.PageBufferSize(8 * 1024 * 1024), // 8MB pages + // Configure sorting for optimal query performance + // Sort by block_number first, then block_timestamp for efficient range queries + parquet.SortingWriterConfig( + parquet.SortingColumns( + parquet.Ascending("block_number"), + parquet.Ascending("block_timestamp"), + ), + ), + // Set column index size limit (enables column indexes for all columns) + parquet.ColumnIndexSizeLimit(16 * 1024), // 16KB limit for column index + } + + writer := parquet.NewGenericWriter[ParquetBlockData](&buf, writerOptions...) + + // Write all data at once for better compression and statistics + if _, err := writer.Write(parquetData); err != nil { + return nil, fmt.Errorf("failed to write parquet data: %w", err) + } + + if err := writer.Close(); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func (f *ParquetFormatter) GetFileExtension() string { + return ".parquet" +} + +func (f *ParquetFormatter) GetContentType() string { + return "application/octet-stream" +} + +func (f *ParquetFormatter) getCompressionCodec() parquet.WriterOption { + switch f.config.Compression { + case "gzip": + return parquet.Compression(&parquet.Gzip) + case "zstd": + return parquet.Compression(&parquet.Zstd) + default: + return parquet.Compression(&parquet.Snappy) + } +} + +func (s *S3Connector) calculateChecksum(data []byte) string { + hash := sha256.Sum256(data) + return hex.EncodeToString(hash[:]) +} + +// Implement remaining IMainStorage methods with empty implementations +// These will return errors indicating they're not supported + +func (s *S3Connector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) { + return nil, fmt.Errorf("ReplaceBlockData not supported by S3 connector") +} + +func (s *S3Connector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) { + return QueryResult[common.Block]{}, fmt.Errorf("GetBlocks not supported by S3 connector - use Athena or similar") +} + +func (s *S3Connector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) { + return QueryResult[common.Transaction]{}, fmt.Errorf("GetTransactions not supported by S3 connector - use Athena or similar") +} + +func (s *S3Connector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) { + return QueryResult[common.Log]{}, fmt.Errorf("GetLogs not supported by S3 connector - use Athena or similar") +} + +func (s *S3Connector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) { + return QueryResult[common.Trace]{}, fmt.Errorf("GetTraces not supported by S3 connector") +} + +func (s *S3Connector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) { + return QueryResult[interface{}]{}, fmt.Errorf("GetAggregations not supported by S3 connector") +} + +func (s *S3Connector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) { + return QueryResult[common.TokenBalance]{}, fmt.Errorf("GetTokenBalances not supported by S3 connector") +} + +func (s *S3Connector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) { + return QueryResult[common.TokenTransfer]{}, fmt.Errorf("GetTokenTransfers not supported by S3 connector") +} + +func (s *S3Connector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { + // First check the buffer for blocks from this chain + maxBlock := s.buffer.GetMaxBlockNumber(chainId) + if maxBlock == nil { + maxBlock = big.NewInt(0) + } + + // Then check S3 for the maximum block number + prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) + if s.config.Prefix != "" { + prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) + } + + ctx := context.Background() + paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ + Bucket: aws.String(s.config.Bucket), + Prefix: aws.String(prefix), + }) + + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return nil, fmt.Errorf("failed to list S3 objects: %w", err) + } + + for _, obj := range page.Contents { + // Extract block range from filename: blocks_{start}_{end}.parquet + if obj.Key == nil { + continue + } + _, endBlock := s.extractBlockRangeFromKey(*obj.Key) + if endBlock != nil && endBlock.Cmp(maxBlock) > 0 { + maxBlock = endBlock + } + } + } + + return maxBlock, nil +} + +func (s *S3Connector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + maxBlock := big.NewInt(0) + foundAny := false + + // First check the buffer for blocks in this range + bufferBlocks := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock) + for _, block := range bufferBlocks { + blockNum := block.Block.Number + if !foundAny || blockNum.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(blockNum) + foundAny = true + } + } + + // Then check S3 files + prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) + if s.config.Prefix != "" { + prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) + } + + ctx := context.Background() + paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ + Bucket: aws.String(s.config.Bucket), + Prefix: aws.String(prefix), + }) + + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return nil, fmt.Errorf("failed to list objects: %w", err) + } + + for _, obj := range page.Contents { + if obj.Key == nil { + continue + } + fileStart, fileEnd := s.extractBlockRangeFromKey(*obj.Key) + if fileStart == nil || fileEnd == nil { + continue + } + + // Check if this file overlaps with our range + if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 { + // The maximum block in this file that's within our range + maxInFile := new(big.Int).Set(fileEnd) + if maxInFile.Cmp(endBlock) > 0 { + maxInFile = endBlock + } + + if !foundAny || maxInFile.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(maxInFile) + foundAny = true + } + } + } + } + + if !foundAny { + return big.NewInt(0), nil + } + + return maxBlock, nil +} + +func (s *S3Connector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + minBlock := big.NewInt(0) + maxBlock := big.NewInt(0) + count := big.NewInt(0) + foundAny := false + + // First check the buffer for blocks in this range + bufferBlocks := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock) + for _, block := range bufferBlocks { + blockNum := block.Block.Number + count.Add(count, big.NewInt(1)) + + if !foundAny { + minBlock = new(big.Int).Set(blockNum) + maxBlock = new(big.Int).Set(blockNum) + foundAny = true + } else { + if blockNum.Cmp(minBlock) < 0 { + minBlock = new(big.Int).Set(blockNum) + } + if blockNum.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(blockNum) + } + } + } + + // Then check S3 files + prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) + if s.config.Prefix != "" { + prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) + } + + ctx := context.Background() + paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ + Bucket: aws.String(s.config.Bucket), + Prefix: aws.String(prefix), + }) + + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return nil, fmt.Errorf("failed to list objects: %w", err) + } + + for _, obj := range page.Contents { + if obj.Key == nil { + continue + } + fileStart, fileEnd := s.extractBlockRangeFromKey(*obj.Key) + if fileStart == nil || fileEnd == nil { + continue + } + + // Check if this file overlaps with our range + if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 { + // Calculate the effective range within our query bounds + effectiveStart := new(big.Int).Set(fileStart) + if effectiveStart.Cmp(startBlock) < 0 { + effectiveStart = startBlock + } + effectiveEnd := new(big.Int).Set(fileEnd) + if effectiveEnd.Cmp(endBlock) > 0 { + effectiveEnd = endBlock + } + + // Update min/max blocks + if !foundAny { + minBlock = new(big.Int).Set(effectiveStart) + maxBlock = new(big.Int).Set(effectiveEnd) + foundAny = true + } else { + if effectiveStart.Cmp(minBlock) < 0 { + minBlock = new(big.Int).Set(effectiveStart) + } + if effectiveEnd.Cmp(maxBlock) > 0 { + maxBlock = new(big.Int).Set(effectiveEnd) + } + } + + // Add the count of blocks in this file's overlapping range + // Note: This assumes contiguous blocks in the file + blocksInRange := new(big.Int).Sub(effectiveEnd, effectiveStart) + blocksInRange.Add(blocksInRange, big.NewInt(1)) // Add 1 because range is inclusive + count.Add(count, blocksInRange) + } + } + } + + return count, nil +} + +func (s *S3Connector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { + var headers []common.BlockHeader + + // First get headers from buffer + bufferData := s.buffer.GetData() + for _, block := range bufferData { + if block.Block.ChainId.Cmp(chainId) == 0 { + // Check if block is in range (if from is specified) + if from != nil && block.Block.Number.Cmp(from) > 0 { + continue + } + // Apply limit if specified + if to != nil && len(headers) >= int(to.Int64()) { + break + } + headers = append(headers, common.BlockHeader{ + Number: block.Block.Number, + Hash: block.Block.Hash, + ParentHash: block.Block.ParentHash, + }) + } + } + + // If we need more headers, get from S3 + if to == nil || len(headers) < int(to.Int64()) { + // Download relevant parquet files and extract block headers + files, err := s.findFilesInRange(chainId, big.NewInt(0), from) // from 0 to 'from' block + if err != nil { + return nil, err + } + + for _, file := range files { + fileHeaders, err := s.extractBlockHeadersFromFile(file, chainId, from, to) + if err != nil { + log.Warn().Err(err).Str("file", file).Msg("Failed to extract headers from file") + continue + } + headers = append(headers, fileHeaders...) + } + } + + // Sort in descending order + sort.Slice(headers, func(i, j int) bool { + return headers[i].Number.Cmp(headers[j].Number) > 0 + }) + + // Apply limit if specified + if to != nil && len(headers) > int(to.Int64()) { + headers = headers[:to.Int64()] + } + + return headers, nil +} + +func (s *S3Connector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) { + if startBlock == nil || endBlock == nil { + return nil, fmt.Errorf("start block and end block must not be nil") + } + + if startBlock.Cmp(endBlock) > 0 { + return nil, fmt.Errorf("start block must be less than or equal to end block") + } + + // First check buffer for blocks in range + blockData := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock) + + // Then find and download relevant files from S3 + files, err := s.findFilesInRange(chainId, startBlock, endBlock) + if err != nil { + return nil, err + } + + for _, file := range files { + fileData, err := s.downloadAndParseFile(file, chainId, startBlock, endBlock) + if err != nil { + log.Warn().Err(err).Str("file", file).Msg("Failed to parse file") + continue + } + blockData = append(blockData, fileData...) + } + + // Sort by block number + sort.Slice(blockData, func(i, j int) bool { + return blockData[i].Block.Number.Cmp(blockData[j].Block.Number) < 0 + }) + + return blockData, nil +} + +func (s *S3Connector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { + // Build a set of all block numbers we have + blockSet := make(map[string]bool) + + // First add blocks from buffer + bufferBlocks := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock) + for _, block := range bufferBlocks { + blockSet[block.Block.Number.String()] = true + } + + // Then check S3 files in range + files, err := s.findFilesInRange(chainId, startBlock, endBlock) + if err != nil { + return nil, err + } + + for _, file := range files { + fileStart, fileEnd := s.extractBlockRangeFromKey(file) + if fileStart == nil || fileEnd == nil { + continue + } + + // Add all blocks in this file's range to our set + for i := new(big.Int).Set(fileStart); i.Cmp(fileEnd) <= 0; i.Add(i, big.NewInt(1)) { + if i.Cmp(startBlock) >= 0 && i.Cmp(endBlock) <= 0 { + blockSet[i.String()] = true + } + } + } + + // Find missing blocks + var missing []*big.Int + for i := new(big.Int).Set(startBlock); i.Cmp(endBlock) <= 0; i.Add(i, big.NewInt(1)) { + if !blockSet[i.String()] { + missing = append(missing, new(big.Int).Set(i)) + } + } + + return missing, nil +} + +func (s *S3Connector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) { + if len(blockNumbers) == 0 { + return nil, nil + } + + // Create a map for quick lookup + blockNumMap := make(map[string]bool) + for _, bn := range blockNumbers { + blockNumMap[bn.String()] = true + } + + var result []common.BlockData + + // First check buffer for requested blocks + bufferData := s.buffer.GetData() + for _, block := range bufferData { + if block.Block.ChainId.Cmp(chainId) == 0 { + if blockNumMap[block.Block.Number.String()] { + result = append(result, block) + // Remove from map so we don't fetch it from S3 + delete(blockNumMap, block.Block.Number.String()) + } + } + } + + // If all blocks were in buffer, return early + if len(blockNumMap) == 0 { + return result, nil + } + + // Sort remaining block numbers to optimize file access + var remainingBlocks []*big.Int + for blockStr := range blockNumMap { + bn, _ := new(big.Int).SetString(blockStr, 10) + remainingBlocks = append(remainingBlocks, bn) + } + sort.Slice(remainingBlocks, func(i, j int) bool { + return remainingBlocks[i].Cmp(remainingBlocks[j]) < 0 + }) + + if len(remainingBlocks) == 0 { + return result, nil + } + + minBlock := remainingBlocks[0] + maxBlock := remainingBlocks[len(remainingBlocks)-1] + + // Find relevant files for remaining blocks + files, err := s.findFilesInRange(chainId, minBlock, maxBlock) + if err != nil { + return nil, err + } + + for _, file := range files { + fileData, err := s.downloadAndParseFile(file, chainId, minBlock, maxBlock) + if err != nil { + log.Warn().Err(err).Str("file", file).Msg("Failed to parse file") + continue + } + + // Filter to only requested blocks + for _, bd := range fileData { + if blockNumMap[bd.Block.Number.String()] { + result = append(result, bd) + } + } + } + + return result, nil +} + +// Helper functions + +func (s *S3Connector) extractBlockRangeFromKey(key string) (*big.Int, *big.Int) { + // Extract block range from key like: chain_1/year=2024/blocks_1000_2000.parquet + parts := strings.Split(key, "/") + if len(parts) == 0 { + return nil, nil + } + + filename := parts[len(parts)-1] + if !strings.HasPrefix(filename, "blocks_") || !strings.HasSuffix(filename, s.formatter.GetFileExtension()) { + return nil, nil + } + + // Remove prefix and extension + rangeStr := strings.TrimPrefix(filename, "blocks_") + rangeStr = strings.TrimSuffix(rangeStr, s.formatter.GetFileExtension()) + + // Split by underscore to get start and end + rangeParts := strings.Split(rangeStr, "_") + if len(rangeParts) != 2 { + return nil, nil + } + + startBlock, ok1 := new(big.Int).SetString(rangeParts[0], 10) + endBlock, ok2 := new(big.Int).SetString(rangeParts[1], 10) + if !ok1 || !ok2 { + return nil, nil + } + + return startBlock, endBlock +} + +func (s *S3Connector) findFilesInRange(chainId *big.Int, startBlock, endBlock *big.Int) ([]string, error) { + prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) + if s.config.Prefix != "" { + prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) + } + + ctx := context.Background() + paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ + Bucket: aws.String(s.config.Bucket), + Prefix: aws.String(prefix), + }) + + var relevantFiles []string + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return nil, fmt.Errorf("failed to list objects: %w", err) + } + + for _, obj := range page.Contents { + if obj.Key == nil { + continue + } + + fileStart, fileEnd := s.extractBlockRangeFromKey(*obj.Key) + if fileStart == nil || fileEnd == nil { + continue + } + + // Check if this file's range overlaps with our query range + if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 { + relevantFiles = append(relevantFiles, *obj.Key) + } + } + } + + return relevantFiles, nil +} + +func (s *S3Connector) downloadAndParseFile(key string, chainId *big.Int, startBlock, endBlock *big.Int) ([]common.BlockData, error) { + ctx := context.Background() + + // Download the file + result, err := s.client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(s.config.Bucket), + Key: aws.String(key), + }) + if err != nil { + return nil, fmt.Errorf("failed to download file: %w", err) + } + defer result.Body.Close() + + // Read entire file into memory (required for parquet reader) + data, err := io.ReadAll(result.Body) + if err != nil { + return nil, fmt.Errorf("failed to read file data: %w", err) + } + + // Read the parquet file + reader := parquet.NewGenericReader[ParquetBlockData](bytes.NewReader(data)) + defer reader.Close() + + var blockData []common.BlockData + parquetRows := make([]ParquetBlockData, 100) // Read in batches + + for { + n, err := reader.Read(parquetRows) + if err != nil && err.Error() != "EOF" { + return nil, fmt.Errorf("failed to read parquet: %w", err) + } + if n == 0 { + break + } + + for i := 0; i < n; i++ { + pd := parquetRows[i] + + // Convert uint64 block number to big.Int + blockNum := new(big.Int).SetUint64(pd.BlockNumber) + + // Filter by range if specified + if startBlock != nil && blockNum.Cmp(startBlock) < 0 { + continue + } + if endBlock != nil && blockNum.Cmp(endBlock) > 0 { + continue + } + + // Unmarshal JSON data + var block common.Block + if err := json.Unmarshal(pd.Block, &block); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal block") + continue + } + + var transactions []common.Transaction + if len(pd.Transactions) > 0 { + if err := json.Unmarshal(pd.Transactions, &transactions); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal transactions") + } + } + + var logs []common.Log + if len(pd.Logs) > 0 { + if err := json.Unmarshal(pd.Logs, &logs); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal logs") + } + } + + var traces []common.Trace + if len(pd.Traces) > 0 { + if err := json.Unmarshal(pd.Traces, &traces); err != nil { + log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal traces") + } + } + + blockData = append(blockData, common.BlockData{ + Block: block, + Transactions: transactions, + Logs: logs, + Traces: traces, + }) + } + } + + return blockData, nil +} + +func (s *S3Connector) extractBlockHeadersFromFile(key string, chainId *big.Int, from, to *big.Int) ([]common.BlockHeader, error) { + // Download and parse only the block headers + blockData, err := s.downloadAndParseFile(key, chainId, from, to) + if err != nil { + return nil, err + } + + headers := make([]common.BlockHeader, 0, len(blockData)) + for _, bd := range blockData { + headers = append(headers, common.BlockHeader{ + Number: bd.Block.Number, + Hash: bd.Block.Hash, + ParentHash: bd.Block.ParentHash, + }) + } + + return headers, nil +} diff --git a/internal/tools/clickhouse/0004_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql similarity index 54% rename from internal/tools/clickhouse/0004_clickhouse_create_blocks_table.sql rename to internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql index 68bee0e..1bab7b8 100644 --- a/internal/tools/clickhouse/0004_clickhouse_create_blocks_table.sql +++ b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql @@ -21,10 +21,29 @@ CREATE TABLE IF NOT EXISTS blocks ( `gas_used` UInt256, `withdrawals_root` FixedString(66), `base_fee_per_gas` Nullable(UInt64), + `insert_timestamp` DateTime DEFAULT now(), - `sign` Int8 DEFAULT 1, - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 3, - INDEX idx_hash hash TYPE bloom_filter GRANULARITY 3, -) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) + `is_deleted` UInt8 DEFAULT 0, + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2, + + PROJECTION chain_state_projection + ( + SELECT + chain_id, + count() AS count, + uniqExact(block_number) AS unique_block_count, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp + GROUP BY + chain_id + ) + + +) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) ORDER BY (chain_id, block_number) -PARTITION BY chain_id; \ No newline at end of file +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0001_clickhouse_create_block_failures_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_block_failures_table.sql deleted file mode 100644 index 669842d..0000000 --- a/internal/tools/clickhouse/0001_clickhouse_create_block_failures_table.sql +++ /dev/null @@ -1,12 +0,0 @@ -CREATE TABLE IF NOT EXISTS block_failures ( - `chain_id` UInt256, - `block_number` UInt256, - `last_error_timestamp` UInt64 CODEC(Delta, ZSTD), - `count` UInt16, - `reason` String, - `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` UInt8 DEFAULT 0, - INDEX idx_block_number block_number TYPE minmax GRANULARITY 1, -) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -ORDER BY (chain_id, block_number) -SETTINGS allow_experimental_replacing_merge_with_cleanup = 1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql new file mode 100644 index 0000000..562f339 --- /dev/null +++ b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql @@ -0,0 +1,98 @@ +CREATE TABLE IF NOT EXISTS transactions ( + `chain_id` UInt256, + `hash` FixedString(66), + `nonce` UInt64, + `block_hash` FixedString(66), + `block_number` UInt256, + `block_timestamp` DateTime CODEC(Delta, ZSTD), + `transaction_index` UInt64, + `from_address` FixedString(42), + `to_address` FixedString(42), + `value` UInt256, + `gas` UInt64, + `gas_price` UInt256, + `data` String, + `function_selector` FixedString(10), + `max_fee_per_gas` UInt128, + `max_priority_fee_per_gas` UInt128, + `max_fee_per_blob_gas` UInt256, + `blob_versioned_hashes` Array(String), + `transaction_type` UInt8, + `r` UInt256, + `s` UInt256, + `v` UInt256, + `access_list` Nullable(String), + `authorization_list` Nullable(String), + `contract_address` Nullable(FixedString(42)), + `gas_used` Nullable(UInt64), + `cumulative_gas_used` Nullable(UInt64), + `effective_gas_price` Nullable(UInt256), + `blob_gas_used` Nullable(UInt64), + `blob_gas_price` Nullable(UInt256), + `logs_bloom` Nullable(String), + `status` Nullable(UInt64), + + `insert_timestamp` DateTime DEFAULT now(), + `is_deleted` UInt8 DEFAULT 0, + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, + INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2, + INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 4, + INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 4, + INDEX idx_function_selector function_selector TYPE bloom_filter GRANULARITY 2, + + PROJECTION from_address_projection + ( + SELECT + _part_offset + ORDER BY + chain_id, + from_address, + block_number, + hash + ), + PROJECTION to_address_projection + ( + SELECT + _part_offset + ORDER BY + chain_id, + to_address, + block_number, + hash + ), + PROJECTION from_address_state_projection + ( + SELECT + chain_id, + from_address, + count() AS tx_count, + uniqExact(hash) AS unique_tx_count, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp + GROUP BY + chain_id, + from_address + ), + PROJECTION to_address_state_projection + ( + SELECT + chain_id, + to_address, + count() AS tx_count, + uniqExact(hash) AS unique_tx_count, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp + GROUP BY + chain_id, + to_address + ) +) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) +ORDER BY (chain_id, block_number, hash) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0002_clickhouse_create_cursors_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_cursors_table.sql deleted file mode 100644 index 6574a3b..0000000 --- a/internal/tools/clickhouse/0002_clickhouse_create_cursors_table.sql +++ /dev/null @@ -1,7 +0,0 @@ -CREATE TABLE IF NOT EXISTS cursors ( - `chain_id` UInt256, - `cursor_type` String, - `cursor_value` String, - `insert_timestamp` DateTime DEFAULT now(), -) ENGINE = ReplacingMergeTree(insert_timestamp) -ORDER BY (chain_id, cursor_type); diff --git a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql new file mode 100644 index 0000000..d4e202c --- /dev/null +++ b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql @@ -0,0 +1,71 @@ +CREATE TABLE IF NOT EXISTS logs ( + `chain_id` UInt256, + `block_number` UInt256, + `block_hash` FixedString(66), + `block_timestamp` DateTime CODEC(Delta, ZSTD), + `transaction_hash` FixedString(66), + `transaction_index` UInt64, + `log_index` UInt64, + `address` FixedString(42), + `data` String, + `topic_0` String, + `topic_1` String, + `topic_2` String, + `topic_3` String, + + `insert_timestamp` DateTime DEFAULT now(), + `is_deleted` UInt8 DEFAULT 0, + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, + INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 2, + INDEX idx_address address TYPE bloom_filter GRANULARITY 3, + INDEX idx_topic0 topic_0 TYPE bloom_filter GRANULARITY 3, + INDEX idx_topic1 topic_1 TYPE bloom_filter GRANULARITY 4, + INDEX idx_topic2 topic_2 TYPE bloom_filter GRANULARITY 4, + INDEX idx_topic3 topic_3 TYPE bloom_filter GRANULARITY 4, + + PROJECTION chain_address_topic0_projection + ( + SELECT + _part_offset + ORDER BY + chain_id, + address, + topic_0, + block_number, + transaction_index, + log_index + ), + PROJECTION chain_topic0_projection + ( + SELECT + _part_offset + ORDER BY + chain_id, + topic_0, + block_number, + transaction_index, + log_index, + address + ), + PROJECTION address_topic0_state_projection + ( + SELECT + chain_id, + address, + topic_0, + count() AS log_count, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp + GROUP BY + chain_id, + address, + topic_0 + ) +) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) +ORDER BY (chain_id, block_number, transaction_hash, log_index) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; diff --git a/internal/tools/clickhouse/0003_clickhouse_create_staging_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_staging_table.sql deleted file mode 100644 index cd015ac..0000000 --- a/internal/tools/clickhouse/0003_clickhouse_create_staging_table.sql +++ /dev/null @@ -1,11 +0,0 @@ -CREATE TABLE IF NOT EXISTS block_data ( - `chain_id` UInt256, - `block_number` UInt256, - `data` String, - `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` UInt8 DEFAULT 0, - INDEX idx_block_number block_number TYPE minmax GRANULARITY 1, -) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -ORDER BY (chain_id, block_number) -PARTITION BY chain_id -SETTINGS allow_experimental_replacing_merge_with_cleanup = 1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0007_clickhouse_create_traces_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql similarity index 56% rename from internal/tools/clickhouse/0007_clickhouse_create_traces_table.sql rename to internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql index f3dc25d..6b65467 100644 --- a/internal/tools/clickhouse/0007_clickhouse_create_traces_table.sql +++ b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql @@ -20,13 +20,39 @@ CREATE TABLE IF NOT EXISTS traces ( `author` Nullable(FixedString(42)), `reward_type` LowCardinality(Nullable(String)), `refund_address` Nullable(FixedString(42)), - `sign` Int8 DEFAULT 1, + `insert_timestamp` DateTime DEFAULT now(), - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 3, - INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 1, - INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 1, -) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) -ORDER BY (chain_id, block_number, transaction_hash, trace_address) -PARTITION BY chain_id; \ No newline at end of file + `is_deleted` UInt8 DEFAULT 0, + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 2, + INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3, + INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 3, + + PROJECTION from_address_projection + ( + SELECT + _part_offset + ORDER BY + chain_id, + from_address, + block_number, + transaction_hash, + trace_address + ), + PROJECTION to_address_projection + ( + SELECT + _part_offset + ORDER BY + chain_id, + to_address, + block_number, + transaction_hash, + trace_address + ) + +) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) +ORDER BY (chain_id, transaction_hash, trace_address) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; diff --git a/internal/tools/clickhouse/0000_clickhouse_create_insert_null_table.sql b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql similarity index 95% rename from internal/tools/clickhouse/0000_clickhouse_create_insert_null_table.sql rename to internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql index 25fc90e..8597fcd 100644 --- a/internal/tools/clickhouse/0000_clickhouse_create_insert_null_table.sql +++ b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql @@ -1,5 +1,6 @@ -CREATE TABLE IF NOT EXISTS inserts_null_table ( +CREATE TABLE IF NOT EXISTS insert_null_block_data ( chain_id UInt256, + block Tuple( block_number UInt256, block_timestamp DateTime, @@ -92,8 +93,7 @@ CREATE TABLE IF NOT EXISTS inserts_null_table ( reward_type LowCardinality(Nullable(String)), refund_address Nullable(FixedString(42)) )), + insert_timestamp DateTime DEFAULT now(), - sign Int8 DEFAULT 1 -) ENGINE = MergeTree -ORDER BY (chain_id, insert_timestamp) -PARTITION BY chain_id; + is_deleted UInt8 DEFAULT 0 +) ENGINE = Null; diff --git a/internal/tools/clickhouse/0008_clickhouse_create_insert_mvs.sql b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql similarity index 85% rename from internal/tools/clickhouse/0008_clickhouse_create_insert_mvs.sql rename to internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql index 0ea2673..b10c379 100644 --- a/internal/tools/clickhouse/0008_clickhouse_create_insert_mvs.sql +++ b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql @@ -1,4 +1,4 @@ -CREATE MATERIALIZED VIEW IF NOT EXISTS mv_blocks_inserts +CREATE MATERIALIZED VIEW IF NOT EXISTS insert_blocks_mv TO blocks AS SELECT @@ -25,10 +25,10 @@ SELECT block.20 AS withdrawals_root, block.21 AS base_fee_per_gas, insert_timestamp, - sign -FROM inserts_null_table; + is_deleted +FROM insert_null_block_data; -CREATE MATERIALIZED VIEW IF NOT EXISTS mv_transactions_inserts +CREATE MATERIALIZED VIEW IF NOT EXISTS insert_transactions_mv TO transactions AS SELECT @@ -65,11 +65,11 @@ SELECT t.30 AS logs_bloom, t.31 AS status, insert_timestamp, - sign -FROM inserts_null_table + is_deleted +FROM insert_null_block_data ARRAY JOIN transactions AS t; -CREATE MATERIALIZED VIEW IF NOT EXISTS mv_logs_inserts +CREATE MATERIALIZED VIEW IF NOT EXISTS insert_logs_mv TO logs AS SELECT @@ -87,11 +87,11 @@ SELECT l.11 AS topic_2, l.12 AS topic_3, insert_timestamp, - sign -FROM inserts_null_table + is_deleted +FROM insert_null_block_data ARRAY JOIN logs AS l; -CREATE MATERIALIZED VIEW IF NOT EXISTS mv_traces_inserts +CREATE MATERIALIZED VIEW IF NOT EXISTS insert_traces_mv TO traces AS SELECT @@ -117,6 +117,6 @@ SELECT tr.19 AS reward_type, tr.20 AS refund_address, insert_timestamp, - sign -FROM inserts_null_table + is_deleted +FROM insert_null_block_data ARRAY JOIN traces AS tr; diff --git a/internal/tools/clickhouse/0006_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0006_clickhouse_create_logs_table.sql deleted file mode 100644 index b1d3db3..0000000 --- a/internal/tools/clickhouse/0006_clickhouse_create_logs_table.sql +++ /dev/null @@ -1,39 +0,0 @@ -CREATE TABLE IF NOT EXISTS logs ( - `chain_id` UInt256, - `block_number` UInt256, - `block_hash` FixedString(66), - `block_timestamp` DateTime CODEC(Delta, ZSTD), - `transaction_hash` FixedString(66), - `transaction_index` UInt64, - `log_index` UInt64, - `address` FixedString(42), - `data` String, - `topic_0` String, - `topic_1` String, - `topic_2` String, - `topic_3` String, - `insert_timestamp` DateTime DEFAULT now(), - `sign` Int8 DEFAULT 1, - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 3, - INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_address address TYPE bloom_filter GRANULARITY 1, - INDEX idx_topic0 topic_0 TYPE bloom_filter GRANULARITY 1, - INDEX idx_topic1 topic_1 TYPE bloom_filter GRANULARITY 1, - INDEX idx_topic2 topic_2 TYPE bloom_filter GRANULARITY 1, - INDEX idx_topic3 topic_3 TYPE bloom_filter GRANULARITY 1, - PROJECTION logs_chainid_topic0_address - ( - SELECT * - ORDER BY - chain_id, - topic_0, - address, - block_number, - transaction_index, - log_index - ) -) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) -ORDER BY (chain_id, block_number, transaction_hash, log_index) -PARTITION BY chain_id -SETTINGS deduplicate_merge_projection_mode = 'drop', lightweight_mutation_projection_mode = 'rebuild'; diff --git a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql new file mode 100644 index 0000000..edb92cb --- /dev/null +++ b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql @@ -0,0 +1,114 @@ +CREATE TABLE IF NOT EXISTS token_transfers +( + `chain_id` UInt256, + `token_type` LowCardinality(String), + `token_address` FixedString(42), + `token_id` UInt256, + `from_address` FixedString(42), + `to_address` FixedString(42), + `block_number` UInt256, + `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)), + `transaction_hash` FixedString(66), + `transaction_index` UInt64, + `amount` UInt256, + `log_index` UInt64, + `batch_index` Nullable(UInt16) DEFAULT NULL, + + `insert_timestamp` DateTime DEFAULT now(), + `is_deleted` UInt8 DEFAULT 0, + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3, + INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 3, + INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 4, + + PROJECTION from_address_projection ( + SELECT + _part_offset + ORDER BY + chain_id, + from_address, + block_number, + transaction_index, + log_index + ), + PROJECTION to_address_projection ( + SELECT + _part_offset + ORDER BY + chain_id, + to_address, + block_number, + transaction_index, + log_index + ), + PROJECTION token_id_projection ( + SELECT + _part_offset + ORDER BY + chain_id, + token_address, + token_id, + block_number, + transaction_index, + log_index + ), + PROJECTION from_address_state_projection ( + SELECT + chain_id, + from_address, + token_address, + token_type, + count() AS transfer_count, + sum(toInt256(amount)) AS total_amount, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp + GROUP BY + chain_id, + from_address, + token_address, + token_type + ), + PROJECTION to_address_state_projection ( + SELECT + chain_id, + to_address, + token_address, + token_type, + count() AS transfer_count, + sum(toInt256(amount)) AS total_amount, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp + GROUP BY + chain_id, + to_address, + token_address, + token_type + ), + PROJECTION token_state_projection ( + SELECT + chain_id, + token_address, + token_id, + token_type, + count() AS transfer_count, + sum(toInt256(amount)) AS total_volume, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp + GROUP BY + chain_id, + token_address, + token_id, + token_type + ) +) +ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) +ORDER BY (chain_id, token_address, block_number, transaction_index, log_index) +SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql new file mode 100644 index 0000000..30d01a5 --- /dev/null +++ b/internal/tools/clickhouse/0007_clickhouse_create_token_transfers_mv.sql @@ -0,0 +1,158 @@ +-- ERC20 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc20_mv +TO token_transfers +AS +SELECT + chain_id, + address AS token_address, + 'erc20' AS token_type, + 0 AS token_id, + concat('0x', substring(topic_1, 27, 40)) AS from_address, + concat('0x', substring(topic_2, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS amount, + log_index, + CAST(NULL AS Nullable(UInt16)) AS batch_index, + insert_timestamp, + is_deleted +FROM logs +WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' -- Transfer + AND length(topic_1) = 66 AND startsWith(topic_1, '0x') + AND length(topic_2) = 66 AND startsWith(topic_2, '0x') + AND topic_3 = '' + AND length(data) = 66; + +-- ERC721 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc721_mv +TO token_transfers +AS +SELECT + chain_id, + address AS token_address, + 'erc721' AS token_type, + reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id, + concat('0x', substring(topic_1, 27, 40)) AS from_address, + concat('0x', substring(topic_2, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + toUInt8(1) AS amount, + log_index, + CAST(NULL AS Nullable(UInt16)) AS batch_index, + insert_timestamp, + is_deleted +FROM logs +WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' + AND length(topic_1) = 66 AND startsWith(topic_1, '0x') + AND length(topic_2) = 66 AND startsWith(topic_2, '0x') + AND length(topic_3) = 66 AND startsWith(topic_3, '0x') + AND length(data) = 2; + +-- ERC1155 (single) +CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc1155_single_mv +TO token_transfers +AS +SELECT + chain_id, + address AS token_address, + 'erc1155' AS token_type, + reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS token_id, + concat('0x', substring(topic_2, 27, 40)) AS from_address, + concat('0x', substring(topic_3, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount, + log_index, + toNullable(toUInt16(0)) AS batch_index, + insert_timestamp, + is_deleted +FROM logs +WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62' -- TransferSingle + AND length(topic_2) = 66 AND length(topic_3) = 66 + AND length(data) = (2 + 2*64); + +-- ERC1155 (batch) +CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc1155_batch_mv +TO token_transfers +AS +SELECT + chain_id, + address AS token_address, + 'erc1155' AS token_type, + reinterpretAsUInt256(reverse(substring(bin, (ids_base + ((i - 1) * 32)) + 1, 32))) AS token_id, + concat('0x', substring(topic_2, 27, 40)) AS from_address, + concat('0x', substring(topic_3, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(substring(bin, (am_base + ((i - 1) * 32)) + 1, 32))) AS amount, + log_index, + toNullable(toUInt16(i - 1)) AS batch_index, + insert_timestamp, + is_deleted +FROM ( + SELECT + chain_id, + address, + topic_2, + topic_3, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + is_deleted, + insert_timestamp, + unhex(substring(data, 3)) AS bin, + length(unhex(substring(data, 3))) AS bin_len, + toUInt32(reinterpretAsUInt256(reverse(substring(unhex(substring(data, 3)), 1, 32)))) AS ids_off, + toUInt32(reinterpretAsUInt256(reverse(substring(unhex(substring(data, 3)), 33, 32)))) AS am_off, + toUInt32(reinterpretAsUInt256(reverse(substring(unhex(substring(data, 3)), ids_off + 1, 32)))) AS ids_len, + toUInt32(reinterpretAsUInt256(reverse(substring(unhex(substring(data, 3)), am_off + 1, 32)))) AS am_len, + ids_off + 32 AS ids_base, + am_off + 32 AS am_base +FROM default.logs +WHERE (topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') + AND (length(topic_2) = 66) + AND (length(topic_3) = 66) + AND (ids_len = am_len) + AND (ids_len > 0) + AND ((ids_off + 32) <= bin_len) + AND ((am_off + 32) <= bin_len) + AND ((ids_base + (ids_len * 32)) <= bin_len) + AND ((am_base + (am_len * 32)) <= bin_len) +) ARRAY JOIN range(1, ids_len + 1) AS i; + + +-- ERC6909 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_transfers_erc6909_mv +TO token_transfers +AS +SELECT + chain_id, + address AS token_address, + 'erc6909' AS token_type, + reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id, + concat('0x', substring(topic_1, 27, 40)) AS from_address, + concat('0x', substring(topic_2, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount, + log_index, + CAST(NULL AS Nullable(UInt16)) AS batch_index, + insert_timestamp, + is_deleted +FROM logs +WHERE topic_0 = '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859' + AND length(topic_1) = 66 + AND length(topic_2) = 66 + AND length(data) == 2 + 128; \ No newline at end of file diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql new file mode 100644 index 0000000..49444f1 --- /dev/null +++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql @@ -0,0 +1,67 @@ +CREATE TABLE IF NOT EXISTS token_balances +( + `chain_id` UInt256, + `token_type` LowCardinality(String), + `token_address` FixedString(42), + `owner_address` FixedString(42), + `token_id` UInt256, + + -- Normalized delta: positive for incoming, negative for outgoing + `balance_delta` Int256, + + -- Transaction details for ordering and deduplication + `block_number` UInt256, + `block_timestamp` DateTime, + `transaction_hash` FixedString(66), + `transaction_index` UInt64, + `log_index` UInt64, + `direction` Enum8('from' = 1, 'to' = 2), -- To make each transfer create 2 unique rows + + `insert_timestamp` DateTime DEFAULT now(), + `is_deleted` UInt8 DEFAULT 0, + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_token_address token_address TYPE bloom_filter GRANULARITY 3, + INDEX idx_owner_address owner_address TYPE bloom_filter GRANULARITY 3, + + PROJECTION owner_balances_projection + ( + SELECT + chain_id, + owner_address, + token_address, + token_id, + sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state, + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY chain_id, owner_address, token_address, token_id + ), + + PROJECTION token_balances_projection + ( + SELECT + chain_id, + token_address, + token_id, + owner_address, + sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state, + minState(block_number) AS min_block_number_state, + minState(block_timestamp) AS min_block_timestamp_state, + maxState(block_number) AS max_block_number_state, + maxState(block_timestamp) AS max_block_timestamp_state + GROUP BY chain_id, token_address, token_id, owner_address + ), + + PROJECTION token_projection + ( + SELECT + _part_offset + ORDER BY chain_id, token_address, token_id, owner_address + ) +) +ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) +PARTITION BY chain_id +ORDER BY (chain_id, owner_address, token_address, token_id, block_number, transaction_index, log_index, direction) +SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql index c9e54cb..63e523e 100644 --- a/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql +++ b/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql @@ -1,117 +1,161 @@ -CREATE TABLE IF NOT EXISTS token_balances -( - `token_type` String, - `chain_id` UInt256, - `owner` FixedString(42), - `address` FixedString(42), - `token_id` UInt256, - `balance` Int256, - PROJECTION address_projection - ( - SELECT * - ORDER BY - token_type, - chain_id, - address, - token_id - ) -) -ENGINE = SummingMergeTree -ORDER BY (token_type, chain_id, owner, address, token_id) -SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild'; +-- ERC20 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc20_mv +TO token_balances +AS +-- FROM side (outgoing, negative delta) +SELECT + chain_id, + token_type, + token_address, + from_address AS owner_address, + token_id, + -toInt256(amount) AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'from' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc20' +UNION ALL +-- TO side (incoming, positive delta) +SELECT + chain_id, + token_type, + token_address, + to_address AS owner_address, + token_id, + toInt256(amount) AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'to' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc20'; -CREATE MATERIALIZED VIEW IF NOT EXISTS single_token_transfers_mv TO token_balances AS -SELECT chain_id, owner, address, token_type, token_id, sum(amount) as balance -FROM -( - SELECT - chain_id, - address, - (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' AND topic_3 = '') as is_erc20, - (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' AND topic_3 != '') as is_erc721, - (topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62') as is_erc1155, - if(is_erc1155, concat('0x', substring(topic_2, 27, 40)), concat('0x', substring(topic_1, 27, 40))) AS sender_address, -- ERC20 & ERC721 both have topic_1 as sender - if(is_erc1155, concat('0x', substring(topic_3, 27, 40)), concat('0x', substring(topic_2, 27, 40))) AS receiver_address, -- ERC20 & ERC721 both have topic_2 as receiver - multiIf(is_erc20, 'erc20', is_erc721, 'erc721', 'erc1155') as token_type, - multiIf( - is_erc1155, - reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))), - is_erc721, - reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))), - toUInt256(0) -- other - ) AS token_id, - multiIf( - is_erc20 AND length(data) = 66, - reinterpretAsInt256(reverse(unhex(substring(data, 3)))), - is_erc721, - toInt256(1), - is_erc1155, - if(length(data) = 130, reinterpretAsInt256(reverse(unhex(substring(data, 67, 64)))), toInt256(1)), - toInt256(0) -- unknown - ) AS transfer_amount, - (sign * transfer_amount) as amount - FROM logs - WHERE - topic_0 IN ( - '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', - '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62' - ) -) -array join - [chain_id, chain_id] AS chain_id, - [sender_address, receiver_address] AS owner, - [-amount, amount] as amount, - [token_type, token_type] AS token_type, - [token_id, token_id] AS token_id, - [address, address] AS address -GROUP BY chain_id, owner, address, token_type, token_id; +-- ERC721 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc721_mv +TO token_balances +AS +SELECT + chain_id, + token_type, + token_address, + from_address AS owner_address, + token_id, + -1 AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'from' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc721' +UNION ALL +SELECT + chain_id, + token_type, + token_address, + to_address AS owner_address, + token_id, + 1 AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'to' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc721'; -CREATE MATERIALIZED VIEW IF NOT EXISTS erc1155_batch_token_transfers_mv TO token_balances AS -SELECT chain_id, owner, address, token_type, token_id, sum(amount) as balance -FROM ( - WITH - metadata as ( - SELECT - *, - 3 + 2 * 64 as ids_length_idx, - ids_length_idx + 64 as ids_values_idx, - reinterpretAsUInt64(reverse(unhex(substring(data, ids_length_idx, 64)))) AS ids_length, - ids_length_idx + 64 + (ids_length * 64) as amounts_length_idx, - reinterpretAsUInt64(reverse(unhex(substring(data, amounts_length_idx, 64)))) AS amounts_length, - amounts_length_idx + 64 as amounts_values_idx - FROM logs - WHERE topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb' AND topic_2 != '' AND topic_3 != '' AND ids_length = amounts_length - ), - decoded AS ( - SELECT - *, - arrayMap( - x -> substring(data, ids_values_idx + (x - 1) * 64, 64), - range(1, ids_length + 1) - ) AS ids_hex, - arrayMap( - x -> substring(data, amounts_values_idx + (x - 1) * 64, 64), - range(1, amounts_length + 1) - ) AS amounts_hex - FROM metadata - ) - SELECT - chain_id, - address, - concat('0x', substring(topic_2, 27, 40)) AS sender_address, - concat('0x', substring(topic_3, 27, 40)) AS receiver_address, - 'erc1155' as token_type, - reinterpretAsUInt256(reverse(unhex(substring(hex_id, 1, 64)))) AS token_id, - reinterpretAsInt256(reverse(unhex(substring(hex_amount, 1, 64)))) AS transfer_amount, - (sign * transfer_amount) as amount - FROM decoded - ARRAY JOIN ids_hex AS hex_id, amounts_hex AS hex_amount -) -array join - [chain_id, chain_id] AS chain_id, - [sender_address, receiver_address] AS owner, - [-amount, amount] as amount, - [token_type, token_type] AS token_type, - [token_id, token_id] AS token_id, - [address, address] AS address -GROUP BY chain_id, owner, address, token_type, token_id; \ No newline at end of file +-- ERC1155 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc1155_mv +TO token_balances +AS +SELECT + chain_id, + token_type, + token_address, + from_address AS owner_address, + token_id, + -toInt256(amount) AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'from' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc1155' +UNION ALL +SELECT + chain_id, + token_type, + token_address, + to_address AS owner_address, + token_id, + toInt256(amount) AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'to' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc1155'; + +-- ERC6909 +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc6909_mv +TO token_balances +AS +SELECT + chain_id, + token_type, + token_address, + from_address AS owner_address, + token_id, + -toInt256(amount) AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'from' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc6909' +UNION ALL +SELECT + chain_id, + token_type, + token_address, + to_address AS owner_address, + token_id, + toInt256(amount) AS balance_delta, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + log_index, + 'to' AS direction, + insert_timestamp, + is_deleted +FROM token_transfers +WHERE token_type = 'erc6909'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0005_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql similarity index 58% rename from internal/tools/clickhouse/0005_clickhouse_create_transactions_table.sql rename to internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql index 48f1e0d..f546f40 100644 --- a/internal/tools/clickhouse/0005_clickhouse_create_transactions_table.sql +++ b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql @@ -1,4 +1,4 @@ -CREATE TABLE IF NOT EXISTS transactions ( +CREATE TABLE IF NOT EXISTS address_transactions ( `chain_id` UInt256, `hash` FixedString(66), `nonce` UInt64, @@ -6,6 +6,8 @@ CREATE TABLE IF NOT EXISTS transactions ( `block_number` UInt256, `block_timestamp` DateTime CODEC(Delta, ZSTD), `transaction_index` UInt64, + `address` FixedString(42), + `address_type` Enum8('from' = 1, 'to' = 2), `from_address` FixedString(42), `to_address` FixedString(42), `value` UInt256, @@ -31,32 +33,31 @@ CREATE TABLE IF NOT EXISTS transactions ( `blob_gas_price` Nullable(UInt256), `logs_bloom` Nullable(String), `status` Nullable(UInt64), - `sign` Int8 DEFAULT 1, + `insert_timestamp` DateTime DEFAULT now(), - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 3, - INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_hash hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 1, - INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 1, - INDEX idx_function_selector function_selector TYPE bloom_filter GRANULARITY 1, - PROJECTION txs_chainid_from_address + `is_deleted` UInt8 DEFAULT 0, + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3, + INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 4, + INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 4, + + PROJECTION address_total_count_projection ( - SELECT * - ORDER BY + SELECT chain_id, - from_address, - block_number - ), - PROJECTION txs_chainid_to_address - ( - SELECT * - ORDER BY + address, + count() AS tx_count, + uniqExact(hash) AS unique_tx_count, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp + GROUP BY chain_id, - to_address, - block_number, - hash + address ) -) ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) -ORDER BY (chain_id, block_number, hash) -PARTITION BY chain_id -SETTINGS deduplicate_merge_projection_mode = 'drop', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file +) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) +ORDER BY (chain_id, address, block_number, hash, transaction_index) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0010_clickhouse_create_token_transfers_mv.sql b/internal/tools/clickhouse/0010_clickhouse_create_token_transfers_mv.sql deleted file mode 100644 index b87e35b..0000000 --- a/internal/tools/clickhouse/0010_clickhouse_create_token_transfers_mv.sql +++ /dev/null @@ -1,211 +0,0 @@ -CREATE TABLE IF NOT EXISTS token_transfers -( - `token_type` LowCardinality(String), - `chain_id` UInt256, - `token_address` FixedString(42), - `from_address` FixedString(42), - `to_address` FixedString(42), - `block_number` UInt256, - `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)), - `transaction_hash` FixedString(66), - `token_id` UInt256, - `amount` UInt256, - `log_index` UInt64, - `sign` Int8 DEFAULT 1, - `insert_timestamp` DateTime DEFAULT now(), - - INDEX minmax_block_number block_number TYPE minmax GRANULARITY 16, - INDEX minmax_block_timestamp block_timestamp TYPE minmax GRANULARITY 16, - - PROJECTION from_address_projection - ( - SELECT * - ORDER BY - chain_id, - token_type, - from_address, - block_number, - log_index - ), - PROJECTION to_address_projection - ( - SELECT * - ORDER BY - chain_id, - token_type, - to_address, - block_number, - log_index - ), - PROJECTION transaction_hash_projection - ( - SELECT * - ORDER BY - chain_id, - token_type, - transaction_hash, - block_number, - log_index - ), - PROJECTION token_aggregation_projection - ( - SELECT - chain_id, - token_type, - max(block_number) AS max_block_number, - count() AS total_count - GROUP BY - chain_id, - token_type - ) -) -ENGINE = VersionedCollapsingMergeTree(sign, insert_timestamp) -PARTITION BY chain_id -ORDER BY (chain_id, token_type, token_address, block_number, log_index) -SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild'; - -CREATE MATERIALIZED VIEW IF NOT EXISTS logs_to_token_transfers TO token_transfers -( - `chain_id` UInt256, - `token_address` FixedString(42), - `from_address` String, - `to_address` String, - `token_type` String, - `block_number` UInt256, - `block_timestamp` DateTime, - `transaction_hash` FixedString(66), - `log_index` UInt64, - `sign` Int8, - `insert_timestamp` DateTime, - `token_id` UInt256, - `amount` UInt256 -) -AS WITH - transfer_logs AS - ( - SELECT - chain_id, - address AS token_address, - topic_0, - topic_1, - topic_2, - topic_3, - (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef') AND (topic_3 = '') AS is_erc20, - (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef') AND (topic_3 != '') AS is_erc721, - topic_0 IN ('0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') AS is_erc1155, - multiIf(is_erc20, 'erc20', is_erc721, 'erc721', 'erc1155') AS token_type, - if(is_erc1155, concat('0x', substring(topic_2, 27, 40)), concat('0x', substring(topic_1, 27, 40))) AS from_address, - if(is_erc1155, concat('0x', substring(topic_3, 27, 40)), concat('0x', substring(topic_2, 27, 40))) AS to_address, - data, - block_number, - block_timestamp, - transaction_hash, - log_index, - sign, - insert_timestamp - FROM logs - WHERE topic_0 IN ('0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') - ), - batch_transfer_metadata AS - ( - SELECT - *, - 3 + (2 * 64) AS ids_length_idx, - ids_length_idx + 64 AS ids_values_idx, - reinterpretAsUInt64(reverse(unhex(substring(data, ids_length_idx, 64)))) AS ids_length, - (ids_length_idx + 64) + (ids_length * 64) AS amounts_length_idx, - reinterpretAsUInt64(reverse(unhex(substring(data, amounts_length_idx, 64)))) AS amounts_length, - amounts_length_idx + 64 AS amounts_values_idx - FROM transfer_logs - WHERE (topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') AND (length(topic_1) = 66) AND (length(topic_2) = 66) AND (length(topic_3) = 66) AND (length(data) != (258 + ((ids_length + amounts_length) * 64))) AND (ids_length = amounts_length) - ), - batch_transfer_logs AS - ( - SELECT - *, - arrayMap(x -> substring(data, ids_values_idx + ((x - 1) * 64), 64), range(1, toInt32(ids_length) + 1)) AS ids_hex, - arrayMap(x -> substring(data, amounts_values_idx + ((x - 1) * 64), 64), range(1, toInt32(amounts_length) + 1)) AS amounts_hex - FROM batch_transfer_metadata - ) -SELECT - chain_id, - token_address, - from_address, - to_address, - token_type, - block_number, - block_timestamp, - transaction_hash, - log_index, - sign, - insert_timestamp, - multiIf(is_erc1155, reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))), is_erc721, reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))), toUInt256(0)) AS token_id, - multiIf(is_erc20 AND (length(data) = 66), reinterpretAsUInt256(reverse(unhex(substring(data, 3)))), is_erc721, toUInt256(1), is_erc1155, if(length(data) = 130, reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))), toUInt256(1)), toUInt256(0)) AS amount -FROM transfer_logs -WHERE topic_0 IN ('0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62') -UNION ALL -WITH - transfer_logs AS - ( - SELECT - chain_id, - address AS token_address, - topic_0, - topic_1, - topic_2, - topic_3, - (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef') AND (topic_3 = '') AS is_erc20, - (topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef') AND (topic_3 != '') AS is_erc721, - topic_0 IN ('0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') AS is_erc1155, - multiIf(is_erc20, 'erc20', is_erc721, 'erc721', 'erc1155') AS token_type, - if(is_erc1155, concat('0x', substring(topic_2, 27, 40)), concat('0x', substring(topic_1, 27, 40))) AS from_address, - if(is_erc1155, concat('0x', substring(topic_3, 27, 40)), concat('0x', substring(topic_2, 27, 40))) AS to_address, - data, - block_number, - block_timestamp, - transaction_hash, - log_index, - sign, - insert_timestamp - FROM logs - WHERE topic_0 IN ('0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') - ), - batch_transfer_metadata AS - ( - SELECT - *, - 3 + (2 * 64) AS ids_length_idx, - ids_length_idx + 64 AS ids_values_idx, - reinterpretAsUInt64(reverse(unhex(substring(data, ids_length_idx, 64)))) AS ids_length, - (ids_length_idx + 64) + (ids_length * 64) AS amounts_length_idx, - reinterpretAsUInt64(reverse(unhex(substring(data, amounts_length_idx, 64)))) AS amounts_length, - amounts_length_idx + 64 AS amounts_values_idx - FROM transfer_logs - WHERE (topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb') AND (length(topic_1) = 66) AND (length(topic_2) = 66) AND (length(topic_3) = 66) AND (length(data) != (258 + ((ids_length + amounts_length) * 64))) AND (ids_length = amounts_length) - ), - batch_transfer_logs AS - ( - SELECT - *, - arrayMap(x -> substring(data, ids_values_idx + ((x - 1) * 64), 64), range(1, toInt32(ids_length) + 1)) AS ids_hex, - arrayMap(x -> substring(data, amounts_values_idx + ((x - 1) * 64), 64), range(1, toInt32(amounts_length) + 1)) AS amounts_hex - FROM batch_transfer_metadata - ) -SELECT - chain_id, - token_address, - from_address, - to_address, - token_type, - block_number, - block_timestamp, - transaction_hash, - log_index, - sign, - insert_timestamp, - reinterpretAsUInt256(reverse(unhex(substring(hex_id, 1, 64)))) AS token_id, - reinterpretAsUInt256(reverse(unhex(substring(hex_amount, 1, 64)))) AS amount -FROM batch_transfer_logs -ARRAY JOIN - ids_hex AS hex_id, - amounts_hex AS hex_amount \ No newline at end of file diff --git a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql new file mode 100644 index 0000000..c5b5ac7 --- /dev/null +++ b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql @@ -0,0 +1,44 @@ +CREATE MATERIALIZED VIEW address_transactions_mv +TO address_transactions +AS +SELECT + chain_id, + hash, + nonce, + block_hash, + block_number, + block_timestamp, + transaction_index, + address_tuple.1 AS address, + address_tuple.2 AS address_type, + from_address, + to_address, + value, + gas, + gas_price, + data, + function_selector, + max_fee_per_gas, + max_priority_fee_per_gas, + max_fee_per_blob_gas, + blob_versioned_hashes, + transaction_type, + r, + s, + v, + access_list, + authorization_list, + contract_address, + gas_used, + cumulative_gas_used, + effective_gas_price, + blob_gas_used, + blob_gas_price, + logs_bloom, + status, + + insert_timestamp, + is_deleted +FROM transactions +ARRAY JOIN + arrayZip([from_address, to_address], ['from', 'to']) AS address_tuple; \ No newline at end of file diff --git a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql new file mode 100644 index 0000000..c130e70 --- /dev/null +++ b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql @@ -0,0 +1,67 @@ +CREATE TABLE IF NOT EXISTS address_transfers ( + `chain_id` UInt256, + `token_type` LowCardinality(String), + `token_address` FixedString(42), + `token_id` UInt256, + `address` FixedString(42), + `address_type` Enum8('from' = 1, 'to' = 2), + `from_address` FixedString(42), + `to_address` FixedString(42), + `block_number` UInt256, + `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)), + `transaction_hash` FixedString(66), + `transaction_index` UInt64, + `amount` UInt256, + `log_index` UInt64, + `batch_index` Nullable(UInt16) DEFAULT NULL, + + `insert_timestamp` DateTime DEFAULT now(), + `is_deleted` UInt8 DEFAULT 0, + + INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, + INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3, + INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 4, + INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 4, + + PROJECTION address_state_projection ( + SELECT + chain_id, + address, + address_type, + token_address, + token_type, + count() AS transfer_count, + sum(toInt256(amount)) AS total_amount, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp + GROUP BY + chain_id, + address, + address_type, + token_address, + token_type + ), + PROJECTION address_total_state_projection ( + SELECT + chain_id, + address, + token_address, + token_type, + count() AS transfer_count, + sum(toInt256(amount)) AS total_amount, + min(block_number) AS min_block_number, + min(block_timestamp) AS min_block_timestamp, + max(block_number) AS max_block_number, + max(block_timestamp) AS max_block_timestamp + GROUP BY + chain_id, + address, + token_address, + token_type + ) +) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) +ORDER BY (chain_id, address, block_number, transaction_hash, transaction_index) +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) +SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql new file mode 100644 index 0000000..0a7d2cc --- /dev/null +++ b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql @@ -0,0 +1,24 @@ +CREATE MATERIALIZED VIEW address_transfers_mv +TO address_transfers +AS +SELECT + chain_id, + token_type, + token_address, + token_id, + address_tuple.1 AS address, + address_tuple.2 AS address_type, + from_address, + to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + amount, + log_index, + batch_index, + insert_timestamp, + is_deleted +FROM token_transfers +ARRAY JOIN + arrayZip([from_address, to_address], ['from', 'to']) AS address_tuple; \ No newline at end of file diff --git a/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql b/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql new file mode 100644 index 0000000..c2090e6 --- /dev/null +++ b/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql @@ -0,0 +1,202 @@ +CREATE TABLE IF NOT EXISTS backfill_logs +( + `chain_id` UInt256, + `block_number` UInt256, + `block_hash` FixedString(66), + `block_timestamp` DateTime CODEC(Delta, ZSTD), + `transaction_hash` FixedString(66), + `transaction_index` UInt64, + `log_index` UInt64, + `address` FixedString(42), + `data` String, + `topic_0` String, + `topic_1` String, + `topic_2` String, + `topic_3` String, + + `sign` Int8 DEFAULT 1, + `insert_timestamp` DateTime DEFAULT now(), +) ENGINE = Null; + + +--- Materialize view running to the correct tables +-- ERC20 +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc20_mv +TO token_transfers +AS +SELECT + chain_id, + address AS token_address, + 'erc20' AS token_type, + 0 AS token_id, + concat('0x', substring(topic_1, 27, 40)) AS from_address, + concat('0x', substring(topic_2, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS amount, + log_index, + CAST(NULL AS Nullable(UInt16)) AS batch_index, + sign, + insert_timestamp +FROM backfill_logs +WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' + AND length(topic_1) = 66 AND startsWith(topic_1, '0x') + AND length(topic_2) = 66 AND startsWith(topic_2, '0x') + AND topic_3 = '' + AND length(data) = 66; + +-- ERC721 +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc721_mv +TO token_transfers +AS +SELECT + chain_id, + address AS token_address, + 'erc721' AS token_type, + reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id, + concat('0x', substring(topic_1, 27, 40)) AS from_address, + concat('0x', substring(topic_2, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + toUInt8(1) AS amount, + log_index, + CAST(NULL AS Nullable(UInt16)) AS batch_index, + sign, + insert_timestamp +FROM backfill_logs +WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' + AND length(topic_1) = 66 AND startsWith(topic_1, '0x') + AND length(topic_2) = 66 AND startsWith(topic_2, '0x') + AND length(topic_3) = 66 AND startsWith(topic_3, '0x') + AND length(data) = 2; + +-- ERC1155 (single) +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc1155_single_mv +TO token_transfers +AS +SELECT + chain_id, + address AS token_address, + 'erc1155' AS token_type, + reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS token_id, + concat('0x', substring(topic_2, 27, 40)) AS from_address, + concat('0x', substring(topic_3, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount, + log_index, + toNullable(toUInt16(0)) AS batch_index, + sign, + insert_timestamp +FROM backfill_logs +WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62' -- TransferSingle + AND length(topic_2) = 66 AND length(topic_3) = 66 + AND length(data) = (2 + 2*64); + +-- ERC1155 (batch) +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc1155_batch_mv +TO token_transfers +AS +SELECT + chain_id, + address AS token_address, + 'erc1155' AS token_type, + reinterpretAsUInt256(reverse(unhex(id_hex))) AS token_id, + concat('0x', substring(topic_2, 27, 40)) AS from_address, + concat('0x', substring(topic_3, 27, 40)) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(amount_hex))) AS amount, + log_index, + toNullable(toUInt16(array_index - 1)) AS batch_index, + sign, + insert_timestamp +FROM ( + SELECT + chain_id, address, topic_2, topic_3, + block_number, block_timestamp, transaction_hash, transaction_index, log_index, sign, insert_timestamp, + toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64))))) AS ids_offset, + toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64))))) AS amounts_offset, + toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + ids_offset * 2, 64))))) AS ids_length, + toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + amounts_offset * 2, 64))))) AS amounts_length, + arrayMap(i -> substring(data, 3 + ids_offset * 2 + 64 + (i-1)*64, 64), range(1, least(ids_length, 10000) + 1)) AS ids_array, + arrayMap(i -> substring(data, 3 + amounts_offset * 2 + 64 + (i-1)*64, 64), range(1, least(amounts_length, 10000) + 1)) AS amounts_array + FROM backfill_logs + WHERE topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb' + AND length(topic_2) = 66 + AND length(topic_3) = 66 + AND ids_length = amounts_length +) +ARRAY JOIN + ids_array AS id_hex, + amounts_array AS amount_hex, + arrayEnumerate(ids_array) AS array_index; + + +-- ERC6909 +CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc6909_mv +TO token_transfers +AS +SELECT + chain_id, + lower(address) AS token_address, + 'erc6909' AS token_type, + reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id, + lower(concat('0x', substring(topic_1, 27, 40))) AS from_address, + lower(concat('0x', substring(topic_2, 27, 40))) AS to_address, + block_number, + block_timestamp, + transaction_hash, + transaction_index, + reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount, + log_index, + CAST(NULL AS Nullable(UInt16)) AS batch_index, + sign, + insert_timestamp +FROM backfill_logs +WHERE topic_0 = '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859' + AND length(topic_1) = 66 + AND length(topic_2) = 66 + AND length(data) == 2 + 128; + +--- INITIATE BACKFILL BY RUNNING: +-- INSERT INTO backfill_logs +-- SELECT +-- chain_id, +-- block_number, +-- block_hash, +-- block_timestamp, +-- transaction_hash , +-- transaction_index, +-- log_index, +-- address, +-- data, +-- topic_0, +-- topic_1, +-- topic_2, +-- topic_3, +-- sign, +-- insert_timestamp, +-- FROM logs +-- WHERE 1=1 +-- AND chain_id = 1 +-- AND block_number >= 0 AND block_number < 10000000 +-- AND topic_0 IN ( +-- '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', -- 20/721 +-- '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', -- 1155 single +-- '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb', -- 1155 batch +-- '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859' -- 6909 +-- ); + +-- DROP TABLE token_transfers, token_balance; +-- DROP TABLE bf__token_transfers_erc20_mv, bf__token_transfers_erc721_mv, bf__token_transfers_erc1155_mv, bf__token_transfers_erc6909_mv; +-- DROP TABLE token_transfers_erc20_mv, token_transfers_erc721_mv, token_transfers_erc1155_mv, token_transfers_erc6909_mv; +-- DROP TABLE token_balance_erc20_mv, token_balance_erc721_mv, token_balance_erc1155_mv, token_balance_erc6909_mv; \ No newline at end of file diff --git a/internal/worker/worker.go b/internal/worker/worker.go index d25294f..fd94bab 100644 --- a/internal/worker/worker.go +++ b/internal/worker/worker.go @@ -2,6 +2,7 @@ package worker import ( "context" + "fmt" "math/big" "sort" "sync" @@ -12,138 +13,334 @@ import ( "github.com/thirdweb-dev/indexer/internal/common" "github.com/thirdweb-dev/indexer/internal/metrics" "github.com/thirdweb-dev/indexer/internal/rpc" + "github.com/thirdweb-dev/indexer/internal/source" ) +// SourceType represents the type of data source +type SourceType string + +const ( + // SourceTypeRPC represents RPC data source + SourceTypeRPC SourceType = "rpc" + // SourceTypeArchive represents archive data source (e.g., S3) + SourceTypeArchive SourceType = "archive" +) + +// String returns the string representation of the source type +func (s SourceType) String() string { + return string(s) +} + +// Worker handles block data fetching from RPC and optional archive type Worker struct { - rpc rpc.IRPCClient + rpc rpc.IRPCClient + archive source.ISource // Optional alternative source + rpcSemaphore chan struct{} // Limit concurrent RPC requests } func NewWorker(rpc rpc.IRPCClient) *Worker { return &Worker{ - rpc: rpc, + rpc: rpc, + rpcSemaphore: make(chan struct{}, 20), + } +} + +// NewWorkerWithArchive creates a new Worker with optional archive support +func NewWorkerWithArchive(rpc rpc.IRPCClient, source source.ISource) *Worker { + return &Worker{ + rpc: rpc, + archive: source, + rpcSemaphore: make(chan struct{}, 20), + } +} + +// fetchFromRPC fetches blocks directly from RPC +func (w *Worker) fetchFromRPC(ctx context.Context, blocks []*big.Int) []rpc.GetFullBlockResult { + // Acquire semaphore for rate limiting + select { + case w.rpcSemaphore <- struct{}{}: + defer func() { <-w.rpcSemaphore }() + case <-ctx.Done(): + return nil + } + + return w.rpc.GetFullBlocks(ctx, blocks) +} + +// fetchFromArchive fetches blocks from archive if available +func (w *Worker) fetchFromArchive(ctx context.Context, blocks []*big.Int) []rpc.GetFullBlockResult { + if w.archive == nil { + return nil } + return w.archive.GetFullBlocks(ctx, blocks) } -func (w *Worker) processChunkWithRetry(ctx context.Context, chunk []*big.Int, resultsCh chan<- []rpc.GetFullBlockResult, sem chan struct{}) { +// processChunkWithRetry processes a chunk with automatic retry on failure +func (w *Worker) processChunkWithRetry(ctx context.Context, chunk []*big.Int, fetchFunc func(context.Context, []*big.Int) []rpc.GetFullBlockResult) []rpc.GetFullBlockResult { select { case <-ctx.Done(): - return + // Return error results for all blocks if context cancelled + var results []rpc.GetFullBlockResult + for _, block := range chunk { + results = append(results, rpc.GetFullBlockResult{ + BlockNumber: block, + Error: fmt.Errorf("context cancelled"), + }) + } + return results default: } - // Acquire semaphore only for the RPC request - sem <- struct{}{} - results := w.rpc.GetFullBlocks(ctx, chunk) - <-sem // Release semaphore immediately after RPC request + // Fetch the chunk + results := fetchFunc(ctx, chunk) - if len(chunk) == 1 { - // chunk size 1 is the minimum, so we return whatever we get - resultsCh <- results - return + // If we got all results, return them + if len(results) == len(chunk) { + allSuccess := true + for _, r := range results { + if r.Error != nil { + allSuccess = false + break + } + } + if allSuccess { + return results + } } - // Check for failed blocks + // Separate successful and failed + successMap := make(map[string]rpc.GetFullBlockResult) var failedBlocks []*big.Int - var successfulResults []rpc.GetFullBlockResult for i, result := range results { - if result.Error != nil { - failedBlocks = append(failedBlocks, chunk[i]) - } else { - successfulResults = append(successfulResults, result) + if i < len(chunk) { + if result.Error == nil { + successMap[chunk[i].String()] = result + } else { + failedBlocks = append(failedBlocks, chunk[i]) + } } } - log.Debug().Msgf("Out of %d blocks, %d successful, %d failed", len(results), len(successfulResults), len(failedBlocks)) - // If we have successful results, send them - if len(successfulResults) > 0 { - resultsCh <- successfulResults - } + // If only one block failed, retry once more + if len(failedBlocks) == 1 { + retryResults := fetchFunc(ctx, failedBlocks) + if len(retryResults) > 0 { + if retryResults[0].Error == nil { + successMap[failedBlocks[0].String()] = retryResults[0] + } else { + // Keep the error result + successMap[failedBlocks[0].String()] = rpc.GetFullBlockResult{ + BlockNumber: failedBlocks[0], + Error: retryResults[0].Error, + } + } + } + } else if len(failedBlocks) > 1 { + // Split failed blocks and retry recursively + mid := len(failedBlocks) / 2 + leftChunk := failedBlocks[:mid] + rightChunk := failedBlocks[mid:] - // If no blocks failed, we're done - if len(failedBlocks) == 0 { - return - } + log.Debug(). + Int("failed_count", len(failedBlocks)). + Int("left_chunk", len(leftChunk)). + Int("right_chunk", len(rightChunk)). + Msg("Splitting failed blocks for retry") - // can't split any further, so try one last time - if len(failedBlocks) == 1 { - w.processChunkWithRetry(ctx, failedBlocks, resultsCh, sem) - return - } + // Process both halves + leftResults := w.processChunkWithRetry(ctx, leftChunk, fetchFunc) + rightResults := w.processChunkWithRetry(ctx, rightChunk, fetchFunc) - // Split failed blocks in half and retry - mid := len(failedBlocks) / 2 - leftChunk := failedBlocks[:mid] - rightChunk := failedBlocks[mid:] + // Add results to map + for _, r := range leftResults { + if r.BlockNumber != nil { + successMap[r.BlockNumber.String()] = r + } + } + for _, r := range rightResults { + if r.BlockNumber != nil { + successMap[r.BlockNumber.String()] = r + } + } + } - log.Debug().Msgf("Splitting %d failed blocks into chunks of %d and %d", len(failedBlocks), len(leftChunk), len(rightChunk)) + // Build final results in original order + var finalResults []rpc.GetFullBlockResult + for _, block := range chunk { + if result, ok := successMap[block.String()]; ok { + finalResults = append(finalResults, result) + } else { + // Add error result for missing blocks + finalResults = append(finalResults, rpc.GetFullBlockResult{ + BlockNumber: block, + Error: fmt.Errorf("failed to fetch block"), + }) + } + } - var wg sync.WaitGroup - wg.Add(2) + return finalResults +} - go func() { - defer wg.Done() - w.processChunkWithRetry(ctx, leftChunk, resultsCh, sem) - }() +// processBatch processes a batch of blocks from a specific source +func (w *Worker) processBatch(ctx context.Context, blocks []*big.Int, sourceType SourceType, fetchFunc func(context.Context, []*big.Int) []rpc.GetFullBlockResult) []rpc.GetFullBlockResult { + if len(blocks) == 0 { + return nil + } - go func() { - defer wg.Done() - w.processChunkWithRetry(ctx, rightChunk, resultsCh, sem) - }() + // Determine chunk size based on source + chunkSize := w.rpc.GetBlocksPerRequest().Blocks + if sourceType == SourceTypeArchive && w.archive != nil { + chunkSize = len(blocks) // Fetch all at once from archive + } - wg.Wait() -} + chunks := common.SliceToChunks(blocks, chunkSize) -func (w *Worker) Run(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult { - blockCount := len(blockNumbers) - chunks := common.SliceToChunks(blockNumbers, w.rpc.GetBlocksPerRequest().Blocks) + log.Debug(). + Str("source", sourceType.String()). + Int("total_blocks", len(blocks)). + Int("chunks", len(chunks)). + Int("chunk_size", chunkSize). + Msg("Processing blocks") + var allResults []rpc.GetFullBlockResult + var mu sync.Mutex var wg sync.WaitGroup - resultsCh := make(chan []rpc.GetFullBlockResult, blockCount) - // Create a semaphore channel to limit concurrent goroutines - sem := make(chan struct{}, 20) - - log.Debug().Msgf("Worker Processing %d blocks in %d chunks of max %d blocks", blockCount, len(chunks), w.rpc.GetBlocksPerRequest().Blocks) + batchDelay := time.Duration(config.Cfg.RPC.Blocks.BatchDelay) * time.Millisecond for i, chunk := range chunks { - if i > 0 { - time.Sleep(time.Duration(config.Cfg.RPC.Blocks.BatchDelay) * time.Millisecond) + // Check context before starting new work + if ctx.Err() != nil { + log.Debug().Msg("Context canceled, skipping remaining chunks") + break // Don't start new chunks, but let existing ones finish } - select { - case <-ctx.Done(): - log.Debug().Msg("Context canceled, stopping Worker") - return nil - default: - // continue processing + + // Add delay between batches for RPC (except first batch) + if i > 0 && sourceType == SourceTypeRPC && batchDelay > 0 { + select { + case <-ctx.Done(): + log.Debug().Msg("Context canceled during batch delay") + break + case <-time.After(batchDelay): + // Continue after delay + } } wg.Add(1) go func(chunk []*big.Int) { defer wg.Done() - w.processChunkWithRetry(ctx, chunk, resultsCh, sem) + results := w.processChunkWithRetry(ctx, chunk, fetchFunc) + + mu.Lock() + allResults = append(allResults, results...) + mu.Unlock() }(chunk) } - go func() { - wg.Wait() - close(resultsCh) - }() + // Wait for all started goroutines to complete + wg.Wait() + + // Sort results by block number (only if we have results) + if len(allResults) > 0 { + sort.Slice(allResults, func(i, j int) bool { + return allResults[i].BlockNumber.Cmp(allResults[j].BlockNumber) < 0 + }) + } + + return allResults +} - results := make([]rpc.GetFullBlockResult, 0, blockCount) - for batchResults := range resultsCh { - results = append(results, batchResults...) +// shouldUseArchive determines if ALL requested blocks are within archive range +func (w *Worker) shouldUseArchive(ctx context.Context, blockNumbers []*big.Int) bool { + // Check if archive is configured and we have blocks to process + if w.archive == nil || len(blockNumbers) == 0 { + return false } - // Sort results by block number - sort.Slice(results, func(i, j int) bool { - return results[i].BlockNumber.Cmp(results[j].BlockNumber) < 0 - }) + // Get archive block range + minArchive, maxArchive, err := w.archive.GetSupportedBlockRange(ctx) + if err != nil { + log.Warn().Err(err).Msg("Failed to get archive block range") + return false + } - // track the last fetched block number + // Check if ALL blocks are within archive range + for _, block := range blockNumbers { + if block.Cmp(minArchive) < 0 || block.Cmp(maxArchive) > 0 { + // At least one block is outside archive range + return false + } + } + + // All blocks are within archive range + return true +} + +// Run processes blocks using either archive OR rpc +func (w *Worker) Run(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult { + if len(blockNumbers) == 0 { + return nil + } + + var results []rpc.GetFullBlockResult + + // Determine which source to use + sourceType := SourceTypeRPC + fetchFunc := w.fetchFromRPC + + if w.shouldUseArchive(ctx, blockNumbers) { + sourceType = SourceTypeArchive + fetchFunc = w.fetchFromArchive + log.Debug(). + Int("count", len(blockNumbers)). + Str("source", sourceType.String()). + Msg("Using archive for all blocks") + } else { + log.Debug(). + Int("count", len(blockNumbers)). + Str("source", sourceType.String()). + Msg("Using RPC for all blocks") + } + + // Process all blocks with the selected source + results = w.processBatch(ctx, blockNumbers, sourceType, fetchFunc) + + // Update metrics and log summary if len(results) > 0 { lastBlockNumberFloat, _ := results[len(results)-1].BlockNumber.Float64() metrics.LastFetchedBlock.Set(lastBlockNumberFloat) + + // Count successes and failures + successful := 0 + failed := 0 + for _, r := range results { + if r.Error == nil { + successful++ + } else { + failed++ + } + } + + log.Debug(). + Int("total", len(results)). + Int("successful", successful). + Int("failed", failed). + Str("source", sourceType.String()). + Msg("Block fetching complete") } + return results } + +// Close gracefully shuts down the worker and cleans up resources +func (w *Worker) Close() error { + // Close archive if it exists + if w.archive != nil { + log.Debug().Msg("Closing archive connection") + w.archive.Close() + } + + log.Debug().Msg("Worker closed successfully") + return nil +} diff --git a/test/mocks/MockIMainStorage.go b/test/mocks/MockIMainStorage.go index 679345c..e13e4ee 100644 --- a/test/mocks/MockIMainStorage.go +++ b/test/mocks/MockIMainStorage.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.50.4. DO NOT EDIT. +// Code generated by mockery v2.53.5. DO NOT EDIT. //go:build !production @@ -26,6 +26,51 @@ func (_m *MockIMainStorage) EXPECT() *MockIMainStorage_Expecter { return &MockIMainStorage_Expecter{mock: &_m.Mock} } +// Close provides a mock function with no fields +func (_m *MockIMainStorage) Close() error { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for Close") + } + + var r0 error + if rf, ok := ret.Get(0).(func() error); ok { + r0 = rf() + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MockIMainStorage_Close_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Close' +type MockIMainStorage_Close_Call struct { + *mock.Call +} + +// Close is a helper method to define mock.On call +func (_e *MockIMainStorage_Expecter) Close() *MockIMainStorage_Close_Call { + return &MockIMainStorage_Close_Call{Call: _e.mock.On("Close")} +} + +func (_c *MockIMainStorage_Close_Call) Run(run func()) *MockIMainStorage_Close_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockIMainStorage_Close_Call) Return(_a0 error) *MockIMainStorage_Close_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockIMainStorage_Close_Call) RunAndReturn(run func() error) *MockIMainStorage_Close_Call { + _c.Call.Return(run) + return _c +} + // FindMissingBlockNumbers provides a mock function with given fields: chainId, startBlock, endBlock func (_m *MockIMainStorage) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { ret := _m.Called(chainId, startBlock, endBlock) @@ -143,6 +188,66 @@ func (_c *MockIMainStorage_GetAggregations_Call) RunAndReturn(run func(string, s return _c } +// GetBlockCount provides a mock function with given fields: chainId, startBlock, endBlock +func (_m *MockIMainStorage) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { + ret := _m.Called(chainId, startBlock, endBlock) + + if len(ret) == 0 { + panic("no return value specified for GetBlockCount") + } + + var r0 *big.Int + var r1 error + if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) (*big.Int, error)); ok { + return rf(chainId, startBlock, endBlock) + } + if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) *big.Int); ok { + r0 = rf(chainId, startBlock, endBlock) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*big.Int) + } + } + + if rf, ok := ret.Get(1).(func(*big.Int, *big.Int, *big.Int) error); ok { + r1 = rf(chainId, startBlock, endBlock) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockIMainStorage_GetBlockCount_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetBlockCount' +type MockIMainStorage_GetBlockCount_Call struct { + *mock.Call +} + +// GetBlockCount is a helper method to define mock.On call +// - chainId *big.Int +// - startBlock *big.Int +// - endBlock *big.Int +func (_e *MockIMainStorage_Expecter) GetBlockCount(chainId interface{}, startBlock interface{}, endBlock interface{}) *MockIMainStorage_GetBlockCount_Call { + return &MockIMainStorage_GetBlockCount_Call{Call: _e.mock.On("GetBlockCount", chainId, startBlock, endBlock)} +} + +func (_c *MockIMainStorage_GetBlockCount_Call) Run(run func(chainId *big.Int, startBlock *big.Int, endBlock *big.Int)) *MockIMainStorage_GetBlockCount_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*big.Int), args[1].(*big.Int), args[2].(*big.Int)) + }) + return _c +} + +func (_c *MockIMainStorage_GetBlockCount_Call) Return(blockCount *big.Int, err error) *MockIMainStorage_GetBlockCount_Call { + _c.Call.Return(blockCount, err) + return _c +} + +func (_c *MockIMainStorage_GetBlockCount_Call) RunAndReturn(run func(*big.Int, *big.Int, *big.Int) (*big.Int, error)) *MockIMainStorage_GetBlockCount_Call { + _c.Call.Return(run) + return _c +} + // GetBlockHeadersDescending provides a mock function with given fields: chainId, from, to func (_m *MockIMainStorage) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { ret := _m.Called(chainId, from, to) diff --git a/test/mocks/MockIOrchestratorStorage.go b/test/mocks/MockIOrchestratorStorage.go index fe382f0..c8d0932 100644 --- a/test/mocks/MockIOrchestratorStorage.go +++ b/test/mocks/MockIOrchestratorStorage.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.50.4. DO NOT EDIT. +// Code generated by mockery v2.53.5. DO NOT EDIT. //go:build !production @@ -8,9 +8,6 @@ import ( big "math/big" mock "github.com/stretchr/testify/mock" - common "github.com/thirdweb-dev/indexer/internal/common" - - storage "github.com/thirdweb-dev/indexer/internal/storage" ) // MockIOrchestratorStorage is an autogenerated mock type for the IOrchestratorStorage type @@ -26,17 +23,17 @@ func (_m *MockIOrchestratorStorage) EXPECT() *MockIOrchestratorStorage_Expecter return &MockIOrchestratorStorage_Expecter{mock: &_m.Mock} } -// DeleteBlockFailures provides a mock function with given fields: failures -func (_m *MockIOrchestratorStorage) DeleteBlockFailures(failures []common.BlockFailure) error { - ret := _m.Called(failures) +// Close provides a mock function with no fields +func (_m *MockIOrchestratorStorage) Close() error { + ret := _m.Called() if len(ret) == 0 { - panic("no return value specified for DeleteBlockFailures") + panic("no return value specified for Close") } var r0 error - if rf, ok := ret.Get(0).(func([]common.BlockFailure) error); ok { - r0 = rf(failures) + if rf, ok := ret.Get(0).(func() error); ok { + r0 = rf() } else { r0 = ret.Error(0) } @@ -44,57 +41,56 @@ func (_m *MockIOrchestratorStorage) DeleteBlockFailures(failures []common.BlockF return r0 } -// MockIOrchestratorStorage_DeleteBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteBlockFailures' -type MockIOrchestratorStorage_DeleteBlockFailures_Call struct { +// MockIOrchestratorStorage_Close_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Close' +type MockIOrchestratorStorage_Close_Call struct { *mock.Call } -// DeleteBlockFailures is a helper method to define mock.On call -// - failures []common.BlockFailure -func (_e *MockIOrchestratorStorage_Expecter) DeleteBlockFailures(failures interface{}) *MockIOrchestratorStorage_DeleteBlockFailures_Call { - return &MockIOrchestratorStorage_DeleteBlockFailures_Call{Call: _e.mock.On("DeleteBlockFailures", failures)} +// Close is a helper method to define mock.On call +func (_e *MockIOrchestratorStorage_Expecter) Close() *MockIOrchestratorStorage_Close_Call { + return &MockIOrchestratorStorage_Close_Call{Call: _e.mock.On("Close")} } -func (_c *MockIOrchestratorStorage_DeleteBlockFailures_Call) Run(run func(failures []common.BlockFailure)) *MockIOrchestratorStorage_DeleteBlockFailures_Call { +func (_c *MockIOrchestratorStorage_Close_Call) Run(run func()) *MockIOrchestratorStorage_Close_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].([]common.BlockFailure)) + run() }) return _c } -func (_c *MockIOrchestratorStorage_DeleteBlockFailures_Call) Return(_a0 error) *MockIOrchestratorStorage_DeleteBlockFailures_Call { +func (_c *MockIOrchestratorStorage_Close_Call) Return(_a0 error) *MockIOrchestratorStorage_Close_Call { _c.Call.Return(_a0) return _c } -func (_c *MockIOrchestratorStorage_DeleteBlockFailures_Call) RunAndReturn(run func([]common.BlockFailure) error) *MockIOrchestratorStorage_DeleteBlockFailures_Call { +func (_c *MockIOrchestratorStorage_Close_Call) RunAndReturn(run func() error) *MockIOrchestratorStorage_Close_Call { _c.Call.Return(run) return _c } -// GetBlockFailures provides a mock function with given fields: qf -func (_m *MockIOrchestratorStorage) GetBlockFailures(qf storage.QueryFilter) ([]common.BlockFailure, error) { - ret := _m.Called(qf) +// GetLastCommittedBlockNumber provides a mock function with given fields: chainId +func (_m *MockIOrchestratorStorage) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { + ret := _m.Called(chainId) if len(ret) == 0 { - panic("no return value specified for GetBlockFailures") + panic("no return value specified for GetLastCommittedBlockNumber") } - var r0 []common.BlockFailure + var r0 *big.Int var r1 error - if rf, ok := ret.Get(0).(func(storage.QueryFilter) ([]common.BlockFailure, error)); ok { - return rf(qf) + if rf, ok := ret.Get(0).(func(*big.Int) (*big.Int, error)); ok { + return rf(chainId) } - if rf, ok := ret.Get(0).(func(storage.QueryFilter) []common.BlockFailure); ok { - r0 = rf(qf) + if rf, ok := ret.Get(0).(func(*big.Int) *big.Int); ok { + r0 = rf(chainId) } else { if ret.Get(0) != nil { - r0 = ret.Get(0).([]common.BlockFailure) + r0 = ret.Get(0).(*big.Int) } } - if rf, ok := ret.Get(1).(func(storage.QueryFilter) error); ok { - r1 = rf(qf) + if rf, ok := ret.Get(1).(func(*big.Int) error); ok { + r1 = rf(chainId) } else { r1 = ret.Error(1) } @@ -102,30 +98,88 @@ func (_m *MockIOrchestratorStorage) GetBlockFailures(qf storage.QueryFilter) ([] return r0, r1 } -// MockIOrchestratorStorage_GetBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetBlockFailures' -type MockIOrchestratorStorage_GetBlockFailures_Call struct { +// MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetLastCommittedBlockNumber' +type MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call struct { *mock.Call } -// GetBlockFailures is a helper method to define mock.On call -// - qf storage.QueryFilter -func (_e *MockIOrchestratorStorage_Expecter) GetBlockFailures(qf interface{}) *MockIOrchestratorStorage_GetBlockFailures_Call { - return &MockIOrchestratorStorage_GetBlockFailures_Call{Call: _e.mock.On("GetBlockFailures", qf)} +// GetLastCommittedBlockNumber is a helper method to define mock.On call +// - chainId *big.Int +func (_e *MockIOrchestratorStorage_Expecter) GetLastCommittedBlockNumber(chainId interface{}) *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call { + return &MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call{Call: _e.mock.On("GetLastCommittedBlockNumber", chainId)} } -func (_c *MockIOrchestratorStorage_GetBlockFailures_Call) Run(run func(qf storage.QueryFilter)) *MockIOrchestratorStorage_GetBlockFailures_Call { +func (_c *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call) Run(run func(chainId *big.Int)) *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(storage.QueryFilter)) + run(args[0].(*big.Int)) }) return _c } -func (_c *MockIOrchestratorStorage_GetBlockFailures_Call) Return(_a0 []common.BlockFailure, _a1 error) *MockIOrchestratorStorage_GetBlockFailures_Call { - _c.Call.Return(_a0, _a1) +func (_c *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call) Return(blockNumber *big.Int, err error) *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call { + _c.Call.Return(blockNumber, err) + return _c +} + +func (_c *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call) RunAndReturn(run func(*big.Int) (*big.Int, error)) *MockIOrchestratorStorage_GetLastCommittedBlockNumber_Call { + _c.Call.Return(run) + return _c +} + +// GetLastPublishedBlockNumber provides a mock function with given fields: chainId +func (_m *MockIOrchestratorStorage) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { + ret := _m.Called(chainId) + + if len(ret) == 0 { + panic("no return value specified for GetLastPublishedBlockNumber") + } + + var r0 *big.Int + var r1 error + if rf, ok := ret.Get(0).(func(*big.Int) (*big.Int, error)); ok { + return rf(chainId) + } + if rf, ok := ret.Get(0).(func(*big.Int) *big.Int); ok { + r0 = rf(chainId) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*big.Int) + } + } + + if rf, ok := ret.Get(1).(func(*big.Int) error); ok { + r1 = rf(chainId) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetLastPublishedBlockNumber' +type MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call struct { + *mock.Call +} + +// GetLastPublishedBlockNumber is a helper method to define mock.On call +// - chainId *big.Int +func (_e *MockIOrchestratorStorage_Expecter) GetLastPublishedBlockNumber(chainId interface{}) *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call { + return &MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call{Call: _e.mock.On("GetLastPublishedBlockNumber", chainId)} +} + +func (_c *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call) Run(run func(chainId *big.Int)) *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*big.Int)) + }) + return _c +} + +func (_c *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call) Return(blockNumber *big.Int, err error) *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call { + _c.Call.Return(blockNumber, err) return _c } -func (_c *MockIOrchestratorStorage_GetBlockFailures_Call) RunAndReturn(run func(storage.QueryFilter) ([]common.BlockFailure, error)) *MockIOrchestratorStorage_GetBlockFailures_Call { +func (_c *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call) RunAndReturn(run func(*big.Int) (*big.Int, error)) *MockIOrchestratorStorage_GetLastPublishedBlockNumber_Call { _c.Call.Return(run) return _c } @@ -188,12 +242,12 @@ func (_c *MockIOrchestratorStorage_GetLastReorgCheckedBlockNumber_Call) RunAndRe return _c } -// SetLastReorgCheckedBlockNumber provides a mock function with given fields: chainId, blockNumber -func (_m *MockIOrchestratorStorage) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { +// SetLastCommittedBlockNumber provides a mock function with given fields: chainId, blockNumber +func (_m *MockIOrchestratorStorage) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { ret := _m.Called(chainId, blockNumber) if len(ret) == 0 { - panic("no return value specified for SetLastReorgCheckedBlockNumber") + panic("no return value specified for SetLastCommittedBlockNumber") } var r0 error @@ -206,46 +260,46 @@ func (_m *MockIOrchestratorStorage) SetLastReorgCheckedBlockNumber(chainId *big. return r0 } -// MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastReorgCheckedBlockNumber' -type MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call struct { +// MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastCommittedBlockNumber' +type MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call struct { *mock.Call } -// SetLastReorgCheckedBlockNumber is a helper method to define mock.On call +// SetLastCommittedBlockNumber is a helper method to define mock.On call // - chainId *big.Int // - blockNumber *big.Int -func (_e *MockIOrchestratorStorage_Expecter) SetLastReorgCheckedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { - return &MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call{Call: _e.mock.On("SetLastReorgCheckedBlockNumber", chainId, blockNumber)} +func (_e *MockIOrchestratorStorage_Expecter) SetLastCommittedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call { + return &MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call{Call: _e.mock.On("SetLastCommittedBlockNumber", chainId, blockNumber)} } -func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { +func (_c *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call { _c.Call.Run(func(args mock.Arguments) { run(args[0].(*big.Int), args[1].(*big.Int)) }) return _c } -func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) Return(_a0 error) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { +func (_c *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call) Return(_a0 error) *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call { _c.Call.Return(_a0) return _c } -func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { +func (_c *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIOrchestratorStorage_SetLastCommittedBlockNumber_Call { _c.Call.Return(run) return _c } -// StoreBlockFailures provides a mock function with given fields: failures -func (_m *MockIOrchestratorStorage) StoreBlockFailures(failures []common.BlockFailure) error { - ret := _m.Called(failures) +// SetLastPublishedBlockNumber provides a mock function with given fields: chainId, blockNumber +func (_m *MockIOrchestratorStorage) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ret := _m.Called(chainId, blockNumber) if len(ret) == 0 { - panic("no return value specified for StoreBlockFailures") + panic("no return value specified for SetLastPublishedBlockNumber") } var r0 error - if rf, ok := ret.Get(0).(func([]common.BlockFailure) error); ok { - r0 = rf(failures) + if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok { + r0 = rf(chainId, blockNumber) } else { r0 = ret.Error(0) } @@ -253,30 +307,78 @@ func (_m *MockIOrchestratorStorage) StoreBlockFailures(failures []common.BlockFa return r0 } -// MockIOrchestratorStorage_StoreBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'StoreBlockFailures' -type MockIOrchestratorStorage_StoreBlockFailures_Call struct { +// MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastPublishedBlockNumber' +type MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call struct { *mock.Call } -// StoreBlockFailures is a helper method to define mock.On call -// - failures []common.BlockFailure -func (_e *MockIOrchestratorStorage_Expecter) StoreBlockFailures(failures interface{}) *MockIOrchestratorStorage_StoreBlockFailures_Call { - return &MockIOrchestratorStorage_StoreBlockFailures_Call{Call: _e.mock.On("StoreBlockFailures", failures)} +// SetLastPublishedBlockNumber is a helper method to define mock.On call +// - chainId *big.Int +// - blockNumber *big.Int +func (_e *MockIOrchestratorStorage_Expecter) SetLastPublishedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call { + return &MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call{Call: _e.mock.On("SetLastPublishedBlockNumber", chainId, blockNumber)} } -func (_c *MockIOrchestratorStorage_StoreBlockFailures_Call) Run(run func(failures []common.BlockFailure)) *MockIOrchestratorStorage_StoreBlockFailures_Call { +func (_c *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].([]common.BlockFailure)) + run(args[0].(*big.Int), args[1].(*big.Int)) }) return _c } -func (_c *MockIOrchestratorStorage_StoreBlockFailures_Call) Return(_a0 error) *MockIOrchestratorStorage_StoreBlockFailures_Call { +func (_c *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call) Return(_a0 error) *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call { _c.Call.Return(_a0) return _c } -func (_c *MockIOrchestratorStorage_StoreBlockFailures_Call) RunAndReturn(run func([]common.BlockFailure) error) *MockIOrchestratorStorage_StoreBlockFailures_Call { +func (_c *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIOrchestratorStorage_SetLastPublishedBlockNumber_Call { + _c.Call.Return(run) + return _c +} + +// SetLastReorgCheckedBlockNumber provides a mock function with given fields: chainId, blockNumber +func (_m *MockIOrchestratorStorage) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { + ret := _m.Called(chainId, blockNumber) + + if len(ret) == 0 { + panic("no return value specified for SetLastReorgCheckedBlockNumber") + } + + var r0 error + if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok { + r0 = rf(chainId, blockNumber) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastReorgCheckedBlockNumber' +type MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call struct { + *mock.Call +} + +// SetLastReorgCheckedBlockNumber is a helper method to define mock.On call +// - chainId *big.Int +// - blockNumber *big.Int +func (_e *MockIOrchestratorStorage_Expecter) SetLastReorgCheckedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { + return &MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call{Call: _e.mock.On("SetLastReorgCheckedBlockNumber", chainId, blockNumber)} +} + +func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*big.Int), args[1].(*big.Int)) + }) + return _c +} + +func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) Return(_a0 error) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIOrchestratorStorage_SetLastReorgCheckedBlockNumber_Call { _c.Call.Return(run) return _c } diff --git a/test/mocks/MockIRPCClient.go b/test/mocks/MockIRPCClient.go index 42f37ef..f7045c4 100644 --- a/test/mocks/MockIRPCClient.go +++ b/test/mocks/MockIRPCClient.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.50.4. DO NOT EDIT. +// Code generated by mockery v2.53.5. DO NOT EDIT. //go:build !production diff --git a/test/mocks/MockIStagingStorage.go b/test/mocks/MockIStagingStorage.go index 14f8e68..53964d3 100644 --- a/test/mocks/MockIStagingStorage.go +++ b/test/mocks/MockIStagingStorage.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.50.4. DO NOT EDIT. +// Code generated by mockery v2.53.5. DO NOT EDIT. //go:build !production @@ -26,6 +26,97 @@ func (_m *MockIStagingStorage) EXPECT() *MockIStagingStorage_Expecter { return &MockIStagingStorage_Expecter{mock: &_m.Mock} } +// Close provides a mock function with no fields +func (_m *MockIStagingStorage) Close() error { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for Close") + } + + var r0 error + if rf, ok := ret.Get(0).(func() error); ok { + r0 = rf() + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MockIStagingStorage_Close_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Close' +type MockIStagingStorage_Close_Call struct { + *mock.Call +} + +// Close is a helper method to define mock.On call +func (_e *MockIStagingStorage_Expecter) Close() *MockIStagingStorage_Close_Call { + return &MockIStagingStorage_Close_Call{Call: _e.mock.On("Close")} +} + +func (_c *MockIStagingStorage_Close_Call) Run(run func()) *MockIStagingStorage_Close_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockIStagingStorage_Close_Call) Return(_a0 error) *MockIStagingStorage_Close_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockIStagingStorage_Close_Call) RunAndReturn(run func() error) *MockIStagingStorage_Close_Call { + _c.Call.Return(run) + return _c +} + +// DeleteBlockFailures provides a mock function with given fields: failures +func (_m *MockIStagingStorage) DeleteBlockFailures(failures []common.BlockFailure) error { + ret := _m.Called(failures) + + if len(ret) == 0 { + panic("no return value specified for DeleteBlockFailures") + } + + var r0 error + if rf, ok := ret.Get(0).(func([]common.BlockFailure) error); ok { + r0 = rf(failures) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MockIStagingStorage_DeleteBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteBlockFailures' +type MockIStagingStorage_DeleteBlockFailures_Call struct { + *mock.Call +} + +// DeleteBlockFailures is a helper method to define mock.On call +// - failures []common.BlockFailure +func (_e *MockIStagingStorage_Expecter) DeleteBlockFailures(failures interface{}) *MockIStagingStorage_DeleteBlockFailures_Call { + return &MockIStagingStorage_DeleteBlockFailures_Call{Call: _e.mock.On("DeleteBlockFailures", failures)} +} + +func (_c *MockIStagingStorage_DeleteBlockFailures_Call) Run(run func(failures []common.BlockFailure)) *MockIStagingStorage_DeleteBlockFailures_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].([]common.BlockFailure)) + }) + return _c +} + +func (_c *MockIStagingStorage_DeleteBlockFailures_Call) Return(_a0 error) *MockIStagingStorage_DeleteBlockFailures_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockIStagingStorage_DeleteBlockFailures_Call) RunAndReturn(run func([]common.BlockFailure) error) *MockIStagingStorage_DeleteBlockFailures_Call { + _c.Call.Return(run) + return _c +} + // DeleteStagingData provides a mock function with given fields: data func (_m *MockIStagingStorage) DeleteStagingData(data []common.BlockData) error { ret := _m.Called(data) @@ -72,107 +163,107 @@ func (_c *MockIStagingStorage_DeleteStagingData_Call) RunAndReturn(run func([]co return _c } -// GetLastPublishedBlockNumber provides a mock function with given fields: chainId -func (_m *MockIStagingStorage) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { - ret := _m.Called(chainId) +// DeleteStagingDataOlderThan provides a mock function with given fields: chainId, blockNumber +func (_m *MockIStagingStorage) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { + ret := _m.Called(chainId, blockNumber) if len(ret) == 0 { - panic("no return value specified for GetLastPublishedBlockNumber") - } - - var r0 *big.Int - var r1 error - if rf, ok := ret.Get(0).(func(*big.Int) (*big.Int, error)); ok { - return rf(chainId) - } - if rf, ok := ret.Get(0).(func(*big.Int) *big.Int); ok { - r0 = rf(chainId) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*big.Int) - } + panic("no return value specified for DeleteStagingDataOlderThan") } - if rf, ok := ret.Get(1).(func(*big.Int) error); ok { - r1 = rf(chainId) + var r0 error + if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok { + r0 = rf(chainId, blockNumber) } else { - r1 = ret.Error(1) + r0 = ret.Error(0) } - return r0, r1 + return r0 } -// MockIStagingStorage_GetLastPublishedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetLastPublishedBlockNumber' -type MockIStagingStorage_GetLastPublishedBlockNumber_Call struct { +// MockIStagingStorage_DeleteStagingDataOlderThan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteStagingDataOlderThan' +type MockIStagingStorage_DeleteStagingDataOlderThan_Call struct { *mock.Call } -// GetLastPublishedBlockNumber is a helper method to define mock.On call +// DeleteStagingDataOlderThan is a helper method to define mock.On call // - chainId *big.Int -func (_e *MockIStagingStorage_Expecter) GetLastPublishedBlockNumber(chainId interface{}) *MockIStagingStorage_GetLastPublishedBlockNumber_Call { - return &MockIStagingStorage_GetLastPublishedBlockNumber_Call{Call: _e.mock.On("GetLastPublishedBlockNumber", chainId)} +// - blockNumber *big.Int +func (_e *MockIStagingStorage_Expecter) DeleteStagingDataOlderThan(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { + return &MockIStagingStorage_DeleteStagingDataOlderThan_Call{Call: _e.mock.On("DeleteStagingDataOlderThan", chainId, blockNumber)} } -func (_c *MockIStagingStorage_GetLastPublishedBlockNumber_Call) Run(run func(chainId *big.Int)) *MockIStagingStorage_GetLastPublishedBlockNumber_Call { +func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int)) + run(args[0].(*big.Int), args[1].(*big.Int)) }) return _c } -func (_c *MockIStagingStorage_GetLastPublishedBlockNumber_Call) Return(maxBlockNumber *big.Int, err error) *MockIStagingStorage_GetLastPublishedBlockNumber_Call { - _c.Call.Return(maxBlockNumber, err) +func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Return(_a0 error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { + _c.Call.Return(_a0) return _c } -func (_c *MockIStagingStorage_GetLastPublishedBlockNumber_Call) RunAndReturn(run func(*big.Int) (*big.Int, error)) *MockIStagingStorage_GetLastPublishedBlockNumber_Call { +func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { _c.Call.Return(run) return _c } -// SetLastPublishedBlockNumber provides a mock function with given fields: chainId, blockNumber -func (_m *MockIStagingStorage) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - ret := _m.Called(chainId, blockNumber) +// GetBlockFailures provides a mock function with given fields: qf +func (_m *MockIStagingStorage) GetBlockFailures(qf storage.QueryFilter) ([]common.BlockFailure, error) { + ret := _m.Called(qf) if len(ret) == 0 { - panic("no return value specified for SetLastPublishedBlockNumber") + panic("no return value specified for GetBlockFailures") } - var r0 error - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok { - r0 = rf(chainId, blockNumber) + var r0 []common.BlockFailure + var r1 error + if rf, ok := ret.Get(0).(func(storage.QueryFilter) ([]common.BlockFailure, error)); ok { + return rf(qf) + } + if rf, ok := ret.Get(0).(func(storage.QueryFilter) []common.BlockFailure); ok { + r0 = rf(qf) } else { - r0 = ret.Error(0) + if ret.Get(0) != nil { + r0 = ret.Get(0).([]common.BlockFailure) + } } - return r0 + if rf, ok := ret.Get(1).(func(storage.QueryFilter) error); ok { + r1 = rf(qf) + } else { + r1 = ret.Error(1) + } + + return r0, r1 } -// MockIStagingStorage_SetLastPublishedBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SetLastPublishedBlockNumber' -type MockIStagingStorage_SetLastPublishedBlockNumber_Call struct { +// MockIStagingStorage_GetBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetBlockFailures' +type MockIStagingStorage_GetBlockFailures_Call struct { *mock.Call } -// SetLastPublishedBlockNumber is a helper method to define mock.On call -// - chainId *big.Int -// - blockNumber *big.Int -func (_e *MockIStagingStorage_Expecter) SetLastPublishedBlockNumber(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_SetLastPublishedBlockNumber_Call { - return &MockIStagingStorage_SetLastPublishedBlockNumber_Call{Call: _e.mock.On("SetLastPublishedBlockNumber", chainId, blockNumber)} +// GetBlockFailures is a helper method to define mock.On call +// - qf storage.QueryFilter +func (_e *MockIStagingStorage_Expecter) GetBlockFailures(qf interface{}) *MockIStagingStorage_GetBlockFailures_Call { + return &MockIStagingStorage_GetBlockFailures_Call{Call: _e.mock.On("GetBlockFailures", qf)} } -func (_c *MockIStagingStorage_SetLastPublishedBlockNumber_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_SetLastPublishedBlockNumber_Call { +func (_c *MockIStagingStorage_GetBlockFailures_Call) Run(run func(qf storage.QueryFilter)) *MockIStagingStorage_GetBlockFailures_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int), args[1].(*big.Int)) + run(args[0].(storage.QueryFilter)) }) return _c } -func (_c *MockIStagingStorage_SetLastPublishedBlockNumber_Call) Return(_a0 error) *MockIStagingStorage_SetLastPublishedBlockNumber_Call { - _c.Call.Return(_a0) +func (_c *MockIStagingStorage_GetBlockFailures_Call) Return(_a0 []common.BlockFailure, _a1 error) *MockIStagingStorage_GetBlockFailures_Call { + _c.Call.Return(_a0, _a1) return _c } -func (_c *MockIStagingStorage_SetLastPublishedBlockNumber_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_SetLastPublishedBlockNumber_Call { +func (_c *MockIStagingStorage_GetBlockFailures_Call) RunAndReturn(run func(storage.QueryFilter) ([]common.BlockFailure, error)) *MockIStagingStorage_GetBlockFailures_Call { _c.Call.Return(run) return _c } @@ -341,17 +432,17 @@ func (_c *MockIStagingStorage_InsertStagingData_Call) RunAndReturn(run func([]co return _c } -// DeleteOlderThan provides a mock function with given fields: chainId, blockNumber -func (_m *MockIStagingStorage) DeleteOlderThan(chainId *big.Int, blockNumber *big.Int) error { - ret := _m.Called(chainId, blockNumber) +// StoreBlockFailures provides a mock function with given fields: failures +func (_m *MockIStagingStorage) StoreBlockFailures(failures []common.BlockFailure) error { + ret := _m.Called(failures) if len(ret) == 0 { - panic("no return value specified for DeleteOlderThan") + panic("no return value specified for StoreBlockFailures") } var r0 error - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok { - r0 = rf(chainId, blockNumber) + if rf, ok := ret.Get(0).(func([]common.BlockFailure) error); ok { + r0 = rf(failures) } else { r0 = ret.Error(0) } @@ -359,31 +450,30 @@ func (_m *MockIStagingStorage) DeleteOlderThan(chainId *big.Int, blockNumber *bi return r0 } -// MockIStagingStorage_DeleteOlderThan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteOlderThan' -type MockIStagingStorage_DeleteOlderThan_Call struct { +// MockIStagingStorage_StoreBlockFailures_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'StoreBlockFailures' +type MockIStagingStorage_StoreBlockFailures_Call struct { *mock.Call } -// DeleteOlderThan is a helper method to define mock.On call -// - chainId *big.Int -// - blockNumber *big.Int -func (_e *MockIStagingStorage_Expecter) DeleteOlderThan(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_DeleteOlderThan_Call { - return &MockIStagingStorage_DeleteOlderThan_Call{Call: _e.mock.On("DeleteOlderThan", chainId, blockNumber)} +// StoreBlockFailures is a helper method to define mock.On call +// - failures []common.BlockFailure +func (_e *MockIStagingStorage_Expecter) StoreBlockFailures(failures interface{}) *MockIStagingStorage_StoreBlockFailures_Call { + return &MockIStagingStorage_StoreBlockFailures_Call{Call: _e.mock.On("StoreBlockFailures", failures)} } -func (_c *MockIStagingStorage_DeleteOlderThan_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_DeleteOlderThan_Call { +func (_c *MockIStagingStorage_StoreBlockFailures_Call) Run(run func(failures []common.BlockFailure)) *MockIStagingStorage_StoreBlockFailures_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int), args[1].(*big.Int)) + run(args[0].([]common.BlockFailure)) }) return _c } -func (_c *MockIStagingStorage_DeleteOlderThan_Call) Return(_a0 error) *MockIStagingStorage_DeleteOlderThan_Call { +func (_c *MockIStagingStorage_StoreBlockFailures_Call) Return(_a0 error) *MockIStagingStorage_StoreBlockFailures_Call { _c.Call.Return(_a0) return _c } -func (_c *MockIStagingStorage_DeleteOlderThan_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_DeleteOlderThan_Call { +func (_c *MockIStagingStorage_StoreBlockFailures_Call) RunAndReturn(run func([]common.BlockFailure) error) *MockIStagingStorage_StoreBlockFailures_Call { _c.Call.Return(run) return _c }