|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Real-world test: Load ERC20 transfers into Snowflake using parallel streaming. |
| 4 | +
|
| 5 | +Usage: |
| 6 | + python app/test_erc20_parallel_load.py [--blocks BLOCKS] [--workers WORKERS] |
| 7 | +
|
| 8 | +Example: |
| 9 | + python app/test_erc20_parallel_load.py --blocks 100000 --workers 8 |
| 10 | +""" |
| 11 | + |
| 12 | +import argparse |
| 13 | +import os |
| 14 | +import time |
| 15 | +from datetime import datetime |
| 16 | + |
| 17 | +from amp.client import Client |
| 18 | +from amp.streaming.parallel import ParallelConfig |
| 19 | + |
| 20 | + |
| 21 | +def get_recent_block_range(client: Client, num_blocks: int = 100_000): |
| 22 | + """Query amp server to get recent block range.""" |
| 23 | + print(f'\n🔍 Detecting recent block range ({num_blocks:,} blocks)...') |
| 24 | + |
| 25 | + query = 'SELECT MAX(block_num) as max_block FROM eth_firehose.logs' |
| 26 | + result = client.get_sql(query, read_all=True) |
| 27 | + |
| 28 | + if result.num_rows == 0: |
| 29 | + raise RuntimeError('No data found in eth_firehose.logs') |
| 30 | + |
| 31 | + max_block = result.column('max_block')[0].as_py() |
| 32 | + if max_block is None: |
| 33 | + raise RuntimeError('No blocks found in eth_firehose.logs') |
| 34 | + |
| 35 | + min_block = max(0, max_block - num_blocks) |
| 36 | + |
| 37 | + print(f'✅ Block range: {min_block:,} to {max_block:,} ({max_block - min_block:,} blocks)') |
| 38 | + return min_block, max_block |
| 39 | + |
| 40 | + |
| 41 | +def load_erc20_transfers(num_blocks: int = 100_000, num_workers: int = 8): |
| 42 | + """Load ERC20 transfers using parallel streaming.""" |
| 43 | + |
| 44 | + # Initialize client |
| 45 | + server_url = os.getenv('AMP_SERVER_URL', 'grpc://34.27.238.174:80') |
| 46 | + client = Client(server_url) |
| 47 | + print(f'📡 Connected to amp server: {server_url}') |
| 48 | + |
| 49 | + # Get recent block range |
| 50 | + min_block, max_block = get_recent_block_range(client, num_blocks) |
| 51 | + |
| 52 | + # Generate unique table name |
| 53 | + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') |
| 54 | + table_name = f'erc20_transfers_{timestamp}' |
| 55 | + print(f'\n📊 Target table: {table_name}') |
| 56 | + |
| 57 | + # ERC20 Transfer event signature |
| 58 | + transfer_sig = 'Transfer(address indexed from, address indexed to, uint256 value)' |
| 59 | + |
| 60 | + # ERC20 transfer query with corrected syntax |
| 61 | + erc20_query = f""" |
| 62 | + select |
| 63 | + pc.block_num, |
| 64 | + pc.block_hash, |
| 65 | + pc.timestamp, |
| 66 | + pc.tx_hash, |
| 67 | + pc.tx_index, |
| 68 | + pc.log_index, |
| 69 | + pc.dec['from'] as from_address, |
| 70 | + pc.dec['to'] as to_address, |
| 71 | + pc.dec['value'] as value |
| 72 | + from ( |
| 73 | + select |
| 74 | + l.block_num, |
| 75 | + l.block_hash, |
| 76 | + l.tx_hash, |
| 77 | + l.tx_index, |
| 78 | + l.log_index, |
| 79 | + l.timestamp, |
| 80 | + evm_decode(l.topic1, l.topic2, l.topic3, l.data, '{transfer_sig}') as dec |
| 81 | + from eth_firehose.logs l |
| 82 | + where |
| 83 | + l.topic0 = evm_topic('{transfer_sig}') and |
| 84 | + l.topic3 IS NULL) pc |
| 85 | + """ |
| 86 | + |
| 87 | + # Configure Snowflake connection |
| 88 | + snowflake_config = { |
| 89 | + 'account': os.getenv('SNOWFLAKE_ACCOUNT'), |
| 90 | + 'user': os.getenv('SNOWFLAKE_USER'), |
| 91 | + 'warehouse': os.getenv('SNOWFLAKE_WAREHOUSE'), |
| 92 | + 'database': os.getenv('SNOWFLAKE_DATABASE'), |
| 93 | + 'private_key': os.getenv('SNOWFLAKE_PRIVATE_KEY'), |
| 94 | + 'loading_method': 'stage', # Use fast bulk loading via COPY INTO |
| 95 | + } |
| 96 | + |
| 97 | + client.configure_connection(name='snowflake_erc20', loader='snowflake', config=snowflake_config) |
| 98 | + |
| 99 | + # Configure parallel execution |
| 100 | + parallel_config = ParallelConfig( |
| 101 | + num_workers=num_workers, |
| 102 | + table_name='eth_firehose.logs', |
| 103 | + min_block=min_block, |
| 104 | + max_block=max_block, |
| 105 | + block_column='block_num', |
| 106 | + ) |
| 107 | + |
| 108 | + print(f'\n🚀 Starting parallel load with {num_workers} workers...\n') |
| 109 | + |
| 110 | + start_time = time.time() |
| 111 | + |
| 112 | + # Load data in parallel (will stop after processing the block range) |
| 113 | + results = list( |
| 114 | + client.sql(erc20_query).load( |
| 115 | + connection='snowflake_erc20', destination=table_name, stream=True, parallel_config=parallel_config |
| 116 | + ) |
| 117 | + ) |
| 118 | + |
| 119 | + duration = time.time() - start_time |
| 120 | + |
| 121 | + # Calculate statistics |
| 122 | + total_rows = sum(r.rows_loaded for r in results if r.success) |
| 123 | + rows_per_sec = total_rows / duration if duration > 0 else 0 |
| 124 | + partitions = [r for r in results if 'partition_id' in r.metadata] |
| 125 | + successful_workers = len(partitions) |
| 126 | + failed_workers = num_workers - successful_workers |
| 127 | + |
| 128 | + # Print results |
| 129 | + print(f'\n{"=" * 70}') |
| 130 | + print('🎉 ERC20 Parallel Load Complete!') |
| 131 | + print(f'{"=" * 70}') |
| 132 | + print(f'📊 Table name: {table_name}') |
| 133 | + print(f'📦 Block range: {min_block:,} to {max_block:,}') |
| 134 | + print(f'📈 Rows loaded: {total_rows:,}') |
| 135 | + print(f'⏱️ Duration: {duration:.2f}s') |
| 136 | + print(f'🚀 Throughput: {rows_per_sec:,.0f} rows/sec') |
| 137 | + print(f'👷 Workers: {successful_workers}/{num_workers} succeeded') |
| 138 | + if failed_workers > 0: |
| 139 | + print(f'⚠️ Failed workers: {failed_workers}') |
| 140 | + print(f'📊 Avg rows/block: {total_rows / (max_block - min_block):.0f}') |
| 141 | + print(f'{"=" * 70}') |
| 142 | + |
| 143 | + print(f'\n✅ Table "{table_name}" is ready in Snowflake for testing!') |
| 144 | + print(f' Query it with: SELECT * FROM {table_name} LIMIT 10;') |
| 145 | + |
| 146 | + return table_name, total_rows, duration |
| 147 | + |
| 148 | + |
| 149 | +if __name__ == '__main__': |
| 150 | + parser = argparse.ArgumentParser(description='Load ERC20 transfers into Snowflake using parallel streaming') |
| 151 | + parser.add_argument( |
| 152 | + '--blocks', type=int, default=100_000, help='Number of recent blocks to load (default: 100,000)' |
| 153 | + ) |
| 154 | + parser.add_argument('--workers', type=int, default=8, help='Number of parallel workers (default: 8)') |
| 155 | + |
| 156 | + args = parser.parse_args() |
| 157 | + |
| 158 | + try: |
| 159 | + load_erc20_transfers(num_blocks=args.blocks, num_workers=args.workers) |
| 160 | + except KeyboardInterrupt: |
| 161 | + print('\n\n⚠️ Interrupted by user') |
| 162 | + except Exception as e: |
| 163 | + print(f'\n\n❌ Error: {e}') |
| 164 | + raise |
0 commit comments