@@ -61,9 +61,7 @@ def configure_logging(verbose: bool = False):
6161 """
6262 # Configure root logger first
6363 logging .basicConfig (
64- level = logging .INFO ,
65- format = '%(asctime)s | %(levelname)s | %(name)s | %(message)s' ,
66- datefmt = '%Y-%m-%d %H:%M:%S'
64+ level = logging .INFO , format = '%(asctime)s | %(levelname)s | %(name)s | %(message)s' , datefmt = '%Y-%m-%d %H:%M:%S'
6765 )
6866
6967 if not verbose :
@@ -223,8 +221,16 @@ def print_configuration(args, min_block: int, max_block: int, has_labels: bool):
223221 print (f'🏷️ Label Joining: ENABLED ({ args .label_name } )' )
224222
225223
226- def print_results (results , table_name : str , min_block : int , max_block : int ,
227- duration : float , num_workers : int , has_labels : bool , label_columns : str = '' ):
224+ def print_results (
225+ results ,
226+ table_name : str ,
227+ min_block : int ,
228+ max_block : int ,
229+ duration : float ,
230+ num_workers : int ,
231+ has_labels : bool ,
232+ label_columns : str = '' ,
233+ ):
228234 """Print execution results and sample queries."""
229235 # Calculate statistics
230236 total_rows = sum (r .rows_loaded for r in results if r .success )
@@ -268,131 +274,81 @@ def main():
268274 parser = argparse .ArgumentParser (
269275 description = 'Load data into Snowflake using parallel streaming with custom SQL queries' ,
270276 formatter_class = argparse .RawDescriptionHelpFormatter ,
271- epilog = __doc__
277+ epilog = __doc__ ,
272278 )
273279
274280 # Required arguments
275281 required = parser .add_argument_group ('required arguments' )
276- required .add_argument (
277- '--query-file' ,
278- required = True ,
279- help = 'Path to SQL query file to execute'
280- )
281- required .add_argument (
282- '--table-name' ,
283- required = True ,
284- help = 'Destination Snowflake table name'
285- )
282+ required .add_argument ('--query-file' , required = True , help = 'Path to SQL query file to execute' )
283+ required .add_argument ('--table-name' , required = True , help = 'Destination Snowflake table name' )
286284
287285 # Block range arguments (mutually exclusive groups)
288286 block_range = parser .add_argument_group ('block range' )
289- block_range .add_argument (
290- '--blocks' ,
291- type = int ,
292- help = 'Number of recent blocks to load (auto-detect range)'
293- )
294- block_range .add_argument (
295- '--min-block' ,
296- type = int ,
297- help = 'Explicit start block (requires --max-block)'
298- )
299- block_range .add_argument (
300- '--max-block' ,
301- type = int ,
302- help = 'Explicit end block (requires --min-block)'
303- )
287+ block_range .add_argument ('--blocks' , type = int , help = 'Number of recent blocks to load (auto-detect range)' )
288+ block_range .add_argument ('--min-block' , type = int , help = 'Explicit start block (requires --max-block)' )
289+ block_range .add_argument ('--max-block' , type = int , help = 'Explicit end block (requires --min-block)' )
304290 block_range .add_argument (
305291 '--source-table' ,
306292 default = 'eth_firehose.logs' ,
307- help = 'Table for block range detection (default: eth_firehose.logs)'
293+ help = 'Table for block range detection (default: eth_firehose.logs)' ,
308294 )
309295 block_range .add_argument (
310- '--block-column' ,
311- default = 'block_num' ,
312- help = 'Column name for block partitioning (default: block_num)'
296+ '--block-column' , default = 'block_num' , help = 'Column name for block partitioning (default: block_num)'
313297 )
314298
315299 # Label configuration (all optional)
316300 labels = parser .add_argument_group ('label configuration (optional)' )
317- labels .add_argument (
318- '--label-csv' ,
319- help = 'Path to CSV file with label data'
320- )
321- labels .add_argument (
322- '--label-name' ,
323- help = 'Label identifier (required if --label-csv provided)'
324- )
325- labels .add_argument (
326- '--label-key' ,
327- help = 'CSV column for joining (required if --label-csv provided)'
328- )
329- labels .add_argument (
330- '--stream-key' ,
331- help = 'Stream column for joining (required if --label-csv provided)'
332- )
301+ labels .add_argument ('--label-csv' , help = 'Path to CSV file with label data' )
302+ labels .add_argument ('--label-name' , help = 'Label identifier (required if --label-csv provided)' )
303+ labels .add_argument ('--label-key' , help = 'CSV column for joining (required if --label-csv provided)' )
304+ labels .add_argument ('--stream-key' , help = 'Stream column for joining (required if --label-csv provided)' )
333305
334306 # Snowflake configuration
335307 snowflake = parser .add_argument_group ('snowflake configuration' )
336308 snowflake .add_argument (
337- '--connection-name' ,
338- help = 'Snowflake connection name (default: auto-generated from table name)'
309+ '--connection-name' , help = 'Snowflake connection name (default: auto-generated from table name)'
339310 )
340311 snowflake .add_argument (
341312 '--loading-method' ,
342313 choices = ['snowpipe_streaming' , 'stage' , 'insert' ],
343314 default = 'snowpipe_streaming' ,
344- help = 'Snowflake loading method (default: snowpipe_streaming)'
315+ help = 'Snowflake loading method (default: snowpipe_streaming)' ,
345316 )
346317 snowflake .add_argument (
347318 '--preserve-reorg-history' ,
348319 action = 'store_true' ,
349320 default = True ,
350- help = 'Enable reorg history preservation (default: enabled)'
321+ help = 'Enable reorg history preservation (default: enabled)' ,
351322 )
352323 snowflake .add_argument (
353324 '--no-preserve-reorg-history' ,
354325 action = 'store_false' ,
355326 dest = 'preserve_reorg_history' ,
356- help = 'Disable reorg history preservation'
357- )
358- snowflake .add_argument (
359- '--disable-state' ,
360- action = 'store_true' ,
361- help = 'Disable state management (job resumption)'
362- )
363- snowflake .add_argument (
364- '--pool-size' ,
365- type = int ,
366- help = 'Connection pool size (default: workers + 2)'
327+ help = 'Disable reorg history preservation' ,
367328 )
329+ snowflake .add_argument ('--disable-state' , action = 'store_true' , help = 'Disable state management (job resumption)' )
330+ snowflake .add_argument ('--pool-size' , type = int , help = 'Connection pool size (default: workers + 2)' )
368331
369332 # Parallel execution configuration
370333 parallel = parser .add_argument_group ('parallel execution' )
371- parallel .add_argument (
372- '--workers' ,
373- type = int ,
374- default = 4 ,
375- help = 'Number of parallel workers (default: 4)'
376- )
334+ parallel .add_argument ('--workers' , type = int , default = 4 , help = 'Number of parallel workers (default: 4)' )
377335 parallel .add_argument (
378336 '--flush-interval' ,
379337 type = float ,
380338 default = 1.0 ,
381- help = 'Snowpipe Streaming buffer flush interval in seconds (default: 1.0)'
339+ help = 'Snowpipe Streaming buffer flush interval in seconds (default: 1.0)' ,
382340 )
383341
384342 # Server configuration
385343 parser .add_argument (
386344 '--server' ,
387345 default = os .getenv ('AMP_SERVER_URL' , 'grpc://34.27.238.174:80' ),
388- help = 'AMP server URL (default: from AMP_SERVER_URL env or grpc://34.27.238.174:80)'
346+ help = 'AMP server URL (default: from AMP_SERVER_URL env or grpc://34.27.238.174:80)' ,
389347 )
390348
391349 # Logging configuration
392350 parser .add_argument (
393- '--verbose' ,
394- action = 'store_true' ,
395- help = 'Enable verbose logging from Snowflake libraries (default: suppressed)'
351+ '--verbose' , action = 'store_true' , help = 'Enable verbose logging from Snowflake libraries (default: suppressed)'
396352 )
397353
398354 args = parser .parse_args ()
@@ -445,8 +401,7 @@ def main():
445401
446402 # Print results
447403 label_columns = f'{ args .label_key } joined columns' if has_labels else ''
448- print_results (results , args .table_name , min_block , max_block , duration ,
449- args .workers , has_labels , label_columns )
404+ print_results (results , args .table_name , min_block , max_block , duration , args .workers , has_labels , label_columns )
450405
451406 return args .table_name , sum (r .rows_loaded for r in results if r .success ), duration
452407
@@ -456,6 +411,7 @@ def main():
456411 except Exception as e :
457412 print (f'\n \n ❌ Error: { e } ' )
458413 import traceback
414+
459415 traceback .print_exc ()
460416 sys .exit (1 )
461417
0 commit comments