22from argparse import ArgumentTypeError
33from typing import Callable
44
5- from django .core .management .base import BaseCommand
5+ from django .core .management .base import BaseCommand , CommandParser
66from pyspark .sql import SparkSession
77
88from usaspending_api .common .etl .spark import create_ref_temp_views
3535 load_object_class_program_activity_incremental ,
3636 object_class_program_activity_schema ,
3737)
38- from usaspending_api .download .delta_models .transaction_download import transaction_download_schema
38+ from usaspending_api .download .delta_models .transaction_download import (
39+ transaction_download_schema ,
40+ )
3941from usaspending_api .etl .table_specs import QueryTableSpec
4042from usaspending_api .recipient .delta_models import (
4143 RECIPIENT_LOOKUP_POSTGRES_COLUMNS ,
5860 AWARD_SEARCH_POSTGRES_GOLD_COLUMNS ,
5961 award_search_create_sql_string ,
6062)
61- from usaspending_api .search .delta_models .dataframes .award_search import load_award_search , load_award_search_incremental
63+ from usaspending_api .search .delta_models .dataframes .award_search import (
64+ load_award_search ,
65+ load_award_search_incremental ,
66+ )
6267from usaspending_api .search .delta_models .dataframes .transaction_search import (
6368 load_transaction_search ,
6469 load_transaction_search_incremental ,
7075 subaward_search_create_sql_string ,
7176 subaward_search_load_sql_string ,
7277)
73- from usaspending_api .search .models import AwardSearch , SubawardSearch , SummaryStateView , TransactionSearch
78+ from usaspending_api .search .models import (
79+ AwardSearch ,
80+ SubawardSearch ,
81+ SummaryStateView ,
82+ TransactionSearch ,
83+ )
7484from usaspending_api .settings import HOST
7585from usaspending_api .transactions .delta_models import (
7686 SUMMARY_STATE_VIEW_COLUMNS ,
226236 "partition_keys" : ["is_fpds" ],
227237 "partitioning_form" : "LIST" ,
228238 "partitions" : [
229- {"table_suffix" : "_fpds" , "partitioning_clause" : "FOR VALUES IN (TRUE)" },
230- {"table_suffix" : "_fabs" , "partitioning_clause" : "FOR VALUES IN (FALSE)" },
239+ {
240+ "table_suffix" : "_fpds" ,
241+ "partitioning_clause" : "FOR VALUES IN (TRUE)" ,
242+ },
243+ {
244+ "table_suffix" : "_fabs" ,
245+ "partitioning_clause" : "FOR VALUES IN (FALSE)" ,
246+ },
231247 ],
232248 },
233249 }
286302 "partition_column_type" : "numeric" ,
287303 "delta_table_create_sql" : account_balances_schema ,
288304 "delta_table_create_options" : {"delta.enableChangeDataFeed" : True },
289- "column_names" : list (),
290- "delta_table_create_partitions" : ["reporting_fiscal_year" , "funding_toptier_agency_id" ],
305+ "column_names" : [],
306+ "delta_table_create_partitions" : [
307+ "reporting_fiscal_year" ,
308+ "funding_toptier_agency_id" ,
309+ ],
291310 }
292311 ),
293312 "award_financial_download" : QueryTableSpec (
299318 "partition_column_type" : "numeric" ,
300319 "delta_table_create_sql" : award_financial_schema ,
301320 "delta_table_create_options" : {"delta.enableChangeDataFeed" : True },
302- "column_names" : list (),
303- "delta_table_create_partitions" : ["reporting_fiscal_year" , "funding_toptier_agency_id" ],
321+ "column_names" : [],
322+ "delta_table_create_partitions" : [
323+ "reporting_fiscal_year" ,
324+ "funding_toptier_agency_id" ,
325+ ],
304326 }
305327 ),
306328 "object_class_program_activity_download" : QueryTableSpec (
312334 "partition_column_type" : "numeric" ,
313335 "delta_table_create_sql" : object_class_program_activity_schema ,
314336 "delta_table_create_options" : {"delta.enableChangeDataFeed" : True },
315- "column_names" : list (),
316- "delta_table_create_partitions" : ["reporting_fiscal_year" , "funding_toptier_agency_id" ],
337+ "column_names" : [],
338+ "delta_table_create_partitions" : [
339+ "reporting_fiscal_year" ,
340+ "funding_toptier_agency_id" ,
341+ ],
317342 }
318343 ),
319344 "transaction_download" : QueryTableSpec (
323348 "partition_column_type" : "numeric" ,
324349 "delta_table_create_sql" : transaction_download_schema ,
325350 "delta_table_create_options" : {"delta.enableChangeDataFeed" : True },
326- "column_names" : list (),
327- "delta_table_create_partitions" : ["awarding_agency_code" , "is_fpds" , "action_date_fiscal_year" ],
351+ "column_names" : [],
352+ "delta_table_create_partitions" : [
353+ "awarding_agency_code" ,
354+ "is_fpds" ,
355+ "action_date_fiscal_year" ,
356+ ],
328357 }
329358 ),
330359}
@@ -342,7 +371,8 @@ class Command(BaseCommand):
342371 destination_table_name : str
343372 spark : SparkSession
344373
345- def add_arguments (self , parser ):
374+ @staticmethod
375+ def add_arguments (parser : CommandParser ) -> None :
346376 parser .add_argument (
347377 "--destination-table" ,
348378 type = str ,
@@ -370,7 +400,7 @@ def add_arguments(self, parser):
370400 help = "Whether or not the table will be updated incrementally" ,
371401 )
372402
373- def handle (self , * args , ** options ):
403+ def handle (self , * args , ** options ) -> None :
374404 extra_conf = {
375405 # Config for Delta Lake tables and SQL. Need these to keep Dela table metadata in the metastore
376406 "spark.sql.extensions" : "io.delta.sql.DeltaSparkSessionExtension" ,
@@ -385,17 +415,25 @@ def handle(self, *args, **options):
385415 spark_created_by_command = False
386416 if not self .spark :
387417 spark_created_by_command = True
388- self .spark = configure_spark_session (** extra_conf , spark_context = self .spark ) # type: SparkSession
418+ self .spark = configure_spark_session (
419+ ** extra_conf , spark_context = self .spark
420+ ) # type: SparkSession
389421
390422 # Resolve Parameters
391423 destination_table = options ["destination_table" ]
392424 table_spec = TABLE_SPEC [destination_table ]
393425 self .destination_database = options ["alt_db" ] or table_spec .destination_database
394- self .destination_table_name = options ["alt_name" ] or destination_table .split ("." )[- 1 ]
395- source_query_key = "source_query_incremental" if options ["incremental" ] else "source_query"
426+ self .destination_table_name = (
427+ options ["alt_name" ] or destination_table .split ("." )[- 1 ]
428+ )
429+ source_query_key = (
430+ "source_query_incremental" if options ["incremental" ] else "source_query"
431+ )
396432 load_query = getattr (table_spec , source_query_key )
397433 if load_query is None :
398- raise ArgumentTypeError (f"Invalid source query. `{ source_query_key } ` must be specified in the TABLE_SPEC." )
434+ raise ArgumentTypeError (
435+ f"Invalid source query. `{ source_query_key } ` must be specified in the TABLE_SPEC."
436+ )
399437
400438 # Set the database that will be interacted with for all Delta Lake table Spark-based activity
401439 logger .info (f"Using Spark Database: { self .destination_database } " )
@@ -405,15 +443,19 @@ def handle(self, *args, **options):
405443
406444 if isinstance (load_query , list ):
407445 for index , query in enumerate (load_query ):
408- logger .info (f"Running query number: { index + 1 } \n Preview of query: { query [:100 ]} " )
446+ logger .info (
447+ f"Running query number: { index + 1 } \n Preview of query: { query [:100 ]} "
448+ )
409449 self .run_spark_sql (query )
410450 else :
411451 self .run_spark_sql (load_query )
412452
413453 if spark_created_by_command :
414454 self .spark .stop ()
415455
416- def run_spark_sql (self , query : str | Callable [[SparkSession , str , str ], None ]):
456+ def run_spark_sql (
457+ self , query : str | Callable [[SparkSession , str , str ], None ]
458+ ) -> None :
417459 if isinstance (query , str ):
418460 jdbc_conn_props = get_jdbc_connection_properties ()
419461 self .spark .sql (
@@ -430,4 +472,6 @@ def run_spark_sql(self, query: str | Callable[[SparkSession, str, str], None]):
430472 elif isinstance (query , Callable ):
431473 query (self .spark , self .destination_database , self .destination_table_name )
432474 else :
433- raise ArgumentTypeError (f"Invalid query. `{ query } ` must be a string or a Callable." )
475+ raise ArgumentTypeError (
476+ f"Invalid query. `{ query } ` must be a string or a Callable."
477+ )
0 commit comments