Skip to content

Commit c19512d

Browse files
authored
Merge pull request #4556 from fedspendingtransparency/ftr/dev-13938-custom-account-paritioning
[DEV-13938] Partition the Custom Account download Delta tables
2 parents 9629626 + e113bbf commit c19512d

File tree

5 files changed

+37
-34
lines changed

5 files changed

+37
-34
lines changed

usaspending_api/download/delta_models/account_balances_download.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
from delta.tables import DeltaTable
2-
from usaspending_api.download.helpers.delta_models_helpers import fy_quarter_period
3-
from pyspark.sql import DataFrame, functions as sf, SparkSession
2+
from pyspark.sql import DataFrame, SparkSession
3+
from pyspark.sql import functions as sf
44
from pyspark.sql.types import (
55
BooleanType,
66
DateType,
77
DecimalType,
88
IntegerType,
9+
LongType,
910
StringType,
1011
StructField,
1112
StructType,
1213
TimestampType,
13-
LongType,
1414
)
1515

16+
from usaspending_api.download.helpers.delta_models_helpers import fy_quarter_period
17+
1618
account_balances_schema = StructType(
1719
[
1820
StructField("funding_toptier_agency_id", IntegerType()),

usaspending_api/download/delta_models/award_financial_download.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
from delta.tables import DeltaTable
2-
from pyspark.sql import SparkSession, functions as sf
2+
from pyspark.sql import SparkSession
3+
from pyspark.sql import functions as sf
34
from pyspark.sql.functions import expr
45
from pyspark.sql.types import (
56
BooleanType,
67
DateType,
78
DecimalType,
89
IntegerType,
10+
LongType,
911
StringType,
1012
StructField,
1113
StructType,
12-
LongType,
1314
)
15+
1416
from usaspending_api.download.helpers.delta_models_helpers import fy_quarter_period
1517
from usaspending_api.download.helpers.download_annotation_functions import AWARD_URL
1618

usaspending_api/download/delta_models/object_class_program_activity_download.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
from delta.tables import DeltaTable
2-
from pyspark.sql import SparkSession, functions as sf
2+
from pyspark.sql import SparkSession
3+
from pyspark.sql import functions as sf
34
from pyspark.sql.types import (
45
BooleanType,
56
DateType,
67
DecimalType,
78
IntegerType,
9+
LongType,
810
StringType,
911
StructField,
1012
StructType,
11-
LongType,
1213
)
13-
from usaspending_api.download.helpers.delta_models_helpers import fy_quarter_period
1414

15+
from usaspending_api.download.helpers.delta_models_helpers import fy_quarter_period
1516

1617
object_class_program_activity_schema = StructType(
1718
[

usaspending_api/etl/management/commands/create_delta_table.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
from pyspark.sql.types import StructType
55

66
from usaspending_api.awards.delta_models.award_id_lookup import AWARD_ID_LOOKUP_SCHEMA
7-
from usaspending_api.common.spark.configs import DEFAULT_EXTRA_CONF
8-
from usaspending_api.config import CONFIG
97
from usaspending_api.common.helpers.spark_helpers import (
108
configure_spark_session,
119
get_active_spark_session,
1210
)
11+
from usaspending_api.common.spark.configs import DEFAULT_EXTRA_CONF
12+
from usaspending_api.config import CONFIG
1313
from usaspending_api.etl.management.commands.archive_table_in_delta import TABLE_SPEC as ARCHIVE_TABLE_SPEC
1414
from usaspending_api.etl.management.commands.load_query_to_delta import TABLE_SPEC as LOAD_QUERY_TABLE_SPEC
1515
from usaspending_api.etl.management.commands.load_table_to_delta import TABLE_SPEC as LOAD_TABLE_TABLE_SPEC
@@ -33,7 +33,6 @@
3333

3434

3535
class Command(BaseCommand):
36-
3736
help = """
3837
This command creates an empty Delta Table based on the provided --destination-table argument.
3938
"""

usaspending_api/etl/management/commands/load_query_to_delta.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,20 @@
2020
covid_faba_spending_load_sql_strings,
2121
)
2222
from usaspending_api.disaster.models import CovidFABASpending
23+
from usaspending_api.download.delta_models.account_balances_download import (
24+
account_balances_schema,
25+
load_account_balances,
26+
load_account_balances_incremental,
27+
)
2328
from usaspending_api.download.delta_models.award_financial_download import (
29+
award_financial_schema,
2430
load_award_financial,
2531
load_award_financial_incremental,
26-
award_financial_schema,
2732
)
2833
from usaspending_api.download.delta_models.object_class_program_activity_download import (
29-
object_class_program_activity_schema,
3034
load_object_class_program_activity,
3135
load_object_class_program_activity_incremental,
32-
)
33-
from usaspending_api.download.delta_models.account_balances_download import (
34-
load_account_balances,
35-
load_account_balances_incremental,
36-
account_balances_schema,
36+
object_class_program_activity_schema,
3737
)
3838
from usaspending_api.download.delta_models.transaction_download import transaction_download_schema
3939
from usaspending_api.recipient.delta_models import (
@@ -84,7 +84,6 @@
8484
transaction_search_create_sql_string,
8585
)
8686

87-
8887
AWARD_URL = f"{HOST}/award/" if "localhost" in HOST else f"https://{HOST}/award/"
8988

9089
logger = logging.getLogger(__name__)
@@ -105,14 +104,14 @@
105104
"is_partition_column_unique": True,
106105
"delta_table_create_sql": award_search_create_sql_string,
107106
"delta_table_create_options": None,
108-
"delta_table_create_partitions": None,
109107
"source_schema": AWARD_SEARCH_POSTGRES_COLUMNS,
110108
"custom_schema": "recipient_hash STRING, federal_accounts STRING, cfdas ARRAY<STRING>,"
111109
" tas_components ARRAY<STRING>",
112110
"column_names": list(AWARD_SEARCH_COLUMNS),
113111
"postgres_seq_name": None,
114112
"tsvectors": None,
115113
"postgres_partition_spec": None,
114+
"delta_table_create_partitions": None,
116115
},
117116
"award_search_gold": {
118117
"model": AwardSearch,
@@ -129,14 +128,14 @@
129128
"is_partition_column_unique": True,
130129
"delta_table_create_sql": award_search_create_sql_string,
131130
"delta_table_create_options": None,
132-
"delta_table_create_partitions": None,
133131
"source_schema": AWARD_SEARCH_POSTGRES_GOLD_COLUMNS,
134132
"custom_schema": "recipient_hash STRING, federal_accounts STRING, cfdas ARRAY<STRING>,"
135133
" tas_components ARRAY<STRING>",
136134
"column_names": list(AWARD_SEARCH_POSTGRES_GOLD_COLUMNS),
137135
"postgres_seq_name": None,
138136
"tsvectors": None,
139137
"postgres_partition_spec": None,
138+
"delta_table_create_partitions": None,
140139
},
141140
"recipient_lookup": {
142141
"model": RecipientLookup,
@@ -153,13 +152,13 @@
153152
"is_partition_column_unique": True,
154153
"delta_table_create_sql": rpt_recipient_lookup_create_sql_string,
155154
"delta_table_create_options": None,
156-
"delta_table_create_partitions": None,
157155
"source_schema": RECIPIENT_LOOKUP_POSTGRES_COLUMNS,
158156
"custom_schema": "recipient_hash STRING",
159157
"column_names": list(RPT_RECIPIENT_LOOKUP_DELTA_COLUMNS),
160158
"postgres_seq_name": "recipient_lookup_id_seq",
161159
"tsvectors": None,
162160
"postgres_partition_spec": None,
161+
"delta_table_create_partitions": None,
163162
},
164163
"recipient_profile": {
165164
"model": RecipientProfile,
@@ -176,13 +175,13 @@
176175
"is_partition_column_unique": False,
177176
"delta_table_create_sql": recipient_profile_create_sql_string,
178177
"delta_table_create_options": None,
179-
"delta_table_create_partitions": None,
180178
"source_schema": RECIPIENT_PROFILE_POSTGRES_COLUMNS,
181179
"custom_schema": "recipient_hash STRING",
182180
"column_names": list(RPT_RECIPIENT_PROFILE_DELTA_COLUMNS),
183181
"postgres_seq_name": "recipient_profile_id_seq",
184182
"tsvectors": None,
185183
"postgres_partition_spec": None,
184+
"delta_table_create_partitions": None,
186185
},
187186
"summary_state_view": {
188187
"model": SummaryStateView,
@@ -199,13 +198,13 @@
199198
"is_partition_column_unique": True,
200199
"delta_table_create_sql": summary_state_view_create_sql_string,
201200
"delta_table_create_options": None,
202-
"delta_table_create_partitions": None,
203201
"source_schema": SUMMARY_STATE_VIEW_POSTGRES_COLUMNS,
204202
"custom_schema": "duh STRING",
205203
"column_names": list(SUMMARY_STATE_VIEW_COLUMNS),
206204
"postgres_seq_name": None,
207205
"tsvectors": None,
208206
"postgres_partition_spec": None,
207+
"delta_table_create_partitions": None,
209208
},
210209
"sam_recipient": {
211210
"model": None,
@@ -222,13 +221,13 @@
222221
"is_partition_column_unique": True,
223222
"delta_table_create_sql": sam_recipient_create_sql_string,
224223
"delta_table_create_options": None,
225-
"delta_table_create_partitions": None,
226224
"source_schema": SAM_RECIPIENT_POSTGRES_COLUMNS,
227225
"custom_schema": None,
228226
"column_names": list(SAM_RECIPIENT_COLUMNS),
229227
"postgres_seq_name": None,
230228
"tsvectors": None,
231229
"postgres_partition_spec": None,
230+
"delta_table_create_partitions": None,
232231
},
233232
"transaction_search": {
234233
"model": TransactionSearch,
@@ -245,13 +244,13 @@
245244
"is_partition_column_unique": True,
246245
"delta_table_create_sql": transaction_search_create_sql_string,
247246
"delta_table_create_options": None,
248-
"delta_table_create_partitions": None,
249247
"source_schema": TRANSACTION_SEARCH_POSTGRES_COLUMNS,
250248
"custom_schema": "recipient_hash STRING, federal_accounts STRING, parent_recipient_hash STRING",
251249
"column_names": list(TRANSACTION_SEARCH_POSTGRES_COLUMNS),
252250
"postgres_seq_name": None,
253251
"tsvectors": None,
254252
"postgres_partition_spec": None,
253+
"delta_table_create_partitions": None,
255254
},
256255
"transaction_search_gold": {
257256
"model": TransactionSearch,
@@ -268,7 +267,6 @@
268267
"is_partition_column_unique": True,
269268
"delta_table_create_sql": transaction_search_create_sql_string,
270269
"delta_table_create_options": None,
271-
"delta_table_create_partitions": None,
272270
"source_schema": TRANSACTION_SEARCH_POSTGRES_GOLD_COLUMNS,
273271
"custom_schema": "recipient_hash STRING, federal_accounts STRING, parent_recipient_hash STRING",
274272
"column_names": list(TRANSACTION_SEARCH_POSTGRES_GOLD_COLUMNS),
@@ -282,6 +280,7 @@
282280
{"table_suffix": "_fabs", "partitioning_clause": "FOR VALUES IN (FALSE)"},
283281
],
284282
},
283+
"delta_table_create_partitions": None,
285284
},
286285
"transaction_current_cd_lookup": {
287286
"model": None,
@@ -298,13 +297,13 @@
298297
"is_partition_column_unique": True,
299298
"delta_table_create_sql": transaction_current_cd_lookup_create_sql_string,
300299
"delta_table_create_options": None,
301-
"delta_table_create_partitions": None,
302300
"source_schema": TRANSACTION_CURRENT_CD_LOOKUP_COLUMNS,
303301
"custom_schema": "",
304302
"column_names": list(TRANSACTION_CURRENT_CD_LOOKUP_COLUMNS),
305303
"postgres_seq_name": None,
306304
"tsvectors": None,
307305
"postgres_partition_spec": None,
306+
"delta_table_create_partitions": None,
308307
},
309308
"subaward_search": {
310309
"model": SubawardSearch,
@@ -321,13 +320,13 @@
321320
"is_partition_column_unique": True,
322321
"delta_table_create_sql": subaward_search_create_sql_string,
323322
"delta_table_create_options": None,
324-
"delta_table_create_partitions": None,
325323
"source_schema": SUBAWARD_SEARCH_POSTGRES_COLUMNS,
326324
"custom_schema": "treasury_account_identifiers ARRAY<INTEGER>",
327325
"column_names": list(SUBAWARD_SEARCH_COLUMNS),
328326
"postgres_seq_name": None,
329327
"tsvectors": SUBAWARD_SEARCH_POSTGRES_VECTORS,
330328
"postgres_partition_spec": None,
329+
"delta_table_create_partitions": None,
331330
},
332331
"covid_faba_spending": {
333332
"model": CovidFABASpending,
@@ -344,13 +343,13 @@
344343
"is_partition_column_unique": False,
345344
"delta_table_create_sql": covid_faba_spending_create_sql_string,
346345
"delta_table_create_options": None,
347-
"delta_table_create_partitions": None,
348346
"source_schema": COVID_FABA_SPENDING_POSTGRES_COLUMNS,
349347
"custom_schema": None,
350348
"column_names": list(COVID_FABA_SPENDING_DELTA_COLUMNS),
351349
"postgres_seq_name": None,
352350
"tsvectors": None,
353351
"postgres_partition_spec": None,
352+
"delta_table_create_partitions": None,
354353
},
355354
"account_balances_download": {
356355
"model": None,
@@ -367,13 +366,13 @@
367366
"is_partition_column_unique": False,
368367
"delta_table_create_sql": account_balances_schema,
369368
"delta_table_create_options": {"delta.enableChangeDataFeed": True},
370-
"delta_table_create_partitions": None,
371369
"source_schema": None,
372370
"custom_schema": None,
373371
"column_names": list(),
374372
"postgres_seq_name": None,
375373
"tsvectors": None,
376374
"postgres_partition_spec": None,
375+
"delta_table_create_partitions": ["reporting_fiscal_year", "funding_toptier_agency_id"],
377376
},
378377
"award_financial_download": {
379378
"model": None,
@@ -390,13 +389,13 @@
390389
"is_partition_column_unique": False,
391390
"delta_table_create_sql": award_financial_schema,
392391
"delta_table_create_options": {"delta.enableChangeDataFeed": True},
393-
"delta_table_create_partitions": None,
394392
"source_schema": None,
395393
"custom_schema": None,
396394
"column_names": list(),
397395
"postgres_seq_name": None,
398396
"tsvectors": None,
399397
"postgres_partition_spec": None,
398+
"delta_table_create_partitions": ["reporting_fiscal_year", "funding_toptier_agency_id"],
400399
},
401400
"object_class_program_activity_download": {
402401
"model": None,
@@ -413,13 +412,13 @@
413412
"is_partition_column_unique": False,
414413
"delta_table_create_sql": object_class_program_activity_schema,
415414
"delta_table_create_options": {"delta.enableChangeDataFeed": True},
416-
"delta_table_create_partitions": None,
417415
"source_schema": None,
418416
"custom_schema": None,
419417
"column_names": list(),
420418
"postgres_seq_name": None,
421419
"tsvectors": None,
422420
"postgres_partition_spec": None,
421+
"delta_table_create_partitions": ["reporting_fiscal_year", "funding_toptier_agency_id"],
423422
},
424423
"transaction_download": {
425424
"model": None,
@@ -436,13 +435,13 @@
436435
"is_partition_column_unique": False,
437436
"delta_table_create_sql": transaction_download_schema,
438437
"delta_table_create_options": {"delta.enableChangeDataFeed": True},
439-
"delta_table_create_partitions": ["awarding_agency_code", "is_fpds", "action_date_fiscal_year"],
440438
"source_schema": None,
441439
"custom_schema": None,
442440
"column_names": list(),
443441
"postgres_seq_name": None,
444442
"tsvectors": None,
445443
"postgres_partition_spec": None,
444+
"delta_table_create_partitions": ["awarding_agency_code", "is_fpds", "action_date_fiscal_year"],
446445
},
447446
}
448447

0 commit comments

Comments
 (0)