Skip to content

Commit 97f4d06

Browse files
[DEV-12639] fix duplicate awards
1 parent 4d757fe commit 97f4d06

File tree

4 files changed

+10
-44
lines changed

4 files changed

+10
-44
lines changed

usaspending_api/broker/helpers/last_load_date.py

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import logging
22

33
from datetime import timedelta
4-
from itertools import islice
54

65
from usaspending_api.broker import lookups
76
from usaspending_api.broker.models import ExternalDataLoadDate
@@ -99,34 +98,3 @@ def update_last_load_date(key, last_load_date):
9998
external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT[key],
10099
defaults={"last_load_date": cast_datetime_to_utc(last_load_date)},
101100
)
102-
103-
def get_second_to_last_load_data(key, lookback_minutes=None, default=None, format_func: callable = (lambda _: _)):
104-
"""
105-
Retrieve the second_to_last_load_date from the USAspending database.
106-
107-
Valid keys are dictated by the keys in EXTERNAL_DATA_TYPE_DICT.
108-
109-
lookback_minutes is used to provide some protection against gaps caused by
110-
long transactions or race conditions. It will be subtracted from
111-
second_to_last_load_data. NOTE: It will not be subtracted from the default in the
112-
case where no second_to_last_load_data is found.
113-
114-
default will be returned if no second_to_last_load_data is found in the database.
115-
"""
116-
external_data_type_id = lookups.EXTERNAL_DATA_TYPE_DICT[key]
117-
load_date = (
118-
ExternalDataLoadDate.objects.filter(external_data_type_id=external_data_type_id)
119-
.values_list("last_load_date", flat=True)
120-
)
121-
if load_date is None:
122-
logger.warning(format_func(f"No record of a previous run for `{key}` was found!"))
123-
return default
124-
second_to_last_load_data = next(islice(load_date.iterator(), 1, 2), None)
125-
if second_to_last_load_data is None:
126-
logger.warning(format_func(f"No record of a previous run for `{key}` was found!"))
127-
return default
128-
else:
129-
logger.info(format_func(f"Value for previous `{key}` ETL: {second_to_last_load_data}"))
130-
if lookback_minutes is not None:
131-
second_to_last_load_data -= timedelta(minutes=lookback_minutes)
132-
return second_to_last_load_data

usaspending_api/download/tests/integration/test_account_download_dataframe_builder.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,4 +250,3 @@ def test_account_balances(mock_get_submission_ids_for_periods, spark, account_do
250250
assert ta_builder.account_balances.count() == 2
251251
fa_builder = FederalAccountDownloadDataFrameBuilder(spark, account_download_filter)
252252
assert fa_builder.account_balances.count() == 2
253-

usaspending_api/etl/elasticsearch_loader_helpers/delete_data.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import re
23
from datetime import datetime
34
from time import perf_counter
45
from typing import Dict, List, Optional, Union
@@ -14,7 +15,6 @@
1415
from usaspending_api.broker.helpers.last_load_date import (
1516
get_last_load_date,
1617
get_latest_load_date,
17-
get_second_to_last_load_data
1818
)
1919
from usaspending_api.common.helpers.s3_helpers import (
2020
access_s3_object,
@@ -316,15 +316,11 @@ def delete_awards(
316316
317317
Returns: Number of ES docs deleted in the index
318318
"""
319-
es_delete_window_start = get_second_to_last_load_data(
320-
"es_deletes", format_func=(lambda log_msg: format_log(log_msg, action="Delete"))
321-
)
322-
323319
delete_window_start = get_last_load_date(
324320
"es_deletes", format_func=(lambda log_msg: format_log(log_msg, action="Delete"))
325321
)
326322
deleted_tx_keys = _gather_deleted_transaction_keys(
327-
config, es_delete_window_start, fabs_external_data_load_date_key, fpds_external_data_load_date_key
323+
config, delete_window_start, fabs_external_data_load_date_key, fpds_external_data_load_date_key
328324
)
329325
awards_to_delete = []
330326

@@ -393,14 +389,12 @@ def delete_transactions(
393389

394390
tx_keys_to_delete = []
395391

396-
es_delete_window_start = get_second_to_last_load_data("es_deletes", format_func=(lambda log_msg: format_log(log_msg, action="Delete")))
397-
398392
delete_window_start = get_last_load_date(
399393
"es_deletes", format_func=(lambda log_msg: format_log(log_msg, action="Delete"))
400394
)
401395

402396
deleted_tx_keys = _gather_deleted_transaction_keys(
403-
config, es_delete_window_start, fabs_external_data_load_date_key, fpds_external_data_load_date_key
397+
config, delete_window_start, fabs_external_data_load_date_key, fpds_external_data_load_date_key
404398
)
405399
tx_keys_to_delete.extend(deleted_tx_keys)
406400

@@ -471,6 +465,12 @@ def _gather_deleted_transaction_keys(
471465
and not x.key.startswith("staging")
472466
and delete_window_start <= x.last_modified <= end_date
473467
)
468+
# matches the pattern YYYY-mm-DD_FABSdeletions_{TIME_IN_SECONDS}.csv
469+
or (
470+
re.search(r"^\d{4}-(0[1-9]|1[0-2])-(0?[1-9]|[12][0-9]|3[01])_FABSdeletions_.*.csv", x.key)
471+
and not x.key.startswith("staging")
472+
and delete_window_start <= x.last_modified
473+
)
474474
]
475475

476476
logger.info(format_log(f"Found {len(filtered_csv_list)} CSV files in the date range", action="Delete"))

usaspending_api/etl/management/commands/load_transactions_in_delta.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,7 @@ class Command(BaseCommand):
8686
FROM aidlu_fabs AS aidlu LEFT JOIN raw.published_fabs AS pfabs ON (
8787
aidlu.transaction_unique_id = ucase(pfabs.afa_generated_unique)
8888
)
89-
WHERE pfabs.afa_generated_unique IS NULL AND NOT EXISTS (SELECT 1 FROM aidlu_fabs
90-
WHERE aidlu.transaction_unique_id = ucase(pfabs.afa_generated_unique) AND pfabs.correction_delete_indicatr = null)
89+
WHERE pfabs.afa_generated_unique IS NULL
9190
"""
9291

9392
def add_arguments(self, parser):

0 commit comments

Comments
 (0)