Skip to content

Commit 177fbcc

Browse files
author
Dan Kelley
committed
Merge branch 'nginx_setup' of https://github.com/CodeForPhilly/paws-data-pipeline into nginx_setup
2 parents c52ae81 + a58c493 commit 177fbcc

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

src/server/pipeline/clean_and_load_data.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from datasource_manager import DATASOURCE_MAPPING, SOURCE_NORMALIZATION_MAPPING
88
from flask import current_app
9+
import sqlalchemy
910
from config import CURRENT_SOURCE_FILES_PATH
1011

1112

@@ -35,7 +36,12 @@ def start(connection, pdp_contacts_df, file_path_list):
3536
result = pd.concat([result, source_df])
3637

3738
else:
38-
df.to_sql(table_name, connection, index=False, if_exists='append')
39+
if table_name in sqlalchemy.inspect(connection).get_table_names():
40+
# Only retain new/updated records in secondary tables (shifts, donations, etc.)
41+
current_app.logger.info(' - Deduplicating old records')
42+
old_data = pd.read_sql_table(table_name, connection)
43+
df = old_data.append(df, ignore_index=True).drop_duplicates()
44+
df.to_sql(table_name, connection, index=False, if_exists='replace')
3945

4046
current_app.logger.info(' - Finish load_paws_data on: ' + uploaded_file)
4147

0 commit comments

Comments
 (0)