Skip to content

Commit a2f5beb

Browse files
committed
Merge branch 'master' of github.com:CodeForPhilly/paws-data-pipeline into issue-166-360Page
2 parents a7a9557 + 030bd8a commit a2f5beb

File tree

2 files changed

+10
-4
lines changed

2 files changed

+10
-4
lines changed

src/server/api/common_api.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ def get_contacts(search_text):
1515

1616
names = search_text.split(" ")
1717
if len(names) == 2:
18-
query = text("select * from pdp_contacts WHERE archived_date is null AND \
19-
(lower(first_name) like lower(:name1) AND lower(last_name) like lower(:name2) \
20-
OR lower(first_name) like lower(:name2) AND lower(last_name) like lower(:name1))")
18+
query = text("select * from pdp_contacts where archived_date is null AND\
19+
lower(first_name) like lower(:name1) and lower(last_name) like lower(:name2) \
20+
OR lower(first_name) like lower(:name2) and lower(last_name) like lower(:name1)")
2121
query_result = connection.execute(query, name1='{}%'.format(names[0]), name2='{}%'.format(names[1]))
2222
elif len(names) == 1:
2323
query = text("select * from pdp_contacts \

src/server/pipeline/clean_and_load_data.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from datasource_manager import DATASOURCE_MAPPING, SOURCE_NORMALIZATION_MAPPING
88
from flask import current_app
9+
import sqlalchemy
910
from config import CURRENT_SOURCE_FILES_PATH
1011

1112

@@ -35,7 +36,12 @@ def start(connection, pdp_contacts_df, file_path_list):
3536
result = pd.concat([result, source_df])
3637

3738
else:
38-
df.to_sql(table_name, connection, index=False, if_exists='append')
39+
if table_name in sqlalchemy.inspect(connection).get_table_names():
40+
# Only retain new/updated records in secondary tables (shifts, donations, etc.)
41+
current_app.logger.info(' - Deduplicating old records')
42+
old_data = pd.read_sql_table(table_name, connection)
43+
df = old_data.append(df, ignore_index=True).drop_duplicates()
44+
df.to_sql(table_name, connection, index=False, if_exists='replace')
3945

4046
current_app.logger.info(' - Finish load_paws_data on: ' + uploaded_file)
4147

0 commit comments

Comments
 (0)