Skip to content

Commit 71161b2

Browse files
committed
added manual matches as parted of automated matching flow
1 parent 0f89881 commit 71161b2

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

src/server/pipeline/clean_and_load_data.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ def start(connection, pdp_contacts_df, file_path_list):
1717
for uploaded_file in file_path_list:
1818
file_path = os.path.join(CURRENT_SOURCE_FILES_PATH, uploaded_file)
1919
table_name = file_path.split('/')[-1].split('-')[0]
20-
if table_name == 'manual_matches':
20+
if table_name == 'manualmatches':
2121
manual_matches_df = pd.read_csv((io.BytesIO(open(file_path, "rb").read())), encoding='iso-8859-1')
22+
manual_matches_df[["volgistics", "shelterluvpeople"]] = manual_matches_df[["volgistics", "shelterluvpeople"]].fillna(0).astype(int).astype(str)
2223
continue
2324

2425
current_app.logger.info('Running load_paws_data on: ' + uploaded_file)

src/server/pipeline/match_data.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ def start(connection, added_or_updated_rows, manual_matches_df):
2222
current_app.logger.info('Start record matching')
2323
# Will need to consider updating the existing row contents (filter by active), deactivate,
2424
# try to match, and merge previous matching groups if applicable
25-
current_app.logger.info('Manual matches DF: ' + manual_matches_df)
26-
2725
job_id = str(int(time.time()))
2826
log_db.log_exec_status(job_id, {'status': 'starting', 'at_row': 0, 'of_rows': 0})
2927
current_app.logger.info("***** Running execute job ID " + job_id + " *****")
@@ -73,6 +71,13 @@ def start(connection, added_or_updated_rows, manual_matches_df):
7371
((pdp_contacts["email_normalized"] == row["email_normalized"]) | (pdp_contacts["mobile"] == row["mobile"]))
7472
)
7573
]
74+
#collect other linked ids from manual matches source
75+
linked_ids = manual_matches_df[(manual_matches_df[row["source_type"]] == row["source_id"])]
76+
ids = linked_ids.to_dict(orient="records")
77+
for id_num, row_dict in enumerate(ids):
78+
for column, value in row_dict.items():
79+
row_matches = row_matches.append(pdp_contacts[(pdp_contacts["source_type"] == column) & (pdp_contacts["source_id"] == value)])
80+
7681
if row_matches.empty: # new record, no matching rows
7782
max_matching_group += 1
7883
row_group = max_matching_group

0 commit comments

Comments
 (0)