Skip to content

Commit 2a35e79

Browse files
committed
allow pipeline to run without manual matches provided
1 parent 71161b2 commit 2a35e79

File tree

2 files changed

+9
-6
lines changed

2 files changed

+9
-6
lines changed

src/server/pipeline/clean_and_load_data.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
def start(connection, pdp_contacts_df, file_path_list):
1414
result = pd.DataFrame(columns=pdp_contacts_df.columns)
1515
json_rows = pd.DataFrame(columns=["source_type", "source_id", "json"])
16-
16+
manual_matches_df = None
17+
1718
for uploaded_file in file_path_list:
1819
file_path = os.path.join(CURRENT_SOURCE_FILES_PATH, uploaded_file)
1920
table_name = file_path.split('/')[-1].split('-')[0]

src/server/pipeline/match_data.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,13 @@ def start(connection, added_or_updated_rows, manual_matches_df):
7272
)
7373
]
7474
#collect other linked ids from manual matches source
75-
linked_ids = manual_matches_df[(manual_matches_df[row["source_type"]] == row["source_id"])]
76-
ids = linked_ids.to_dict(orient="records")
77-
for id_num, row_dict in enumerate(ids):
78-
for column, value in row_dict.items():
79-
row_matches = row_matches.append(pdp_contacts[(pdp_contacts["source_type"] == column) & (pdp_contacts["source_id"] == value)])
75+
if manual_matches_df != None:
76+
linked_ids = manual_matches_df[(manual_matches_df[row["source_type"]] == row["source_id"])]
77+
ids = linked_ids.to_dict(orient="records")
78+
for row_dict in enumerate(ids):
79+
for column, value in row_dict.items():
80+
row_matches = row_matches.append(pdp_contacts[(pdp_contacts["source_type"] == column) & (pdp_contacts["source_id"] == value)])
81+
8082

8183
if row_matches.empty: # new record, no matching rows
8284
max_matching_group += 1

0 commit comments

Comments
 (0)