Skip to content

Commit a16d144

Browse files
committed
Clean up counting
Using one-indexing for matching_id (in part to reserve zero), and fixed a minor visual bug in row_print_freq to avoid strange printing with mod 0.
1 parent ed22a08 commit a16d144

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

src/server/pipeline/match_data.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def start(connection, added_or_updated_rows):
1818
if pdp_contacts["matching_id"].dropna().size == 0:
1919
max_matching_group = 0
2020
else:
21-
max_matching_group = max(pdp_contacts["matching_id"].dropna()) + 1
21+
max_matching_group = max(pdp_contacts["matching_id"].dropna())
2222

2323
# Initialize column metadata we'll write to pdp_contacts
2424
items_to_update["matching_id"] = 0 # initializing an int and overwrite in the loop
@@ -28,7 +28,7 @@ def start(connection, added_or_updated_rows):
2828
del row["_id"] # avoid specifying the _id field, so postgres will auto-increment for us
2929

3030
rows = items_to_update.to_dict(orient="records")
31-
row_print_freq = np.floor_divide(len(rows), 20) # approx every 5%
31+
row_print_freq = max(1, np.floor_divide(len(rows), 20)) # approx every 5% (or every row if small)
3232
for row_num, row in enumerate(rows):
3333
if row_num % row_print_freq == 0:
3434
current_app.logger.info("- Matching rows {}-{} of {}".format(
@@ -42,8 +42,8 @@ def start(connection, added_or_updated_rows):
4242
(pdp_contacts["email"] == row["email"]) # TODO: could transform this line into an "or" with phone number
4343
]
4444
if row_matches.empty: # new record, no matching rows
45-
row_group = max_matching_group
4645
max_matching_group += 1
46+
row_group = max_matching_group
4747
else: # existing match(es)
4848
row_group = row_matches["matching_id"].values[0]
4949
if not all(row_matches["matching_id"] == row_group):

0 commit comments

Comments
 (0)