@@ -15,14 +15,13 @@ def normalize_before_match(value):
15
15
return result
16
16
17
17
18
- def start (connection , added_or_updated_rows ):
18
+ def start (connection , added_or_updated_rows , manual_matches_df ):
19
19
# Match new records to each other and existing pdp_contacts data.
20
20
# Assigns matching ID's to records, as well.
21
21
# WARNING: not thread-safe and could lead to concurrency issues if two users /execute simultaneously
22
22
current_app .logger .info ('Start record matching' )
23
23
# Will need to consider updating the existing row contents (filter by active), deactivate,
24
24
# try to match, and merge previous matching groups if applicable
25
-
26
25
job_id = str (int (time .time ()))
27
26
log_db .log_exec_status (job_id , {'status' : 'starting' , 'at_row' : 0 , 'of_rows' : 0 })
28
27
current_app .logger .info ("***** Running execute job ID " + job_id + " *****" )
@@ -63,15 +62,24 @@ def start(connection, added_or_updated_rows):
63
62
# Exact matches based on specified columns
64
63
row_matches = pdp_contacts [
65
64
(
66
- ((pdp_contacts ["first_name_normalized" ] == row ["first_name_normalized" ]) &
67
- (pdp_contacts ["last_name_normalized" ] == row ["last_name_normalized" ]))
68
- |
69
- ((pdp_contacts ["first_name_normalized" ] == row ["last_name_normalized" ]) &
70
- (pdp_contacts ["last_name_normalized" ] == row ["first_name_normalized" ]))
71
- &
72
- ((pdp_contacts ["email_normalized" ] == row ["email_normalized" ]) | (pdp_contacts ["mobile" ] == row ["mobile" ]))
65
+ ((pdp_contacts ["first_name_normalized" ] == row ["first_name_normalized" ]) &
66
+ (pdp_contacts ["last_name_normalized" ] == row ["last_name_normalized" ]))
67
+ |
68
+ ((pdp_contacts ["first_name_normalized" ] == row ["last_name_normalized" ]) &
69
+ (pdp_contacts ["last_name_normalized" ] == row ["first_name_normalized" ]))
70
+ &
71
+ ((pdp_contacts ["email_normalized" ] == row ["email_normalized" ]) | (pdp_contacts ["mobile" ] == row ["mobile" ]))
73
72
)
74
73
]
74
+ #collect other linked ids from manual matches source
75
+ if manual_matches_df != None :
76
+ linked_ids = manual_matches_df [(manual_matches_df [row ["source_type" ]] == row ["source_id" ])]
77
+ ids = linked_ids .to_dict (orient = "records" )
78
+ for row_dict in enumerate (ids ):
79
+ for column , value in row_dict .items ():
80
+ row_matches = row_matches .append (pdp_contacts [(pdp_contacts ["source_type" ] == column ) & (pdp_contacts ["source_id" ] == value )])
81
+
82
+
75
83
if row_matches .empty : # new record, no matching rows
76
84
max_matching_group += 1
77
85
row_group = max_matching_group
0 commit comments