1
- import datetime
1
+ import datetime , time
2
2
import pandas as pd
3
3
import numpy as np
4
4
5
5
from flask import current_app
6
+ from pipeline import log_db
7
+
6
8
7
9
8
10
def start (connection , added_or_updated_rows ):
@@ -12,6 +14,9 @@ def start(connection, added_or_updated_rows):
12
14
current_app .logger .info ('Start record matching' )
13
15
# Will need to consider updating the existing row contents (filter by active), deactivate,
14
16
# try to match, and merge previous matching groups if applicable
17
+
18
+ job_id = str (int (time .time ()))
19
+ log_db .log_exec_status (job_id ,{'status' : 'starting' , 'at_row' : 0 , 'of_rows' :0 })
15
20
items_to_update = pd .concat ([added_or_updated_rows ["new" ], added_or_updated_rows ["updated" ]], ignore_index = True )
16
21
pdp_contacts = pd .read_sql_table ('pdp_contacts' , connection )
17
22
@@ -32,6 +37,7 @@ def start(connection, added_or_updated_rows):
32
37
current_app .logger .info ("- Matching rows {}-{} of {}" .format (
33
38
row_num + 1 , min (len (rows ), row_num + row_print_freq ), len (rows ))
34
39
)
40
+ log_db .log_exec_status (job_id ,{'status' : 'executing' , 'at_row' : row_num + 1 , 'of_rows' :len (rows )})
35
41
36
42
# Exact matches based on specified columns
37
43
row_matches = pdp_contacts [
@@ -59,3 +65,5 @@ def start(connection, added_or_updated_rows):
59
65
current_app .logger .info ("- Writing data to pdp_contacts table" )
60
66
items_to_update .to_sql ('pdp_contacts' , connection , index = False , if_exists = 'append' )
61
67
current_app .logger .info ("- Finished load to pdp_contacts table" )
68
+
69
+ log_db .log_exec_status (job_id ,{'status' : 'complete' , 'at_row' : len (rows ), 'of_rows' :len (rows )})
0 commit comments