Logs match_data progress to DB

c-simpson · c-simpson · commit 4360780c8e2a · 2021-03-22T16:42:49.000-04:00
diff --git a/src/server/pipeline/log_db.py b/src/server/pipeline/log_db.py
@@ -0,0 +1,40 @@
+from datetime import datetime
+import json
+from sqlalchemy.sql import text
+from flask import  current_app
+
+from sqlalchemy.dialects.postgresql import insert
+from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, exc, select
+
+from config import engine
+
+
+metadata = MetaData()
+
+kvt = Table("kv_unique", metadata, autoload=True, autoload_with=engine)
+
+
+
+def log_exec_status(job_id: str, job_status: dict):
+
+    # Write Last Execution stats to DB  
+    # See Alembic Revision ID: 05e0693f8cbb for table definition
+    with engine.connect() as connection:
+        ins_stmt = insert(kvt).values(               # Postgres-specific insert() supporting ON CONFLICT 
+            keycol = 'job-' + job_id,
+            valcol = json.dumps(job_status)
+            )
+
+        # If key already present in DB, do update instead 
+        upsert = ins_stmt.on_conflict_do_update(
+                constraint='kv_unique_keycol_key',
+                set_=dict(valcol=json.dumps(job_status))
+                )
+
+        try:
+            connection.execute(upsert)
+        except Exception as e:
+            current_app.logger.error("Insert/Update failed Execution status")
+            current_app.logger.exception(e)
+
+
diff --git a/src/server/pipeline/match_data.py b/src/server/pipeline/match_data.py
@@ -1,8 +1,10 @@
-import datetime
+import datetime, time
 import pandas as pd
 import numpy as np
 
 from flask import current_app
+from pipeline import log_db
+
 
 
 def start(connection, added_or_updated_rows):
@@ -12,6 +14,9 @@ def start(connection, added_or_updated_rows):
     current_app.logger.info('Start record matching')
     # Will need to consider updating the existing row contents (filter by active), deactivate,
     # try to match, and merge previous matching groups if applicable
+
+    job_id = str(int(time.time()))
+    log_db.log_exec_status(job_id,{'status': 'starting', 'at_row':  0, 'of_rows':0})
     items_to_update = pd.concat([added_or_updated_rows["new"], added_or_updated_rows["updated"]], ignore_index=True)
     pdp_contacts = pd.read_sql_table('pdp_contacts', connection)
 
@@ -32,6 +37,7 @@ def start(connection, added_or_updated_rows):
             current_app.logger.info("- Matching rows {}-{} of {}".format(
                 row_num+1, min(len(rows), row_num+row_print_freq), len(rows))
             )
+            log_db.log_exec_status(job_id,{'status': 'executing', 'at_row':  row_num+1, 'of_rows':len(rows)})
         
         # Exact matches based on specified columns
         row_matches = pdp_contacts[
@@ -59,3 +65,5 @@ def start(connection, added_or_updated_rows):
     current_app.logger.info("- Writing data to pdp_contacts table")
     items_to_update.to_sql('pdp_contacts', connection, index=False, if_exists='append')
     current_app.logger.info("- Finished load to pdp_contacts table")
+
+    log_db.log_exec_status(job_id,{'status': 'complete', 'at_row':  len(rows), 'of_rows':len(rows)})