Merge pull request #232 from CodeForPhilly/table_for_file

urirot · web-flow · commit ca3cfeda54b2 · 2021-03-23T20:07:35.000-04:00
Uses DB for storing Last Execution stats instead of file
diff --git a/src/server/alembic/versions/05e0693f8cbb_key_value_table.py b/src/server/alembic/versions/05e0693f8cbb_key_value_table.py
@@ -0,0 +1,31 @@
+"""key/value table
+
+Revision ID: 05e0693f8cbb
+Revises: 6b8cf99be000
+Create Date: 2021-03-18 11:35:43.512082
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '05e0693f8cbb'
+down_revision = '6b8cf99be000'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.create_table(
+        'kv_unique',
+        sa.Column('_id', sa.Integer, primary_key=True),
+        sa.Column('keycol', sa.String(50), nullable=False, unique=True),
+        sa.Column('valcol', sa.String(65536), nullable=True),
+    )
+    
+    # op.create_index('kvk_ix', 'kv_unique', ['key'], unique=True)
+
+
+def downgrade():
+    pass
diff --git a/src/server/api/admin_api.py b/src/server/api/admin_api.py
@@ -4,6 +4,9 @@
 from datetime import datetime
 import json
 from sqlalchemy.sql import text
+
+from sqlalchemy.dialects.postgresql import insert
+from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, exc, select
 from pipeline import flow_script
 from config import engine
 from flask import request, redirect, jsonify, current_app, abort
@@ -16,10 +19,14 @@
 
 ALLOWED_EXTENSIONS = {"csv", "xlsx"}
 
+metadata = MetaData()
 
 def __allowed_file(filename):
     return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
 
+kvt = Table("kv_unique", metadata, autoload=True, autoload_with=engine)
+
+
 
 # file upload tutorial
 @admin_api.route("/api/file", methods=["POST"])
@@ -62,15 +69,32 @@ def execute():
     statistics = get_statistics()
 
     last_execution_details = {"executionTime": current_time, "stats": statistics}
+    last_ex_json = (json.dumps(last_execution_details))
 
-    last_execution_file = open(LOGS_PATH + "last_execution.json", "w")
-    last_execution_file.write(json.dumps(last_execution_details))
-    last_execution_file.close()
+    # Write Last Execution stats to DB  
+    # See Alembic Revision ID: 05e0693f8cbb for table definition
+    with engine.connect() as connection:
+        ins_stmt = insert(kvt).values(               # Postgres-specific insert() supporting ON CONFLICT 
+            keycol = 'last_execution_time',
+            valcol = last_ex_json,
+            )
+        # If key already present in DB, do update instead 
+        upsert = ins_stmt.on_conflict_do_update(
+                constraint='kv_unique_keycol_key',
+                set_=dict(valcol=last_ex_json)
+                )
+
+        try:
+            connection.execute(upsert)
+        except Exception as e:
+            current_app.logger.error("Insert/Update failed on Last Execution stats")
+            current_app.logger.exception(e)
 
     return jsonify(success=True)
 
 
 def get_statistics():
+
     with engine.connect() as connection:
         query_matches = text("SELECT count(*) FROM (SELECT distinct matching_id from pdp_contacts) as a;")
         query_total_count = text("SELECT count(*) FROM pdp_contacts;")
@@ -88,26 +112,36 @@ def get_statistics():
 
 @admin_api.route("/api/statistics", methods=["GET"])
 def list_statistics():
-    try:
-        last_execution_file = open(LOGS_PATH + "last_execution.json", "r")
-        last_execution_details = json.loads(last_execution_file.read())
-        last_execution_file.close()
+    """ Pull Last Execution stats from DB. """
+    current_app.logger.info("list_statistics() request")
+    last_execution_details = '{}'  # Empty but valid JSON
 
-    except (FileNotFoundError):
-        current_app.logger.error("last_execution.json file was missing")
-        return abort(500)
+    try:    # See Alembic Revision ID: 05e0693f8cbb for table definition
+        with engine.connect() as connection:
+            s = text("select valcol from kv_unique where keycol = 'last_execution_time';")
+            result = connection.execute(s)
+            last_execution_details  = result.fetchone()[0]
 
-    except (json.JSONDecodeError):
-        current_app.logger.error(
-            "last_execution.json could not be decoded - possible corruption"
-        )
-        return abort(500)
 
     except Exception as e:
-        current_app.logger.error("Failure reading last_execution.json: ", e)
-        return abort(500)
+        current_app.logger.error("Failure reading Last Execution stats from DB")
+       # return abort(500)    # Weird but not worth a 500
+
+    return last_execution_details
+
+
+@admin_api.route("/api/get_execution_status/<int:job_id>", methods=["GET"])
+def get_exec_status(job_id):
+    kvt = Table("kv_unique", metadata, autoload=True, autoload_with=engine)
+    with engine.connect() as connection:
+        s_jobid = 'job-' + str(job_id)        
+        s = text("select valcol from kv_unique where keycol = :j ;")
+        s = s.bindparams(j=s_jobid)
+        result = connection.execute(s)
+        exec_status  = result.fetchone()[0]
+
+    return exec_status
 
-    return jsonify(last_execution_details)
 
 
 """
diff --git a/src/server/app.py b/src/server/app.py
@@ -16,6 +16,7 @@
 app.secret_key = APP_SECRET_KEY
 app.config["MAX_CONTENT_LENGTH"] = 500 * 1024 * 1024  # 500 Megs
 app.config["SEND_FILE_MAX_AGE_DEFAULT"] = 0
+
 from api.admin_api import admin_api
 from api.common_api import common_api
 from api.user_api import user_api
@@ -24,8 +25,8 @@
 app.register_blueprint(common_api)
 app.register_blueprint(user_api)
 
-app.logger.setLevel('INFO')  # By default, Docker appears to set at INFO but VSCode at WARNING 
 
+app.logger.setLevel('INFO')  # By default, Docker appears to set at INFO but VSCode at WARNING 
 
 # init_db_schema.start(connection)
 
diff --git a/src/server/config.py b/src/server/config.py
@@ -76,10 +76,3 @@
 os.makedirs(CURRENT_SOURCE_FILES_PATH, exist_ok=True)
 os.makedirs(REPORT_PATH, exist_ok=True)
 os.makedirs(ZIPPED_FILES, exist_ok=True)
-
-if not (os.path.exists(LOGS_PATH + "last_execution.json")):
-    f = open(
-        LOGS_PATH + "last_execution.json", "w"
-    )  # Prevent 500 error from /api/statistics
-    f.write("{}")
-    f.close()
diff --git a/src/server/pipeline/log_db.py b/src/server/pipeline/log_db.py
@@ -0,0 +1,40 @@
+from datetime import datetime
+import json
+from sqlalchemy.sql import text
+from flask import  current_app
+
+from sqlalchemy.dialects.postgresql import insert
+from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, exc, select
+
+from config import engine
+
+
+metadata = MetaData()
+
+kvt = Table("kv_unique", metadata, autoload=True, autoload_with=engine)
+
+
+
+def log_exec_status(job_id: str, job_status: dict):
+
+    # Write Last Execution stats to DB  
+    # See Alembic Revision ID: 05e0693f8cbb for table definition
+    with engine.connect() as connection:
+        ins_stmt = insert(kvt).values(               # Postgres-specific insert() supporting ON CONFLICT 
+            keycol = 'job-' + job_id,
+            valcol = json.dumps(job_status)
+            )
+
+        # If key already present in DB, do update instead 
+        upsert = ins_stmt.on_conflict_do_update(
+                constraint='kv_unique_keycol_key',
+                set_=dict(valcol=json.dumps(job_status))
+                )
+
+        try:
+            connection.execute(upsert)
+        except Exception as e:
+            current_app.logger.error("Insert/Update failed Execution status")
+            current_app.logger.exception(e)
+
+
diff --git a/src/server/pipeline/match_data.py b/src/server/pipeline/match_data.py
@@ -1,8 +1,10 @@
-import datetime
+import datetime, time
 import pandas as pd
 import numpy as np
 
 from flask import current_app
+from pipeline import log_db
+
 
 
 def start(connection, added_or_updated_rows):
@@ -12,6 +14,10 @@ def start(connection, added_or_updated_rows):
     current_app.logger.info('Start record matching')
     # Will need to consider updating the existing row contents (filter by active), deactivate,
     # try to match, and merge previous matching groups if applicable
+
+    job_id = str(int(time.time()))
+    log_db.log_exec_status(job_id,{'status': 'starting', 'at_row':  0, 'of_rows':0})
+    current_app.logger.info("Running execute job ID " + job_id)
     items_to_update = pd.concat([added_or_updated_rows["new"], added_or_updated_rows["updated"]], ignore_index=True)
     pdp_contacts = pd.read_sql_table('pdp_contacts', connection)
 
@@ -32,6 +38,7 @@ def start(connection, added_or_updated_rows):
             current_app.logger.info("- Matching rows {}-{} of {}".format(
                 row_num+1, min(len(rows), row_num+row_print_freq), len(rows))
             )
+            log_db.log_exec_status(job_id,{'status': 'executing', 'at_row':  row_num+1, 'of_rows':len(rows)})
         
         # Exact matches based on specified columns
         row_matches = pdp_contacts[
@@ -59,3 +66,5 @@ def start(connection, added_or_updated_rows):
     current_app.logger.info("- Writing data to pdp_contacts table")
     items_to_update.to_sql('pdp_contacts', connection, index=False, if_exists='append')
     current_app.logger.info("- Finished load to pdp_contacts table")
+
+    log_db.log_exec_status(job_id,{'status': 'complete', 'at_row':  len(rows), 'of_rows':len(rows)})