- fix last updated file time to local

urirot · urirot · commit 30a510683337 · 2021-03-30T22:24:49.000-04:00
- improve matching efficiency with standardizing column strings to lower case
diff --git a/src/client/package-lock.json b/src/client/package-lock.json
diff --git a/src/client/src/pages/Admin.js b/src/client/src/pages/Admin.js
@@ -146,7 +146,7 @@ class Admin extends Component {
                                             const fileName = file.split("-")[0];
                                             let fileDate = file.split("-").slice(1).join().split(".")[0];
                                             let fileDateOnlyNumbers = fileDate.replaceAll(",", "");
-                                            let fileDateFormatted = moment(fileDateOnlyNumbers, "YYYYMMDDhmmss").local().format("MMMM Do YYYY, h:mm:ss a");
+                                            let fileDateFormatted = moment.utc(fileDateOnlyNumbers, "YYYYMMDDhmmss").local().format("MMMM Do YYYY, h:mm:ss a");
 
                                             return (
                                                 <TableRow key={index}>
diff --git a/src/client/src/pages/DataView360/DataView360.js b/src/client/src/pages/DataView360/DataView360.js
@@ -202,7 +202,7 @@ class DataView360 extends Component {
                     <Paper className={styles.main} elevation={1} style={{"padding": "1em"}}>
                         <ContactInfo participant={_.get(this.state, 'participantData.contact_details')}/>
                         <Grid container direction="row" justify="center">
-                            <Grid item style={{"marginTop": "1em", "position": "fixed"}}>
+                            <Grid item style={{"marginTop": "1em"}}>
                                 <Button variant="contained" color="primary"
                                         onClick={() => {
                                             this.setState({
diff --git a/src/server/pipeline/match_data.py b/src/server/pipeline/match_data.py
@@ -39,8 +39,18 @@ def start(connection, added_or_updated_rows):
     items_to_update["archived_date"] = np.nan
     items_to_update["created_date"] = datetime.datetime.now()
 
+    # Create Normalized columns for matching
+    items_to_update["first_name_normalized"] = items_to_update["first_name"].apply(normalize_before_match)
+    items_to_update["last_name_normalized"] = items_to_update["last_name"].apply(normalize_before_match)
+    items_to_update["email_normalized"] = items_to_update["email"].apply(normalize_before_match)
+
+    pdp_contacts["first_name_normalized"] = pdp_contacts["first_name"].apply(normalize_before_match)
+    pdp_contacts["last_name_normalized"] = pdp_contacts["last_name"].apply(normalize_before_match)
+    pdp_contacts["email_normalized"] = pdp_contacts["email"].apply(normalize_before_match)
+
     rows = items_to_update.to_dict(orient="records")
     row_print_freq = max(1, np.floor_divide(len(rows), 20))  # approx every 5% (or every row if small)
+
     for row_num, row in enumerate(rows):
         if row_num % row_print_freq == 0:
             current_app.logger.info("- Matching rows {}-{} of {}".format(
@@ -51,24 +61,15 @@ def start(connection, added_or_updated_rows):
             })
 
         # Exact matches based on specified columns
-
         row_matches = pdp_contacts[
             (
-                    ((pdp_contacts["first_name"].apply(lambda x: normalize_before_match(x)) == normalize_before_match(
-                        row["first_name"])) &
-                     (pdp_contacts["last_name"].apply(lambda x: normalize_before_match(x)) == normalize_before_match(
-                         row["last_name"])))
+                    ((pdp_contacts["first_name_normalized"] == row["first_name_normalized"]) &
+                     (pdp_contacts["last_name_normalized"] == row["last_name_normalized"]))
                     |
-                    ((pdp_contacts["first_name"].apply(lambda x: normalize_before_match(x)) == normalize_before_match(
-                        row[
-                            "last_name"])) &
-                     (pdp_contacts["last_name"].apply(lambda x: normalize_before_match(x)) == normalize_before_match(
-                         row[
-                             "first_name"])))
+                    ((pdp_contacts["first_name_normalized"] == row["last_name_normalized"]) &
+                     (pdp_contacts["last_name_normalized"] == row["first_name_normalized"]))
                     &
-                    ((pdp_contacts["email"].apply(lambda x: normalize_before_match(x)) == normalize_before_match(
-                        row["email"])) | (
-                             pdp_contacts["mobile"] == row["mobile"]))
+                    ((pdp_contacts["email_normalized"] == row["email_normalized"]) | (pdp_contacts["mobile"] == row["mobile"]))
             )
         ]
         if row_matches.empty:  # new record, no matching rows
@@ -89,6 +90,8 @@ def start(connection, added_or_updated_rows):
 
     # Write new data and matching ID's to postgres in bulk, instead of line-by-line
     current_app.logger.info("- Writing data to pdp_contacts table")
+    items_to_update = items_to_update.drop(
+        columns=["first_name_normalized", "last_name_normalized", "email_normalized"])
     items_to_update.to_sql('pdp_contacts', connection, index=False, if_exists='append')
     current_app.logger.info("- Finished load to pdp_contacts table")