Skip to content

Commit 4dd8ee2

Browse files
committed
add reverse first and last names to matching
restore setupProxy and merge in statistics db instead of files
1 parent 9d58f84 commit 4dd8ee2

File tree

4 files changed

+30
-19
lines changed

4 files changed

+30
-19
lines changed

src/client/src/pages/Admin.js

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ class Admin extends Component {
3939
statistics: undefined,
4040
filesInput: undefined,
4141
fileListHtml: undefined,
42-
lastExecution: undefined
42+
lastExecution: undefined,
43+
serverBusy: false
4344
}
4445

4546
this.handleIndexChange = this.handleIndexChange.bind(this);
@@ -85,15 +86,15 @@ class Admin extends Component {
8586
this.setState({isLoading: false});
8687
};
8788

88-
async handleExecute(event) {
89+
handleExecute(event) {
8990
event.preventDefault();
9091

9192
this.setState({isLoading: true});
9293
fetch('/api/execute');
9394

9495
setTimeout(() => {
9596
this.refreshPage();
96-
}, 3000);
97+
}, 1500);
9798
}
9899

99100
async handleGetStatistics() {
@@ -184,7 +185,9 @@ class Admin extends Component {
184185
<b>Last Analysis</b>
185186
</TableCell>
186187
<TableCell align="left">
187-
<b>{moment(this.state.lastExecution, "dddd MMMM Do h:mm:ss YYYY").local().format("MMMM Do YYYY, h:mm:ss a")}</b>
188+
<b>
189+
{moment(this.state.lastExecution, "dddd MMMM Do h:mm:ss YYYY").local().format("MMMM Do YYYY, h:mm:ss a")}
190+
</b>
188191
</TableCell>
189192
</TableRow>
190193
{this.state.statistics.map((row, index) => (

src/client/src/setupProxy.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
const proxy = require('http-proxy-middleware');
22

3+
34
module.exports = function(app) {
45
app.use(proxy('/api/**', {
5-
target: process.env.IS_LOCAL === 'true' ? 'http://localhost:5000' : 'http://server:5000',
6-
changeOrigin: true,
6+
target: process.env.IS_LOCAL === 'true' ? 'http://localhost:3333' : 'http://server:5000'
77
}
88
));
99
}

src/server/api/admin_api.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,14 @@
66
from sqlalchemy.sql import text
77

88
from sqlalchemy.dialects.postgresql import insert
9-
from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, exc, select
9+
from sqlalchemy import Table, MetaData
1010
from pipeline import flow_script
1111
from config import engine
12-
from flask import request, redirect, jsonify, current_app, abort
12+
from flask import request, redirect, jsonify, current_app
1313
from api.file_uploader import validate_and_arrange_upload
1414
from config import (
1515
RAW_DATA_PATH,
16-
CURRENT_SOURCE_FILES_PATH,
17-
LOGS_PATH,
16+
CURRENT_SOURCE_FILES_PATH
1817
)
1918

2019
ALLOWED_EXTENSIONS = {"csv", "xlsx"}
@@ -120,8 +119,10 @@ def list_statistics():
120119
with engine.connect() as connection:
121120
s = text("select valcol from kv_unique where keycol = 'last_execution_time';")
122121
result = connection.execute(s)
123-
last_execution_details = result.fetchone()[0]
124122

123+
fetch_result = result.fetchone()
124+
if fetch_result:
125+
last_execution_details = result.fetchone()[0]
125126

126127
except Exception as e:
127128
current_app.logger.error("Failure reading Last Execution stats from DB")

src/server/pipeline/match_data.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,27 @@ def start(connection, added_or_updated_rows):
2424
items_to_update["matching_id"] = 0 # initializing an int and overwrite in the loop
2525
items_to_update["archived_date"] = np.nan
2626
items_to_update["created_date"] = datetime.datetime.now()
27-
27+
2828
rows = items_to_update.to_dict(orient="records")
2929
row_print_freq = max(1, np.floor_divide(len(rows), 20)) # approx every 5% (or every row if small)
3030
for row_num, row in enumerate(rows):
3131
if row_num % row_print_freq == 0:
3232
current_app.logger.info("- Matching rows {}-{} of {}".format(
33-
row_num+1, min(len(rows), row_num+row_print_freq), len(rows))
33+
row_num + 1, min(len(rows), row_num + row_print_freq), len(rows))
3434
)
35-
35+
3636
# Exact matches based on specified columns
3737
row_matches = pdp_contacts[
38-
(pdp_contacts["first_name"] == row["first_name"]) &
39-
(pdp_contacts["last_name"] == row["last_name"]) &
40-
((pdp_contacts["email"] == row["email"]) | (pdp_contacts["mobile"] == row["mobile"]))
38+
(
39+
((pdp_contacts["first_name"] == row["first_name"]) &
40+
(pdp_contacts["last_name"] == row["last_name"]))
41+
|
42+
((pdp_contacts["first_name"] == row["last_name"]) &
43+
(pdp_contacts["last_name"] == row["first_name"]))
44+
&
45+
((pdp_contacts["email"] == row["email"]) | (pdp_contacts["mobile"] == row["mobile"]))
46+
)
47+
4148
]
4249
if row_matches.empty: # new record, no matching rows
4350
max_matching_group += 1
@@ -47,14 +54,14 @@ def start(connection, added_or_updated_rows):
4754
if not all(row_matches["matching_id"] == row_group):
4855
current_app.logger.warning(
4956
"Source {} with ID {} is matching multiple groups in pdp_contacts ({})"
50-
.format(row["source_type"], row["source_id"], str(row_matches["matching_id"].drop_duplicates()))
57+
.format(row["source_type"], row["source_id"], str(row_matches["matching_id"].drop_duplicates()))
5158
)
5259
items_to_update.loc[row_num, "matching_id"] = row_group
5360
# Updating local pdp_contacts dataframe instead of a roundtrip to postgres within the loop.
5461
# Indexing by iloc and vector of rows to keep the pd.DataFrame class and avoid implicit
5562
# casting to a single-typed pd.Series.
5663
pdp_contacts = pdp_contacts.append(items_to_update.iloc[[row_num], :], ignore_index=True)
57-
64+
5865
# Write new data and matching ID's to postgres in bulk, instead of line-by-line
5966
current_app.logger.info("- Writing data to pdp_contacts table")
6067
items_to_update.to_sql('pdp_contacts', connection, index=False, if_exists='append')

0 commit comments

Comments
 (0)