Skip to content

Commit 951e496

Browse files
committed
remove dependency on dropbox for getting Shelterluv data, make sure data pull is all done in the same transaction
1 parent ebc0417 commit 951e496

File tree

6 files changed

+36
-71
lines changed

6 files changed

+36
-71
lines changed
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
from api.API_ingest import shelterluv_api_handler
1+
from api.API_ingest import shelterluv_api_handler, salesforce_api_handler
22
import structlog
33
logger = structlog.get_logger()
44

5-
def start(conn):
5+
def start(session):
66
logger.debug("Start Fetching raw data from different API sources")
77
#Run each source to store the output in dropbox and in the container as a CSV
8-
shelterluv_api_handler.store_shelterluv_people_all(conn)
8+
shelterluv_api_handler.store_shelterluv_people_all(session)
9+
salesforce_api_handler.store_contacts_all(session)
910
logger.debug("Finish Fetching raw data from different API sources")

src/server/api/API_ingest/salesforce_api_handler.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,8 @@
55
from config import engine
66
from models import SalesForceContacts
77

8-
def ingest_data():
8+
def store_contacts_all(session):
99

10-
Session = sessionmaker(engine)
11-
12-
session = Session()
1310
session.execute("TRUNCATE TABLE salesforcecontacts")
1411

1512
sf = Salesforce(domain=os.getenv('SALESFORCE_DOMAIN'), password=os.getenv('SALESFORCE_PW'), username=os.getenv('SALESFORCE_USERNAME'), organizationId=os.getenv('SALESFORCE_ORGANIZATION_ID'), security_token=os.getenv('SALESFORCE_SECURITY_TOKEN'))
@@ -33,6 +30,4 @@ def ingest_data():
3330
session.add(contact)
3431
done = results['done']
3532
if not done:
36-
results = sf.query_more(results['nextRecordsUrl'])
37-
38-
session.commit()
33+
results = sf.query_more(results['nextRecordsUrl'])
Lines changed: 23 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,4 @@
1-
import csv
2-
import os
3-
import time
4-
5-
import requests
6-
import pandas as pd
7-
from api.API_ingest.dropbox_handler import upload_file_to_dropbox
8-
from constants import RAW_DATA_PATH
1+
import requests, os
92
from models import ShelterluvPeople
103
import structlog
114
logger = structlog.get_logger()
@@ -25,31 +18,8 @@
2518
logger.error("Couldn't get SHELTERLUV_SECRET_TOKEN from file or environment")
2619

2720

28-
def write_csv(json_data):
29-
now = time.localtime()
30-
now_date = time.strftime("%Y-%m-%d--%H-%M-%S", now)
31-
32-
path = RAW_DATA_PATH + "shelterluvpeople-" + now_date + ".csv" # store file name to use for dropbox
33-
34-
file_handle = open(path, "w")
35-
36-
csv_writer = csv.writer(file_handle)
37-
38-
count = 0
39-
for item in json_data:
40-
if count == 0:
41-
# Writing headers of CSV file
42-
header = item.keys()
43-
csv_writer.writerow(header)
44-
count += 1
45-
46-
# Writing data of CSV file
47-
csv_writer.writerow(item.values())
48-
49-
file_handle.close()
50-
51-
return path
5221

22+
TEST_MODE=os.getenv("TEST_MODE") # if not present, has value None
5323
#################################
5424
# This script is used to fetch data from shelterluv API.
5525
# Please be mindful of your usage.
@@ -64,39 +34,37 @@ def write_csv(json_data):
6434

6535
''' Iterate over all shelterlove people and store in json file in the raw data folder
6636
We fetch 100 items in each request, since that is the limit based on our research '''
67-
def store_shelterluv_people_all(conn):
37+
def store_shelterluv_people_all(session):
6838
offset = 0
6939
LIMIT = 100
7040
has_more = True
71-
shelterluv_people = []
41+
42+
session.execute("TRUNCATE TABLE shelterluvpeople")
7243

7344
logger.debug("Start getting shelterluv contacts from people table")
7445

7546
while has_more:
7647
r = requests.get("http://shelterluv.com/api/v1/people?limit={}&offset={}".format(LIMIT, offset),
7748
headers={"x-api-key": SHELTERLUV_SECRET_TOKEN})
7849
response = r.json()
79-
shelterluv_people += response["people"]
80-
has_more = response["has_more"]
81-
offset += 100
82-
83-
logger.debug("Finish getting shelterluv contacts from people table")
50+
for person in response["people"]:
51+
#todo: Does this need more "null checks"?
52+
session.add(ShelterluvPeople(firstname=person["Firstname"],
53+
lastname=person["Lastname"],
54+
id=person["ID"] if "ID" in person else None,
55+
internal_id=person["Internal-ID"],
56+
associated=person["Associated"],
57+
street=person["Street"],
58+
apartment=person["Apartment"],
59+
city=person["City"],
60+
state=person["State"],
61+
zip=person["Zip"],
62+
email=person["Email"],
63+
phone=person["Phone"],
64+
animal_ids=person["Animal_ids"]))
65+
offset += LIMIT
66+
has_more = response["has_more"] if not TEST_MODE else response["has_more"] and offset < 1000
8467

85-
logger.debug("Start storing latest shelterluvpeople results to container")
86-
if os.listdir(RAW_DATA_PATH):
87-
for file_name in os.listdir(RAW_DATA_PATH):
88-
file_path = os.path.join(RAW_DATA_PATH, file_name)
89-
file_name_striped = file_path.split('-')[0].split('/')[-1]
9068

91-
if file_name_striped == "shelterluvpeople":
92-
os.remove(file_path)
93-
94-
file_path = write_csv(shelterluv_people)
95-
logger.debug("Finish storing latest shelterluvpeople results to container")
96-
97-
logger.debug("Start storing " + '/shelterluv/' + "results to dropbox")
98-
upload_file_to_dropbox(file_path, '/shelterluv/' + file_path.split('/')[-1])
99-
logger.debug("Finish storing " + '/shelterluv/' + "results to dropbox")
69+
logger.debug("Finish getting shelterluv contacts from people table")
10070

101-
logger.debug("Uploading shelterluvpeople csv to database")
102-
ShelterluvPeople.insert_from_df(pd.read_csv(file_path, dtype="string"), conn)

src/server/api/admin_api.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,12 @@
99
from sqlalchemy import Table, MetaData
1010
from pipeline import flow_script
1111
from config import engine
12-
from flask import request, redirect, jsonify, current_app
12+
from flask import request, redirect, jsonify
1313
from api.file_uploader import validate_and_arrange_upload
14-
from sqlalchemy.orm import Session, sessionmaker
14+
from sqlalchemy.orm import sessionmaker
1515

1616
from api import jwt_ops
1717
from config import RAW_DATA_PATH
18-
from api.API_ingest.salesforce_api_handler import ingest_data
1918

2019
import structlog
2120
logger = structlog.get_logger()

src/server/api/internal_api.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from datetime import datetime
55
from api.API_ingest import ingest_sources_from_api
66
from rfm_funcs.create_scores import create_scores
7+
from sqlalchemy.orm import sessionmaker
78

89
import structlog
910
logger = structlog.get_logger()
@@ -28,8 +29,10 @@ def user_test2():
2829
@internal_api.route("/api/ingestRawData", methods=["GET"])
2930
def ingest_raw_data():
3031
try:
31-
with engine.begin() as conn:
32-
ingest_sources_from_api.start(conn)
32+
Session = sessionmaker(engine)
33+
with Session() as session:
34+
ingest_sources_from_api.start(session)
35+
session.commit()
3336
except Exception as e:
3437
logger.error(e)
3538

src/server/secrets_dict.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
SD_COMMENT="This is for local development"
2-
SHELTERLUV_SECRET_TOKEN=""
32
APP_SECRET_KEY="ASKASK"
43
JWT_SECRET="JWTSECRET"
54
POSTGRES_PASSWORD="thispasswordisverysecure"

0 commit comments

Comments
 (0)