Merge branch 'master' of https://github.com/CodeForPhilly/paws-data-pipeline into issue-166-360Page

urirot · urirot · commit b4961769eb89 · 2021-02-02T19:24:32.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,4 @@ start_env.sh
 
 /src/server/venv/
 /src/local_files/
+/src/server/secrets.py
diff --git a/src/client/.gitignore b/src/client/.gitignore
@@ -20,5 +20,4 @@
 
 npm-debug.log*
 yarn-debug.log*
-yarn-error.log*
-package-lock.json
+yarn-error.log*
diff --git a/src/client/package-lock.json b/src/client/package-lock.json
diff --git a/src/client/src/pages/Admin.js b/src/client/src/pages/Admin.js
@@ -175,15 +175,9 @@ class Admin extends Component {
         : _.isEmpty(this.state.statistics) !== true &&
         <TableContainer component={Paper} className="statisticsData">
             <Table aria-label="simple table" className={classes.table}>
-                <TableHead>
-                    <TableRow>
-                        <TableCell>Sources Matched</TableCell>
-                        <TableCell align="left">Number of Matches</TableCell>
-                    </TableRow>
-                </TableHead>
                 <TableBody>
-                {this.state.statistics.map((row) => (
-                    <TableRow key={row[0]}>
+                {this.state.statistics.map((row, index) => (
+                    <TableRow key={index}>
                     <TableCell align="left" component="th" scope="row">
                         {row[0]}
                     </TableCell>
diff --git a/src/server/api/admin_api.py b/src/server/api/admin_api.py
@@ -105,8 +105,8 @@ def get_statistics():
 
         # Need to iterate over the results proxy
         results = {
-            "matches count": [dict(row) for row in matches_count_query_result][0]["count"],
-            "total contacts count": [dict(row) for row in total_count_query_result][0]["count"]
+            "Distinct Matching Groups Count": [dict(row) for row in matches_count_query_result][0]["count"],
+            "Total Contacts Count": [dict(row) for row in total_count_query_result][0]["count"]
         }
 
         return results
diff --git a/src/server/api/common_api.py b/src/server/api/common_api.py
@@ -2,6 +2,9 @@
 from config import engine
 from flask import jsonify
 from sqlalchemy.sql import text
+import requests
+import json
+from secrets import SHELTERLUV_SECRET_TOKEN
 
 
 @common_api.route('/api/contacts/<search_text>', methods=['GET'])
@@ -37,24 +40,45 @@ def get_360(matching_id):
         query_result = connection.execute(query, matching_id=matching_id)
 
         result["contact_details"] = [dict(row) for row in query_result]
-        result["shifts"] = []
-        result["donations"] = []
-        result["adoptions"] = []
 
-        # todo: complete retrieving details for response
-        for row in query_result:
+        for row in result["contact_details"]:
+            if row["source_type"] == "salesforcecontacts":
+                donations_query = text("select * from salesforcedonations where contact_id like :salesforcecontacts_id")
+                salesforce_contacts_query_result = connection.execute(donations_query,
+                                                                      salesforcecontacts_id=row["source_id"] + "%")
+                salesforce_donations_results = [dict(row) for row in salesforce_contacts_query_result]
+                result['donations'] = salesforce_donations_results
+
             if row["source_type"] == "volgistics":
-                query = text("select * from volgisticsshifts where number = :volgistics_id")
-                query_result = connection.execute(query, volgistics_id=row["source_id"])
-                result["shifts"] += [dict(row) for row in query_result]
-
-        '''
-        query = text("select * from salesforcedonations where contact_id = :salesforcecontacts_id")
-        query_result = connection.execute(query, salesforcecontacts_id=salesforcecontacts_id)
-        salesforcedonations_results = [dict(row) for row in query_result]
-
-        if salesforcedonations_results:
-            result['salesforcedonations'] = salesforcedonations_results
-        '''
+                shifts_query = text("select * from volgisticsshifts where number = :volgistics_id")
+                volgistics_shifts_query_result = connection.execute(shifts_query, volgistics_id=row["source_id"])
+                volgisticsshifts_results = [dict(row) for row in volgistics_shifts_query_result]
+                result['shifts'] = volgisticsshifts_results
+
+            if row["source_type"] == "shelterluvpeople":
+                adoptions = []
+                person = requests.get("http://shelterluv.com/api/v1/people/{}".format(row["source_id"]),
+                                      headers={"x-api-key": SHELTERLUV_SECRET_TOKEN})
+                person_json = person.json()
+                animal_ids = person_json["Animal_ids"]
+
+                for animal_id in animal_ids:
+                    animal_events = requests.get("http://shelterluv.com/api/v1/animals/{}/events".format(animal_id),
+                                                 headers={"x-api-key": SHELTERLUV_SECRET_TOKEN})
+                    animal_events_json = animal_events.json()
+
+                    for event in animal_events_json["events"]:
+                        for adoption in event["AssociatedRecords"]:
+                            if adoption["Type"] == "Person" and adoption["Id"] == row["source_id"]:
+                                del event["AssociatedRecords"]
+                                animal_details = requests.get(
+                                    "http://shelterluv.com/api/v1/animals/{}".format(animal_id),
+                                    headers={"x-api-key": SHELTERLUV_SECRET_TOKEN})
+
+                                animal_details_json = animal_details.json()
+                                event["animal_details"] = animal_details_json
+                                adoptions.append(event)
+
+                    result['adoptions'] = adoptions
 
         return jsonify({'result': result})
diff --git a/src/server/datasource_manager.py b/src/server/datasource_manager.py
@@ -94,18 +94,18 @@ def volgistics_address(index, street):
         "state": "mailing_state_province",
         "zip": "mailing_zip_postal_code",
         "others": {
-            "additional_sources": [{
-                "salesforcedonations": {
-                        'should_drop_first_column': True
-                    }
-                }
-            ],
             "should_drop_first_column": True
         }
 
     },
+    "salesforcedonations": {
+        "parent": "salesforcecontacts",
+        "others": {
+            "should_drop_first_column": True
+        }
+    },
     "shelterluvpeople": {
-        "source_id": "id",
+        "source_id": "internal-id",
         "first_name": "firstname",
         "last_name": "lastname",
         "email": "email",
@@ -131,19 +131,13 @@ def volgistics_address(index, street):
         "state": "state",
         "zip": "zip",
         "others": {
-            "additional_sources": [{
-                    "volgisticsshifts": {
-                        'should_drop_first_column': True
-                    }
-                }
-            ],
             "should_drop_first_column": True
         }
-
+    },
+    "volgisticsshifts": {
+        "parent": "volgistics",
+        "others": {
+            "should_drop_first_column": True
+        }
     }
 }
-
-
-
-
-
diff --git a/src/server/models.py b/src/server/models.py
@@ -1,6 +1,6 @@
 import datetime
 
-from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
+from sqlalchemy import Column, Integer, String, DateTime
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy.ext.declarative import declarative_base
 
@@ -89,42 +89,4 @@ class Volgistics(Base):
     json = Column(JSONB)
 
 
-class SalesForceDonations(Base):
-    __tablename__ = "salesforcedonations"
 
-    _id = Column(Integer, primary_key=True)
-    recurring_donor = Column(String)
-    opportunity_owner = Column(String)
-    account_id = Column(String)
-    account_name = Column(String)
-    opportunity_id = Column(String)
-    opportunity_name = Column(String)
-    stage = Column(String)
-    fiscal_period = Column(String)
-    amount = Column(String)
-    probability = Column(String)
-    age = Column(String)
-    close_date = Column(String)
-    created_date = Column(String)
-    next_step = Column(String)
-    lead_source = Column(String)
-    type = Column(String)
-    source = Column(String)
-    contact_id = Column(String)
-    primary_campaign_source = Column(String)
-
-
-class Volgistics_Shifts(Base):
-    __tablename__ = 'volgisticsshifts'
-
-    _id = Column(Integer, primary_key=True)
-    number = Column(String)
-    site = Column(String)
-    place = Column(String)
-    assignment = Column(String)
-    role = Column(String)
-    from_date = Column('from', DateTime)
-    to = Column(DateTime)
-    spare_date = Column(String)
-    spare_chechbox = Column(String)
-    coordinator = Column(String)
diff --git a/src/server/pipeline/clean_and_load_data.py b/src/server/pipeline/clean_and_load_data.py
@@ -9,7 +9,7 @@
 from config import CURRENT_SOURCE_FILES_PATH
 
 
-def start(pdp_contacts_df, file_path_list):
+def start(connection, pdp_contacts_df, file_path_list):
     result = pd.DataFrame(columns=pdp_contacts_df.columns)
 
     for uploaded_file in file_path_list:
@@ -26,12 +26,16 @@ def start(pdp_contacts_df, file_path_list):
         normalization_without_others = copy.deepcopy(SOURCE_NORMALIZATION_MAPPING[table_name])
         normalization_without_others.pop("others")  # copy avoids modifying the imported mapping
 
-        source_df = create_normalized_df(df, normalization_without_others, table_name)
+        if "parent" not in normalization_without_others:
+            source_df = create_normalized_df(df, normalization_without_others, table_name)
+
+            if result.empty:
+                result = source_df
+            else:
+                result = pd.concat([result, source_df])
 
-        if result.empty:
-            result = source_df
         else:
-            result = pd.concat([result, source_df])
+            df.to_sql(table_name, connection, index=False, if_exists='append')
 
         current_app.logger.info('   - Finish load_paws_data on: ' + uploaded_file)
 
diff --git a/src/server/pipeline/flow_script.py b/src/server/pipeline/flow_script.py
@@ -23,7 +23,7 @@ def start_flow():
             # Clean the input data and normalize
             # input - existing files in path
             # output - normalized object of all entries
-            normalized_data = clean_and_load_data.start(pdp_contacts_df, file_path_list)
+            normalized_data = clean_and_load_data.start(connection, pdp_contacts_df, file_path_list)
 
             # Standardize column data types
             # If additional inconsistencies are encountered, may need to enforce the schema of
diff --git a/src/server/pipeline/shelterluv_api_handler.py b/src/server/pipeline/shelterluv_api_handler.py
@@ -1,6 +1,7 @@
 import requests
 import json
 import config
+from secrets import SHELTERLUV_SECRET_TOKEN
 
 #################################
 # This script is used to fetch data from shelterluv API.
@@ -14,24 +15,26 @@
 # /people has all the data. it seems that /person/:id isn't used
 #################################
 
-shelterluv_token = "replace_with_secret_token" #dropbox
-
-
 ''' Iterate over all shelterlove people and store in json file in the raw data folder
 We fetch 100 items in each request, since that is the limit based on our research '''
-def save_shelterluv_people_all():
+def store_shelterluv_people_all():
     offset = 0
     LIMIT = 100
     has_more = True
     shelterluv_people = []
 
     while has_more:
         r = requests.get("http://shelterluv.com/api/v1/people?limit={}&offset={}".format(LIMIT, offset),
-                         headers={"x-api-key": shelterluv_token})
+                         headers={"x-api-key": SHELTERLUV_SECRET_TOKEN})
         response = r.json()
         shelterluv_people += response["people"]
         has_more = response["has_more"]
         offset += 100
 
     with open(config.RAW_DATA_PATH + "shelterLuv_people.json", "w") as outfile:
         json.dump(shelterluv_people, outfile, indent=4)
+
+
+if __name__ == "__main__":
+    # execute only if run as a script
+    store_shelterluv_people_all()

Original file line number	Diff line number	Diff line change
`@@ -18,3 +18,4 @@ start_env.sh`
`18`	`18`
`19`	`19`	`/src/server/venv/`
`20`	`20`	`/src/local_files/`
	`21`	`+/src/server/secrets.py`
Original file line number	Diff line number	Diff line change
`@@ -105,8 +105,8 @@ def get_statistics():`
`105`	`105`
`106`	`106`	`# Need to iterate over the results proxy`
`107`	`107`	`results = {`
`108`		`- "matches count": [dict(row) for row in matches_count_query_result][0]["count"],`
`109`		`- "total contacts count": [dict(row) for row in total_count_query_result][0]["count"]`
	`108`	`+ "Distinct Matching Groups Count": [dict(row) for row in matches_count_query_result][0]["count"],`
	`109`	`+ "Total Contacts Count": [dict(row) for row in total_count_query_result][0]["count"]`
`110`	`110`	`}`
`111`	`111`
`112`	`112`	`return results`