Skip to content

Commit 369c004

Browse files
authored
feat: allow qa environment to use prod database contents (#1153)
1 parent e9e9c9e commit 369c004

File tree

4 files changed

+227
-0
lines changed

4 files changed

+227
-0
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
name: Copy PROD DB to QA
2+
on:
3+
workflow_dispatch: # Supports manual deployment
4+
inputs:
5+
dest_database_name:
6+
description: 'The name of the destination database (using MobilityDatabase will overwrite the current QA DB)'
7+
required: false
8+
default: 'MobilityDatabaseProdDuplicate'
9+
backup_db:
10+
description: 'Backup the current QA DB before importing the dump'
11+
required: false
12+
default: 'true'
13+
14+
jobs:
15+
run-script:
16+
runs-on: ubuntu-latest
17+
env:
18+
SOURCE_PROJECT_ID: ${{ vars.PROD_MOBILITY_FEEDS_PROJECT_ID }}
19+
DEST_PROJECT_ID: ${{ vars.QA_MOBILITY_FEEDS_PROJECT_ID }}
20+
DUMP_BUCKET_NAME: "mobilitydata-database-dump-qa"
21+
BUCKET_PROJECT_ID: ${{ vars.QA_MOBILITY_FEEDS_PROJECT_ID }}
22+
GCP_REGION: ${{ vars.MOBILITY_FEEDS_REGION }}
23+
DB_INSTANCE_NAME: ${{ secrets.DB_INSTANCE_NAME }}
24+
DEST_DATABASE_PASSWORD: ${{ secrets.QA_POSTGRE_USER_PASSWORD }}
25+
DUMP_FILE_NAME: "prod-db-dump.sql"
26+
SOURCE_DATABASE_NAME: ${{ vars.PROD_POSTGRE_SQL_DB_NAME }}
27+
DEST_DATABASE_NAME: ${{ inputs.dest_database_name || 'MobilityDatabaseProdDuplicate' }}
28+
DEST_DATABASE_USER: ${{ secrets.QA_POSTGRE_USER_NAME }}
29+
DEST_DATABASE_IMPORT_USER: ${{ secrets.PROD_POSTGRE_USER_NAME }}
30+
GCP_FEED_BASTION_SSH_KEY: ${{ secrets.GCP_FEED_BASTION_SSH_KEY }}
31+
BACKUP_DB: ${{ inputs.backup_db || 'true' }}
32+
33+
steps:
34+
- name: Checkout code
35+
uses: actions/checkout@v2
36+
37+
- name: Authenticate to Google Cloud PROD project
38+
id: gcloud_auth_prod
39+
uses: google-github-actions/auth@v2
40+
with:
41+
credentials_json: ${{ secrets.PROD_GCP_MOBILITY_FEEDS_SA_KEY }}
42+
43+
- name: GCloud Setup PROD
44+
uses: google-github-actions/setup-gcloud@v2
45+
46+
- name: Get PROD SQL service account
47+
run: |
48+
SERVICE_ACCOUNT=$(gcloud sql instances describe "mobilitydata-database-instance" --project=$SOURCE_PROJECT_ID --format="value(serviceAccountEmailAddress)")
49+
echo "SOURCE_SQL_SERVICE_ACCOUNT=$SERVICE_ACCOUNT" >> $GITHUB_ENV
50+
echo "Destination SQL Service Account: $SERVICE_ACCOUNT"
51+
52+
- name: Authenticate to Google Cloud QA project
53+
id: gcloud_auth_qa
54+
uses: google-github-actions/auth@v2
55+
with:
56+
credentials_json: ${{ secrets.QA_GCP_MOBILITY_FEEDS_SA_KEY }}
57+
58+
- name: GCloud Setup QA
59+
uses: google-github-actions/setup-gcloud@v2
60+
61+
- name: Create DB dump bucket and give permissions
62+
run: bash scripts/duplicate-prod-db/create-dump-bucket.sh
63+
64+
- name: Authenticate to Google Cloud PROD project Again
65+
uses: google-github-actions/auth@v2
66+
with:
67+
credentials_json: ${{ secrets.PROD_GCP_MOBILITY_FEEDS_SA_KEY }}
68+
69+
- name: GCloud Setup PROD again
70+
uses: google-github-actions/setup-gcloud@v2
71+
72+
- name: Dump the PROD DB
73+
run: |
74+
gcloud sql export sql $DB_INSTANCE_NAME gs://$DUMP_BUCKET_NAME/$DUMP_FILE_NAME --database=$SOURCE_DATABASE_NAME --quiet
75+
76+
- name: Authenticate to Google Cloud QA project Again
77+
uses: google-github-actions/auth@v2
78+
with:
79+
credentials_json: ${{ secrets.QA_GCP_MOBILITY_FEEDS_SA_KEY }}
80+
81+
- name: GCloud Setup QA Again
82+
uses: google-github-actions/setup-gcloud@v2
83+
84+
- name: QA backup and import dump into the QA DB
85+
run: bash scripts/duplicate-prod-db/copy-prod-db-to-qa.sh
86+
87+
- name: Load secrets from 1Password
88+
uses: 1password/[email protected]
89+
with:
90+
export-env: true # Export loaded secrets as environment variables
91+
env:
92+
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
93+
GCP_FEED_SSH_USER: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/GCP_FEED_SSH_USER/username"
94+
GCP_FEED_BASTION_NAME: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/GCP_FEED_BASTION_NAME/username"
95+
GCP_FEED_BASTION_SSH_KEY: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/GCP_FEED_BASTION_SSH_KEY/private key"
96+
97+
- name: Tunnel and run SQL scripts on imported database
98+
run: bash scripts/duplicate-prod-db/post-import.sh
99+
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#!/bin/bash
2+
# This script is used by the duplicate-prod-db.yml workflow.
3+
# It exports the PROD DB to a bucket, then imports it to the QA DB.
4+
# It also makes a backup of the QA DB.
5+
6+
# Validate required environment variables
7+
REQUIRED_VARS=(
8+
"DB_INSTANCE_NAME"
9+
"DUMP_BUCKET_NAME"
10+
"SOURCE_DATABASE_NAME"
11+
"DEST_DATABASE_NAME"
12+
"DEST_DATABASE_PASSWORD"
13+
"DEST_DATABASE_IMPORT_USER"
14+
"DUMP_FILE_NAME"
15+
"BACKUP_DB"
16+
17+
)
18+
19+
for VAR in "${REQUIRED_VARS[@]}"; do
20+
if [ -z "${!VAR}" ]; then
21+
echo "Error: Environment variable $VAR is not set."
22+
exit 1
23+
fi
24+
done
25+
26+
if [ "$BACKUP_DB" == "true" ]; then
27+
echo "Dump the QA database as a backup"
28+
# According to chatgpt,
29+
# This is Google's recommended, safe method and doesn’t require direct access to the DB. It runs the export
30+
# in a way that avoids locking the database and works from GCP itself (so no traffic leaves GCP).
31+
gcloud sql export sql $DB_INSTANCE_NAME gs://$DUMP_BUCKET_NAME/qa-db-dump-backup.sql --database=$SOURCE_DATABASE_NAME --quiet
32+
else
33+
echo "Skipping backup of the QA database as it was not requested"
34+
fi
35+
36+
echo "Deleting the existing $DEST_DATABASE_NAME database"
37+
gcloud sql databases delete $DEST_DATABASE_NAME --instance=$DB_INSTANCE_NAME --quiet
38+
39+
echo "Recreating the $DEST_DATABASE_NAME database"
40+
gcloud sql databases create $DEST_DATABASE_NAME --instance=$DB_INSTANCE_NAME
41+
42+
echo "Importing the dump into the QA database"
43+
# The exported sql contains statements that require authentication as user postgres.
44+
# In theory we could dump the DB without these statements, with:
45+
# pg_dump --no-owner --no-privileges -d your_database > clean_dump.sql.
46+
47+
# The dumped DB refers to the PROD database user (data_feeds_user), so we need to be this user when importing.
48+
export PGPASSWORD=$DEST_DATABASE_PASSWORD
49+
gcloud sql import sql $DB_INSTANCE_NAME gs://$DUMP_BUCKET_NAME/$DUMP_FILE_NAME --database=$DEST_DATABASE_NAME --user=$DEST_DATABASE_IMPORT_USER --quiet
50+
51+
echo "Deleting the dump file from the bucket"
52+
gsutil rm gs://$DUMP_BUCKET_NAME/$DUMP_FILE_NAME
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/bin/bash
2+
# This script is used by the duplicate-prod-db.yml workflow.
3+
# It creates a bucket to house the dump of the production database.
4+
# It also gives permission to the dump bucket so the SQL instances in PROD and QA can use it.
5+
6+
# Validate required environment variables
7+
REQUIRED_VARS=(
8+
"DEST_PROJECT_ID"
9+
"DUMP_BUCKET_NAME"
10+
"GCP_REGION"
11+
"BUCKET_PROJECT_ID"
12+
"SOURCE_SQL_SERVICE_ACCOUNT"
13+
"DB_INSTANCE_NAME"
14+
)
15+
16+
for VAR in "${REQUIRED_VARS[@]}"; do
17+
if [ -z "${!VAR}" ]; then
18+
echo "Error: Environment variable $VAR is not set."
19+
exit 1
20+
fi
21+
done
22+
23+
BUCKET_PROJECT_ID=$DEST_PROJECT_ID
24+
25+
echo "Checking if bucket exists..."
26+
if ! gsutil ls -b "gs://${DUMP_BUCKET_NAME}" &> /dev/null; then
27+
echo "Bucket doesn't exist. Creating..."
28+
gsutil mb -l $GCP_REGION -p $BUCKET_PROJECT_ID "gs://${DUMP_BUCKET_NAME}"
29+
else
30+
echo "Bucket already exists."
31+
fi
32+
33+
echo "Giving permission for the source sql instance to read-write to the bucket"
34+
gsutil iam ch serviceAccount:$SOURCE_SQL_SERVICE_ACCOUNT:objectAdmin gs://$DUMP_BUCKET_NAME
35+
36+
echo "Getting the service account for the QA DB to give permission to the bucket"
37+
DEST_SQL_SERVICE_ACCOUNT=$(gcloud sql instances describe $DB_INSTANCE_NAME --format="value(serviceAccountEmailAddress)")
38+
echo "Destination SQL Service Account: $DEST_SQL_SERVICE_ACCOUNT"
39+
40+
echo "Giving permission for the dest sql instance to read-write to the bucket"
41+
gsutil iam ch serviceAccount:$DEST_SQL_SERVICE_ACCOUNT:objectAdmin gs://$DUMP_BUCKET_NAME
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/bin/bash
2+
# This script is used by the duplicate-prod-db.yml workflow.
3+
# It execute some SQL scripts on the imported DBL
4+
# - Give permission to the tables to the postgres user.
5+
# - Modify the email addresses in the DB so we can't accidentally email real people.
6+
7+
REQUIRED_VARS=(
8+
"GCP_FEED_BASTION_SSH_KEY"
9+
"DEST_PROJECT_ID"
10+
"GCP_REGION"
11+
"GCP_FEED_BASTION_NAME"
12+
"GCP_FEED_SSH_USER"
13+
"DB_INSTANCE_NAME"
14+
"DEST_DATABASE_PASSWORD"
15+
"DEST_DATABASE_IMPORT_USER"
16+
"DEST_DATABASE_NAME"
17+
)
18+
echo "Tunelling"
19+
mkdir -p ~/.ssh
20+
echo "$GCP_FEED_BASTION_SSH_KEY" > ~/.ssh/id_rsa
21+
chmod 600 ~/.ssh/id_rsa
22+
./scripts/tunnel-create.sh -project_id $DEST_PROJECT_ID -zone ${GCP_REGION}-a -instance ${GCP_FEED_BASTION_NAME}-qa -target_account ${GCP_FEED_SSH_USER} -db_instance ${DB_INSTANCE_NAME} -port 5454
23+
sleep 10 # Wait for the tunnel to establish
24+
25+
echo "Giving new role to postgres user"
26+
export PGPASSWORD=$DEST_DATABASE_PASSWORD
27+
psql -h localhost -p 5454 -U $DEST_DATABASE_IMPORT_USER -d $DEST_DATABASE_NAME -c "GRANT data_feeds_user TO postgres;"
28+
29+
echo "Redirecting email addresses to mobilitydata.org"
30+
cat <<'EOF' | psql -h localhost -p 5454 -U postgres -d $DEST_DATABASE_NAME
31+
UPDATE feed
32+
SET feed_contact_email = REPLACE(feed_contact_email, '@', '_at_') || '@mobilitydata.org'
33+
WHERE feed_contact_email IS NOT NULL
34+
AND TRIM(feed_contact_email) <> '';
35+
EOF

0 commit comments

Comments
 (0)