Skip to content

Commit 8789ff6

Browse files
Add offline backup script with setup recipe (#92)
* Add initial backup scripts * Add requirements & basic script * Move to backups folder * Add timer target * Add recipe for enabling backup service * Lint * Make sure timer is started on first run * Add matrix webhook alerting for backup completion
1 parent 292682d commit 8789ff6

File tree

6 files changed

+109
-3
lines changed

6 files changed

+109
-3
lines changed

justfile

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@ set dotenv-load := false
44
default:
55
@just --list --unsorted
66

7-
IS_PROD := env_var_or_default("IS_PROD", "")
7+
IS_PROD := env("IS_PROD", "")
88
DOCKER_FILES := "--file=docker-compose.yml" + (
99
if IS_PROD != "true" {" --file=docker-compose.dev.yml"}
1010
else {" --file=docker-compose.prod.yml"}
1111
)
12-
SERVICE := env_var_or_default("SERVICE", "scheduler")
12+
SERVICE := env("SERVICE", "scheduler")
13+
SCRIPT_DIR := justfile_directory() + "/techbloc_airflow/scripts"
1314

1415
export PROJECT_PY_VERSION := `grep '# PYTHON' requirements_prod.txt | awk -F= '{print $2}'`
1516
export PROJECT_AIRFLOW_VERSION := `grep '^apache-airflow' requirements_prod.txt | awk -F= '{print $3}'`
16-
export SSH_DIRECTORY := env_var_or_default("SSH_DIRECTORY", "")
17+
export SSH_DIRECTORY := env("SSH_DIRECTORY", "")
1718

1819
# Print the required Python version
1920
@py-version:
@@ -122,3 +123,21 @@ init:
122123
# Launch a pgcli shell on the postgres container (defaults to openledger) use "airflow" for airflow metastore
123124
db-shell:
124125
@just run bash -c 'sqlite3 /opt/airflow/db/airflow.db'
126+
127+
# Set up the offsite backup systemd service (requires sudo)
128+
backup-setup:
129+
git update-index --assume-unchanged techbloc_airflow/scripts/backups/techbloc-backup.env
130+
@echo "Please edit the environment file at {{ SCRIPT_DIR }}/backups/techbloc-backup.env before continuing"
131+
@read -p "Press Enter to continue" REPLY
132+
sudo mkdir -p /opt/techbloc/
133+
sudo chown $USER /opt/techbloc/
134+
python3 -m venv /opt/techbloc/venv
135+
/opt/techbloc/venv/bin/pip install -r {{ SCRIPT_DIR }}/backups/requirements.txt
136+
cp {{ SCRIPT_DIR }}/backups/techbloc-backup.env /opt/techbloc/techbloc-backup.env
137+
cp {{ SCRIPT_DIR }}/backups/techbloc-backup.py /opt/techbloc/techbloc-backup.py
138+
sudo cp {{ SCRIPT_DIR }}/backups/techbloc-backup.service /etc/systemd/system/techbloc-backup.service
139+
sudo cp {{ SCRIPT_DIR }}/backups/techbloc-backup.timer /etc/systemd/system/techbloc-backup.timer
140+
sudo systemctl daemon-reload
141+
sudo systemctl start techbloc-backup.timer
142+
sudo systemctl enable techbloc-backup.timer
143+
@echo "Backup service set up successfully!"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
boto3
2+
requests
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Environment file for techbloc-backups
2+
SPACES_ACCESS_KEY={spaces_access_key}
3+
SPACES_SECRET_KEY={spaces_secret_key}
4+
SPACES_BUCKET_NAME={spaces_bucket_name}
5+
DOWNLOAD_DIR={download_dir}
6+
MATRIX_WEBHOOK_URL=http://webhook
7+
MATRIX_WEBHOOK_API_KEY=api_key
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/usr/bin/env python
2+
3+
import logging
4+
import os
5+
import socket
6+
7+
import boto3
8+
import requests
9+
10+
11+
FILENAMES = [f"{name}-backup.tar.bz2" for name in ["matrix", "openoversight"]]
12+
DOWNLOAD_DIR = os.getenv("DOWNLOAD_DIR")
13+
SPACES_BUCKET_NAME = os.getenv("SPACES_BUCKET_NAME")
14+
SPACES_BASE_PREFIX = "monolith-backups"
15+
MATRIX_WEBHOOK_URL = os.getenv("MATRIX_WEBHOOK_URL")
16+
MATRIX_WEBHOOK_API_KEY = os.getenv("MATRIX_WEBHOOK_API_KEY")
17+
18+
19+
log = logging.getLogger(__name__)
20+
21+
22+
def get_s3_client():
23+
return boto3.client(
24+
"s3",
25+
aws_access_key_id=os.getenv("SPACES_ACCESS_KEY"),
26+
aws_secret_access_key=os.getenv("SPACES_SECRET_KEY"),
27+
endpoint_url="https://sfo3.digitaloceanspaces.com",
28+
region_name="sfo3",
29+
)
30+
31+
32+
def alert_matrix(filename: str) -> None:
33+
text = f"⬇ Backup on `{socket.gethostname()}` for `{filename}` complete."
34+
log.info(f"Sending message to Matrix: {text}")
35+
response = requests.post(
36+
MATRIX_WEBHOOK_URL,
37+
json={
38+
"key": MATRIX_WEBHOOK_API_KEY,
39+
"body": text,
40+
},
41+
)
42+
response.raise_for_status()
43+
44+
45+
def download_files() -> None:
46+
client = get_s3_client()
47+
for filename in FILENAMES:
48+
key = f"{SPACES_BASE_PREFIX}/{filename}"
49+
download_path = f"{DOWNLOAD_DIR}/{filename}"
50+
log.info(f"Downloading {filename} to {download_path}")
51+
52+
client.download_file(SPACES_BUCKET_NAME, key, download_path)
53+
alert_matrix(filename)
54+
log.info(f"Downloaded {len(FILENAMES)} files")
55+
56+
57+
if __name__ == "__main__":
58+
logging.basicConfig(
59+
format="[%(asctime)s - %(name)s - %(lineno)3d][%(levelname)s] %(message)s",
60+
level=logging.INFO,
61+
)
62+
download_files()
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[Unit]
2+
Description=TechBloc Backup Service
3+
4+
[Service]
5+
Type=oneshot
6+
ExecStart=/opt/techbloc/venv/bin/python3 /opt/techbloc/techbloc-backup.py
7+
EnvironmentFile=/opt/techbloc/techbloc-backup.env
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
[Unit]
2+
Description=TechBloc Backup Service (weekly)
3+
4+
[Timer]
5+
OnCalendar=Sun *-*-* 05:00:00
6+
Persistent=true
7+
8+
[Install]
9+
WantedBy=timers.target

0 commit comments

Comments
 (0)