Skip to content

Commit f253db2

Browse files
committed
Add backup and restore functionality
Add Docker volume backup functionality
1 parent 887596d commit f253db2

File tree

5 files changed

+321
-153
lines changed

5 files changed

+321
-153
lines changed

Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ FROM python:3.10.7-slim-buster
22

33
WORKDIR /app
44

5+
RUN mkdir -p /tmp/outputs
6+
57
COPY requirements.txt .
68

79
RUN pip install --no-cache-dir -r requirements.txt

README.md

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,41 @@
1-
# docker-volume-backup
1+
# docker-volume-backup
2+
3+
example
4+
5+
```yaml
6+
version: "3.8"
7+
services:
8+
app:
9+
image: ubuntu
10+
volumes:
11+
- app:/app
12+
- app1:/app1
13+
working_dir: /app
14+
stdin_open: true
15+
tty: true
16+
command: tail -F anything
17+
18+
backup:
19+
build:
20+
context: .
21+
dockerfile: Dockerfile
22+
volumes:
23+
- app:/tmp/backups/app:ro
24+
- app1:/tmp/backups/app1:ro
25+
- ./outputs:/tmp/outputs
26+
# save logs to host
27+
28+
environment:
29+
- BACKUP_DIR=/tmp/backups
30+
- OUTPUT_DIR=/tmp/outputs
31+
- S3_ENDPOINT=
32+
- S3_BUCKET=backups
33+
- S3_ACCESS_KEY=
34+
- S3_SECRET_KEY=
35+
- S3_PREFIX=docker
36+
- SECOND_INTERVAL=60 // backup interval in seconds
37+
38+
volumes:
39+
app:
40+
app1:
41+
```

docker-compose.yml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
version: '3.8'
2+
services:
3+
app:
4+
image: ubuntu
5+
volumes:
6+
- app:/app
7+
- app1:/app1
8+
working_dir: /app
9+
stdin_open: true
10+
tty: true
11+
command: tail -F anything
12+
13+
backup:
14+
build:
15+
context: .
16+
dockerfile: Dockerfile
17+
volumes:
18+
- app:/tmp/backups/app:ro
19+
- app1:/tmp/backups/app1:ro
20+
- ./outputs:/tmp/outputs
21+
# save logs to host
22+
23+
environment:
24+
- BACKUP_DIR=/tmp/backups
25+
- OUTPUT_DIR=/tmp/outputs
26+
- S3_ENDPOINT=https://0c5059a262cdf86340651b0d9a085c9e.r2.cloudflarestorage.com
27+
- S3_BUCKET=backups
28+
- S3_ACCESS_KEY=a37a6cd9f857e427818e7aae139689e4
29+
- S3_SECRET_KEY=7d8ab79a602a5a236d12e6e3e4bd6339665feea70f187775ff8d1914236e9eab
30+
- S3_PREFIX=docker
31+
- SECOND_INTERVAL=60
32+
33+
34+
volumes:
35+
app:
36+
app1:

main.py

Lines changed: 30 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -1,170 +1,48 @@
1+
from utils import *
12
from botocore.exceptions import NoCredentialsError
23
import os
3-
import tarfile
4-
import boto3
54
from dotenv import load_dotenv
65
import os
76
import time
87
import schedule
98
import logging
10-
import hashlib
9+
1110
# Load the .env file
1211
load_dotenv(".env")
1312

14-
15-
# ... rest of your code ...
16-
17-
def backup_directory(parent_dir, output_dir):
18-
"""
19-
Archive each subdirectory of a parent directory to a .tar.gz file.
20-
21-
Parameters:
22-
parent_dir (str): The parent directory containing the subdirectories to archive.
23-
output_dir (str): The directory to save the .tar.gz files to.
24-
"""
25-
# Create the output directory if it doesn't exist
26-
timestamp = get_timestamp()
27-
logging.info(f"Backup started at {timestamp}")
28-
os.makedirs(output_dir, exist_ok=True)
29-
# clear the output directory
30-
for file_name in os.listdir(output_dir):
31-
file_path = os.path.join(output_dir, file_name)
32-
try:
33-
if os.path.isfile(file_path):
34-
os.remove(file_path)
35-
except Exception as e:
36-
logging.error(e)
37-
38-
for dir_name in os.listdir(parent_dir):
39-
dir_path = os.path.join(parent_dir, dir_name)
40-
# Check if it's a directory
41-
if os.path.isdir(dir_path):
42-
logging.info(f"Archiving {dir_path}")
43-
with tarfile.open(os.path.join(output_dir, f"{dir_name}.tar.gz"), "w:gz") as tar:
44-
tar.add(dir_path, arcname=dir_name)
45-
46-
S3_ENDPOINT_URL = os.getenv("S3_ENDPOINT")
47-
S3_ACCESS_KEY = os.getenv("S3_ACCESS_KEY")
48-
S3_SECRET_KEY = os.getenv("S3_SECRET_KEY")
49-
S3_BUCKET = os.getenv("S3_BUCKET")
50-
S3_PREFIX = os.getenv("S3_PREFIX")
51-
if (not S3_ENDPOINT_URL or not S3_ACCESS_KEY or not S3_SECRET_KEY or not S3_BUCKET):
52-
logging.error("S3 credentials not found")
53-
return
54-
s3 = connect_s3(S3_ENDPOINT_URL, S3_ACCESS_KEY, S3_SECRET_KEY)
55-
for file_name in os.listdir(output_dir):
56-
file_path = os.path.join(output_dir, file_name)
57-
print(f"Uploading {file_path} to S3 bucket {S3_BUCKET}")
58-
upload_to_s3(s3, file_path, S3_BUCKET,
59-
S3_PREFIX+'/'+timestamp + '/'+file_name)
60-
logging.info(f"Backup finished at {get_timestamp()}")
61-
62-
63-
def restore_tar_gz(tar_path, restore_dir):
64-
"""
65-
Restore the contents of a .tar.gz file to a specified directory.
66-
67-
Parameters:
68-
tar_path (str): The path to the .tar.gz file.
69-
restore_dir (str): The directory to restore the contents to.
70-
"""
71-
# Create the restore directory if it doesn't exist
72-
os.makedirs(restore_dir, exist_ok=True)
73-
74-
# Open the .tar.gz file
75-
with tarfile.open(tar_path, "r:gz") as tar:
76-
# Extract all files to the restore directory
77-
tar.extractall(path=restore_dir)
78-
79-
80-
def connect_s3(endpoint_url, access_key, secret_key):
81-
"""
82-
Connect to AWS S3.
83-
84-
Parameters:
85-
access_key (str): Your AWS access key.
86-
secret_key (str): Your AWS secret key.
87-
88-
Returns:
89-
s3: The boto3 S3 client.
90-
"""
91-
s3 = boto3.client('s3', aws_access_key_id=access_key,
92-
aws_secret_access_key=secret_key, endpoint_url=endpoint_url)
93-
return s3
94-
95-
96-
def upload_to_s3(s3, file_name, bucket, object_name=None):
97-
"""
98-
Upload a file to an S3 bucket.
99-
100-
Parameters:
101-
s3: The boto3 S3 client.
102-
file_name (str): The file to upload.
103-
bucket (str): The S3 bucket to upload to.
104-
object_name (str): The name of the object in the S3 bucket. If not specified, file_name is used.
105-
"""
106-
if object_name is None:
107-
object_name = file_name
108-
109-
try:
110-
s3.upload_file(file_name, bucket, object_name)
111-
# calc file sha256 then save to log
112-
file_hash = hashlib.sha256()
113-
with open(file_name, "rb") as f:
114-
for byte_block in iter(lambda: f.read(4096), b""):
115-
file_hash.update(byte_block)
116-
logging.info(f"File {file_name} SHA256: {file_hash.hexdigest()}")
117-
118-
logging.info(
119-
f"Upload {file_name} Successful with object name {object_name} to bucket {bucket}")
120-
return True
121-
except FileNotFoundError:
122-
logging.error(f"The file {file_name} was not found")
123-
return False
124-
except NoCredentialsError:
125-
logging.error("Credentials not available")
126-
return False
127-
128-
129-
def download_from_s3(s3, bucket, object_name, file_name=None):
130-
"""
131-
Download a file from an S3 bucket.
132-
133-
Parameters:
134-
s3: The boto3 S3 client.
135-
bucket (str): The S3 bucket to download from.
136-
object_name (str): The name of the object in the S3 bucket.
137-
file_name (str): The file to download to. If not specified, object_name is used.
138-
"""
139-
if file_name is None:
140-
file_name = object_name
141-
142-
try:
143-
s3.download_file(bucket, object_name, file_name)
144-
print("Download Successful")
145-
return True
146-
except NoCredentialsError:
147-
print("Credentials not available")
148-
return False
149-
150-
151-
def get_timestamp():
152-
return time.strftime("%Y%m%d-%H%M%S")
153-
154-
15513
# main
15614
if __name__ == "__main__":
157-
# Load configuration from .env file
158-
SECOND_INTERVAL = os.getenv("SECOND_INTERVAL")
15+
# # Load configuration from .env file
16+
mode = os.getenv("MODE")
15917
backup_dir = os.getenv("BACKUP_DIR")
16018
output_dir = os.getenv("OUTPUT_DIR")
16119
# Set up logging
16220
logging.basicConfig(level=logging.INFO,
16321
format='%(asctime)s - %(levelname)s - %(message)s',
16422
handlers=[logging.FileHandler(os.path.join(output_dir, 'backup.log')), logging.StreamHandler()])
165-
backup_directory(backup_dir, output_dir)
166-
schedule.every(int(SECOND_INTERVAL)).seconds.do(
167-
backup_directory, backup_dir, output_dir)
168-
while True:
169-
schedule.run_pending()
170-
time.sleep(1)
23+
24+
if mode == "backup":
25+
SECOND_INTERVAL = os.getenv("SECOND_INTERVAL")
26+
27+
backup_directory(backup_dir, output_dir)
28+
schedule.every(int(SECOND_INTERVAL)).seconds.do(
29+
backup_directory, backup_dir, output_dir)
30+
while True:
31+
schedule.run_pending()
32+
time.sleep(1)
33+
34+
if mode == "restore":
35+
logging.info("Restore mode ")
36+
S3_ENDPOINT_URL = os.getenv("S3_ENDPOINT")
37+
S3_ACCESS_KEY = os.getenv("S3_ACCESS_KEY")
38+
S3_SECRET_KEY = os.getenv("S3_SECRET_KEY")
39+
S3_BUCKET = os.getenv("S3_BUCKET")
40+
S3_PREFIX = os.getenv("S3_PREFIX")
41+
RESTORE_DIR = os.getenv("RESTORE_DIR")
42+
if (not S3_ENDPOINT_URL or not S3_ACCESS_KEY or not S3_SECRET_KEY or not S3_BUCKET):
43+
logging.error("S3 credentials not found")
44+
s3 = connect_s3(S3_ENDPOINT_URL, S3_ACCESS_KEY, S3_SECRET_KEY)
45+
download_dir_from_s3(
46+
s3, S3_BUCKET, RESTORE_DIR, output_dir)
47+
restore_directory(output_dir, backup_dir)
48+
logging.info(f"Restore finished at {get_timestamp()}")

0 commit comments

Comments
 (0)