Skip to content

Commit e7a33bb

Browse files
authored
Merge pull request #30 from galaxyproject/overhaul-backups
Overhaul and modernize backups
2 parents fc1e5c2 + 63d9daf commit e7a33bb

File tree

7 files changed

+366
-190
lines changed

7 files changed

+366
-190
lines changed

README.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,14 +104,13 @@ Role Variables
104104
105105
- `postgresql_backup_mail_recipient`: User or address that should receive mail from the backup scripts.
106106
107-
- `postgresql_backup_remote_rsync_path`: Path to `rsync` on the remote system.
108-
109107
- `postgresql_backup_post_command`: Arbitrary command to run after successful completion of a scheduled backup.
110108
111109
Dependencies
112110
------------
113111
114-
None
112+
Backup functionality requires Python 2.7 or 3.5+, psycopg2, and rsync. Note that if installing PGDG versions of
113+
PostgreSQL on Enterprise Linux, corresponding psycopg2 packages are available from the PGDG yum repositories.
115114
116115
Example Playbook
117116
----------------

defaults/main.yml

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,32 @@
11
---
22

33
postgresql_default_version: 10
4-
postgresql_backup_local_dir: ~postgres/backup
5-
postgresql_backup_active_dir: "{{ postgresql_backup_local_dir }}/active"
6-
postgresql_backup_mail_recipient: postgres
7-
postgresql_backup_rotate: true
84
postgresql_user_name: postgres
95

6+
# Point-In-Time Recovery (PITR) backup options
7+
# https://www.postgresql.org/docs/current/continuous-archiving.html
8+
postgresql_backup_local_dir: >-
9+
{{ '/var/lib/pgsql' if ansible_os_family == 'RedHat' else (
10+
'/var/lib/postgresql' if ansible_os_family == 'Debian' else '~postgres') }}/backup
11+
postgresql_create_backup_dir: true
12+
13+
# Options used for the WAL archive command - do not change this unless you have read the PITR documentation and
14+
# understand how this command must work.
1015
postgresql_archive_wal_rsync_args: '--ignore-existing -ptg --info=skip1'
16+
17+
# These options are passed to all calls of rsync (in addition to backups, rsync is used to clean up old backups)
18+
postgresql_backup_rsync_connect_opts: ''
19+
# These options are passed only to the call of rsync that performs the backup
20+
postgresql_backup_rsync_backup_opts: '-rptg'
21+
22+
# Keep this many old backups
23+
postgresql_backup_keep: 30
24+
25+
__postgresql_pgdg_bin_dir: "{{ '/usr/pgsql-' ~ (postgresql_version | replace('.', '')) ~ '/bin' }}"
26+
postgresql_backup_command: >-
27+
{{ postgresql_backup_local_dir | quote }}/bin/backup.py
28+
{{ '--rsync-connect-opts ' ~ (postgresql_backup_rsync_connect_opts | quote) if postgresql_backup_rsync_connect_opts else '' }}
29+
--rsync-backup-opts {{ postgresql_backup_rsync_backup_opts | quote }}
30+
--keep {{ postgresql_backup_keep | quote }}
31+
{{ '--pg-bin-dir ' ~ __postgresql_pgdg_bin_dir if ansible_os_family == 'RedHat' else '' }}
32+
--backup --clean-archive {{ postgresql_backup_dir | quote }}

files/backup.py

Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
#!/usr/bin/env python
2+
"""
3+
Perform PostgreSQL PITR backups
4+
5+
See the documentation for details:
6+
https://www.postgresql.org/docs/current/continuous-archiving.html
7+
8+
This is intended to be run locally on the PostgreSQL server as the postgres
9+
user, with an appropriate environment to connect to the desired PostgreSQL
10+
instance. See the environment variables that control this if necessary.
11+
12+
This script is compatible with Python 2.7 and Python 3.5+
13+
"""
14+
from __future__ import print_function
15+
16+
import argparse
17+
import bisect
18+
import datetime
19+
import errno
20+
import logging
21+
import os
22+
import re
23+
import shlex
24+
import subprocess
25+
import sys
26+
import time
27+
import tempfile
28+
try:
29+
from shlex import quote as shlex_quote
30+
except ImportError:
31+
from pipes import quote as shlex_quote
32+
33+
import psycopg2
34+
35+
36+
START_BACKUP_SQL = "SELECT pg_start_backup(%(label)s, false, false)"
37+
STOP_BACKUP_SQL = "SELECT * FROM pg_stop_backup(false, true)"
38+
RSYNC_EXCLUDES = (
39+
'pg_wal/*', # >= 10
40+
'pg_xlog/*', # < 10
41+
'postmaster.pid',
42+
'postmaster.opts',
43+
'pg_replslot/*',
44+
'pg_dynshmem/*',
45+
'pg_notify/*',
46+
'pg_serial/*',
47+
'pg_snapshots/*',
48+
'pg_stat_tmp/*',
49+
'pg_subtrans/*',
50+
'pg_tmp*',
51+
'pg_internal.init',
52+
)
53+
BACKUP_LABEL_RE = re.compile(r"\d{8}T\d{6}Z")
54+
LAST_SEGMENT_RE = re.compile(r"START WAL LOCATION:.*\(file ([^)]+)\)")
55+
56+
log = None
57+
58+
59+
class Label(object):
60+
# for sorting
61+
def __init__(self, label):
62+
self.label = label
63+
self.date, self.time = [int(x) for x in label.rstrip('Z').split('T')]
64+
65+
def __str__(self):
66+
return self.label
67+
68+
def __eq__(self, other):
69+
return self.date == other.date and self.time == other.time
70+
71+
def __lt__(self, other):
72+
return self.date <= other.date and self.time < other.time
73+
74+
def __le__(self, other):
75+
return self.date <= other.date and self.time <= other.time
76+
77+
def __gt__(self, other):
78+
return self.date >= other.date and self.time > other.time
79+
80+
def __ge__(self, other):
81+
return self.date >= other.date and self.time >= other.time
82+
83+
84+
class State(object):
85+
def __init__(self):
86+
self._conn = None
87+
self._cursor = None
88+
self._label = None
89+
self._rsync_opts = None
90+
91+
def set_rsync_opts(self, opts):
92+
self._rsync_opts = opts
93+
94+
@property
95+
def rsync_cmd(self):
96+
cmd = ['rsync']
97+
if self._rsync_opts:
98+
cmd.extend(shlex.split(rsync_opts))
99+
return cmd
100+
101+
@property
102+
def conn(self):
103+
if not self._conn:
104+
log.info('Connecting to database')
105+
self._conn = psycopg2.connect('dbname=postgres')
106+
return self._conn
107+
108+
@property
109+
def cursor(self):
110+
if not self._cursor:
111+
self._cursor = self.conn.cursor()
112+
return self._cursor
113+
114+
@property
115+
def label(self):
116+
if not self._label:
117+
self._label = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
118+
log.info('Backup label is: %s', self._label)
119+
return self._label
120+
121+
122+
state = State()
123+
124+
125+
def parse_args(argv):
126+
parser = argparse.ArgumentParser(description='Utility for performing PostgreSQL PITR backups')
127+
parser.add_argument('--backup', action='store_true', default=False, help='Perform backup')
128+
parser.add_argument('--keep', type=int, default=-1, help='Keep this many backups (default: all)')
129+
parser.add_argument('--clean-archive', action='store_true', default=False, help='Clean WAL archive')
130+
parser.add_argument('--rsync-connect-opts', default=None, help='Options to always pass to rsync (e.g. for connection parameters)')
131+
parser.add_argument('--rsync-backup-opts', default='-rptg', help='Options to pass to rsync for backup (default: -rptg)')
132+
parser.add_argument('--pg-bin-dir', default=None, help='Directory containing PostgreSQL auxiliary binaries if not on $PATH')
133+
parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Verbose output')
134+
parser.add_argument('backup_path', help='Backup to location (rsync-compatible string)')
135+
args = parser.parse_args(argv)
136+
if args.clean_archive and ':' in args.backup_path:
137+
parser.error('--clean-archive cannot be used with remote backup directories')
138+
return args
139+
140+
141+
def configure_logging(verbose):
142+
logging_config = {}
143+
logging_config['level'] = logging.DEBUG if verbose else logging.INFO
144+
logging.basicConfig(**logging_config)
145+
global log
146+
log = logging.getLogger()
147+
148+
149+
def log_command(cmd):
150+
log.debug('command is: %s', ' '.join([shlex_quote(x) for x in cmd]))
151+
152+
153+
def initiate_backup():
154+
log.info("Initiating backup with pg_start_backup()")
155+
state.cursor.execute(START_BACKUP_SQL, {'label': state.label})
156+
157+
158+
def perform_backup(backup_path, rsync_backup_opts):
159+
state.cursor.execute("SHOW data_directory")
160+
data_dir = state.cursor.fetchone()[0]
161+
rsync_data_dir = data_dir.rstrip('/') + os.sep
162+
rsync_backup_path = os.path.join(backup_path, state.label)
163+
164+
# assemble rsync command line
165+
cmd = state.rsync_cmd
166+
cmd.extend(shlex.split(rsync_backup_opts))
167+
cmd.extend(['--delete', '--delete-delay'])
168+
[cmd.extend(['--exclude', exclude]) for exclude in RSYNC_EXCLUDES]
169+
cmd.extend([rsync_data_dir, rsync_backup_path])
170+
171+
log.info('Performing rsync backup from %s to %s', *cmd[-2:])
172+
log_command(cmd)
173+
try:
174+
subprocess.check_call(cmd)
175+
except subprocess.CalledProcessError as exc:
176+
if exc.returncode != 24:
177+
raise
178+
179+
180+
def write_backup_file(backup_path, file_contents, file_name):
181+
file_path = os.path.join(backup_path, state.label, file_name)
182+
cmd = state.rsync_cmd
183+
# use a tempfile with rsync since the path might be remote
184+
mode = 'w' if isinstance(file_contents, str) else 'wb'
185+
with tempfile.NamedTemporaryFile(mode=mode, prefix='postgresql_backup_') as fh:
186+
fh.write(file_contents)
187+
fh.flush()
188+
cmd.extend([fh.name, file_path])
189+
log.info('Writing backup file at path: %s', file_path)
190+
log_command(cmd)
191+
subprocess.check_call(cmd)
192+
193+
194+
def finalize_backup(backup_path):
195+
log.info("Finalizing backup with pg_stop_backup()")
196+
state.cursor.execute(STOP_BACKUP_SQL)
197+
row = state.cursor.fetchone()
198+
last_segment = row[0]
199+
backup_label = row[1]
200+
tablespace_map = row[2]
201+
log.info('Last WAL segment for this backup is: %s', last_segment)
202+
write_backup_file(backup_path, backup_label, 'backup_label')
203+
if tablespace_map:
204+
write_backup_file(backup_path, tablespace_map, 'tablespace_map')
205+
206+
207+
def get_current_labels(backup_path):
208+
cmd = state.rsync_cmd
209+
cmd.extend(['--list-only', backup_path.rstrip('/') + '/'])
210+
out = subprocess.check_output(cmd)
211+
if sys.version_info > (3,):
212+
out = out.decode('utf-8')
213+
labels = []
214+
# there doesn't appear to be a way to format rsync --list-only output
215+
for line in out.splitlines():
216+
entry = line.split()[-1]
217+
if BACKUP_LABEL_RE.match(entry):
218+
label = Label(entry)
219+
bisect.insort(labels, label)
220+
return list(map(str, labels))
221+
222+
223+
def rsync_delete_dirs(backup_path, labels):
224+
# can't use ssh here since I don't want to write a translator from rsync connect params to ssh
225+
temp_name = tempfile.mkdtemp(prefix="postgresql_backup_empty_")
226+
try:
227+
# empty the dirs first, unfortunately this has to be done one-by-one
228+
for label in labels:
229+
cmd = state.rsync_cmd
230+
cmd.extend(['-r', '--delete', temp_name + '/', os.path.join(backup_path, label)])
231+
log_command(cmd)
232+
subprocess.check_call(cmd)
233+
# then all the empty dirs can be deleted at once
234+
cmd = state.rsync_cmd
235+
[cmd.extend(['--include', label]) for label in labels]
236+
cmd.extend(['--exclude', '*', '-d', '--delete'])
237+
cmd.extend([temp_name + '/', backup_path])
238+
log_command(cmd)
239+
subprocess.check_call(cmd)
240+
finally:
241+
os.rmdir(temp_name)
242+
243+
244+
def cleanup_old_backups(backup_path, keep):
245+
labels = get_current_labels(backup_path)
246+
if len(labels) > keep:
247+
delete_labels = labels[:(len(labels) - keep)]
248+
log.debug('The following backups will be removed due to --keep=%s: %s', keep, ', '.join(delete_labels))
249+
rsync_delete_dirs(backup_path, delete_labels)
250+
251+
252+
def extract_last_segment_from_backup_label(backup_label):
253+
for line in backup_label.splitlines():
254+
match = LAST_SEGMENT_RE.match(line)
255+
if match:
256+
return match.group(1)
257+
return None
258+
259+
260+
def cleanup_wal_archive(backup_path, pg_bin_dir):
261+
assert ':' not in backup_path # this should be handled by the parser
262+
labels = get_current_labels(backup_path)
263+
if not labels:
264+
log.warning("No backups found, cannot clean WAL archive")
265+
return
266+
oldest_label = labels[0]
267+
backup_label_path = os.path.join(backup_path, oldest_label, 'backup_label')
268+
try:
269+
backup_label = open(backup_label_path).read()
270+
except:
271+
log.exception("Cannot read backup_label from oldest backup, WAL archive will not be cleaned")
272+
return
273+
last_segment = extract_last_segment_from_backup_label(backup_label)
274+
if not last_segment:
275+
log.warning("Could not determine last segment from oldest backup, WAL archive will not be cleaned")
276+
return
277+
log.info("Last segment in oldest backup (%s): %s", oldest_label, last_segment)
278+
log.info("Running pg_archivecleanup")
279+
wal_archive_path = os.path.join(backup_path, 'wal_archive')
280+
if pg_bin_dir:
281+
cmd = [os.path.join(pg_bin_dir, 'pg_archivecleanup')]
282+
else:
283+
cmd = ['pg_archivecleanup']
284+
cmd.extend(['-d', wal_archive_path, last_segment])
285+
log_command(cmd)
286+
try:
287+
subprocess.check_call(cmd)
288+
except OSError as exc:
289+
if exc.errno == errno.ENOENT:
290+
log.error("Cannot find pg_archivecleanup (see --pg-bin-dir option)")
291+
raise
292+
293+
294+
def main(argv):
295+
args = parse_args(argv)
296+
configure_logging(args.verbose)
297+
state.set_rsync_opts(args.rsync_connect_opts)
298+
start = time.time()
299+
if args.backup:
300+
initiate_backup()
301+
perform_backup(args.backup_path, args.rsync_backup_opts)
302+
finalize_backup(args.backup_path)
303+
log.info("Backup complete")
304+
if args.keep > 0:
305+
cleanup_old_backups(args.backup_path, args.keep)
306+
if args.clean_archive:
307+
cleanup_wal_archive(args.backup_path, args.pg_bin_dir)
308+
elapsed = time.time() - start
309+
log.info("Completed in %d seconds", elapsed)
310+
311+
312+
if __name__ == '__main__':
313+
main(sys.argv[1:])

0 commit comments

Comments
 (0)