Skip to content

Commit 5bb336d

Browse files
committed
Overhaul backups to use the preferred non-exclusive PITR method, as well as storing the WAL segments in a separate
archive.
1 parent fc1e5c2 commit 5bb336d

File tree

7 files changed

+363
-190
lines changed

7 files changed

+363
-190
lines changed

README.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,14 +104,13 @@ Role Variables
104104
105105
- `postgresql_backup_mail_recipient`: User or address that should receive mail from the backup scripts.
106106
107-
- `postgresql_backup_remote_rsync_path`: Path to `rsync` on the remote system.
108-
109107
- `postgresql_backup_post_command`: Arbitrary command to run after successful completion of a scheduled backup.
110108
111109
Dependencies
112110
------------
113111
114-
None
112+
Backup functionality requires Python 2.7 or 3.5+, psycopg2, and rsync. Note that if installing PGDG versions of
113+
PostgreSQL on Enterprise Linux, corresponding psycopg2 packages are available from the PGDG yum repositories.
115114
116115
Example Playbook
117116
----------------

defaults/main.yml

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,32 @@
11
---
22

33
postgresql_default_version: 10
4-
postgresql_backup_local_dir: ~postgres/backup
5-
postgresql_backup_active_dir: "{{ postgresql_backup_local_dir }}/active"
6-
postgresql_backup_mail_recipient: postgres
7-
postgresql_backup_rotate: true
84
postgresql_user_name: postgres
95

6+
# Point-In-Time Recovery (PITR) backup options
7+
# https://www.postgresql.org/docs/current/continuous-archiving.html
8+
postgresql_backup_local_dir: >-
9+
{{ '/var/lib/pgsql' if ansible_os_family == 'RedHat' else (
10+
'/var/lib/postgresql' if ansible_os_family == 'Debian' else '~postgres') }}/backup
11+
postgresql_create_backup_dir: true
12+
13+
# Options used for the WAL archive command - do not change this unless you have read the PITR documentation and
14+
# understand how this command must work.
1015
postgresql_archive_wal_rsync_args: '--ignore-existing -ptg --info=skip1'
16+
17+
# These options are passed to all calls of rsync (in addition to backups, rsync is used to clean up old backups)
18+
postgresql_backup_rsync_connect_opts: ''
19+
# These options are passed only to the call of rsync that performs the backup
20+
postgresql_backup_rsync_backup_opts: '-rptg'
21+
22+
# Keep this many old backups
23+
postgresql_backup_keep: 30
24+
25+
__postgresql_pgdg_bin_dir: "{{ '/usr/pgsql-' ~ (postgresql_version | replace('.', '')) ~ '/bin' }}"
26+
postgresql_backup_command: >-
27+
{{ postgresql_backup_local_dir | quote }}/bin/backup.py
28+
{{ '--rsync-connect-opts ' ~ (postgresql_backup_rsync_connect_opts | quote) if postgresql_backup_rsync_connect_opts else '' }}
29+
--rsync-backup-opts {{ postgresql_backup_rsync_backup_opts | quote }}
30+
--keep {{ postgresql_backup_keep | quote }}
31+
{{ '--pg-bin-dir ' ~ __postgresql_pgdg_bin_dir if ansible_os_family == 'RedHat' else '' }}
32+
--backup --clean-archive {{ postgresql_backup_dir | quote }}

files/backup.py

Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
#!/usr/bin/env python
2+
"""
3+
Perform PostgreSQL PITR backups
4+
5+
See the documentation for details:
6+
https://www.postgresql.org/docs/current/continuous-archiving.html
7+
8+
This is intended to be run locally on the PostgreSQL server as the postgres
9+
user, with an appropriate environment to connect to the desired PostgreSQL
10+
instance. See the environment variables that control this if necessary.
11+
12+
This script is compatible with Python 2.7 and Python 3.5+
13+
"""
14+
from __future__ import print_function
15+
16+
import argparse
17+
import bisect
18+
import datetime
19+
import errno
20+
import logging
21+
import os
22+
import re
23+
import shlex
24+
import subprocess
25+
import sys
26+
import time
27+
import tempfile
28+
try:
29+
from shlex import quote as shlex_quote
30+
except ImportError:
31+
from pipes import quote as shlex_quote
32+
33+
import psycopg2
34+
35+
36+
START_BACKUP_SQL = "SELECT pg_start_backup(%(label)s, false, false)"
37+
STOP_BACKUP_SQL = "SELECT * FROM pg_stop_backup(false, true)"
38+
RSYNC_EXCLUDES = (
39+
'pg_wal/*', # >= 10
40+
'pg_xlog/*', # < 10
41+
'postmaster.pid',
42+
'postmaster.opts',
43+
'pg_replslot/*',
44+
'pg_dynshmem/*',
45+
'pg_notify/*',
46+
'pg_serial/*',
47+
'pg_snapshots/*',
48+
'pg_stat_tmp/*',
49+
'pg_subtrans/*',
50+
'pg_tmp*',
51+
'pg_internal.init',
52+
)
53+
BACKUP_LABEL_RE = re.compile(r"\d{8}T\d{6}Z")
54+
LAST_SEGMENT_RE = re.compile(r"START WAL LOCATION:.*\(file ([^)]+)\)")
55+
56+
log = None
57+
58+
59+
class Label(object):
60+
# for sorting
61+
def __init__(self, label):
62+
self.label = label
63+
self.date, self.time = [int(x) for x in label.rstrip('Z').split('T')]
64+
65+
def __str__(self):
66+
return self.label
67+
68+
def __eq__(self, other):
69+
return self.date == other.date and self.time == other.time
70+
71+
def __lt__(self, other):
72+
return self.date <= other.date and self.time < other.time
73+
74+
def __le__(self, other):
75+
return self.date <= other.date and self.time <= other.time
76+
77+
def __gt__(self, other):
78+
return self.date >= other.date and self.time > other.time
79+
80+
def __ge__(self, other):
81+
return self.date >= other.date and self.time >= other.time
82+
83+
84+
class State(object):
85+
def __init__(self):
86+
self._conn = None
87+
self._cursor = None
88+
self._label = None
89+
self._rsync_opts = None
90+
91+
def set_rsync_opts(self, opts):
92+
self._rsync_opts = opts
93+
94+
@property
95+
def rsync_cmd(self):
96+
cmd = ['rsync']
97+
if self._rsync_opts:
98+
cmd.extend(shlex.split(rsync_opts))
99+
return cmd
100+
101+
@property
102+
def conn(self):
103+
if not self._conn:
104+
log.info('Connecting to database')
105+
self._conn = psycopg2.connect('dbname=postgres')
106+
return self._conn
107+
108+
@property
109+
def cursor(self):
110+
if not self._cursor:
111+
self._cursor = self.conn.cursor()
112+
return self._cursor
113+
114+
@property
115+
def label(self):
116+
if not self._label:
117+
self._label = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
118+
log.info('Backup label is: %s', self._label)
119+
return self._label
120+
121+
122+
state = State()
123+
124+
125+
def parse_args(argv):
126+
parser = argparse.ArgumentParser(description='Utility for performing PostgreSQL PITR backups')
127+
parser.add_argument('--backup', action='store_true', default=False, help='Perform backup')
128+
parser.add_argument('--keep', type=int, default=-1, help='Keep this many backups (default: all)')
129+
parser.add_argument('--clean-archive', action='store_true', default=False, help='Clean WAL archive')
130+
parser.add_argument('--rsync-connect-opts', default=None, help='Options to always pass to rsync (e.g. for connection parameters)')
131+
parser.add_argument('--rsync-backup-opts', default='-rptg', help='Options to pass to rsync for backup (default: -rptg)')
132+
parser.add_argument('--pg-bin-dir', default=None, help='Directory containing PostgreSQL auxiliary binaries if not on $PATH')
133+
parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Verbose output')
134+
parser.add_argument('backup_path', help='Backup to location (rsync-compatible string)')
135+
args = parser.parse_args(argv)
136+
if args.clean_archive and ':' in args.backup_path:
137+
parser.error('--clean-archive cannot be used with remote backup directories')
138+
return args
139+
140+
141+
def configure_logging(verbose):
142+
logging_config = {}
143+
logging_config['level'] = logging.DEBUG if verbose else logging.INFO
144+
logging.basicConfig(**logging_config)
145+
global log
146+
log = logging.getLogger()
147+
148+
149+
def log_command(cmd):
150+
log.debug('command is: %s', ' '.join([shlex_quote(x) for x in cmd]))
151+
152+
153+
def initiate_backup():
154+
log.info("Initiating backup with pg_start_backup()")
155+
state.cursor.execute(START_BACKUP_SQL, {'label': state.label})
156+
157+
158+
def perform_backup(backup_path, rsync_backup_opts):
159+
state.cursor.execute("SHOW data_directory")
160+
data_dir = state.cursor.fetchone()[0]
161+
rsync_data_dir = data_dir.rstrip('/') + os.sep
162+
rsync_backup_path = os.path.join(backup_path, state.label)
163+
164+
# assemble rsync command line
165+
cmd = state.rsync_cmd
166+
cmd.extend(shlex.split(rsync_backup_opts))
167+
cmd.extend(['--delete', '--delete-delay'])
168+
[cmd.extend(['--exclude', exclude]) for exclude in RSYNC_EXCLUDES]
169+
cmd.extend([rsync_data_dir, rsync_backup_path])
170+
171+
log.info('Performing rsync backup from %s to %s', *cmd[-2:])
172+
log_command(cmd)
173+
try:
174+
subprocess.check_call(cmd)
175+
except subprocess.CalledProcessError as exc:
176+
if exc.returncode != 24:
177+
raise
178+
179+
180+
def write_backup_file(backup_path, file_contents, file_name):
181+
file_path = os.path.join(backup_path, state.label, file_name)
182+
cmd = state.rsync_cmd
183+
# use a tempfile with rsync since the path might be remote
184+
with tempfile.NamedTemporaryFile(mode='wb', prefix='postgresql_backup_') as fh:
185+
fh.write(file_contents)
186+
fh.flush()
187+
cmd.extend([fh.name, file_path])
188+
log.info('Writing backup file at path: %s', file_path)
189+
log_command(cmd)
190+
subprocess.check_call(cmd)
191+
192+
193+
def finalize_backup(backup_path):
194+
log.info("Finalizing backup with pg_stop_backup()")
195+
state.cursor.execute(STOP_BACKUP_SQL)
196+
row = state.cursor.fetchone()
197+
last_segment = row[0]
198+
backup_label = row[1]
199+
tablespace_map = row[2]
200+
log.info('Last WAL segment for this backup is: %s', last_segment)
201+
write_backup_file(backup_path, backup_label, 'backup_label')
202+
if tablespace_map:
203+
write_backup_file(backup_path, tablespace_map, 'tablespace_map')
204+
205+
206+
def get_current_labels(backup_path):
207+
cmd = state.rsync_cmd
208+
cmd.extend(['--list-only', backup_path.rstrip('/') + '/'])
209+
out = subprocess.check_output(cmd)
210+
labels = []
211+
# there doesn't appear to be a way to format rsync --list-only output
212+
for line in out.splitlines():
213+
entry = line.split()[-1]
214+
if BACKUP_LABEL_RE.match(entry):
215+
label = Label(entry)
216+
bisect.insort(labels, label)
217+
return list(map(str, labels))
218+
219+
220+
def rsync_delete_dirs(backup_path, labels):
221+
# can't use ssh here since I don't want to write a translator from rsync connect params to ssh
222+
temp_name = tempfile.mkdtemp(prefix="postgresql_backup_empty_")
223+
try:
224+
# empty the dirs first, unfortunately this has to be done one-by-one
225+
for label in labels:
226+
cmd = state.rsync_cmd
227+
cmd.extend(['-r', '--delete', temp_name + '/', os.path.join(backup_path, label)])
228+
log_command(cmd)
229+
subprocess.check_call(cmd)
230+
# then all the empty dirs can be deleted at once
231+
cmd = state.rsync_cmd
232+
[cmd.extend(['--include', label]) for label in labels]
233+
cmd.extend(['--exclude', '*', '-d', '--delete'])
234+
cmd.extend([temp_name + '/', backup_path])
235+
log_command(cmd)
236+
subprocess.check_call(cmd)
237+
finally:
238+
os.rmdir(temp_name)
239+
240+
241+
def cleanup_old_backups(backup_path, keep):
242+
labels = get_current_labels(backup_path)
243+
if len(labels) > keep:
244+
delete_labels = labels[:(len(labels) - keep)]
245+
log.debug('The following backups will be removed due to --keep=%s: %s', keep, ', '.join(delete_labels))
246+
rsync_delete_dirs(backup_path, delete_labels)
247+
248+
249+
def extract_last_segment_from_backup_label(backup_label):
250+
for line in backup_label.splitlines():
251+
match = LAST_SEGMENT_RE.match(line)
252+
if match:
253+
return match.group(1)
254+
return None
255+
256+
257+
def cleanup_wal_archive(backup_path, pg_bin_dir):
258+
assert ':' not in backup_path # this should be handled by the parser
259+
labels = get_current_labels(backup_path)
260+
if not labels:
261+
log.warning("No backups found, cannot clean WAL archive")
262+
return
263+
oldest_label = labels[0]
264+
backup_label_path = os.path.join(backup_path, oldest_label, 'backup_label')
265+
try:
266+
backup_label = open(backup_label_path).read()
267+
except:
268+
log.exception("Cannot read backup_label from oldest backup, WAL archive will not be cleaned")
269+
return
270+
last_segment = extract_last_segment_from_backup_label(backup_label)
271+
if not last_segment:
272+
log.warning("Could not determine last segment from oldest backup, WAL archive will not be cleaned")
273+
return
274+
log.info("Last segment in oldest backup (%s): %s", oldest_label, last_segment)
275+
log.info("Running pg_archivecleanup")
276+
wal_archive_path = os.path.join(backup_path, 'wal_archive')
277+
if pg_bin_dir:
278+
cmd = [os.path.join(pg_bin_dir, 'pg_archivecleanup')]
279+
else:
280+
cmd = ['pg_archivecleanup']
281+
cmd.extend(['-d', wal_archive_path, last_segment])
282+
log_command(cmd)
283+
try:
284+
subprocess.check_call(cmd)
285+
except OSError as exc:
286+
if exc.errno == errno.ENOENT:
287+
log.error("Cannot find pg_archivecleanup (see --pg-bin-dir option)")
288+
raise
289+
290+
291+
def main(argv):
292+
args = parse_args(argv)
293+
configure_logging(args.verbose)
294+
state.set_rsync_opts(args.rsync_connect_opts)
295+
start = time.time()
296+
if args.backup:
297+
initiate_backup()
298+
perform_backup(args.backup_path, args.rsync_backup_opts)
299+
finalize_backup(args.backup_path)
300+
log.info("Backup complete")
301+
if args.keep > 0:
302+
cleanup_old_backups(args.backup_path, args.keep)
303+
if args.clean_archive:
304+
cleanup_wal_archive(args.backup_path, args.pg_bin_dir)
305+
elapsed = time.time() - start
306+
log.info("Completed in %d seconds", elapsed)
307+
308+
309+
if __name__ == '__main__':
310+
main(sys.argv[1:])

0 commit comments

Comments
 (0)