Skip to content

Commit de185ef

Browse files
authored
Merge pull request #341 from github/separate-pruning-snapshots
Remove ghe-prune-snapshots from ghe-backup so it can be called separately. Incremental logic handling to be done outside of this PR.
2 parents de192ea + d7d39a7 commit de185ef

File tree

5 files changed

+65
-16
lines changed

5 files changed

+65
-16
lines changed

backup.config-example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ GHE_DATA_DIR="data"
1616
# be available for the past N days ...
1717
GHE_NUM_SNAPSHOTS=10
1818

19+
# Pruning snapshots can be scheduled outside of the backup process. If set to 'yes'
20+
# ghe-pruning-snapshots will need to be invoked separately via cron
21+
#GHE_PRUNING_SCHEDULED=yes
22+
1923
# The hostname of the GitHub appliance to restore. If you've set up a separate
2024
# GitHub appliance to act as a standby for recovery, specify its IP or hostname
2125
# here. The host to restore to may also be specified directly when running

bin/ghe-backup

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,11 @@ if [ -z "$failures" ]; then
327327
rm -f "../current"
328328
ln -s "$GHE_SNAPSHOT_TIMESTAMP" "../current"
329329

330-
ghe-prune-snapshots
330+
if [[ $GHE_PRUNING_SCHEDULED != "yes" ]]; then
331+
ghe-prune-snapshots
332+
else
333+
log_info "Expired and incomplete snapshots to be pruned separately"
334+
fi
331335
else
332336
log_info "Skipping pruning snapshots, since some backups failed..."
333337
fi

docs/scheduling-backups.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Scheduling backups
1+
# Scheduling backups & snapshot pruning
22

33
Regular backups should be scheduled using `cron(8)` or similar command
44
scheduling service on the backup host. The backup frequency will dictate the
@@ -17,6 +17,13 @@ based on the frequency of backups. The ten most recent snapshots are retained by
1717
default. The number should be adjusted based on backup frequency and available
1818
storage.
1919

20+
By default all expired and incomplete snapshots are deleted at the end of the main
21+
backup process `ghe-backup`. If pruning these snapshots takes a long time you can
22+
choose to disable the pruning process from the backup run and schedule it separately.
23+
This can be achieved by enabling the `GHE_PRUNING_SCHEDULED` option in `backup.config`.
24+
Please note if this option is enabled, you will need to schedule the pruning script `ghe-prune-snapshots`
25+
using `cron` or similar command scheduling service on the backup host.
26+
2027
To schedule hourly backup snapshots with verbose informational output written to
2128
a log file and errors generating an email:
2229

@@ -30,5 +37,10 @@ To schedule nightly backup snapshots instead, use:
3037

3138
0 0 * * * /opt/backup-utils/bin/ghe-backup -v 1>>/opt/backup-utils/backup.log 2>&1
3239

40+
To schedule daily snapshot pruning, use:
41+
42+
43+
44+
0 3 * * * /opt/backup-utils/share/github-backup-utils/ghe-prune-snapshots 1>>/opt/backup-utils/prune-snapshots.log 2>&1
3345

3446
[1]: https://en.wikipedia.org/wiki/Recovery_point_objective

share/github-backup-utils/ghe-prune-snapshots

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,30 @@ prune_snapshot() {
2020
done
2121
}
2222

23-
# First prune all incomplete / failed snapshot directories
24-
prune_dirs="$(ls -1 "$GHE_DATA_DIR"/[0-9]*/incomplete 2>/dev/null || true)"
25-
prune_num=$(echo "$prune_dirs" | grep -v '^$' | wc -l)
23+
# Prune if backup is not running
24+
#if [ ! -f "$GHE_DATA_DIR/in-progress" ] && [ ! -f "$GHE_DATA_DIR/in-progress-restore" ]; then
25+
# Check for backup or restore in-progress file
26+
inprogress_file=$(find $GHE_DATA_DIR -maxdepth 1 -type f \( -name "in-progress" -o -name "in-progress-restore" \) -print -quit)
27+
if [[ "$CALLING_SCRIPT" == "ghe-backup" ]] || [ -z "$inprogress_file" ]; then
28+
# First prune all incomplete / failed snapshot directories
29+
prune_dirs="$(ls -1 "$GHE_DATA_DIR"/[0-9]*/incomplete 2>/dev/null || true)"
30+
prune_num=$(echo "$prune_dirs" | grep -v '^$' | wc -l)
2631

27-
if [ $prune_num -gt 0 ]; then
28-
log_info Pruning $prune_num "failed snapshot(s) ..."
29-
echo "$prune_dirs" | sed 's@/incomplete$@@' | prune_snapshot
30-
fi
32+
if [ $prune_num -gt 0 ]; then
33+
log_info Pruning $prune_num "failed snapshot(s) ..."
34+
echo "$prune_dirs" | sed 's@/incomplete$@@' | prune_snapshot
35+
fi
3136

32-
# Now prune all expired snapshots. Keep GHE_NUM_SNAPSHOTS around.
33-
snapshot_count=$(ls -1d "$GHE_DATA_DIR"/[0-9]* 2>/dev/null | wc -l)
37+
# Now prune all expired snapshots. Keep GHE_NUM_SNAPSHOTS around.
38+
snapshot_count=$(ls -1d "$GHE_DATA_DIR"/[0-9]* 2>/dev/null | wc -l)
3439

35-
if [ "$snapshot_count" -gt "$GHE_NUM_SNAPSHOTS" ]; then
36-
prune_dirs="$(ls -1d "$GHE_DATA_DIR"/[0-9]* | sort -r | awk "NR>$GHE_NUM_SNAPSHOTS")"
37-
prune_num=$(echo "$prune_dirs" | grep -v '^$' | wc -l)
38-
log_info Pruning $prune_num "expired snapshot(s) ..."
39-
echo "$prune_dirs" | prune_snapshot
40+
if [ "$snapshot_count" -gt "$GHE_NUM_SNAPSHOTS" ]; then
41+
prune_dirs="$(ls -1d "$GHE_DATA_DIR"/[0-9]* | sort -r | awk "NR>$GHE_NUM_SNAPSHOTS")"
42+
prune_num=$(echo "$prune_dirs" | grep -v '^$' | wc -l)
43+
log_info Pruning $prune_num "expired snapshot(s) ..."
44+
echo "$prune_dirs" | prune_snapshot
45+
fi
46+
elif [ "$CALLING_SCRIPT" != "ghe-backup" ] && [ -n "$inprogress_file" ]; then
47+
log_info "Detected a running backup/restore process, please wait until that process is complete to prune expired/incomplete snapshots." 1>&2
48+
log_info "If no such process is running, please remove the "$GHE_DATA_DIR/in-progress*" file and retry again." 1>&2
4049
fi

test/test-ghe-prune-snapshots.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,23 @@ begin_test "ghe-prune-snapshots incomplete snapshot pruning"
8989
[ ! -d "$GHE_DATA_DIR/04" ]
9090
)
9191
end_test
92+
93+
begin_test "ghe-prune-snapshots scheduled snapshot pruning"
94+
(
95+
set -e
96+
# Create the backup data dir and fake remote repositories dirs
97+
mkdir -p "$GHE_DATA_DIR" "$GHE_REMOTE_DATA_USER_DIR"
98+
99+
setup_test_data $GHE_REMOTE_DATA_USER_DIR
100+
101+
generate_prune_files 5
102+
103+
pre_num_files=$(file_count_no_current)
104+
105+
GHE_NUM_SNAPSHOTS=3 GHE_PRUNING_SCHEDULED=yes ghe-backup
106+
107+
post_num_files=$(file_count_no_current)
108+
109+
[ "$((pre_num_files + 1))" = "$post_num_files" ]
110+
)
111+
end_test

0 commit comments

Comments
 (0)