Skip to content

Commit ea61b4e

Browse files
Adding a script to speed up cluster backups on enterprise
Only affects GHE cluster instances. No change for single appliance GHE. This will not impact GHE instances older than 2.6. You will need to upgrade to GHE 2.6.8 and 2.7.3 for this enhancement to be available. This will benefit cluster setups with 3 or more nodes. Some basic benchmarks: This test is with roughly 1.2k repos of a total size of 14G. Legacy script ``` real 4m2.751s user 1m11.998s sys 0m44.079s Repo sync with no pre-calculated routes took 241s ``` New script backing up the repositories ``` real 2m3.509s user 0m57.403s sys 0m31.405s * Transferring 297 repositories from ghe-test-dgit-fs1 * Transferring 304 repositories from ghe-test-dgit-fs2 * Transferring 268 repositories from ghe-test-dgit-fs3 * Transferring 280 repositories from ghe-test-dgit-fs4 Calculating sync routes took 4s Repo sync with calculated routes took 118s ```
1 parent 09cb986 commit ea61b4e

File tree

3 files changed

+385
-22
lines changed

3 files changed

+385
-22
lines changed

bin/ghe-backup

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,18 @@ ghe-backup-es-hookshot ||
166166
failures="$failures hookshot"
167167

168168
echo "Backing up Git repositories ..."
169-
ghe-backup-repositories-${GHE_BACKUP_STRATEGY} ||
170-
failures="$failures repositories"
169+
if [ "$GHE_BACKUP_STRATEGY" = "cluster" ]; then
170+
if ghe-ssh "$GHE_HOSTNAME" test -f /data/github/current/bin/dgit-cluster-backup-routes ; then
171+
echo "* Using calculated routes method..."
172+
ghe-backup-repositories-cluster-ng || failures="$failure repositories"
173+
else
174+
echo "* Using legacy method. A faster backup method is available on enterprise 2.7 and up."
175+
ghe-backup-repositories-cluster || failures="$failures repositories"
176+
fi
177+
else
178+
ghe-backup-repositories-${GHE_BACKUP_STRATEGY} ||
179+
failures="$failures repositories"
180+
fi
171181

172182
echo "Backing up GitHub Pages ..."
173183
ghe-backup-pages-${GHE_BACKUP_STRATEGY} ||

share/github-backup-utils/ghe-backup-repositories-cluster

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ backup_current="$GHE_DATA_DIR/current/repositories"
5151

5252
# Verify rsync is available.
5353
if ! rsync --version 1>/dev/null 2>&1; then
54-
echo "Error: rsync not found." 1>&2
55-
exit 1
54+
echo "Error: rsync not found." 1>&2
55+
exit 1
5656
fi
5757

5858
# Perform a host-check and establish GHE_REMOTE_XXX variables.
@@ -108,28 +108,27 @@ done
108108
# If we have a previous increment, avoid transferring existing files via rsync's
109109
# --link-dest support. This also decreases physical space usage considerably.
110110
if [ -d "$backup_current" ]; then
111-
link_dest="--link-dest=../../current/repositories"
111+
link_dest="--link-dest=../../current/repositories"
112112
fi
113113

114114
# Transfer repository data from a GitHub instance to the current snapshot
115115
# directory, using a previous snapshot to avoid transferring files that have
116116
# already been transferred. A set of rsync filter rules are provided on stdin
117117
# for each invocation.
118118
rsync_repository_data () {
119-
port=$(ssh_port_part "$1")
120-
host=$(ssh_host_part "$1")
121-
122-
shift
123-
ghe-rsync -av \
124-
-e "ssh -q $opts -p $port -F $config_file -l $user" \
125-
$link_dest "$@" \
126-
--rsync-path='sudo -u git rsync' \
127-
--include-from=- --exclude=\* \
128-
"$host:$GHE_REMOTE_DATA_USER_DIR/repositories/" \
129-
"$backup_dir" 1>&3
119+
port=$(ssh_port_part "$1")
120+
host=$(ssh_host_part "$1")
121+
122+
shift
123+
ghe-rsync -av \
124+
-e "ssh -q $opts -p $port -F $config_file -l $user" \
125+
$link_dest "$@" \
126+
--rsync-path='sudo -u git rsync' \
127+
--include-from=- --exclude=\* \
128+
"$host:$GHE_REMOTE_DATA_USER_DIR/repositories/" \
129+
"$backup_dir" 1>&3
130130
}
131131

132-
133132
for hostname in $hostnames; do
134133
bm_start "$(basename $0) - $hostname"
135134
echo 1>&3
@@ -199,11 +198,11 @@ RULES
199198
+ /*/nw/??/??/??/*/*.git/packed-refs
200199
RULES
201200

202-
# Sync loose refs and reflogs. This must be performed before object data is
203-
# transferred to ensure that all referenced objects are included.
204-
echo 1>&3
205-
echo "* Transferring refs and reflogs ..." 1>&3
206-
rsync_repository_data $hostname:122 -z <<RULES
201+
# Sync loose refs and reflogs. This must be performed before object data is
202+
# transferred to ensure that all referenced objects are included.
203+
echo 1>&3
204+
echo "* Transferring refs and reflogs ..." 1>&3
205+
rsync_repository_data $hostname:122 -z <<RULES
207206
- /__*__/
208207
- /info/
209208

0 commit comments

Comments
 (0)