Skip to content

Commit bc8a0e0

Browse files
authored
Merge pull request #446 from github/pluehne/avoid-redundant-rsync-operations
Reduce time to restore repository data by avoiding unnecessary rsync operations
2 parents 6a9c877 + 1bb2d0e commit bc8a0e0

File tree

1 file changed

+53
-35
lines changed

1 file changed

+53
-35
lines changed

share/github-backup-utils/ghe-restore-repositories

Lines changed: 53 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -104,42 +104,60 @@ done > $tmp_list
104104
IFS=$OLDIFS
105105
bm_end "$(basename $0) - Building network list"
106106

107-
# The server returns a list of routes:
108-
#
109-
# a/nw/a8/3f/02/100000855 dgit-node1 dgit-node2 dgit-node3
110-
# a/nw/a8/bc/8d/100000880 dgit-node1 dgit-node2 dgit-node4
111-
# a/nw/a5/06/81/100000659 dgit-node3 dgit-node2 dgit-node4
112-
# ...
113-
#
114-
# One route per line.
115-
#
116-
# NOTE: The route generation is performed on the appliance as it is considerably
117-
# more performant than performing over an SSH pipe.
118-
#
119-
bm_start "$(basename $0) - Transferring network list"
120-
cat $tmp_list | ghe-ssh "$GHE_HOSTNAME" -- sponge $remote_tmp_list
121-
cat $tmp_list | ghe_debug
122-
bm_end "$(basename $0) - Transferring network list"
123-
124-
bm_start "$(basename $0) - Generating routes"
125-
restore_routes_script="github-env ./bin/dgit-cluster-restore-routes"
126-
if ghe-ssh "$GHE_HOSTNAME" test -e /usr/local/share/enterprise/ghe-restore-network-routes; then
127-
restore_routes_script="/usr/local/share/enterprise/ghe-restore-network-routes"
107+
# In cluster environments, we need to ensure that all repository networks are replicated back to the
108+
# same Spokes nodes that they were present on when the backup was taken. For this, the list of
109+
# routes of each repository network is first obtained. Afterward, an rsync file list is created for
110+
# each Spokes node including only those repository networks for which there was a route to the
111+
# respective Spokes node.
112+
if $CLUSTER; then
113+
log_info "* Restoring repository networks to cluster nodes according to Spokes routes" 1>&3
114+
115+
# The server returns a list of routes:
116+
#
117+
# a/nw/a8/3f/02/100000855 dgit-node1 dgit-node2 dgit-node3
118+
# a/nw/a8/bc/8d/100000880 dgit-node1 dgit-node2 dgit-node4
119+
# a/nw/a5/06/81/100000659 dgit-node3 dgit-node2 dgit-node4
120+
# ...
121+
#
122+
# One route per line.
123+
#
124+
# NOTE: The route generation is performed on the appliance as it is considerably
125+
# more performant than performing over an SSH pipe.
126+
#
127+
bm_start "$(basename $0) - Transferring network list"
128+
cat $tmp_list | ghe-ssh "$GHE_HOSTNAME" -- sponge $remote_tmp_list
129+
cat $tmp_list | ghe_debug
130+
bm_end "$(basename $0) - Transferring network list"
131+
132+
bm_start "$(basename $0) - Generating routes"
133+
restore_routes_script="github-env ./bin/dgit-cluster-restore-routes"
134+
if ghe-ssh "$GHE_HOSTNAME" test -e /usr/local/share/enterprise/ghe-restore-network-routes; then
135+
restore_routes_script="/usr/local/share/enterprise/ghe-restore-network-routes"
136+
fi
137+
echo "cat $remote_tmp_list | $restore_routes_script | grep 'git-server-' > $remote_routes_list" | ghe-ssh "$GHE_HOSTNAME" -- /bin/bash
138+
ghe-ssh "$GHE_HOSTNAME" -- cat $remote_routes_list | ghe_debug
139+
bm_end "$(basename $0) - Generating routes"
140+
141+
bm_start "$(basename $0) - Fetching routes"
142+
ghe-ssh "$GHE_HOSTNAME" -- gzip -c $remote_routes_list | gzip -d > $routes_list
143+
cat $routes_list | ghe_debug
144+
bm_end "$(basename $0) - Fetching routes"
145+
146+
bm_start "$(basename $0) - Processing routes"
147+
148+
cat $routes_list | awk -v tempdir="$tempdir" '{ for(i=2;i<=NF;i++){ print $1 > (tempdir"/"$i".rsync") }}'
149+
cat $routes_list | awk '{ n = split($1, p, "/"); printf p[n] " /data/repositories/" $1; $1=""; print $0}' > $to_restore
150+
ghe_debug "\n$(find "$tempdir" -maxdepth 1 -name '*.rsync')"
151+
bm_end "$(basename $0) - Processing routes"
152+
# There is no need to collect routes and split them by Spokes server in noncluster setups because
153+
# we need to transfer all repository networks to the primary instance unconditionally, regardless of
154+
# the Spokes route list captured during the backup. As we already have the list of all repository
155+
# network paths, we can simply use that as the rsync file list in noncluster environments.
156+
else
157+
log_info "* Restoring all repository networks to target host unconditionally" 1>&3
158+
159+
cp "$tmp_list" "$tempdir/git-server-primary.rsync"
128160
fi
129-
echo "cat $remote_tmp_list | $restore_routes_script | grep 'git-server-' > $remote_routes_list" | ghe-ssh "$GHE_HOSTNAME" -- /bin/bash
130-
ghe-ssh "$GHE_HOSTNAME" -- cat $remote_routes_list | ghe_debug
131-
bm_end "$(basename $0) - Generating routes"
132-
133-
bm_start "$(basename $0) - Fetching routes"
134-
ghe-ssh "$GHE_HOSTNAME" -- gzip -c $remote_routes_list | gzip -d > $routes_list
135-
cat $routes_list | ghe_debug
136-
bm_end "$(basename $0) - Fetching routes"
137-
138-
bm_start "$(basename $0) - Processing routes"
139-
cat $routes_list | awk -v tempdir="$tempdir" '{ for(i=2;i<=NF;i++){ print $1 > (tempdir"/"$i".rsync") }}'
140-
cat $routes_list | awk '{ n = split($1, p, "/"); printf p[n] " /data/repositories/" $1; $1=""; print $0}' > $to_restore
141-
ghe_debug "\n$(find "$tempdir" -maxdepth 1 -name '*.rsync')"
142-
bm_end "$(basename $0) - Processing routes"
143161

144162
if [ -z "$(find "$tempdir" -maxdepth 1 -name '*.rsync')" ]; then
145163
log_warn "Warning: no routes found, skipping repositories restore ..."

0 commit comments

Comments
 (0)