Skip to content

Commit eb5d42c

Browse files
authored
Merge pull request #369 from github/lildude/optimise-cluster-pages-restore
Optimise route generation and finalisation during cluster restores of pages
2 parents 988b935 + 2095b4b commit eb5d42c

File tree

2 files changed

+135
-1
lines changed

2 files changed

+135
-1
lines changed

bin/ghe-restore

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,12 @@ fi
332332

333333
if $cluster; then
334334
echo "Restoring GitHub Pages into DPages..."
335-
ghe-restore-pages-dpages "$GHE_HOSTNAME" 1>&3
335+
if ghe-ssh "$GHE_HOSTNAME" test -f /data/github/current/script/dpages-cluster-restore-routes; then
336+
ghe_verbose "* Using ghe-restore-pages-dpages-ng to restore"
337+
ghe-restore-pages-dpages-ng "$GHE_HOSTNAME" 1>&3
338+
else
339+
ghe-restore-pages-dpages "$GHE_HOSTNAME" 1>&3
340+
fi
336341
else
337342
echo "Restoring GitHub Pages ..."
338343
ghe-restore-pages-${GHE_BACKUP_STRATEGY} "$GHE_HOSTNAME" 1>&3
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#!/usr/bin/env bash
2+
#/ Usage: ghe-restore-pages-dpages-ng <host>
3+
#/ Restore repositories fron an rsync snapshot of all Git repository data to a GitHub cluster.
4+
#/
5+
#/ Note: This script typically isn't called directly. It's invoked by the
6+
#/ ghe-restore command when restoring into a cluster.
7+
set -e
8+
9+
# Bring in the backup configuration
10+
. $( dirname "${BASH_SOURCE[0]}" )/ghe-backup-config
11+
12+
# Show usage and bail with no arguments
13+
[ -z "$*" ] && print_usage
14+
15+
bm_start "$(basename $0)"
16+
17+
# Grab host arg
18+
GHE_HOSTNAME="$1"
19+
20+
# The snapshot to restore should be set by the ghe-restore command but this lets
21+
# us run this script directly.
22+
: ${GHE_RESTORE_SNAPSHOT:=current}
23+
24+
# Find the pages to restore
25+
pages_paths=$(cd $GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/ && find pages -mindepth 5 -maxdepth 5 | cut -d / -f2-)
26+
27+
# No need to restore anything, early exit
28+
if [ -z "$pages_paths" ]; then
29+
echo "Warning: Pages backup missing. Skipping ..."
30+
exit 0
31+
fi
32+
33+
# Perform a host-check and establish GHE_REMOTE_XXX variables.
34+
ghe_remote_version_required "$GHE_HOSTNAME"
35+
36+
# Split host:port into parts
37+
port=$(ssh_port_part "$GHE_HOSTNAME")
38+
host=$(ssh_host_part "$GHE_HOSTNAME")
39+
40+
# Add user / -l option
41+
user="${host%@*}"
42+
[ "$user" = "$host" ] && user="admin"
43+
44+
hostnames=$(ghe-cluster-nodes "$GHE_HOSTNAME" "pages-server")
45+
46+
tempdir=$(mktemp -d -t backup-utils-restore-XXXXXX)
47+
ghe-ssh "$GHE_HOSTNAME" -- mkdir -p $tempdir
48+
ssh_config_file=$(mktemp -t cluster-backup-restore-XXXXXX)
49+
ghe-ssh-config "$GHE_HOSTNAME" "$hostnames" > "$ssh_config_file"
50+
opts="$GHE_EXTRA_SSH_OPTS -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PasswordAuthentication=no"
51+
tmp_list=$tempdir/tmp_list
52+
routes_list=$tempdir/routes_list
53+
54+
cleanup() {
55+
rm -rf $tempdir
56+
ghe-ssh "$GHE_HOSTNAME" -- rm -rf $tempdir
57+
true
58+
}
59+
60+
trap 'cleanup' EXIT
61+
62+
# Build a list of pages paths to send to the server to calculate
63+
# the restore routes, something like:
64+
#
65+
# 5/d3/d9/44/10
66+
# 0/02/e7/4f/27
67+
# 4/c1/6a/53/31
68+
# 3/34/17/3c/30
69+
# 6/6e/a9/ab/29
70+
# ...
71+
#
72+
# One pages path per line.
73+
bm_start "$(basename $0) - Building pages list"
74+
OLDIFS=$IFS; IFS=$'\n'
75+
for path in $pages_paths; do
76+
ghe_verbose "Adding path $path to the list of pages to send"
77+
echo $path
78+
done > $tmp_list
79+
IFS=$OLDIFS
80+
bm_end "$(basename $0) - Building pages list"
81+
82+
bm_start "$(basename $0) - Transferring pages list"
83+
cat $tmp_list | ghe-ssh "$GHE_HOSTNAME" -- sponge $tmp_list
84+
bm_end "$(basename $0) - Transferring pages list"
85+
86+
# The server returns a list of routes:
87+
#
88+
# 5/d3/d9/44/10 pages-server-1 pages-server-2 pages-server-3
89+
# 0/02/e7/4f/27 pages-server-1 pages-server-3 pages-server-4
90+
# 4/c1/6a/53/31 pages-server-2 pages-server-3 pages-server-4
91+
# 3/34/17/3c/30 pages-server-4 pages-server-2 pages-server-1
92+
# 6/6e/a9/ab/29 pages-server-3 pages-server-2 pages-server-1
93+
# ...
94+
#
95+
# One route per line.
96+
bm_start "$(basename $0) - Generating routes"
97+
echo "cat $tmp_list | github-env ./bin/dpages-cluster-restore-routes > $routes_list" | ghe-ssh "$GHE_HOSTNAME" -- /bin/bash
98+
bm_end "$(basename $0) - Generating routes"
99+
100+
bm_start "$(basename $0) - Transferring routes"
101+
ghe-ssh "$GHE_HOSTNAME" -- cat $routes_list > $routes_list
102+
bm_end "$(basename $0) - Transferring routes"
103+
104+
bm_start "$(basename $0) - Processing routes"
105+
cat $routes_list | awk -v tempdir="$tempdir" '{ for(i=2;i<=NF;i++){ print $1 > (tempdir"/"$i".rsync") }}'
106+
bm_end "$(basename $0) - Processing routes"
107+
108+
bm_start "$(basename $0) - Restoring pages"
109+
for file_list in $tempdir/*.rsync; do
110+
server=$(basename $file_list .rsync)
111+
ghe_verbose "* Transferring Pages to $server"
112+
ghe-rsync -avrHR --delete \
113+
-e "ssh -q $opts -p $port -F $ssh_config_file -l $user" \
114+
--rsync-path="sudo -u git rsync" \
115+
--files-from=$file_list \
116+
"$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/pages/./" \
117+
"$server:$GHE_REMOTE_DATA_USER_DIR/pages/" 1>&3
118+
done
119+
bm_end "$(basename $0) - Restoring pages"
120+
121+
bm_start "$(basename $0) - Finalizing routes"
122+
ghe-ssh "$GHE_HOSTNAME" -- /bin/bash >&3 <<EOF
123+
split -l 1000 -d $routes_list $tempdir/chunk
124+
chunks=\$(find $tempdir/ -name chunk\*)
125+
parallel -i /bin/sh -c "cat {} | github-env ./bin/dpages-cluster-restore-finalize" -- \$chunks
126+
EOF
127+
bm_end "$(basename $0) - Finalizing routes"
128+
129+
bm_end "$(basename $0)"

0 commit comments

Comments
 (0)