Skip to content

Commit 27aa9b5

Browse files
authored
Merge pull request #736 from github/enterprise-3.11-backport-725-ha-sync
Backport 725 for 3.11: Create sync file on HA replica from primary node [cluster]
2 parents a9e2a1d + 2b99d9f commit 27aa9b5

File tree

4 files changed

+72
-26
lines changed

4 files changed

+72
-26
lines changed

share/github-backup-utils/ghe-backup-config

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,3 +676,33 @@ increment-progress-total-count() {
676676
((PROGRESS_TOTAL += $1))
677677
echo "$PROGRESS_TOTAL" > /tmp/backup-utils-progress/total
678678
}
679+
680+
##
681+
# This function is used by ghe-gc-disable, ghe-backup-repositories, and ghe-backup-storage
682+
# This function should be used directly to disable and drain GC operations ONLY on HA-replica node
683+
# (as done in ghe-backup-repositories and ghe-backup-storage)
684+
# Otherwise use ghe-gc-disable which will call this function with the correct parameters.
685+
#
686+
# Arguments:
687+
# $1 - path to sync-in-progress file ($SYNC_IN_PROGRESS_FILE)
688+
# $2 - git cooldown period ($GHE_GIT_COOLDOWN_PERIOD)
689+
##
690+
gc_disable() {
691+
set -e
692+
local sync_in_progress="$1"
693+
local git_cooldown_period="$2"
694+
695+
# Touch the sync-in-progress file, disabling GC operations, and wait for all
696+
# active GC processes to finish on the remote side.
697+
sudo -u git touch "$sync_in_progress"
698+
for _ in $(seq $git_cooldown_period); do
699+
# note: the bracket synta[x] below is to prevent matches against the
700+
# grep process itself.
701+
if ps axo args | grep -E -e "^git( -.*)? nw-repac[k]( |$)" -e "^git( -.*)? g[c]( |$)" >/dev/null; then
702+
sleep 1
703+
else
704+
exit 0
705+
fi
706+
done
707+
exit 7
708+
}

share/github-backup-utils/ghe-backup-repositories

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,9 @@ if [ "$GHE_BACKUP_STRATEGY" = "cluster" ]; then
7878
ghe-ssh-config "$GHE_HOSTNAME" "$hostnames" > "$ssh_config_file"
7979
fi
8080

81-
# hostnames for HA
82-
if ghe-ssh "$GHE_HOSTNAME" -- \
83-
"[ -f '$GHE_REMOTE_ROOT_DIR/etc/github/cluster' ] && [ -f '$GHE_REMOTE_ROOT_DIR/etc/github/repl-state' ]"; then
84-
hostnames=$(ghe-ssh "$GHE_HOSTNAME" ghe-cluster-nodes -i | cut -f 2)
81+
# Replica hostnames for HA
82+
if ghe-ssh "$GHE_HOSTNAME" -- "[ -f '$GHE_REMOTE_ROOT_DIR/etc/github/repl-state' ]"; then
83+
ha_replica_hosts=$(ghe-ssh "$GHE_HOSTNAME" ghe-cluster-nodes --replica)
8584
fi
8685

8786
# Make sure root backup dir exists if this is the first run
@@ -96,11 +95,18 @@ cleanup() {
9695

9796
# Enable remote GC operations
9897
for hostname in $hostnames; do
99-
ghe-gc-enable $ssh_config_file_opt $hostname:$port || {
98+
ghe-gc-enable $ssh_config_file_opt $hostname:$port || {
10099
echo "Re-enable gc on $hostname failed, please manually delete $SYNC_IN_PROGRESS_FILE" 1>&2
101100
}
102101
done
103102

103+
# Enable remote GC operations for HA replica
104+
for replica_host in $ha_replica_hosts; do
105+
echo "set -o pipefail; ssh $replica_host -- 'sudo rm -f $SYNC_IN_PROGRESS_FILE'" | ghe-ssh "$host" /bin/bash || {
106+
echo "Re-enable gc on $replica_host failed, please manually delete $SYNC_IN_PROGRESS_FILE" 1>&2
107+
}
108+
done
109+
104110
ghe-ssh "$GHE_HOSTNAME" -- rm -rf $remote_tempdir
105111
rm -rf $tempdir
106112
}
@@ -111,6 +117,15 @@ for hostname in $hostnames; do
111117
ghe-gc-disable $ssh_config_file_opt $hostname:$port
112118
done
113119

120+
# Disable remote GC operations for HA replica
121+
# gc_disable is a function defined in ghe-backup-config
122+
# gc_disable is called on the replica node via the primary node, because replica node is not expected to be reachable from backup host. But replica node is expected to be reachable from primary node.
123+
for replica_host in $ha_replica_hosts; do
124+
echo "set -o pipefail; ssh $replica_host -- '$(declare -f gc_disable); gc_disable \"$SYNC_IN_PROGRESS_FILE\" \"$GHE_GIT_COOLDOWN_PERIOD\"'" | ghe-ssh "$host" /bin/bash || {
125+
echo "Disable gc on $replica_host failed" 1>&2
126+
}
127+
done
128+
114129
# If we have a previous increment, avoid transferring existing files via rsync's
115130
# --link-dest support. This also decreases physical space usage considerably.
116131
if [ -d "$backup_current" ]; then

share/github-backup-utils/ghe-backup-storage

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,9 @@ if [ "$GHE_BACKUP_STRATEGY" = "cluster" ]; then
5151
ghe-ssh-config "$GHE_HOSTNAME" "$hostnames" > "$ssh_config_file"
5252
fi
5353

54-
# hostnames for HA
55-
if ghe-ssh "$GHE_HOSTNAME" -- \
56-
"[ -f '$GHE_REMOTE_ROOT_DIR/etc/github/cluster' ] && [ -f '$GHE_REMOTE_ROOT_DIR/etc/github/repl-state' ]"; then
57-
hostnames=$(ghe-ssh "$GHE_HOSTNAME" ghe-cluster-nodes -i | cut -f 2)
54+
# Replica hostnames for HA
55+
if ghe-ssh "$GHE_HOSTNAME" -- "[ -f '$GHE_REMOTE_ROOT_DIR/etc/github/repl-state' ]"; then
56+
ha_replica_hosts=$(ghe-ssh "$GHE_HOSTNAME" ghe-cluster-nodes --replica)
5857
fi
5958

6059
# Make sure root backup dir exists if this is the first run
@@ -65,11 +64,18 @@ mkdir -p "$backup_dir"
6564
cleanup() {
6665
# Enable remote maintenance operations
6766
for hostname in $hostnames; do
68-
ghe-gc-enable $ssh_config_file_opt $hostname:$port || {
67+
ghe-gc-enable $ssh_config_file_opt $hostname:$port || {
6968
log_warn "Re-enable gc on $hostname failed, please manually delete $SYNC_IN_PROGRESS_FILE" 1>&2
7069
}
7170
done
7271

72+
# Enable remote GC operations for HA replica
73+
for replica_host in $ha_replica_hosts; do
74+
echo "set -o pipefail; ssh $replica_host -- 'sudo rm -f $SYNC_IN_PROGRESS_FILE'" | ghe-ssh "$host" /bin/bash || {
75+
echo "Re-enable gc on $replica_host failed, please manually delete $SYNC_IN_PROGRESS_FILE" 1>&2
76+
}
77+
done
78+
7379
ghe-ssh "$GHE_HOSTNAME" -- rm -rf $remote_tempdir
7480
rm -rf $tempdir
7581
}
@@ -80,6 +86,15 @@ for hostname in $hostnames; do
8086
ghe-gc-disable $ssh_config_file_opt $hostname:$port
8187
done
8288

89+
# Disable remote GC operations for HA replica
90+
# gc_disable is a function defined in ghe-backup-config
91+
# gc_disable is called on the replica node via the primary node, because replica node is not expected to be reachable from backup host. But replica node is expected to be reachable from primary node.
92+
for replica_host in $ha_replica_hosts; do
93+
echo "set -o pipefail; ssh $replica_host -- '$(declare -f gc_disable); gc_disable \"$SYNC_IN_PROGRESS_FILE\" \"$GHE_GIT_COOLDOWN_PERIOD\"'" | ghe-ssh "$host" /bin/bash || {
94+
echo "Disable gc on $replica_host failed" 1>&2
95+
}
96+
done
97+
8398
# If we have a previous increment and it is not empty, avoid transferring existing files via rsync's
8499
# --link-dest support. This also decreases physical space usage considerably.
85100
if [ -d "$GHE_DATA_DIR/current/storage" ] && [ "$(ls -A $GHE_DATA_DIR/current/storage)" ]; then

share/github-backup-utils/ghe-gc-disable

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,22 +32,8 @@ done
3232
# Exit early when testing
3333
[ -n "$GHE_TEST_REMOTE_VERSION" ] && exit 0
3434

35-
# Touch the sync-in-progress file, disabling GC operations, and wait for all
36-
# active GC processes to finish on the remote side.
37-
echo "
38-
set -e
39-
sudo -u git touch '$SYNC_IN_PROGRESS_FILE'
40-
for i in \$(seq $GHE_GIT_COOLDOWN_PERIOD); do
41-
# note: the bracket synta[x] below is to prevent matches against the
42-
# grep process itself.
43-
if ps axo args | grep -E -e '^git( -.*)? nw-repac[k]( |$)' -e '^git( -.*)? g[c]( |$)' >/dev/null; then
44-
sleep 1
45-
else
46-
exit 0
47-
fi
48-
done
49-
exit 7
50-
" | ghe-ssh $opts "$host" -- /bin/bash || {
35+
# gc_disable is a function defined in ghe-backup-config
36+
echo "set -o pipefail; $(declare -f gc_disable); gc_disable \"$SYNC_IN_PROGRESS_FILE\" \"$GHE_GIT_COOLDOWN_PERIOD\"" | ghe-ssh $opts "$host" -- /bin/bash || {
5137
res=$?
5238
if [ $res = 7 ]; then
5339
log_error "Error: Git GC processes remain after $GHE_GIT_COOLDOWN_PERIOD seconds. Aborting..." 1>&2

0 commit comments

Comments
 (0)