Skip to content

Commit 74f8ec8

Browse files
authored
Merge branch 'master' into taz/fix-restore-target
2 parents 6e6f873 + 3e01563 commit 74f8ec8

File tree

9 files changed

+705
-6
lines changed

9 files changed

+705
-6
lines changed

backup.config-example

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,10 @@ GHE_NUM_SNAPSHOTS=10
4848
#
4949
# WARNING: do not enable this, only useful for debugging/development
5050
#GHE_BACKUP_FSCK=no
51+
52+
# If set to 'no', Elasticsearch audit log indices will not be backed up.
53+
# Note that they will still be backed up from MySQL. This will reduce
54+
# the time and size of the backup process but it will take longer
55+
# for the audit log entries to be searchable as they need to be reindexed
56+
# in Elasticsearch.
57+
#GHE_BACKUP_ES_AUDIT_LOGS=no

bin/ghe-backup

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ echo "Backing up Redis database ..."
184184
ghe-backup-redis > redis.rdb || failures="$failures redis"
185185

186186
echo "Backing up audit log ..."
187-
ghe-backup-es-audit-log || failures="$failures audit-log"
187+
ghe-backup-audit-log || failures="$failures audit-log"
188188

189189
echo "Backing up hookshot logs ..."
190190
ghe-backup-es-hookshot || failures="$failures hookshot"

bin/ghe-restore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,8 @@ fi
301301
# Restore exported audit and hookshot logs to 2.12.9 and newer single nodes and
302302
# all releases of cluster
303303
if $CLUSTER || [ "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.12.9)" ]; then
304-
echo "Restoring Elasticsearch Audit logs ..."
305-
ghe-restore-es-audit-log "$GHE_HOSTNAME" 1>&3
304+
echo "Restoring Audit logs ..."
305+
ghe-restore-audit-log "$GHE_HOSTNAME" 1>&3
306306

307307
echo "Restoring hookshot logs ..."
308308
ghe-restore-es-hookshot "$GHE_HOSTNAME" 1>&3
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#!/usr/bin/env bash
2+
#/ Usage: ghe-backup-audit-log
3+
#/ Take a backup of audit logs.
4+
#/
5+
#/ Note: This command typically isn't called directly. It's invoked by
6+
#/ ghe-backup.
7+
set -e
8+
9+
base_path="$( dirname "${BASH_SOURCE[0]}" )"
10+
# Bring in the backup configuration
11+
# shellcheck source=share/github-backup-utils/ghe-backup-config
12+
. "${base_path}/ghe-backup-config"
13+
14+
# Setup GHE_REMOTE_XXX variables, host and make sure work dir is created
15+
setup(){
16+
# Perform a host-check and establish GHE_REMOTE_XXX variables.
17+
ghe_remote_version_required "$host"
18+
19+
# Set up remote host and root elastic backup directory based on config
20+
host="$GHE_HOSTNAME"
21+
22+
# Make sure root backup dir exists if this is the first run
23+
mkdir -p "$GHE_SNAPSHOT_DIR/audit-log"
24+
}
25+
26+
# Check whether the MySQL backup should be enabled
27+
# by checking if the audit-log-import directory exists,
28+
# this makes it backwards-compatible with old snapshots
29+
mysql_backup_enabled(){
30+
ghe-ssh "$host" test -d "$GHE_REMOTE_DATA_USER_DIR/common/audit-log-import"
31+
}
32+
33+
# Check whether the MySQL import is complete by checking if
34+
# /data/user/common/audit-log-import/complete exists
35+
is_import_complete(){
36+
ghe-ssh "$host" test -e "$GHE_REMOTE_DATA_USER_DIR/common/audit-log-import/complete"
37+
}
38+
39+
# Check whether the MySQL import is disabled by verifying if
40+
# /data/user/common/audit-log-import/skip exists
41+
is_import_disabled(){
42+
ghe-ssh "$host" test -e "$GHE_REMOTE_DATA_USER_DIR/common/audit-log-import/skip"
43+
}
44+
45+
# Check whether the instance ships an audit log reconciler, if it doesn't
46+
# we can't dump audit_entries data, only the schema
47+
is_reconciler_available(){
48+
ghe-ssh "$GHE_HOSTNAME" -- "test -e /usr/local/share/enterprise/ghe-auditlog-repair"
49+
}
50+
51+
# Check whether we only need to back up the audit_entries schema and
52+
# ignore the actual data.
53+
#
54+
# This is the case when:
55+
# - The import to MySQL is not complete
56+
# - The import is disabled
57+
# - The reconciler tool is not available
58+
skip_mysql_entries(){
59+
if ! is_import_complete; then
60+
ghe_verbose "audit log import is not complete"
61+
return
62+
fi
63+
64+
if is_import_disabled; then
65+
ghe_verbose "audit log import is disabled"
66+
return
67+
fi
68+
69+
if ! is_reconciler_available; then
70+
ghe_verbose "audit log reconciler is not available"
71+
return
72+
fi
73+
74+
return 1
75+
}
76+
77+
# If the import to MySQL is complete, add a flag in the snapshot to indicate so.
78+
# And also use `ghe-backup-mysql-audit-log` to dump the audit entries.
79+
backup_mysql(){
80+
if skip_mysql_entries; then
81+
ghe_verbose "only backing up audit log table schema"
82+
"${base_path}/ghe-backup-mysql-audit-log" --schema-only
83+
return
84+
fi
85+
86+
"${base_path}/ghe-backup-mysql-audit-log"
87+
touch "$GHE_SNAPSHOT_DIR/audit-log/mysql-import-complete"
88+
}
89+
90+
# Audit log indices in Elasticsearch are backed up when:
91+
#
92+
# - Import is not complete
93+
# - Import is disabled
94+
# - Reconciler is not available
95+
# - GHE_BACKUP_ES_AUDIT_LOGS is not set to 'no'
96+
es_backup_enabled(){
97+
if skip_mysql_entries; then
98+
return
99+
fi
100+
101+
[ -z "$GHE_BACKUP_ES_AUDIT_LOGS" ] || [ "$GHE_BACKUP_ES_AUDIT_LOGS" != "no" ]
102+
}
103+
104+
# Use ghe-backup-es-audit-log to back up Elasticsearch indices
105+
backup_es(){
106+
"${base_path}/ghe-backup-es-audit-log"
107+
}
108+
109+
backup(){
110+
if mysql_backup_enabled; then
111+
ghe_verbose "MySQL audit logs backup is enabled"
112+
backup_mysql
113+
else
114+
ghe_verbose "MySQL audit logs backup is disabled"
115+
fi
116+
117+
if es_backup_enabled; then
118+
ghe_verbose "Elasticsearch audit logs backup is enabled"
119+
backup_es
120+
else
121+
ghe_verbose "Elasticsearch audit logs backup is disabled"
122+
fi
123+
}
124+
125+
main(){
126+
bm_start "$(basename "$0")"
127+
setup
128+
backup
129+
bm_end "$(basename "$0")"
130+
}
131+
132+
main

share/github-backup-utils/ghe-backup-es-rsync

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,17 @@ fi
2727
# Make sure root backup dir exists if this is the first run
2828
mkdir -p "$GHE_SNAPSHOT_DIR/elasticsearch"
2929

30+
# Create exclude file
31+
exclude_file="$(mktemp)"
32+
echo elasticsearch.yml >"$exclude_file"
33+
34+
# Exclude audit log indices when configuration says so and import to MySQL is complete
35+
# as those indices will be rebuilt from MySQL during a restore
36+
if [ "$GHE_BACKUP_ES_AUDIT_LOGS" = "no" ] && ghe-ssh "$host" test -e "/data/user/common/audit-log-import/complete"; then
37+
ghe_verbose "* Excluding Audit Log indices"
38+
ghe-ssh "$host" curl -s 'http://localhost:9201/_cat/indices/audit_log?h=uuid' >>$exclude_file 2>&3
39+
fi
40+
3041
# Verify that the /data/elasticsearch directory exists.
3142
if ! ghe-ssh "$host" -- "[ -d '$GHE_REMOTE_DATA_USER_DIR/elasticsearch' ]"; then
3243
ghe_verbose "* The '$GHE_REMOTE_DATA_USER_DIR/elasticsearch' directory doesn't exist."
@@ -47,15 +58,16 @@ ghe-rsync -avz \
4758
-e "ghe-ssh -p $(ssh_port_part "$host")" \
4859
--rsync-path="sudo -u elasticsearch rsync" \
4960
$link_dest \
50-
--exclude='elasticsearch.yml' \
61+
--exclude-from="$exclude_file" \
5162
"$(ssh_host_part "$host"):$GHE_REMOTE_DATA_USER_DIR/elasticsearch/" \
5263
"$GHE_SNAPSHOT_DIR/elasticsearch" 1>&3
5364

54-
# Set up a trap to re-enable flushing on exit
65+
# Set up a trap to re-enable flushing on exit and remove temp file
5566
cleanup () {
5667
ghe_verbose "* Enabling ES index flushing ..."
5768
echo '{"index":{"translog.disable_flush":false}}' |
5869
ghe-ssh "$host" -- curl -s -XPUT "localhost:9200/_settings" -d @- >/dev/null
70+
ghe-ssh "$host" rm -rf "$exclude_file"
5971
}
6072
trap 'cleanup' EXIT
6173
trap 'exit $?' INT # ^C always terminate
@@ -72,7 +84,7 @@ ghe-rsync -avz \
7284
-e "ghe-ssh -p $(ssh_port_part "$host")" \
7385
--rsync-path="sudo -u elasticsearch rsync" \
7486
$link_dest \
75-
--exclude='elasticsearch.yml' \
87+
--exclude-from="$exclude_file" \
7688
"$(ssh_host_part "$host"):$GHE_REMOTE_DATA_USER_DIR/elasticsearch/" \
7789
"$GHE_SNAPSHOT_DIR/elasticsearch" 1>&3
7890

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
#!/usr/bin/env bash
2+
#/ Usage: ghe-backup-mysql-audit-log
3+
#/ Take a backup of audit logs in MySQL.
4+
#/
5+
#/ Args:
6+
#/ --only-schema (optional: only dump the table schema)
7+
#/
8+
#/ Note: This command typically isn't called directly. It's invoked by
9+
#/ ghe-backup-audit-log.
10+
set -e
11+
12+
# Bring in the backup configuration
13+
# shellcheck source=share/github-backup-utils/ghe-backup-config
14+
. "$( dirname "${BASH_SOURCE[0]}" )/ghe-backup-config"
15+
16+
# Whether we just need to dump the table schema and no data
17+
only_schema="$1"
18+
19+
# Setup GHE_REMOTE_XXX variables and other global variables
20+
setup(){
21+
# Perform a host-check and establish GHE_REMOTE_XXX variables.
22+
ghe_remote_version_required "$host"
23+
24+
# Set up remote host and root elastic backup directory based on config
25+
host="$GHE_HOSTNAME"
26+
27+
# Where the new MySQL dumps go
28+
snapshot_dir="$GHE_SNAPSHOT_DIR/audit-log-mysql"
29+
30+
# Where the current MySQL dumps live
31+
current_dir="$GHE_DATA_DIR/current/audit-log-mysql"
32+
33+
# Wheter we need a full backup and not incremental
34+
force_full_backup=false
35+
36+
# Make sure root backup dir exists if this is the first run
37+
mkdir -p "$snapshot_dir"
38+
}
39+
40+
# Use ghe-export-audit-logs to fetch the current metadata for all stored
41+
# months in MySQL. For each month: number of entries, minum ID, maximum ID
42+
fetch_current_meta(){
43+
local meta
44+
if ! meta=$(ghe-ssh "$host" "sudo ghe-export-audit-logs months" | grep -v NULL 2>&3); then
45+
ghe_verbose "Error: failed to retrieve audit log metadata"
46+
exit 1
47+
fi
48+
49+
[ -z "$meta" ] && return 1
50+
51+
echo "$meta"
52+
}
53+
54+
# Check if a month data exists in the current snapshot. Use its
55+
# size, minimum ID and maximum ID to assume it's the same if
56+
# they all match.
57+
is_month_synced(){
58+
local meta="$1"
59+
local name=$2
60+
61+
test -f "${current_dir}/${name}.gz" || return 1
62+
test -f "${current_dir}/${name}.meta" || return 1
63+
64+
[ "$(cat "${current_dir}/${name}.meta")" = "$meta" ]
65+
}
66+
67+
# To compare two schemas, we filter out comments,
68+
# the AUTO_INCREMENT=XXXX value and blank lines
69+
# to only leave SQL statements.
70+
filter_schema(){
71+
local schema="$1"
72+
73+
echo "$schema" | \
74+
grep -v "^--" |
75+
grep -v "^/\\*" | \
76+
grep . | \
77+
sed 's/ AUTO_INCREMENT=[0-9]*\b//'
78+
}
79+
80+
# Dump table schema and check whether it has changed when
81+
# compared with the schema stored in the current snapshot.
82+
# If it has changed, we can't do an incremental backup
83+
# and all data needs to be dumped in the new snapshot.
84+
dump_schema(){
85+
ghe_verbose "dumping table schema..."
86+
87+
local current
88+
current=$(ghe-ssh "$host" "ghe-export-audit-logs dump --schema-only" 2>&3)
89+
90+
echo "$current" | gzip >"${snapshot_dir}/schema.gz"
91+
92+
if ! test -e "${current_dir}/schema.gz"; then
93+
return
94+
fi
95+
96+
local previous
97+
previous=$(gunzip -c "${current_dir}/schema.gz")
98+
99+
if ! diff -Naur <(filter_schema "$current") <(filter_schema "$previous") 1>&3 2>&3; then
100+
ghe_verbose "Current and previous schema don't match, forcing full backup"
101+
force_full_backup=true
102+
return
103+
fi
104+
105+
ghe_verbose "Current and previous schemas match"
106+
}
107+
108+
# Dump a month of audit entries from MySQL and store it
109+
# in $name.gz.
110+
# Create $name.meta with number of entries, minimum ID and maximum ID.
111+
dump_month(){
112+
local meta="$1"
113+
local name=$2
114+
115+
ghe_verbose "dumping ${meta}..."
116+
117+
ghe-ssh "$host" "ghe-export-audit-logs dump --use-gzip=true $name" >"${snapshot_dir}/${name}.gz" 2>&3
118+
echo "$meta" > "${snapshot_dir}/${name}.meta"
119+
}
120+
121+
# Check if the export tool is available in this version
122+
export_tool_available(){
123+
ghe-ssh "$host" "test -e /usr/local/bin/ghe-export-audit-logs"
124+
}
125+
126+
# Backup audit log entries:
127+
#
128+
# 1. Fetch metadata about the existing audit log entries in MySQL per month
129+
# (month, number of entries, minumim ID, maximum ID)
130+
# 2. If any month is uptodate in the current snapshot, hardlink it
131+
# 3. Otherwise, dump those month entries from MySQL
132+
backup(){
133+
if ! export_tool_available; then
134+
ghe_verbose "ghe-export-audit-logs is not available"
135+
return
136+
fi
137+
138+
dump_schema
139+
140+
if [ -n "$only_schema" ]; then
141+
ghe_verbose "only table schema was dumped"
142+
return
143+
fi
144+
145+
local meta
146+
if ! meta=$(fetch_current_meta); then
147+
ghe_verbose "there are no current audit log entries"
148+
return
149+
fi
150+
151+
IFS=$'\n'
152+
for month in $meta; do
153+
local month_name
154+
month_name=$(echo "$month" | awk '{print $1}')
155+
156+
if ! $force_full_backup && is_month_synced "$month" "$month_name"; then
157+
# Month is in-sync with current data, create hardlink to it
158+
ghe_verbose "$month_name is in sync, hardlinking to it.."
159+
ln "${current_dir}/${month_name}.gz" "${snapshot_dir}/${month_name}.gz"
160+
ln "${current_dir}/${month_name}.meta" "${snapshot_dir}/${month_name}.meta"
161+
continue
162+
fi
163+
164+
dump_month "$month" "$month_name"
165+
done
166+
}
167+
168+
main(){
169+
bm_start "$(basename "$0")"
170+
setup
171+
backup
172+
bm_end "$(basename "$0")"
173+
}
174+
175+
main

0 commit comments

Comments
 (0)