Skip to content

Commit 65c23c1

Browse files
authored
Merge pull request #517 from github/juruen/audit-log-mysql-drop
Prepare for audit log MySQL backend deprecation
2 parents 17a846e + 0d129ca commit 65c23c1

File tree

7 files changed

+95
-184
lines changed

7 files changed

+95
-184
lines changed

backup.config-example

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,3 @@ GHE_NUM_SNAPSHOTS=10
4848
#
4949
# WARNING: do not enable this, only useful for debugging/development
5050
#GHE_BACKUP_FSCK=no
51-
52-
# If set to 'no', Elasticsearch audit log indices will not be backed up.
53-
# Note that they will still be backed up from MySQL. This will reduce
54-
# the time and size of the backup process but it will take longer
55-
# for the audit log entries to be searchable as they need to be reindexed
56-
# in Elasticsearch.
57-
#GHE_BACKUP_ES_AUDIT_LOGS=no

share/github-backup-utils/ghe-backup-audit-log

Lines changed: 9 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -23,82 +23,20 @@ setup(){
2323
mkdir -p "$GHE_SNAPSHOT_DIR/audit-log"
2424
}
2525

26-
# Check whether the MySQL backup should be enabled
27-
# by checking if the audit-log-import directory exists,
28-
# this makes it backwards-compatible with old snapshots
29-
mysql_backup_enabled(){
30-
ghe-ssh "$host" test -d "$GHE_REMOTE_DATA_USER_DIR/common/audit-log-import"
26+
# Check whether the flag to skip the MySQL audit_entries table truncation
27+
# exists.
28+
is_skip_truncate_enabled(){
29+
ghe-ssh "$host" test -e "$GHE_REMOTE_DATA_USER_DIR/common/audit-log-import/skip_truncate"
3130
}
3231

33-
# Check whether the MySQL import is complete by checking if
34-
# /data/user/common/audit-log-import/complete exists
35-
is_import_complete(){
36-
ghe-ssh "$host" test -e "$GHE_REMOTE_DATA_USER_DIR/common/audit-log-import/complete"
37-
}
38-
39-
# Check whether the MySQL import is disabled by verifying if
40-
# /data/user/common/audit-log-import/skip exists
41-
is_import_disabled(){
42-
ghe-ssh "$host" test -e "$GHE_REMOTE_DATA_USER_DIR/common/audit-log-import/skip"
43-
}
44-
45-
# Check whether the instance ships an audit log reconciler, if it doesn't
46-
# we can't dump audit_entries data, only the schema
47-
is_reconciler_available(){
48-
ghe-ssh "$GHE_HOSTNAME" -- "test -e /usr/local/share/enterprise/ghe-auditlog-repair"
49-
}
50-
51-
# Check whether we only need to back up the audit_entries schema and
52-
# ignore the actual data.
53-
#
54-
# This is the case when:
55-
# - The import to MySQL is not complete
56-
# - The import is disabled
57-
# - The reconciler tool is not available
58-
skip_mysql_entries(){
59-
if ! is_import_complete; then
60-
ghe_verbose "audit log import is not complete"
61-
return
62-
fi
63-
64-
if is_import_disabled; then
65-
ghe_verbose "audit log import is disabled"
66-
return
67-
fi
68-
69-
if ! is_reconciler_available; then
70-
ghe_verbose "audit log reconciler is not available"
71-
return
72-
fi
73-
74-
return 1
75-
}
76-
77-
# If the import to MySQL is complete, add a flag in the snapshot to indicate so.
78-
# And also use `ghe-backup-mysql-audit-log` to dump the audit entries.
7932
backup_mysql(){
80-
if skip_mysql_entries; then
81-
ghe_verbose "only backing up audit log table schema"
33+
if is_skip_truncate_enabled; then
34+
# As skip_truncate exists, we need to also backup the audit entries
35+
# in MySQL because Elasticsearch may not be fully synced.
36+
"${base_path}/ghe-backup-mysql-audit-log"
37+
else
8238
"${base_path}/ghe-backup-mysql-audit-log" --schema-only
83-
return
84-
fi
85-
86-
"${base_path}/ghe-backup-mysql-audit-log"
87-
touch "$GHE_SNAPSHOT_DIR/audit-log/mysql-import-complete"
88-
}
89-
90-
# Audit log indices in Elasticsearch are backed up when:
91-
#
92-
# - Import is not complete
93-
# - Import is disabled
94-
# - Reconciler is not available
95-
# - GHE_BACKUP_ES_AUDIT_LOGS is not set to 'no'
96-
es_backup_enabled(){
97-
if skip_mysql_entries; then
98-
return
9939
fi
100-
101-
[ -z "$GHE_BACKUP_ES_AUDIT_LOGS" ] || [ "$GHE_BACKUP_ES_AUDIT_LOGS" != "no" ]
10240
}
10341

10442
# Use ghe-backup-es-audit-log to back up Elasticsearch indices
@@ -107,19 +45,8 @@ backup_es(){
10745
}
10846

10947
backup(){
110-
if mysql_backup_enabled; then
111-
ghe_verbose "MySQL audit logs backup is enabled"
11248
backup_mysql
113-
else
114-
ghe_verbose "MySQL audit logs backup is disabled"
115-
fi
116-
117-
if es_backup_enabled; then
118-
ghe_verbose "Elasticsearch audit logs backup is enabled"
11949
backup_es
120-
else
121-
ghe_verbose "Elasticsearch audit logs backup is disabled"
122-
fi
12350
}
12451

12552
main(){

share/github-backup-utils/ghe-backup-es-rsync

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,6 @@ fi
2727
# Make sure root backup dir exists if this is the first run
2828
mkdir -p "$GHE_SNAPSHOT_DIR/elasticsearch"
2929

30-
# Create exclude file
31-
exclude_file="$(mktemp)"
32-
echo elasticsearch.yml >"$exclude_file"
33-
34-
# Exclude audit log indices when configuration says so and import to MySQL is complete
35-
# as those indices will be rebuilt from MySQL during a restore
36-
if [ "$GHE_BACKUP_ES_AUDIT_LOGS" = "no" ] && ghe-ssh "$host" test -e "/data/user/common/audit-log-import/complete"; then
37-
ghe_verbose "* Excluding Audit Log indices"
38-
ghe-ssh "$host" curl -s 'http://localhost:9201/_cat/indices/audit_log?h=uuid' >>$exclude_file 2>&3
39-
fi
40-
4130
# Verify that the /data/elasticsearch directory exists.
4231
if ! ghe-ssh "$host" -- "[ -d '$GHE_REMOTE_DATA_USER_DIR/elasticsearch' ]"; then
4332
ghe_verbose "* The '$GHE_REMOTE_DATA_USER_DIR/elasticsearch' directory doesn't exist."
@@ -58,7 +47,6 @@ ghe-rsync -avz \
5847
-e "ghe-ssh -p $(ssh_port_part "$host")" \
5948
--rsync-path="sudo -u elasticsearch rsync" \
6049
$link_dest \
61-
--exclude-from="$exclude_file" \
6250
"$(ssh_host_part "$host"):$GHE_REMOTE_DATA_USER_DIR/elasticsearch/" \
6351
"$GHE_SNAPSHOT_DIR/elasticsearch" 1>&3
6452

@@ -67,7 +55,6 @@ cleanup () {
6755
ghe_verbose "* Enabling ES index flushing ..."
6856
echo '{"index":{"translog.disable_flush":false}}' |
6957
ghe-ssh "$host" -- curl -s -XPUT "localhost:9200/_settings" -d @- >/dev/null
70-
rm -rf "$exclude_file"
7158
}
7259
trap 'cleanup' EXIT
7360
trap 'exit $?' INT # ^C always terminate
@@ -84,7 +71,6 @@ ghe-rsync -avz \
8471
-e "ghe-ssh -p $(ssh_port_part "$host")" \
8572
--rsync-path="sudo -u elasticsearch rsync" \
8673
$link_dest \
87-
--exclude-from="$exclude_file" \
8874
"$(ssh_host_part "$host"):$GHE_REMOTE_DATA_USER_DIR/elasticsearch/" \
8975
"$GHE_SNAPSHOT_DIR/elasticsearch" 1>&3
9076

share/github-backup-utils/ghe-restore-audit-log

Lines changed: 77 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -22,55 +22,70 @@ setup(){
2222
ghe_remote_version_required "$GHE_HOSTNAME"
2323
}
2424

25-
# Check whether the snapshot comes from an instance
26-
# where the MySQL import was complete
27-
is_import_complete(){
28-
test -e "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/audit-log/mysql-import-complete"
25+
# Check whether the snapshot contains audit logs that
26+
# were taken from Elasticsearch
27+
es_data_available(){
28+
ls -A "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT"/audit-log/*.size >/dev/null 2>&1
2929
}
3030

31-
# Check whether the snapshot was taken on an instance
32-
# where MySQL audit logs were enabled
33-
mysql_restored_enabled(){
34-
test -e "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/audit-log-mysql"
31+
# Check whether the snapshot contains audit logs that
32+
# were taken from MySQL
33+
mysql_dump_available(){
34+
ls -A "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT"/audit-log-mysql/20*.gz >/dev/null 2>&1
3535
}
3636

37-
remove_complete_flag(){
38-
ghe_verbose "Setting instance(s) as pending for audit log import to MySQL"
39-
ghe-ssh "$GHE_HOSTNAME" -- "sudo rm -rf $GHE_REMOTE_ROOT_DIR/data/user/common/audit-log-import/complete" 1>&3 2>&3
37+
# Check whether the snapshot contains the audit log table schema
38+
mysql_table_schema_available(){
39+
ls -A "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT"/audit-log-mysql/schema.gz >/dev/null 2>&1
40+
}
4041

41-
if $CLUSTER; then
42-
if ! ghe-ssh "$GHE_HOSTNAME" -- "ghe-cluster-each -- sudo rm -rf /data/user/common/audit-log-import/complete" 1>&3 2>&3; then
43-
ghe_verbose "Failed to set as pending for audit log import to MySQL all instances in cluster"
44-
fi
45-
fi
42+
# Check whether the remote host is running a version where the MySQL backend
43+
# is supported, i.e: < 2.19
44+
is_mysql_supported(){
45+
[ "$(version "$GHE_REMOTE_VERSION")" -lt "$(version 2.19.0)" ]
4646
}
4747

48-
# Use `ghe-backup-mysql-audit-log` to dump the audit entries.
49-
# If the import to MySQL is complete, add a flag in the snapshot to indicate so.
50-
restore_mysql(){
51-
ghe_verbose "Restoring MySQL audit logs ..."
48+
# Helper function to set remote flags in `/data/user/common/audit-log-import`
49+
# if it's supported, i.e: directory exists.
50+
set_remote_flag(){
51+
local flag=$1
52+
local msg=$2
5253

53-
"${base_path}/ghe-restore-mysql-audit-log" "$GHE_HOSTNAME"
54+
local dir="/data/user/common/audit-log-import"
5455

55-
if ! is_import_complete; then
56-
remove_complete_flag
56+
if ! ghe-ssh "$GHE_HOSTNAME" -- "sudo test -d $GHE_REMOTE_ROOT_DIR/$dir" 1>&3 2>&3; then
57+
ghe_verbose "Remote version doesn't support audit log import, skipping '$msg'"
5758
return
5859
fi
5960

60-
ghe_verbose "Audit log import to MySQL is complete"
61-
ghe-ssh "$GHE_HOSTNAME" -- "sudo touch $GHE_REMOTE_ROOT_DIR/data/user/common/audit-log-import/complete"
62-
}
61+
ghe_verbose "$msg"
62+
ghe-ssh "$GHE_HOSTNAME" -- "sudo touch $GHE_REMOTE_ROOT_DIR/$dir/$flag" 1>&3 2>&3
6363

64-
# Audit log indices in Elasticsearch are restored when:
65-
#
66-
# - import to MySQL is not complete
67-
# - GHE_BACKUP_ES_AUDIT_LOGS is not set to 'no'
68-
es_restore_enabled(){
69-
if ! is_import_complete; then
70-
return
64+
if $CLUSTER; then
65+
if ! ghe-ssh "$GHE_HOSTNAME" -- "ghe-cluster-each -- sudo touch $dir/$flag" 1>&3 2>&3; then
66+
ghe_verbose "Failed to $msg in all instances in cluster"
67+
fi
7168
fi
69+
}
70+
71+
# Add flag to not trigger transitions from MySQL to Elasticsearch
72+
set_skip_transition_flag(){
73+
set_remote_flag "skip" "Add flag to skip audit log import to MySQL"
74+
}
7275

73-
[ -z "$GHE_BACKUP_ES_AUDIT_LOGS" ] || [ "$GHE_BACKUP_ES_AUDIT_LOGS" != "no" ]
76+
# Add flag to not trigger the truncation of the MySQL audit log table
77+
set_skip_truncate_flag(){
78+
set_remote_flag "skip_truncate" "Add flag to skip truncating audit log table in MySQL"
79+
}
80+
81+
# Use `ghe-backup-mysql-audit-log` to dump the audit entries.
82+
# If the import to MySQL is complete, add a flag in the snapshot to indicate so.
83+
restore_mysql(){
84+
local only_schema=$1
85+
86+
ghe_verbose "Restoring MySQL audit logs ..."
87+
88+
"${base_path}/ghe-restore-mysql-audit-log" "$GHE_HOSTNAME" "$only_schema"
7489
}
7590

7691
# Use ghe-restore-es-audit-log to restore Elasticsearch indices
@@ -80,37 +95,44 @@ restore_es(){
8095
"${base_path}/ghe-restore-es-audit-log" "$GHE_HOSTNAME"
8196
}
8297

83-
# Whether or not we should trigger a reindex from MySQL into Elasticsearch
84-
should_start_reindex(){
85-
if [ -z "$GHE_BACKUP_ES_AUDIT_LOGS" ] || [ "$GHE_BACKUP_ES_AUDIT_LOGS" != "no" ]; then
86-
ghe_verbose "GHE_BACKUP_ES_AUDIT_LOGS is not set to 'no'"
87-
return 1
88-
fi
89-
90-
if ! ghe-ssh "$GHE_HOSTNAME" -- "test -e /usr/local/share/enterprise/ghe-auditlog-repair"; then
91-
ghe_verbose "ghe-auditlog-repiar doesn't exist"
92-
return 1
93-
fi
94-
}
95-
9698
do_restore(){
97-
if mysql_restored_enabled; then
98-
restore_mysql
99-
else
100-
ghe_verbose "MySQL audit log restore is not enabled"
101-
remove_complete_flag
99+
if is_mysql_supported; then
100+
set_skip_transition_flag
102101
fi
103102

104-
if es_restore_enabled; then
103+
# ES data is available, restore it along
104+
# with the table schema
105+
if es_data_available; then
106+
ghe_verbose "Elasticsearch data is available"
107+
105108
restore_es
109+
restore_mysql --only-schema
106110
return
107111
fi
108112

109-
ghe_verbose "Elasticsearch audit log restore is not enabled"
113+
# Only MySQL data is available, restore it
114+
# and trigger a reindex
115+
if mysql_dump_available; then
116+
ghe_verbose "Only MySQL data is available"
117+
118+
restore_mysql
119+
120+
if ! is_mysql_supported; then
121+
ghe_verbose "Add flag to skip MySQL audit log table truncation"
122+
set_skip_truncate_flag
123+
fi
110124

111-
if should_start_reindex; then
112125
ghe_verbose "Starting audit log reindex from MySQL to Elasticsearch"
113126
ghe-ssh "$GHE_HOSTNAME" -- "sudo systemctl --no-block restart auditlog-repair";
127+
return
128+
fi
129+
130+
if mysql_table_schema_available; then
131+
# Only the table schema is available, restore it
132+
ghe_verbose "Only audit_entries schema is available"
133+
restore_mysql --only-schema
134+
else
135+
ghe_verbose "MySQL table schema is not available"
114136
fi
115137
}
116138

share/github-backup-utils/ghe-restore-mysql-audit-log

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ base_path="$( dirname "${BASH_SOURCE[0]}" )"
1616

1717
GHE_HOSTNAME="$1"
1818

19+
# Whether we just need to imprt the table schema and no data
20+
only_schema="$2"
21+
1922
# Setup GHE_REMOTE_XXX variables, snapshot_dir,
2023
# remote_dir, remote_dump and skip_prepare
2124
setup(){
@@ -202,6 +205,10 @@ restore(){
202205
fi
203206

204207
restore_schema
208+
if [ -n "$only_schema" ]; then
209+
ghe_verbose "only table schema was imported"
210+
return
211+
fi
205212

206213
IFS=$'\n'
207214
for month in $(notsynced_meta); do

test/test-ghe-restore.sh

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -280,32 +280,6 @@ begin_test "ghe-restore with no pages backup"
280280
)
281281
end_test
282282

283-
begin_test "ghe-restore removes audit log import to MySQL flag when is a < 2.17 snapshot"
284-
(
285-
set -e
286-
287-
rm -rf "$GHE_REMOTE_ROOT_DIR"
288-
setup_remote_metadata
289-
290-
# set as configured, enable maintenance mode and create required directories
291-
setup_maintenance_mode "configured"
292-
293-
flag="$GHE_REMOTE_ROOT_DIR/data/user/common/audit-log-import/complete"
294-
mkdir -p "$(dirname $flag)"
295-
touch "$flag"
296-
297-
if ! output=$(ghe-restore -v -f localhost 2>&1); then
298-
echo "Error: failed to restore $output" >&2
299-
exit 1
300-
fi
301-
302-
! test -e "$flag" || {
303-
echo "Error: the restore process should've removed $flag" >&2
304-
exit 1
305-
}
306-
)
307-
end_test
308-
309283
begin_test "ghe-restore cluster backup to non-cluster appliance"
310284
(
311285
set -e

test/testlib.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,9 @@ setup_test_data () {
253253
mkdir -p "$loc/audit-log/"
254254
cd "$loc/audit-log/"
255255
echo "fake audit log last yr last mth" | gzip > audit_log-1-$last_yr-$last_mth-1.gz
256+
echo "1" > audit_log-1-$last_yr-$last_mth-1.size
256257
echo "fake audit log this yr this mth" | gzip > audit_log-1-$this_yr-$this_mth-1.gz
258+
echo "1" > audit_log-1-$this_yr-$this_mth-1.size
257259

258260
# Create hookshot logs
259261
mkdir -p "$loc/hookshot/"

0 commit comments

Comments
 (0)