Skip to content

Commit c5c4ddd

Browse files
author
Evgenii Khramkov
authored
Merge pull request #541 from github/fix-backup-timing
Fix `ghe-backup-repositories` performance for large instances
2 parents 2c47acb + 5206c13 commit c5c4ddd

File tree

5 files changed

+99
-22
lines changed

5 files changed

+99
-22
lines changed

.travis.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ matrix:
99
- brew install moreutils
1010
- brew install shellcheck
1111
- brew install jq
12+
- brew install coreutils
1213
script: make test
1314
- os: linux
1415
dist: trusty
@@ -24,4 +25,5 @@ matrix:
2425
- moreutils
2526
- fakeroot
2627
- jq
28+
- coreutils
2729
script: debuild -uc -us

share/github-backup-utils/ghe-backup-config

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,3 +304,29 @@ ghe_debug() {
304304
version() {
305305
echo "${@#v}" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }';
306306
}
307+
308+
# The list of gists returned by the source changed in 2.16.23, 2.17.14,
309+
# 2.18.8, and 2.19.3. We need to account for this difference here.
310+
# In older versions, all paths need to be truncated with `dirname`.
311+
# In newer versions, gist paths are unmodified, and only other repo types
312+
# are truncated with `dirname`.
313+
fix_paths_for_ghe_version() {
314+
if [[ "$GHE_REMOTE_VERSION" =~ 2.16. && "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.16.23)" ]] || \
315+
[[ "$GHE_REMOTE_VERSION" =~ 2.17. && "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.17.14)" ]] || \
316+
[[ "$GHE_REMOTE_VERSION" =~ 2.18. && "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.18.8)" ]] || \
317+
[[ "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.19.3)" ]]; then
318+
GIST_FILTER="-e /gist/b"
319+
else
320+
unset GIST_FILTER
321+
fi
322+
323+
# This sed expression is equivalent to running `dirname` on each line,
324+
# but without all the fork+exec overhead of calling `dirname` that many
325+
# times:
326+
# 1. strip off trailing slashes
327+
# 2. if the result has no slashes in it, the dirname is "."
328+
# 3. truncate from the final slash (if any) to the end
329+
# If the GIST_FILTER was set above (because we're on a modern version of
330+
# GHES), then don't modify lines with "gist" in them.
331+
sed $GIST_FILTER -e 's/\/$//; s/^[^\/]*$/./; s/\/[^\/]*$//'
332+
}

share/github-backup-utils/ghe-backup-repositories

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -366,25 +366,8 @@ bm_end "$(basename $0) - Special Data Directories Sync"
366366

367367
if [ -z "$GHE_SKIP_ROUTE_VERIFICATION" ]; then
368368
bm_start "$(basename $0) - Verifying Routes"
369-
370-
# The list of gists returned by the source changed in 2.16.23, 2.17.14, 2.18.8 & 2.19.3
371-
# so we need to account for this difference here.
372-
parse_paths() {
373-
while read -r line; do
374-
if [[ "$GHE_REMOTE_VERSION" =~ 2.16 && "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.16.23)" ]] || \
375-
[[ "$GHE_REMOTE_VERSION" =~ 2.17 && "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.17.14)" ]] || \
376-
[[ "$GHE_REMOTE_VERSION" =~ 2.18 && "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.18.8)" ]] || \
377-
[[ "$GHE_REMOTE_VERSION" =~ 2.19 && "$(version $GHE_REMOTE_VERSION)" -ge "$(version 2.19.3)" ]] && \
378-
(echo "$line" | grep -q "gist"); then
379-
echo "$line"
380-
else
381-
dirname "$line"
382-
fi
383-
done
384-
}
385-
386-
cat $tempdir/*.rsync | sort | uniq > $tempdir/source_routes
387-
(cd $backup_dir/ && find * -mindepth 5 -maxdepth 6 -type d -name \*.git | parse_paths | sort | uniq) > $tempdir/destination_routes
369+
cat $tempdir/*.rsync | uniq | sort | uniq > $tempdir/source_routes
370+
(cd $backup_dir/ && find * -mindepth 5 -maxdepth 6 -type d -name \*.git | fix_paths_for_ghe_version | uniq | sort | uniq) > $tempdir/destination_routes
388371

389372
git --no-pager diff --unified=0 --no-prefix -- $tempdir/source_routes $tempdir/destination_routes || echo "Warning: One or more repository networks and/or gists were not found on the source appliance. Please contact GitHub Enterprise Support for assistance."
390373

share/github-backup-utils/ghe-backup-storage

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,8 @@ bm_end "$(basename $0) - Storage object sync"
141141
if [ -z "$GHE_SKIP_ROUTE_VERIFICATION" ]; then
142142
bm_start "$(basename $0) - Verifying Routes"
143143

144-
cat $tempdir/*.rsync | sort | uniq > $tempdir/source_routes
145-
(cd $backup_dir/ && find * -mindepth 3 -maxdepth 3 -type f -print | sort | uniq) > $tempdir/destination_routes
144+
cat $tempdir/*.rsync | uniq | sort | uniq > $tempdir/source_routes
145+
(cd $backup_dir/ && find * -mindepth 3 -maxdepth 3 -type f -print | uniq | sort | uniq) > $tempdir/destination_routes
146146

147147
git --no-pager diff --unified=0 --no-prefix -- $tempdir/source_routes $tempdir/destination_routes || echo "Warning: One or more storage objects were not found on the source appliance. Please contact GitHub Enterprise Support for assistance."
148148

test/test-ghe-backup.sh

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
#!/usr/bin/env bash
22
# ghe-backup command tests
33

4+
TESTS_DIR="$PWD/$(dirname "$0")"
45
# Bring in testlib
56
# shellcheck source=test/testlib.sh
6-
. "$(dirname "$0")/testlib.sh"
7+
. "$TESTS_DIR/testlib.sh"
78

89
# Create the backup data dir and fake remote repositories dirs
910
mkdir -p "$GHE_DATA_DIR" "$GHE_REMOTE_DATA_USER_DIR"
@@ -344,3 +345,68 @@ begin_test "ghe-backup missing directories or files on source appliance"
344345
verify_all_backedup_data
345346
)
346347
end_test
348+
349+
# acceptance criteria is less then 2 seconds for 100,000 lines
350+
begin_test "ghe-backup fix_paths_for_ghe_version performance tests - gists"
351+
(
352+
set -e
353+
timeout 2 bash -c "
354+
source '$TESTS_DIR/../share/github-backup-utils/ghe-backup-config'
355+
GHE_REMOTE_VERSION=2.16.23
356+
seq 1 100000 | sed -e 's/$/ gist/' | fix_paths_for_ghe_version | grep -c gist
357+
"
358+
)
359+
end_test
360+
361+
# acceptance criteria is less then 2 seconds for 100,000 lines
362+
begin_test "ghe-backup fix_paths_for_ghe_version performance tests - wikis"
363+
(
364+
set -e
365+
timeout 2 bash -c "
366+
source '$TESTS_DIR/../share/github-backup-utils/ghe-backup-config'
367+
GHE_REMOTE_VERSION=2.16.23
368+
seq 1 100000 | sed -e 's/$/ wiki/' | fix_paths_for_ghe_version | grep -c '^\.$'
369+
"
370+
)
371+
end_test
372+
373+
# check fix_paths_for_ghe_version version thresholds
374+
begin_test "ghe-backup fix_paths_for_ghe_version newer/older"
375+
(
376+
set -e
377+
378+
# modern versions keep foo/gist as foo/gist
379+
for ver in 2.16.23 v2.16.23 v2.17.14 v2.18.8 v2.19.3 v2.20.0 v3.0.0; do
380+
echo "## $ver, not gist"
381+
[ "$(bash -c "
382+
source '$TESTS_DIR/../share/github-backup-utils/ghe-backup-config'
383+
GHE_REMOTE_VERSION=$ver
384+
echo foo/bar | fix_paths_for_ghe_version
385+
")" == "foo" ]
386+
387+
echo "## $ver, gist"
388+
[ "$(bash -c "
389+
source '$TESTS_DIR/../share/github-backup-utils/ghe-backup-config'
390+
GHE_REMOTE_VERSION=$ver
391+
echo foo/gist | fix_paths_for_ghe_version
392+
")" == "foo/gist" ]
393+
done
394+
395+
# old versions change foo/gist to foo
396+
for ver in 1.0.0 bob a.b.c "" 1.2.16 2.0.0 v2.0.0 v2.15.123 v2.16.22 v2.17.13 v2.18.7 v2.19.2; do
397+
echo "## $ver, not gist"
398+
[ "$(bash -c "
399+
source '$TESTS_DIR/../share/github-backup-utils/ghe-backup-config'
400+
GHE_REMOTE_VERSION=$ver
401+
echo foo/bar | fix_paths_for_ghe_version
402+
")" == "foo" ]
403+
404+
echo "## $ver, gist"
405+
[ "$(bash -c "
406+
source '$TESTS_DIR/../share/github-backup-utils/ghe-backup-config'
407+
GHE_REMOTE_VERSION=$ver
408+
echo foo/gist | fix_paths_for_ghe_version
409+
")" == "foo" ]
410+
done
411+
)
412+
end_test

0 commit comments

Comments
 (0)