Skip to content

Commit 3571ac6

Browse files
Merge pull request #267 from github/ls/lfsboot
add bootstrap script to download small LFS files efficiently
2 parents 6596df8 + ab319f3 commit 3571ac6

File tree

3 files changed

+331
-0
lines changed

3 files changed

+331
-0
lines changed

scripts/boostrap/boot

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
#!/usr/bin/perl
2+
#
3+
# Bootstrap a repository. See here for more info:
4+
# https://github.com/github/platform-samples/tree/master/scripts/bootstrap/create-bootstrap
5+
#
6+
7+
use 5.010;
8+
use strict;
9+
use warnings;
10+
use File::Basename;
11+
use MIME::Base64;
12+
13+
my $min_git_version=2.16.0;
14+
my $min_git_lfs_version=2.3.4;
15+
16+
sub error_exit {
17+
my($msg) = shift;
18+
$msg = 'Bootstrapping repository failed.' if !$msg;
19+
print STDERR "ERROR: $msg\n";
20+
exit 1;
21+
}
22+
23+
sub run {
24+
my($cmd, $err_msg) = @_;
25+
system($cmd) == 0 or error_exit($err_msg);
26+
}
27+
28+
# Set a local config for the repository
29+
sub config {
30+
my($keyvalue) = shift;
31+
run('git config --local ' . $keyvalue);
32+
}
33+
34+
sub header {
35+
my($str) = shift;
36+
print "\n##############################################################\n";
37+
print " " . $str;
38+
print "\n##############################################################\n";
39+
}
40+
41+
my $start = time;
42+
43+
header('Checking Git and Git LFS...');
44+
45+
#
46+
# Upgrade Git
47+
#
48+
# TODO: Currently we upgrade Git only Windows. In the future we could check if
49+
# Git is installed via Homebrew on MacOS and upgrade it there too.
50+
if ($^O eq 'MSWin32') {
51+
system('git update-git-for-windows --gui');
52+
}
53+
54+
#
55+
# Check versions
56+
#
57+
my ($git_version) = `git --version` =~ /([0-9]+([.][0-9]+)+)/;
58+
if (version->parse($git_version) lt version->parse($min_git_version)) {
59+
error_exit("Git version $git_version on this system is outdated. Please upgrade to the latest version!");
60+
}
61+
print "Git version: $git_version\n";
62+
63+
my ($git_lfs_version) = `git lfs version` =~ /([0-9]+([.][0-9]+)+)/;
64+
if (!$git_lfs_version) {
65+
error_exit("Git LFS seems not to be installed on this system.\nPlease follow install instructions on https://git-lfs.github.com/");
66+
}
67+
if (version->parse($git_lfs_version) lt version->parse($min_git_lfs_version)) {
68+
error_exit("Git LFS version $git_version on this system is outdated. Please upgrade to the latest version!");
69+
}
70+
print "Git LFS version: $git_lfs_version\n";
71+
72+
if (system('git config user.name >/dev/null') != 0) {
73+
print "\nIt looks like your name was not configured in Git yet.\n";
74+
print "Please enter your name: ";
75+
chomp(my $username = <STDIN>);
76+
system('git config --global user.name ' . $username);
77+
}
78+
if (system('git config user.email >/dev/null') != 0) {
79+
# TODO: We could check for the correct email format here
80+
print "\nIt looks like your email was not configured in Git yet.\n";
81+
print "Please enter your email address: ";
82+
chomp(my $email = <STDIN>);
83+
system('git config --global user.email ' . $email);
84+
} else {
85+
print "\nGit user: " . `git config --null user.name` . "\n";
86+
print "Git email: " . `git config --null user.email` . "\n";
87+
}
88+
89+
header('Bootstrapping repository...');
90+
91+
#
92+
# Configure the repo
93+
#
94+
chdir dirname(__FILE__);
95+
96+
if (`git rev-parse --abbrev-ref HEAD` !~ /bootstrap/) {
97+
error_exit("Please run '$0' from the bootstrap branch");
98+
}
99+
100+
# Ensure we are starting from a clean state in case the script is failed
101+
# in a previous run.
102+
run('git reset --hard HEAD --quiet');
103+
run('git clean --force -fdx');
104+
105+
# Ensure Git LFS is initialized in the repo
106+
run('git lfs install --local >/dev/null', 'Initializing Git LFS failed.');
107+
108+
# Enable file system cache on Windows (no effect on OS X/Linux)
109+
# see https://groups.google.com/forum/#!topic/git-for-windows/9WrSosaa4A8
110+
config('core.fscache true');
111+
112+
# If the Git LFS locking feature is used, then Git LFS will set lockable files
113+
# to "readonly" by default. This is implemented with a Git LFS "post-checkout"
114+
# hook. Git LFS can skip this hook if no file is locked. However, Git LFS needs
115+
# to traverse the entire tree to find all ".gitattributes" and check for locked
116+
# files. In a large tree (e.g. >20k directories, >300k files) this can take a
117+
# while. Instruct Git LFS to not set lockable files to "readonly". This skips
118+
# the "post-checkout" entirely and speeds up Git LFS for large repositories.
119+
config('lfs.setlockablereadonly false');
120+
121+
# Enable long path support for Windows (no effect on OS X/Linux)
122+
# Git uses the proper API to create long paths on Windows. However, many
123+
# Windows applications use an outdated API that only support paths up to a
124+
# length of 260 characters. As a result these applications would not be able to
125+
# work with the longer paths properly. Keep that in mind if you run into path
126+
# trouble!
127+
# see https://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx
128+
config('core.longpaths true');
129+
130+
if (system('git config core.untrackedCache >/dev/null 2>&1') == 1 &&
131+
system('git update-index --test-untracked-cache') == 0) {
132+
# Enable untracked cache if the file system supports it
133+
# see https://news.ycombinator.com/item?id=11388479
134+
config('core.untrackedCache true');
135+
config('feature.manyFiles true');
136+
}
137+
138+
config('protocol.version 2');
139+
140+
# Download Submodule content in parallel
141+
# see https://git-scm.com/docs/git-config#Documentation/git-config.txt-submodulefetchJobs
142+
config('submodule.fetchJobs 0');
143+
144+
# Speed up "git status" and by suppressing unnecessary terminal output
145+
# see https://github.com/git/git/commit/fd9b544a2991ad74d73ad1bc0af4d24f91a6802b
146+
config('status.aheadBehind false');
147+
148+
#
149+
# Prepare the repo
150+
#
151+
152+
if (-e 'pack/lfs-objects-1.tar.gz') {
153+
# Get the LFS "pack files"
154+
run('git lfs pull --include="pack/lfs-objects-*.tar.gz"', 'Downloading Git LFS pack files failed.');
155+
print "\n";
156+
157+
my $error_lfs = 'Extracting Git LFS pack files failed.';
158+
my $progress = 0;
159+
open(my $pipe, 'tar -xzvf pack/lfs-objects-* 2>&1 |') or error_exit($error_lfs);
160+
while (my $line = <$pipe> ) {
161+
$progress++;
162+
print "\rExtracting LFS objects: $progress/lfs_pack_count";
163+
}
164+
close($pipe) or error_exit($error_lfs);
165+
print "\n";
166+
}
167+
168+
# Check out default branch
169+
run('git checkout --force default_branch');
170+
171+
if (-e '.gitmodules') {
172+
run('git submodule update --init --recursive --reference .git');
173+
}
174+
175+
# Cleanup now obsolete Git LFS pack files
176+
run('git -c lfs.fetchrecentcommitsdays=0 -c lfs.fetchrecentrefsdays=0 -c lfs.fetchrecentremoterefs=false -c lfs.pruneoffsetdays=0 lfs prune >/dev/null');
177+
178+
header('Hurray! Your Git repository is ready for you!');
179+
my $duration = time - $start;
180+
print "Bootstrap time: $duration s\n";

scripts/boostrap/boot.bat

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
@echo off
2+
pushd %~dp0
3+
"%ProgramFiles%"\Git\bin\sh.exe -c "./boot"
4+
popd

scripts/boostrap/create-bootstrap

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
#!/usr/bin/env bash
2+
#
3+
# The `create-bootstrap` script searches a repository for smallish LFS files,
4+
# combines them into larger LFS files, and adds them to a new orphan branch
5+
# called `bootstrap`. In addition, the script adds a `boot` script to the
6+
# orphan branch which splits the larger LFS files up, again.
7+
#
8+
# In order to leverage the Git LFS pack files, the Git user needs to get the
9+
# `bootstrap` branch and run the `boot` script.
10+
#
11+
# Usage:
12+
# 1. Clone your repository with the smallish LFS files
13+
# 2. `cd` into the repository
14+
# 3. Run this script
15+
#
16+
set -e
17+
18+
base_dir=$(cd "${0%/*}" && pwd)
19+
# force=1;
20+
21+
function header {
22+
echo ""
23+
echo "##############################################################"
24+
echo " $1"
25+
echo "##############################################################"
26+
}
27+
28+
function error {
29+
echo "ERROR: $1"
30+
exit 1
31+
}
32+
33+
if [ ! -d .git ]; then
34+
error "Looks like you are not in the root directory of a Git repository."
35+
fi
36+
37+
if [ -z "$force" ] && git rev-parse --verify origin/bootstrap >/dev/null 2>&1; then
38+
error "Branch 'bootstrap' exists already. Please delete it!"
39+
fi
40+
41+
default_branch=$(git rev-parse --abbrev-ref HEAD)
42+
remote_url=$(git config --get remote.origin.url)
43+
repo_name=${remote_url##*/}
44+
repo_name=${repo_name%.git}
45+
46+
header "Ensure relevant Git LFS objects are present..."
47+
git pull
48+
git lfs pull
49+
git submodule foreach --recursive git lfs pull
50+
git \
51+
-c lfs.fetchrecentcommitsdays=0 \
52+
-c lfs.fetchrecentrefsdays=0 \
53+
-c lfs.fetchrecentremoterefs=false \
54+
-c lfs.pruneoffsetdays=0 \
55+
lfs prune
56+
git submodule foreach --recursive git \
57+
-c lfs.fetchrecentcommitsdays=0 \
58+
-c lfs.fetchrecentrefsdays=0 \
59+
-c lfs.fetchrecentremoterefs=false \
60+
-c lfs.pruneoffsetdays=0 \
61+
lfs prune
62+
63+
header "1/4 Creating 'bootstrap' branch..."
64+
git checkout --orphan bootstrap
65+
git reset
66+
git clean -fdx --force --quiet
67+
68+
header "2/4 Creating Git LFS pack files..."
69+
70+
# Copy LFS files of the submodule into the parent repo to make them
71+
# part of the LFS packfile
72+
if [ -e ./.git/modules ]; then
73+
find ./.git/modules -type d -path '*/lfs' -exec cp -rf {} .git/ \;
74+
fi
75+
76+
# Find all LFS files smaller than 256MB and put them into tar files no
77+
# larger than 256MB. Finally, print the number of total files added to
78+
# the archives.
79+
rm -rf pack
80+
mkdir pack
81+
lfs_pack_count=$(
82+
find ./.git/lfs/objects -type f |
83+
perl -ne '
84+
my $path = $_;
85+
chomp($path);
86+
my $size = -s $path;
87+
if ($batch_size + $size > 256*1024*1024 || !$batch_id) {
88+
$batch_id++;
89+
$batch_size = 0;
90+
}
91+
if ($path && $size < 256*1024*1024) {
92+
$total_count++;
93+
$batch_size += $size;
94+
$tar = "pack/lfs-objects-$batch_id.tar";
95+
`tar -rf $tar $path`;
96+
}
97+
print $total_count if eof();
98+
'
99+
)
100+
# Compress those tar files
101+
gzip pack/*
102+
git lfs track 'pack/lfs-objects-*.tar.gz'
103+
git add pack/lfs-objects-*.tar.gz 2>/dev/null || true
104+
105+
# Boot entry point for Linux/MacOS (bash)
106+
cp "$base_dir/boot" boot
107+
perl -pi -e "s/default_branch/$default_branch/" boot
108+
perl -pi -e "s/lfs_pack_count/$lfs_pack_count/" boot
109+
110+
# Boot entry point for Windows (cmd.exe)
111+
cp "$base_dir/boot.bat" boot.bat
112+
113+
cat << EOF > README.md
114+
115+
## Bootstrap Branch
116+
117+
This branch is not related to the rest of the repository content.
118+
The purpose of this branch is to bootstrap the repository quickly
119+
using Git LFS pack files and setting useful defaults.
120+
121+
Bootstrap the repository with the following commands.
122+
123+
### Windows (cmd.exe)
124+
\`\`\`
125+
$ git clone $remote_url --branch bootstrap && $repo_name\\boot.bat
126+
\`\`\`
127+
128+
### Linux/MacOS (bash):
129+
\`\`\`
130+
$ git clone $remote_url --branch bootstrap && ./$repo_name/boot
131+
\`\`\`
132+
133+
EOF
134+
135+
# Note: We intentionally do not add the `.gitattributes` file here.
136+
# This ensures the Git LFS pack files are not downloaded during
137+
# the initial clone and only with the `boot` script.
138+
git add README.md boot boot.bat
139+
140+
header "3/4 Uploading 'bootstrap' branch..."
141+
git -c user.email="[email protected]" \
142+
-c user.name="Bootstrap Creator" \
143+
commit --quiet --message="Initial commit"
144+
git push --force --set-upstream origin bootstrap
145+
146+
header "4/4 Done"
147+
cat README.md

0 commit comments

Comments
 (0)