Skip to content

Commit 18e9d2f

Browse files
authored
Merge pull request #511 from betatim/cached-builds-take2
[MRG] Add caching of already built repositories
2 parents bdf391c + e7018d7 commit 18e9d2f

File tree

10 files changed

+226
-60
lines changed

10 files changed

+226
-60
lines changed

repo2docker/__main__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,6 @@ def get_argparser():
196196
'--cache-from',
197197
action='append',
198198
default=[],
199-
#help=self.traits()['cache_from'].help
200199
)
201200

202201
return argparser
@@ -208,7 +207,6 @@ def make_r2d(argv=None):
208207
if argv is None:
209208
argv = sys.argv[1:]
210209

211-
212210
# version must be checked before parse, as repo/cmd are required and
213211
# will spit out an error if allowed to be parsed first.
214212
if '--version' in argv:
@@ -244,9 +242,11 @@ def make_r2d(argv=None):
244242
extra=dict(phase='failed'))
245243
sys.exit(1)
246244

247-
248245
if args.image_name:
249246
r2d.output_image_spec = args.image_name
247+
else:
248+
# we will pick a name after fetching the repository
249+
r2d.output_image_spec = ""
250250

251251
r2d.json_logs = args.json_logs
252252

@@ -343,5 +343,6 @@ def main():
343343
r2d.log.exception(e)
344344
sys.exit(1)
345345

346+
346347
if __name__ == '__main__':
347348
main()

repo2docker/app.py

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import logging
1414
import os
1515
import pwd
16-
import subprocess
1716
import shutil
1817
import tempfile
1918
import time
@@ -31,8 +30,7 @@
3130
from . import __version__
3231
from .buildpacks import (
3332
PythonBuildPack, DockerBuildPack, LegacyBinderDockerBuildPack,
34-
CondaBuildPack, JuliaBuildPack, BaseImage,
35-
RBuildPack, NixBuildPack
33+
CondaBuildPack, JuliaBuildPack, RBuildPack, NixBuildPack
3634
)
3735
from . import contentproviders
3836
from .utils import (
@@ -364,7 +362,21 @@ def fetch(self, url, ref, checkout_path):
364362
spec, checkout_path, yield_output=self.json_logs):
365363
self.log.info(log_line, extra=dict(phase='fetching'))
366364

367-
365+
if not self.output_image_spec:
366+
self.output_image_spec = (
367+
'r2d' + escapism.escape(self.repo, escape_char='-').lower()
368+
)
369+
# if we are building from a subdirectory include that in the
370+
# image name so we can tell builds from different sub-directories
371+
# apart.
372+
if self.subdir:
373+
self.output_image_spec += (
374+
escapism.escape(self.subdir, escape_char='-').lower()
375+
)
376+
if picked_content_provider.content_id is not None:
377+
self.output_image_spec += picked_content_provider.content_id
378+
else:
379+
self.output_image_spec += str(int(time.time()))
368380

369381
def json_excepthook(self, etype, evalue, traceback):
370382
"""Called on an uncaught exception when using json logging
@@ -399,15 +411,6 @@ def initialize(self):
399411
fmt='%(message)s'
400412
)
401413

402-
if self.output_image_spec == "":
403-
# Attempt to set a sane default!
404-
# HACK: Provide something more descriptive?
405-
self.output_image_spec = (
406-
'r2d' +
407-
escapism.escape(self.repo, escape_char='-').lower() +
408-
str(int(time.time()))
409-
)
410-
411414
if self.dry_run and (self.run or self.push):
412415
raise ValueError("Cannot push or run image if we are not building it")
413416

@@ -546,15 +549,29 @@ def _get_free_port(self):
546549
s.close()
547550
return port
548551

552+
def find_image(self):
553+
# if this is a dry run it is Ok for dockerd to be unreachable so we
554+
# always return False for dry runs.
555+
if self.dry_run:
556+
return False
557+
# check if we already have an image for this content
558+
client = docker.APIClient(version='auto', **kwargs_from_env())
559+
for image in client.images():
560+
if image['RepoTags'] is not None:
561+
for tag in image['RepoTags']:
562+
if tag == self.output_image_spec + ":latest":
563+
return True
564+
return False
565+
549566
def build(self):
550567
"""
551568
Build docker image
552569
"""
553570
# Check if r2d can connect to docker daemon
554571
if not self.dry_run:
555572
try:
556-
api_client = docker.APIClient(version='auto',
557-
**kwargs_from_env())
573+
docker_client = docker.APIClient(version='auto',
574+
**kwargs_from_env())
558575
except DockerException as e:
559576
self.log.exception(e)
560577
raise
@@ -574,6 +591,14 @@ def build(self):
574591
try:
575592
self.fetch(self.repo, self.ref, checkout_path)
576593

594+
if self.find_image():
595+
self.log.info("Reusing existing image ({}), not "
596+
"building.".format(self.output_image_spec))
597+
# no need to build, so skip to the end by `return`ing here
598+
# this will still execute the finally clause and let's us
599+
# avoid having to indent the build code by an extra level
600+
return
601+
577602
if self.subdir:
578603
checkout_path = os.path.join(checkout_path, self.subdir)
579604
if not os.path.isdir(checkout_path):
@@ -610,8 +635,11 @@ def build(self):
610635
self.log.info('Using %s builder\n', bp.__class__.__name__,
611636
extra=dict(phase='building'))
612637

613-
for l in picked_buildpack.build(api_client, self.output_image_spec,
614-
self.build_memory_limit, build_args, self.cache_from):
638+
for l in picked_buildpack.build(docker_client,
639+
self.output_image_spec,
640+
self.build_memory_limit,
641+
build_args,
642+
self.cache_from):
615643
if 'stream' in l:
616644
self.log.info(l['stream'],
617645
extra=dict(phase='building'))
@@ -624,8 +652,9 @@ def build(self):
624652
else:
625653
self.log.info(json.dumps(l),
626654
extra=dict(phase='building'))
655+
627656
finally:
628-
# Cheanup checkout if necessary
657+
# Cleanup checkout if necessary
629658
if self.cleanup_checkout:
630659
shutil.rmtree(checkout_path, ignore_errors=True)
631660

repo2docker/contentproviders/base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,20 @@ class ContentProvider:
1717
def __init__(self):
1818
self.log = logging.getLogger("repo2docker")
1919

20+
@property
21+
def content_id(self):
22+
"""A unique ID to represent the version of the content.
23+
This ID is used to name the built images. If the ID is the same between
24+
two runs of repo2docker we will reuse an existing image (if it exists).
25+
By providing an ID that summarizes the content we can reuse existing
26+
images and speed up build times. A good ID is the revision of a Git
27+
repository or a hash computed from all the content.
28+
The type content ID can be any string.
29+
To disable this behaviour set this property to `None` in which case
30+
a fresh image will always be built.
31+
"""
32+
return None
33+
2034
def detect(self, repo, ref=None, extra_args=None):
2135
"""Determine compatibility between source and this provider.
2236

repo2docker/contentproviders/git.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,14 @@ def fetch(self, spec, output_dir, yield_output=False):
4444
cwd=output_dir,
4545
capture=yield_output):
4646
yield line
47+
48+
cmd = ['git', 'rev-parse', 'HEAD']
49+
sha1 = subprocess.Popen(cmd, stdout=subprocess.PIPE, cwd=output_dir)
50+
self._sha1 = sha1.stdout.read().decode().strip()
51+
52+
@property
53+
def content_id(self):
54+
"""A unique ID to represent the version of the content.
55+
Uses the first seven characters of the git commit ID of the repository.
56+
"""
57+
return self._sha1[:7]

tests/conftest.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@
1212
import pipes
1313
import shlex
1414
import requests
15+
import subprocess
1516
import time
1617

18+
from tempfile import TemporaryDirectory
19+
1720
import pytest
1821
import yaml
1922

@@ -77,6 +80,35 @@ def run_test(args):
7780
return run_test
7881

7982

83+
@pytest.fixture()
84+
def git_repo():
85+
"""
86+
Make a dummy git repo in which user can perform git operations
87+
88+
Should be used as a contextmanager, it will delete directory when done
89+
"""
90+
with TemporaryDirectory() as gitdir:
91+
subprocess.check_call(['git', 'init'], cwd=gitdir)
92+
yield gitdir
93+
94+
95+
@pytest.fixture()
96+
def repo_with_content(git_repo):
97+
"""Create a git repository with content"""
98+
with open(os.path.join(git_repo, 'test'), 'w') as f:
99+
f.write("Hello")
100+
101+
subprocess.check_call(['git', 'add', 'test'], cwd=git_repo)
102+
subprocess.check_call(['git', 'commit', '-m', 'Test commit'],
103+
cwd=git_repo)
104+
# get the commit's SHA1
105+
sha1 = subprocess.Popen(['git', 'rev-parse', 'HEAD'],
106+
stdout=subprocess.PIPE, cwd=git_repo)
107+
sha1 = sha1.stdout.read().decode().strip()
108+
109+
yield git_repo, sha1
110+
111+
80112
class Repo2DockerTest(pytest.Function):
81113
"""A pytest.Item for running repo2docker"""
82114
def __init__(self, name, parent, args):

tests/unit/contentproviders/test_git.py

Lines changed: 18 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,34 @@
1-
from contextlib import contextmanager
21
import os
3-
import subprocess
42
import pytest
53
from tempfile import TemporaryDirectory
64
from repo2docker.contentproviders import Git
75

86

9-
@contextmanager
10-
def git_repo():
11-
"""
12-
Makes a dummy git repo in which user can perform git operations
13-
14-
Should be used as a contextmanager, it will delete directory when done
15-
"""
16-
17-
with TemporaryDirectory() as gitdir:
18-
subprocess.check_call(['git', 'init'], cwd=gitdir)
19-
yield gitdir
20-
21-
22-
def test_clone():
7+
def test_clone(repo_with_content):
238
"""Test simple git clone to a target dir"""
24-
with git_repo() as upstream:
25-
with open(os.path.join(upstream, 'test'), 'w') as f:
26-
f.write("Hello")
9+
upstream, sha1 = repo_with_content
2710

28-
subprocess.check_call(['git', 'add', 'test'], cwd=upstream)
29-
subprocess.check_call(['git', 'commit', '-m', 'Test commit'],
30-
cwd=upstream)
11+
with TemporaryDirectory() as clone_dir:
12+
spec = {'repo': upstream}
13+
git_content = Git()
14+
for _ in git_content.fetch(spec, clone_dir):
15+
pass
16+
assert os.path.exists(os.path.join(clone_dir, 'test'))
17+
18+
assert git_content.content_id == sha1[:7]
3119

32-
with TemporaryDirectory() as clone_dir:
33-
spec = {'repo': upstream}
34-
for _ in Git().fetch(spec, clone_dir):
35-
pass
36-
assert os.path.exists(os.path.join(clone_dir, 'test'))
3720

38-
def test_bad_ref():
21+
def test_bad_ref(repo_with_content):
3922
"""
4023
Test trying to checkout a ref that doesn't exist
4124
"""
42-
with git_repo() as upstream:
43-
with TemporaryDirectory() as clone_dir:
44-
spec = {'repo': upstream, 'ref': 'does-not-exist'}
45-
with pytest.raises(ValueError):
46-
for _ in Git().fetch(spec, clone_dir):
47-
pass
25+
upstream, sha1 = repo_with_content
26+
with TemporaryDirectory() as clone_dir:
27+
spec = {'repo': upstream, 'ref': 'does-not-exist'}
28+
with pytest.raises(ValueError):
29+
for _ in Git().fetch(spec, clone_dir):
30+
pass
31+
4832

4933
def test_always_accept():
5034
# The git content provider should always accept a spec

tests/unit/contentproviders/test_local.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,25 @@ def test_not_detect_local_file():
2424
assert spec is None, spec
2525

2626

27+
def test_content_id_is_None():
28+
# content_id property should always be None for local content provider
29+
# as we rely on the caching done by docker
30+
local = Local()
31+
assert local.content_id is None
32+
33+
2734
def test_content_available():
2835
# create a directory with files, check they are available in the output
2936
# directory
3037
with TemporaryDirectory() as d:
3138
with open(os.path.join(d, 'test'), 'w') as f:
3239
f.write("Hello")
3340

41+
local = Local()
3442
spec = {'path': d}
35-
for _ in Local().fetch(spec, d):
43+
for _ in local.fetch(spec, d):
3644
pass
3745
assert os.path.exists(os.path.join(d, 'test'))
46+
# content_id property should always be None for local content provider
47+
# as we rely on the caching done by docker
48+
assert local.content_id is None

0 commit comments

Comments
 (0)