Skip to content

Commit 5ef42fc

Browse files
authored
Merge pull request #718 from betatim/early-r
[MRG] Install R packages before copying repo contents
2 parents 399b339 + 09b8481 commit 5ef42fc

File tree

2 files changed

+69
-11
lines changed

2 files changed

+69
-11
lines changed

repo2docker/buildpacks/base.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@
9090
9191
{% if build_script_files -%}
9292
# If scripts required during build are present, copy them
93-
{% for src, dst in build_script_files.items() %}
93+
{% for src, dst in build_script_files|dictsort %}
9494
COPY {{ src }} {{ dst }}
9595
{% endfor -%}
9696
{% endif -%}
@@ -125,7 +125,14 @@
125125
# of the repository but don't access any files in the repository. By executing
126126
# them before copying the repository itself we can cache these steps. For
127127
# example installing APT packages.
128-
{% for sd in pre_assemble_script_directives -%}
128+
{% if preassemble_script_files -%}
129+
# If scripts required during build are present, copy them
130+
{% for src, dst in preassemble_script_files|dictsort %}
131+
COPY src/{{ src }} ${REPO_DIR}/{{ dst }}
132+
{% endfor -%}
133+
{% endif -%}
134+
135+
{% for sd in preassemble_script_directives -%}
129136
{{ sd }}
130137
{% endfor %}
131138
@@ -144,7 +151,7 @@
144151
# Container image Labels!
145152
# Put these at the end, since we don't want to rebuild everything
146153
# when these change! Did I mention I hate Dockerfile cache semantics?
147-
{% for k, v in labels.items() %}
154+
{% for k, v in labels|dictsort %}
148155
LABEL {{k}}="{{v}}"
149156
{%- endfor %}
150157
@@ -379,6 +386,19 @@ def get_build_scripts(self):
379386

380387
return []
381388

389+
def get_preassemble_script_files(self):
390+
"""
391+
Dict of files to be copied to the container image for use in preassembly.
392+
393+
This is copied before the `build_scripts`, `preassemble_scripts` and
394+
`assemble_scripts` are run, so can be executed from either of them.
395+
396+
It's a dictionary where the key is the source file path in the
397+
repository and the value is the destination file path inside the
398+
repository in the container.
399+
"""
400+
return {}
401+
382402
def get_preassemble_scripts(self):
383403
"""
384404
Ordered list of shell snippets to build an image for this repository.
@@ -499,13 +519,13 @@ def render(self):
499519
"RUN {}".format(textwrap.dedent(script.strip("\n")))
500520
)
501521

502-
pre_assemble_script_directives = []
522+
preassemble_script_directives = []
503523
last_user = "root"
504524
for user, script in self.get_preassemble_scripts():
505525
if last_user != user:
506-
pre_assemble_script_directives.append("USER {}".format(user))
526+
preassemble_script_directives.append("USER {}".format(user))
507527
last_user = user
508-
pre_assemble_script_directives.append(
528+
preassemble_script_directives.append(
509529
"RUN {}".format(textwrap.dedent(script.strip("\n")))
510530
)
511531

@@ -516,7 +536,8 @@ def render(self):
516536
env=self.get_env(),
517537
labels=self.get_labels(),
518538
build_script_directives=build_script_directives,
519-
pre_assemble_script_directives=pre_assemble_script_directives,
539+
preassemble_script_files=self.get_preassemble_script_files(),
540+
preassemble_script_directives=preassemble_script_directives,
520541
assemble_script_directives=assemble_script_directives,
521542
build_script_files=self.get_build_script_files(),
522543
base_packages=sorted(self.get_base_packages()),

repo2docker/buildpacks/r.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -291,15 +291,52 @@ def get_build_scripts(self):
291291

292292
return super().get_build_scripts() + scripts
293293

294-
def get_assemble_scripts(self):
295-
"""
296-
Return series of build-steps specific to this repository.
294+
def get_preassemble_script_files(self):
295+
files = {}
296+
installR_path = self.binder_path("install.R")
297+
if os.path.exists(installR_path):
298+
files[installR_path] = installR_path
299+
300+
return files
301+
302+
def get_preassemble_scripts(self):
303+
"""Install contents of install.R
304+
305+
Attempt to execute `install.R` before copying the contents of the
306+
repository. We speculate that most of the time we do not need access.
307+
In case this fails we re-run it after copying the repository contents.
308+
309+
The advantage of executing it before copying is that minor edits to the
310+
repository content will not trigger a re-install making things faster.
297311
"""
312+
scripts = []
313+
314+
installR_path = self.binder_path("install.R")
315+
if os.path.exists(installR_path):
316+
scripts += [
317+
(
318+
"${NB_USER}",
319+
"Rscript %s && touch /tmp/.preassembled || true" % installR_path,
320+
)
321+
]
322+
323+
return super().get_preassemble_scripts() + scripts
324+
325+
def get_assemble_scripts(self):
326+
"""Install the dependencies of or the repository itself"""
298327
assemble_scripts = super().get_assemble_scripts()
299328

300329
installR_path = self.binder_path("install.R")
301330
if os.path.exists(installR_path):
302-
assemble_scripts += [("${NB_USER}", "Rscript %s" % installR_path)]
331+
assemble_scripts += [
332+
(
333+
"${NB_USER}",
334+
# only run install.R if the pre-assembly failed
335+
"if [ ! -f /tmp/.preassembled ]; then Rscript {}; fi".format(
336+
installR_path
337+
),
338+
)
339+
]
303340

304341
description_R = "DESCRIPTION"
305342
if not self.binder_dir and os.path.exists(description_R):

0 commit comments

Comments
 (0)