Skip to content

Commit 6c5c09b

Browse files
authored
Merge pull request #743 from minrk/conda-env-first-again-again
[MRG] preassembly for conda/python
2 parents 74af2ad + 930e907 commit 6c5c09b

File tree

6 files changed

+218
-49
lines changed

6 files changed

+218
-49
lines changed

repo2docker/buildpacks/base.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@
132132
{% endfor -%}
133133
{% endif -%}
134134
135+
{% if preassemble_script_directives -%}
136+
USER root
137+
RUN chown -R ${NB_USER}:${NB_USER} ${REPO_DIR}
138+
{% endif -%}
139+
135140
{% for sd in preassemble_script_directives -%}
136141
{{ sd }}
137142
{% endfor %}
@@ -711,12 +716,8 @@ def get_preassemble_scripts(self):
711716
except FileNotFoundError:
712717
pass
713718

714-
return scripts
715-
716-
def get_assemble_scripts(self):
717-
assemble_scripts = []
718719
if "py" in self.stencila_contexts:
719-
assemble_scripts.extend(
720+
scripts.extend(
720721
[
721722
(
722723
"${NB_USER}",
@@ -728,7 +729,7 @@ def get_assemble_scripts(self):
728729
]
729730
)
730731
if self.stencila_manifest_dir:
731-
assemble_scripts.extend(
732+
scripts.extend(
732733
[
733734
(
734735
"${NB_USER}",
@@ -741,7 +742,11 @@ def get_assemble_scripts(self):
741742
)
742743
]
743744
)
744-
return assemble_scripts
745+
return scripts
746+
747+
def get_assemble_scripts(self):
748+
"""Return directives to run after the entire repository has been added to the image"""
749+
return []
745750

746751
def get_post_build_scripts(self):
747752
post_build = self.binder_path("postBuild")

repo2docker/buildpacks/conda/__init__.py

Lines changed: 83 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from ruamel.yaml import YAML
77

88
from ..base import BaseImage
9+
from ...utils import is_local_pip_requirement
910

1011
# pattern for parsing conda dependency line
1112
PYTHON_REGEX = re.compile(r"python\s*=+\s*([\d\.]*)")
@@ -127,6 +128,50 @@ def get_build_script_files(self):
127128
files.update(super().get_build_script_files())
128129
return files
129130

131+
_environment_yaml = None
132+
133+
@property
134+
def environment_yaml(self):
135+
if self._environment_yaml is not None:
136+
return self._environment_yaml
137+
138+
environment_yml = self.binder_path("environment.yml")
139+
if not os.path.exists(environment_yml):
140+
self._environment_yaml = {}
141+
return self._environment_yaml
142+
143+
with open(environment_yml) as f:
144+
env = YAML().load(f)
145+
# check if the env file is empty, if so instantiate an empty dictionary.
146+
if env is None:
147+
env = {}
148+
# check if the env file provided a dict-like thing not a list or other data structure.
149+
if not isinstance(env, Mapping):
150+
raise TypeError(
151+
"environment.yml should contain a dictionary. Got %r" % type(env)
152+
)
153+
self._environment_yaml = env
154+
155+
return self._environment_yaml
156+
157+
@property
158+
def _should_preassemble_env(self):
159+
"""Check for local pip requirements in environment.yaml
160+
161+
If there are any local references, e.g. `-e .`,
162+
stage the whole repo prior to installation.
163+
"""
164+
dependencies = self.environment_yaml.get("dependencies", [])
165+
pip_requirements = None
166+
for dep in dependencies:
167+
if isinstance(dep, dict) and dep.get("pip"):
168+
pip_requirements = dep["pip"]
169+
if isinstance(pip_requirements, list):
170+
for line in pip_requirements:
171+
if is_local_pip_requirement(line):
172+
return False
173+
return True
174+
130175
@property
131176
def python_version(self):
132177
"""Detect the Python version for a given `environment.yml`
@@ -135,31 +180,17 @@ def python_version(self):
135180
or a Falsy empty string '' if not found.
136181
137182
"""
138-
environment_yml = self.binder_path("environment.yml")
139-
if not os.path.exists(environment_yml):
140-
return ""
141-
142183
if not hasattr(self, "_python_version"):
143184
py_version = None
144-
with open(environment_yml) as f:
145-
env = YAML().load(f)
146-
# check if the env file is empty, if so instantiate an empty dictionary.
147-
if env is None:
148-
env = {}
149-
# check if the env file provided a dick-like thing not a list or other data structure.
150-
if not isinstance(env, Mapping):
151-
raise TypeError(
152-
"environment.yml should contain a dictionary. Got %r"
153-
% type(env)
154-
)
155-
for dep in env.get("dependencies", []):
156-
if not isinstance(dep, str):
157-
continue
158-
match = PYTHON_REGEX.match(dep)
159-
if not match:
160-
continue
161-
py_version = match.group(1)
162-
break
185+
env = self.environment_yaml
186+
for dep in env.get("dependencies", []):
187+
if not isinstance(dep, str):
188+
continue
189+
match = PYTHON_REGEX.match(dep)
190+
if not match:
191+
continue
192+
py_version = match.group(1)
193+
break
163194

164195
# extract major.minor
165196
if py_version:
@@ -178,14 +209,27 @@ def py2(self):
178209
"""Am I building a Python 2 kernel environment?"""
179210
return self.python_version and self.python_version.split(".")[0] == "2"
180211

181-
def get_assemble_scripts(self):
212+
def get_preassemble_script_files(self):
213+
"""preassembly only requires environment.yml
214+
215+
enables caching assembly result even when
216+
repo contents change
217+
"""
218+
assemble_files = super().get_preassemble_script_files()
219+
if self._should_preassemble_env:
220+
environment_yml = self.binder_path("environment.yml")
221+
if os.path.exists(environment_yml):
222+
assemble_files[environment_yml] = environment_yml
223+
return assemble_files
224+
225+
def get_env_scripts(self):
182226
"""Return series of build-steps specific to this source repository.
183227
"""
184-
assembly_scripts = []
228+
scripts = []
185229
environment_yml = self.binder_path("environment.yml")
186230
env_prefix = "${KERNEL_PYTHON_PREFIX}" if self.py2 else "${NB_PYTHON_PREFIX}"
187231
if os.path.exists(environment_yml):
188-
assembly_scripts.append(
232+
scripts.append(
189233
(
190234
"${NB_USER}",
191235
r"""
@@ -197,7 +241,19 @@ def get_assemble_scripts(self):
197241
),
198242
)
199243
)
200-
return super().get_assemble_scripts() + assembly_scripts
244+
return scripts
245+
246+
def get_preassemble_scripts(self):
247+
scripts = super().get_preassemble_scripts()
248+
if self._should_preassemble_env:
249+
scripts.extend(self.get_env_scripts())
250+
return scripts
251+
252+
def get_assemble_scripts(self):
253+
scripts = super().get_assemble_scripts()
254+
if not self._should_preassemble_env:
255+
scripts.extend(self.get_env_scripts())
256+
return scripts
201257

202258
def detect(self):
203259
"""Check if current repo should be built with the Conda BuildPack.

repo2docker/buildpacks/pipfile/__init__.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,24 @@ def python_version(self):
7474
self._python_version = self.major_pythons["3"]
7575
return self._python_version
7676

77+
def get_preassemble_script_files(self):
78+
"""Return files needed for preassembly"""
79+
files = super().get_preassemble_script_files()
80+
for name in ("requirements3.txt", "Pipfile", "Pipfile.lock"):
81+
path = self.binder_path(name)
82+
if os.path.exists(path):
83+
files[path] = path
84+
return files
85+
86+
def get_preassemble_scripts(self):
87+
"""scripts to run prior to staging the repo contents"""
88+
scripts = super().get_preassemble_scripts()
89+
# install pipenv to install dependencies within Pipfile.lock or Pipfile
90+
scripts.append(
91+
("${NB_USER}", "${KERNEL_PYTHON_PREFIX}/bin/pip install pipenv==2018.11.26")
92+
)
93+
return scripts
94+
7795
def get_assemble_scripts(self):
7896
"""Return series of build-steps specific to this repository.
7997
"""
@@ -113,11 +131,6 @@ def get_assemble_scripts(self):
113131
# my_package_example = {path=".", editable=true}
114132
working_directory = self.binder_dir or "."
115133

116-
# install pipenv to install dependencies within Pipfile.lock or Pipfile
117-
assemble_scripts.append(
118-
("${NB_USER}", "${KERNEL_PYTHON_PREFIX}/bin/pip install pipenv==2018.11.26")
119-
)
120-
121134
# NOTES:
122135
# - Without prioritizing the PATH to KERNEL_PYTHON_PREFIX over
123136
# NB_SERVER_PYTHON_PREFIX, 'pipenv' draws the wrong conclusion about

repo2docker/buildpacks/python/__init__.py

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33

44
from ..conda import CondaBuildPack
5+
from ...utils import is_local_pip_requirement
56

67

78
class PythonBuildPack(CondaBuildPack):
@@ -34,24 +35,22 @@ def python_version(self):
3435
self._python_version = py_version
3536
return self._python_version
3637

37-
def get_assemble_scripts(self):
38-
"""Return series of build-steps specific to this repository.
38+
def _get_pip_scripts(self):
39+
"""Get pip install scripts
40+
41+
added to preassemble unless local references are found,
42+
in which case this happens in assemble.
3943
"""
40-
# If we have a runtime.txt & that's set to python-2.7,
41-
# requirements.txt will be installed in the *kernel* env
42-
# and requirements3.txt (if it exists)
43-
# will be installed in the python 3 notebook server env.
44-
assemble_scripts = super().get_assemble_scripts()
45-
setup_py = "setup.py"
4644
# KERNEL_PYTHON_PREFIX is the env with the kernel,
4745
# whether it's distinct from the notebook or the same.
4846
pip = "${KERNEL_PYTHON_PREFIX}/bin/pip"
47+
scripts = []
4948
if self.py2:
5049
# using python 2 kernel,
5150
# requirements3.txt allows installation in the notebook server env
5251
nb_requirements_file = self.binder_path("requirements3.txt")
5352
if os.path.exists(nb_requirements_file):
54-
assemble_scripts.append(
53+
scripts.append(
5554
(
5655
"${NB_USER}",
5756
# want the $NB_PYHTON_PREFIX environment variable, not for
@@ -65,12 +64,65 @@ def get_assemble_scripts(self):
6564
# install requirements.txt in the kernel env
6665
requirements_file = self.binder_path("requirements.txt")
6766
if os.path.exists(requirements_file):
68-
assemble_scripts.append(
67+
scripts.append(
6968
(
7069
"${NB_USER}",
7170
'{} install --no-cache-dir -r "{}"'.format(pip, requirements_file),
7271
)
7372
)
73+
return scripts
74+
75+
@property
76+
def _should_preassemble_pip(self):
77+
"""Peek in requirements.txt to determine if we can assemble from only env files
78+
79+
If there are any local references, e.g. `-e .`,
80+
stage the whole repo prior to installation.
81+
"""
82+
if not os.path.exists("binder") and os.path.exists("setup.py"):
83+
# can't install from subset if we're using setup.py
84+
return False
85+
for name in ("requirements.txt", "requirements3.txt"):
86+
requirements_txt = self.binder_path(name)
87+
if not os.path.exists(requirements_txt):
88+
continue
89+
with open(requirements_txt) as f:
90+
for line in f:
91+
if is_local_pip_requirement(line):
92+
return False
93+
94+
# didn't find any local references,
95+
# allow assembly from subset
96+
return True
97+
98+
def get_preassemble_script_files(self):
99+
assemble_files = super().get_preassemble_script_files()
100+
for name in ("requirements.txt", "requirements3.txt"):
101+
requirements_txt = self.binder_path(name)
102+
if os.path.exists(requirements_txt):
103+
assemble_files[requirements_txt] = requirements_txt
104+
return assemble_files
105+
106+
def get_preassemble_scripts(self):
107+
"""Return scripts to run before adding the full repository"""
108+
scripts = super().get_preassemble_scripts()
109+
if self._should_preassemble_pip:
110+
scripts.extend(self._get_pip_scripts())
111+
return scripts
112+
113+
def get_assemble_scripts(self):
114+
"""Return series of build steps that require the full repository"""
115+
# If we have a runtime.txt & that's set to python-2.7,
116+
# requirements.txt will be installed in the *kernel* env
117+
# and requirements3.txt (if it exists)
118+
# will be installed in the python 3 notebook server env.
119+
assemble_scripts = super().get_assemble_scripts()
120+
setup_py = "setup.py"
121+
# KERNEL_PYTHON_PREFIX is the env with the kernel,
122+
# whether it's distinct from the notebook or the same.
123+
pip = "${KERNEL_PYTHON_PREFIX}/bin/pip"
124+
if not self._should_preassemble_pip:
125+
assemble_scripts.extend(self._get_pip_scripts())
74126

75127
# setup.py exists *and* binder dir is not used
76128
if not self.binder_dir and os.path.exists(setup_py):

repo2docker/utils.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,3 +431,29 @@ def normalize_doi(val):
431431
(e.g. https://doi.org/10.1234/jshd123)"""
432432
m = doi_regexp.match(val)
433433
return m.group(2)
434+
435+
436+
def is_local_pip_requirement(line):
437+
"""Return whether a pip requirement (e.g. in requirements.txt file) references a local file"""
438+
# trim comments and skip empty lines
439+
line = line.split("#", 1)[0].strip()
440+
if not line:
441+
return False
442+
if line.startswith(("-r", "-c")):
443+
# local -r or -c references break isolation
444+
return True
445+
# strip off `-e, etc.`
446+
if line.startswith("-"):
447+
line = line.split(None, 1)[1]
448+
if "file://" in line:
449+
# file references break isolation
450+
return True
451+
if "://" in line:
452+
# handle git://../local/file
453+
path = line.split("://", 1)[1]
454+
else:
455+
path = line
456+
if path.startswith("."):
457+
# references a local file
458+
return True
459+
return False

tests/unit/test_utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,20 @@ def test_normalize_doi():
110110
assert utils.normalize_doi("http://doi.org/10.1234/jshd123") == "10.1234/jshd123"
111111
assert utils.normalize_doi("https://doi.org/10.1234/jshd123") == "10.1234/jshd123"
112112
assert utils.normalize_doi("http://dx.doi.org/10.1234/jshd123") == "10.1234/jshd123"
113+
114+
115+
@pytest.mark.parametrize(
116+
"req, is_local",
117+
[
118+
("-r requirements.txt", True),
119+
("-e .", True),
120+
("file://subdir", True),
121+
("file://./subdir", True),
122+
("git://github.com/jupyter/repo2docker", False),
123+
("git+https://github.com/jupyter/repo2docker", False),
124+
("numpy", False),
125+
("# -e .", False),
126+
],
127+
)
128+
def test_local_pip_requirement(req, is_local):
129+
assert utils.is_local_pip_requirement(req) == is_local

0 commit comments

Comments
 (0)