Skip to content

Commit 85edcfe

Browse files
authored
Handle installing libraries multiple times in PipResolver (#3024)
## Changes Handle installing libraries multiple times in `PipResolver`: - ### Linked issues Resolves #3022 Resolves #3023 ### Functionality - [x] modified code linting parts ### Tests - [x] added integration tests
1 parent 5a5e825 commit 85edcfe

File tree

3 files changed

+61
-6
lines changed

3 files changed

+61
-6
lines changed

src/databricks/labs/ucx/source_code/python_libraries.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323

2424
class PythonLibraryResolver(LibraryResolver):
25-
# TODO: https://github.com/databrickslabs/ucx/issues/1640
25+
"""Resolve python libraries by registering and installing Python libraries."""
2626

2727
def __init__(
2828
self,
@@ -61,13 +61,22 @@ def _install_library(self, path_lookup: PathLookup, *libraries: str) -> list[Dep
6161
return self._install_egg(*resolved_libraries)
6262
return self._install_pip(*resolved_libraries)
6363

64-
@staticmethod
65-
def _resolve_libraries(path_lookup: PathLookup, *libraries: str) -> list[str]:
66-
# Resolve relative pip installs from notebooks: %pip install ../../foo.whl
64+
def _resolve_libraries(self, path_lookup: PathLookup, *libraries: str) -> list[str]:
65+
"""Resolve pip installs as library (pip install databricks-labs-ucx) or as path (pip install ../../ucx.whl).
66+
67+
A library is defined as library, i.e. *not* as path, when the library:
68+
1. Is not found in the path lookup.
69+
2. Is not located in the temporary virtual environment where this resolver install libraries. If this is the
70+
case, it signals the library is installed for a second time.
71+
72+
Otherwise, we consider the library to be a path.
73+
74+
Note: The above works given our design choice to *only* build dependency graphs when all dependencies are found.
75+
"""
6776
libs = []
6877
for library in libraries:
6978
maybe_library = path_lookup.resolve(Path(library))
70-
if maybe_library is None:
79+
if maybe_library is None or maybe_library.is_relative_to(self._temporary_virtual_environment):
7180
libs.append(library)
7281
else:
7382
libs.append(maybe_library.as_posix())
@@ -81,9 +90,11 @@ def _install_pip(self, *libraries: str) -> list[DependencyProblem]:
8190
*libraries,
8291
"-t",
8392
str(self._temporary_virtual_environment),
93+
"--upgrade", # Upgrades when library already installed
8494
]
8595
return_code, stdout, stderr = self._runner(args)
86-
logger.debug(f"pip output:\n{stdout}\n{stderr}")
96+
if stdout or stderr:
97+
logger.debug(f"pip output:\n{stdout}\n{stderr}")
8798
if return_code != 0:
8899
command = " ".join(args)
89100
problem = DependencyProblem("library-install-failed", f"'{command}' failed with '{stderr}'")

tests/integration/source_code/test_libraries.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
because it uses the context and the time it takes to run the test.
44
"""
55

6+
import logging
7+
import re
68
from pathlib import Path
79

810
import pytest
@@ -56,3 +58,32 @@ def test_build_notebook_dependency_graphs_fails_installing_when_spaces(simple_ct
5658

5759
assert not maybe.problems
5860
assert maybe.graph.all_relative_names() == {f"{notebook}.py", "thingy/__init__.py"}
61+
62+
63+
def test_build_notebook_dependency_graphs_when_installing_pytest_twice(caplog, simple_ctx) -> None:
64+
pip_already_exists_warning = re.compile(
65+
r".*WARNING: Target directory .+ already exists\. Specify --upgrade to force replacement.*"
66+
)
67+
ctx = simple_ctx.replace(path_lookup=MockPathLookup())
68+
with caplog.at_level(logging.DEBUG, logger="databricks.labs.ucx.source_code.python_libraries"):
69+
maybe = ctx.dependency_resolver.build_notebook_dependency_graph(
70+
Path("pip_install_pytest_twice"), CurrentSessionState()
71+
)
72+
assert not maybe.problems
73+
assert maybe.graph.path_lookup.resolve(Path("pytest"))
74+
assert not pip_already_exists_warning.match(caplog.text.replace("\n", " ")), "Pip already exists warning detected"
75+
76+
77+
@pytest.mark.parametrize(
78+
"notebook",
79+
(
80+
"pip_install_demo_wheel",
81+
"pip_install_multiple_packages",
82+
"pip_install_pytest_with_index_url",
83+
),
84+
)
85+
def test_build_notebook_dependency_graphs_when_installing_notebooks_twice(caplog, simple_ctx, notebook) -> None:
86+
ctx = simple_ctx.replace(path_lookup=MockPathLookup())
87+
for _ in range(2):
88+
maybe = ctx.dependency_resolver.build_notebook_dependency_graph(Path(notebook), CurrentSessionState())
89+
assert not maybe.problems
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Databricks notebook source
2+
3+
# COMMAND ----------
4+
5+
# MAGIC %pip install pytest --quiet
6+
7+
# COMMAND ----------
8+
9+
# MAGIC %pip install pytest --quiet
10+
11+
# COMMAND ----------
12+
13+
import pytest

0 commit comments

Comments
 (0)