Skip to content

Commit d7a295f

Browse files
authored
Improve make known devex (#1820)
## Changes Improve the devex around `known.json`: - Improve logging so that it's clearer what it's doing, in particular when there aren't any new distributions to scan. - Handle building from scratch when `known.json` has been removed. - Only rewrite the known.json file if it has changed. - Use `importlib` instead of `pkgutil` when loading `known.json` (the latter is deprecated from python 3.12). ### Functionality - [ ] added relevant user documentation - [ ] added new CLI command - [ ] modified existing command: `databricks labs ucx ...` - [ ] added a new workflow - [ ] modified existing workflow: `...` - [ ] added a new table - [ ] modified existing table: `...` ### Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [X] manually tested - [X] added unit tests - [ ] added integration tests - [ ] verified on staging environment (screenshot attached)
1 parent 2193127 commit d7a295f

File tree

2 files changed

+33
-5
lines changed

2 files changed

+33
-5
lines changed

src/databricks/labs/ucx/source_code/known.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,24 @@ def _cleanup_name(name):
103103
def rebuild(cls, root: Path):
104104
"""rebuild the known.json file by analyzing the source code of installed libraries. Invoked by `make known`."""
105105
path_lookup = PathLookup.from_sys_path(root)
106-
known_distributions = cls._get_known()
106+
try:
107+
known_distributions = cls._get_known()
108+
logger.info("Scanning for newly installed distributions...")
109+
except FileNotFoundError:
110+
logger.info("No known distributions found; scanning all distributions...")
111+
known_distributions = {}
112+
updated_distributions = known_distributions.copy()
107113
for library_root in path_lookup.library_roots:
108114
for dist_info_folder in library_root.glob("*.dist-info"):
109-
cls._analyze_dist_info(dist_info_folder, known_distributions, library_root)
110-
known_json = Path(__file__).parent / "known.json"
111-
with known_json.open('w') as f:
112-
json.dump(dict(sorted(known_distributions.items())), f, indent=2)
115+
cls._analyze_dist_info(dist_info_folder, updated_distributions, library_root)
116+
updated_distributions = dict(sorted(updated_distributions.items()))
117+
if known_distributions == updated_distributions:
118+
logger.info("No new distributions found.")
119+
else:
120+
known_json = Path(__file__).parent / "known.json"
121+
with known_json.open('w') as f:
122+
json.dump(updated_distributions, f, indent=2)
123+
logger.info(f"Updated known distributions: {known_json.relative_to(Path.cwd())}")
113124

114125
@classmethod
115126
def _analyze_dist_info(cls, dist_info_folder, known_distributions, library_root):

tests/unit/source_code/test_known.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
from pathlib import Path
2+
from unittest import mock
3+
4+
import pytest
25

36
from databricks.labs.ucx.source_code.known import Whitelist
47

@@ -32,5 +35,19 @@ def test_checks_library_compatibility():
3235

3336

3437
def test_loads_known_json():
38+
known_json = Whitelist._get_known() # pylint: disable=protected-access
39+
assert known_json is not None and len(known_json) > 0
40+
41+
42+
def test_error_on_missing_known_json():
43+
with (
44+
mock.patch("pkgutil.get_data", side_effect=FileNotFoundError("simulate missing file")),
45+
pytest.raises(FileNotFoundError),
46+
):
47+
Whitelist._get_known() # pylint: disable=protected-access
48+
49+
50+
def test_rebuild_trivial():
51+
# No-op: the known.json file is already up-to-date
3552
cwd = Path.cwd()
3653
Whitelist.rebuild(cwd)

0 commit comments

Comments
 (0)