Skip to content

Commit 27500c2

Browse files
authored
Added initial version of databricks labs ucx migrate-local-code command (#1067)
The `databricks labs ucx migrate-local-code` command has been added to facilitate migration of local code to a Databricks environment. This initial version of the command is highly experimental, with support for migrating Python and SQL files only. The `.gitignore` file has been updated to exclude output files and specific configuration files from being committed to the repository. This command aims to help users and administrators manage code migration and maintain consistency across workspaces, while also enhancing the compatibility of local code with the Unity Catalog, a part of Databricks' offerings for data and AI.
1 parent 40b454c commit 27500c2

23 files changed

+1103
-5
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,4 +151,5 @@ dev/cleanup.py
151151

152152
.python-version
153153
.databricks-login.json
154-
*.out
154+
*.out
155+
foo

README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ See [contributing instructions](CONTRIBUTING.md) to help improve this project.
4848
* [`create-catalogs-schemas` command](#create-catalogs-schemas-command)
4949
* [`move` command](#move-command)
5050
* [`alias` command](#alias-command)
51+
* [Code migration commands](#code-migration-commands)
52+
* [`migrate-local-code` command](#migrate-local-code-command)
5153
* [Cross-workspace installations](#cross-workspace-installations)
5254
* [`sync-workspace-info` command](#sync-workspace-info-command)
5355
* [`manual-workspace-info` command](#manual-workspace-info-command)
@@ -625,6 +627,23 @@ It can also be used to debug issues related to table aliasing.
625627

626628
[[back to top](#databricks-labs-ucx)]
627629

630+
# Code migration commands
631+
632+
[[back to top](#databricks-labs-ucx)]
633+
634+
## `migrate-local-code` command
635+
636+
```text
637+
databricks labs ucx migrate-local-code
638+
```
639+
640+
**(Experimental)** Once [table migration](#table-migration-commands) is complete, you can run this command to
641+
migrate all python and SQL files in the current working directory. This command is highly experimental and
642+
at the moment only supports Python and SQL files and discards code comments and formatting during
643+
the automated transformation process.
644+
645+
[[back to top](#databricks-labs-ucx)]
646+
628647
# Cross-workspace installations
629648

630649
When installing UCX across multiple workspaces, administrators need to keep UCX configurations in sync.

labs.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,3 +149,6 @@ commands:
149149

150150
- name: revert-cluster-remap
151151
description: Reverting the Re-mapping of the cluster from UC
152+
153+
- name: migrate-local-code
154+
description: (Experimental) Migrate files in the current directory to be more compatible with Unity Catalog.

pyproject.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,12 @@ branch = true
154154
parallel = true
155155

156156
[tool.coverage.report]
157-
omit = ["src/databricks/labs/ucx/mixins/*", "*/working-copy/*", "*/fresh_wheel_file/*"]
157+
omit = [
158+
"src/databricks/labs/ucx/mixins/*",
159+
"src/databricks/labs/ucx/code/lsp.py",
160+
"*/working-copy/*",
161+
"*/fresh_wheel_file/*"
162+
]
158163
exclude_lines = [
159164
"no cov",
160165
"if __name__ == .__main__.:",

src/databricks/labs/ucx/cli.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import shutil
44
import webbrowser
55
from collections.abc import Callable
6+
from pathlib import Path
67

78
from databricks.labs.blueprint.cli import App
89
from databricks.labs.blueprint.entrypoint import get_logger
@@ -19,6 +20,7 @@
1920
from databricks.labs.ucx.azure.access import AzureResourcePermissions
2021
from databricks.labs.ucx.azure.credentials import ServicePrincipalMigration
2122
from databricks.labs.ucx.azure.locations import ExternalLocationsMigration
23+
from databricks.labs.ucx.code.files import Files
2224
from databricks.labs.ucx.config import WorkspaceConfig
2325
from databricks.labs.ucx.hive_metastore import ExternalLocations, TablesCrawler
2426
from databricks.labs.ucx.hive_metastore.catalog_schema import CatalogSchema
@@ -547,5 +549,15 @@ def revert_cluster_remap(w: WorkspaceClient, prompts: Prompts):
547549
cluster_details.revert_cluster_remap(cluster_list, cluster_ids)
548550

549551

552+
@ucx.command
553+
def migrate_local_code(w: WorkspaceClient, prompts: Prompts):
554+
"""Fix the code files based on their language."""
555+
files = Files.for_cli(w)
556+
working_directory = Path.cwd()
557+
if not prompts.confirm("Do you want to apply UC migration to all files in the current directory?"):
558+
return
559+
files.apply(working_directory)
560+
561+
550562
if __name__ == "__main__":
551563
ucx()

src/databricks/labs/ucx/code/__init__.py

Whitespace-only changes.
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from abc import abstractmethod
2+
from collections.abc import Iterable
3+
from dataclasses import dataclass
4+
5+
# Code mapping between LSP, PyLint, and our own diagnostics:
6+
# | LSP | PyLint | Our |
7+
# |---------------------------|------------|----------------|
8+
# | Severity.ERROR | Error | Failure() |
9+
# | Severity.WARN | Warning | Advisory() |
10+
# | DiagnosticTag.DEPRECATED | Warning | Deprecation() |
11+
# | Severity.INFO | Info | Advice() |
12+
# | Severity.HINT | Convention | Convention() |
13+
# | DiagnosticTag.UNNECESSARY | Refactor | Convention() |
14+
15+
16+
@dataclass
17+
class Advice:
18+
code: str
19+
message: str
20+
start_line: int
21+
start_col: int
22+
end_line: int
23+
end_col: int
24+
25+
def replace(
26+
self,
27+
code: str | None = None,
28+
message: str | None = None,
29+
start_line: int | None = None,
30+
start_col: int | None = None,
31+
end_line: int | None = None,
32+
end_col: int | None = None,
33+
) -> 'Advice':
34+
return self.__class__(
35+
code=code if code is not None else self.code,
36+
message=message if message is not None else self.message,
37+
start_line=start_line if start_line is not None else self.start_line,
38+
start_col=start_col if start_col is not None else self.start_col,
39+
end_line=end_line if end_line is not None else self.end_line,
40+
end_col=end_col if end_col is not None else self.end_col,
41+
)
42+
43+
def as_advisory(self) -> 'Advisory':
44+
return Advisory(**self.__dict__)
45+
46+
def as_failure(self) -> 'Failure':
47+
return Failure(**self.__dict__)
48+
49+
def as_deprecation(self) -> 'Deprecation':
50+
return Deprecation(**self.__dict__)
51+
52+
def as_convention(self) -> 'Convention':
53+
return Convention(**self.__dict__)
54+
55+
56+
class Advisory(Advice):
57+
"""A warning that does not prevent the code from running."""
58+
59+
60+
class Failure(Advisory):
61+
"""An error that prevents the code from running."""
62+
63+
64+
class Deprecation(Advisory):
65+
"""An advisory that suggests to replace the code with a newer version."""
66+
67+
68+
class Convention(Advice):
69+
"""A suggestion for a better way to write the code."""
70+
71+
72+
class Linter:
73+
@abstractmethod
74+
def lint(self, code: str) -> Iterable[Advice]: ...
75+
76+
77+
class Fixer:
78+
@abstractmethod
79+
def name(self) -> str: ...
80+
81+
@abstractmethod
82+
def apply(self, code: str) -> str: ...
83+
84+
85+
class SequentialLinter(Linter):
86+
def __init__(self, linters: list[Linter]):
87+
self._linters = linters
88+
89+
def lint(self, code: str) -> Iterable[Advice]:
90+
for linter in self._linters:
91+
yield from linter.lint(code)
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import logging
2+
from pathlib import Path
3+
4+
from databricks.sdk import WorkspaceClient
5+
from databricks.sdk.service.workspace import Language
6+
7+
from databricks.labs.ucx.code.languages import Languages
8+
from databricks.labs.ucx.hive_metastore.table_migrate import TablesMigrate
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
class Files:
14+
"""The Files class is responsible for fixing code files based on their language."""
15+
16+
def __init__(self, languages: Languages):
17+
self._languages = languages
18+
self._extensions = {".py": Language.PYTHON, ".sql": Language.SQL}
19+
20+
@classmethod
21+
def for_cli(cls, ws: WorkspaceClient):
22+
tables_migrate = TablesMigrate.for_cli(ws)
23+
index = tables_migrate.index()
24+
languages = Languages(index)
25+
return cls(languages)
26+
27+
def apply(self, path: Path) -> bool:
28+
if path.is_dir():
29+
for folder in path.iterdir():
30+
self.apply(folder)
31+
return True
32+
return self._apply_file_fix(path)
33+
34+
def _apply_file_fix(self, path):
35+
"""
36+
The fix method reads a file, lints it, applies fixes, and writes the fixed code back to the file.
37+
"""
38+
# Check if the file extension is in the list of supported extensions
39+
if path.suffix not in self._extensions:
40+
return False
41+
# Get the language corresponding to the file extension
42+
language = self._extensions[path.suffix]
43+
# If the language is not supported, return
44+
if not language:
45+
return False
46+
logger.info(f"Analysing {path}")
47+
# Get the linter for the language
48+
linter = self._languages.linter(language)
49+
# Open the file and read the code
50+
with path.open("r") as f:
51+
code = f.read()
52+
applied = False
53+
# Lint the code and apply fixes
54+
for advice in linter.lint(code):
55+
logger.info(f"Found: {advice}")
56+
fixer = self._languages.fixer(language, advice.code)
57+
if not fixer:
58+
continue
59+
logger.info(f"Applying fix for {advice}")
60+
code = fixer.apply(code)
61+
applied = True
62+
if not applied:
63+
return False
64+
# Write the fixed code back to the file
65+
with path.open("w") as f:
66+
logger.info(f"Overwriting {path}")
67+
f.write(code)
68+
return True
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from databricks.sdk.service.workspace import Language
2+
3+
from databricks.labs.ucx.code.base import Fixer, Linter, SequentialLinter
4+
from databricks.labs.ucx.code.pyspark import SparkSql
5+
from databricks.labs.ucx.code.queries import FromTable
6+
from databricks.labs.ucx.hive_metastore.table_migrate import Index
7+
8+
9+
class Languages:
10+
def __init__(self, index: Index):
11+
self._index = index
12+
from_table = FromTable(index)
13+
self._linters = {
14+
Language.PYTHON: SequentialLinter([SparkSql(from_table)]),
15+
Language.SQL: SequentialLinter([from_table]),
16+
}
17+
self._fixers: dict[Language, list[Fixer]] = {
18+
Language.PYTHON: [SparkSql(from_table)],
19+
Language.SQL: [from_table],
20+
}
21+
22+
def is_supported(self, language: Language) -> bool:
23+
return language in self._linters and language in self._fixers
24+
25+
def linter(self, language: Language) -> Linter:
26+
if language not in self._linters:
27+
raise ValueError(f"Unsupported language: {language}")
28+
return self._linters[language]
29+
30+
def fixer(self, language: Language, diagnostic_code: str) -> Fixer | None:
31+
if language not in self._fixers:
32+
return None
33+
for fixer in self._fixers[language]:
34+
if fixer.name() == diagnostic_code:
35+
return fixer
36+
return None
37+
38+
def apply_fixes(self, language: Language, code: str) -> str:
39+
linter = self.linter(language)
40+
for advice in linter.lint(code):
41+
fixer = self.fixer(language, advice.code)
42+
if fixer:
43+
code = fixer.apply(code)
44+
return code

0 commit comments

Comments
 (0)