Skip to content

Commit f346f0a

Browse files
authored
Migrate Python linters from ast (standard library) to astroid package (#1835)
## Changes Migrate Python linters from ast to astroid Implement minimal inference ### Linked issues Progresses #1205 ### Functionality - [ ] added relevant user documentation - [ ] added new CLI command - [ ] modified existing command: `databricks labs ucx ...` - [ ] added a new workflow - [ ] modified existing workflow: `...` - [ ] added a new table - [ ] modified existing table: `...` ### Tests - [ ] manually tested - [x] added unit tests - [ ] added integration tests - [ ] verified on staging environment (screenshot attached) --------- Co-authored-by: Eric Vergnaud <[email protected]>
1 parent 20474c3 commit f346f0a

22 files changed

+526
-449
lines changed

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ dependencies = ["databricks-sdk>=0.27,<0.29",
4848
"databricks-labs-lsql~=0.4.0",
4949
"databricks-labs-blueprint>=0.6.0",
5050
"PyYAML>=6.0.0,<7.0.0",
51-
"sqlglot>=23.9,<24.2"]
51+
"sqlglot>=23.9,<24.2",
52+
"astroid>=3.2.2"]
5253

5354
[project.entry-points.databricks]
5455
runtime = "databricks.labs.ucx.runtime:main"
@@ -65,7 +66,7 @@ dependencies = [
6566
"black~=24.3.0",
6667
"coverage[toml]~=7.4.4",
6768
"mypy~=1.9.0",
68-
"pylint~=3.1.0",
69+
"pylint~=3.2.2",
6970
"pylint-pytest==2.0.0a0",
7071
"databricks-labs-pylint~=0.4.0",
7172
"pytest~=8.1.0",

src/databricks/labs/ucx/source_code/graph.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
from databricks.labs.ucx.source_code.linters.imports import (
1212
ASTLinter,
1313
DbutilsLinter,
14-
SysPathChange,
15-
NotebookRunCall,
1614
ImportSource,
1715
NodeBase,
16+
NotebookRunCall,
17+
SysPathChange,
1818
)
1919
from databricks.labs.ucx.source_code.path_lookup import PathLookup
2020

@@ -186,7 +186,7 @@ def _process_node(self, base_node: NodeBase):
186186
if isinstance(base_node, SysPathChange):
187187
self._mutate_path_lookup(base_node)
188188
if isinstance(base_node, NotebookRunCall):
189-
strpath = base_node.get_constant_path()
189+
strpath = base_node.get_notebook_path()
190190
if strpath is None:
191191
yield DependencyProblem('dependency-not-constant', "Can't check dependency not provided as a constant")
192192
else:
Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,39 @@
1-
import ast
1+
import logging
22

3+
from astroid import Attribute, Call, Name # type: ignore
34

4-
class AstHelper:
5-
@staticmethod
6-
def get_full_attribute_name(node: ast.Attribute) -> str:
7-
return AstHelper._get_value(node)
5+
logger = logging.getLogger(__file__)
86

9-
@staticmethod
10-
def get_full_function_name(node: ast.Call) -> str | None:
11-
if isinstance(node.func, ast.Attribute):
12-
return AstHelper._get_value(node.func)
7+
missing_handlers: set[str] = set()
138

14-
if isinstance(node.func, ast.Name):
15-
return node.func.id
169

10+
class AstHelper:
11+
@classmethod
12+
def get_full_attribute_name(cls, node: Attribute) -> str:
13+
return cls._get_attribute_value(node)
14+
15+
@classmethod
16+
def get_full_function_name(cls, node: Call) -> str | None:
17+
if not isinstance(node, Call):
18+
return None
19+
if isinstance(node.func, Attribute):
20+
return cls._get_attribute_value(node.func)
21+
if isinstance(node.func, Name):
22+
return node.func.name
1723
return None
1824

19-
@staticmethod
20-
def _get_value(node: ast.Attribute):
21-
if isinstance(node.value, ast.Name):
22-
return node.value.id + '.' + node.attr
23-
24-
if isinstance(node.value, ast.Attribute):
25-
value = AstHelper._get_value(node.value)
26-
if not value:
27-
return None
28-
return value + '.' + node.attr
29-
25+
@classmethod
26+
def _get_attribute_value(cls, node: Attribute):
27+
if isinstance(node.expr, Name):
28+
return node.expr.name + '.' + node.attrname
29+
if isinstance(node.expr, Attribute):
30+
parent = cls._get_attribute_value(node.expr)
31+
return node.attrname if parent is None else parent + '.' + node.attrname
32+
if isinstance(node.expr, Call):
33+
name = cls.get_full_function_name(node.expr)
34+
return node.attrname if name is None else name + '.' + node.attrname
35+
name = type(node.expr).__name__
36+
if name not in missing_handlers:
37+
missing_handlers.add(name)
38+
logger.debug(f"Missing handler for {name}")
3039
return None

src/databricks/labs/ucx/source_code/linters/context.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
from databricks.sdk.service.workspace import Language
22

33
from databricks.labs.ucx.hive_metastore.migration_status import MigrationIndex
4-
from databricks.labs.ucx.source_code.base import CurrentSessionState, SequentialLinter, Fixer, Linter
4+
from databricks.labs.ucx.source_code.base import Fixer, Linter, SequentialLinter, CurrentSessionState
55
from databricks.labs.ucx.source_code.linters.dbfs import FromDbfsFolder, DBFSUsageLinter
66
from databricks.labs.ucx.source_code.linters.imports import DbutilsLinter
7+
78
from databricks.labs.ucx.source_code.linters.pyspark import SparkSql
8-
from databricks.labs.ucx.source_code.queries import FromTable
99
from databricks.labs.ucx.source_code.linters.spark_connect import SparkConnectLinter
1010
from databricks.labs.ucx.source_code.linters.table_creation import DBRv8d0Linter
11+
from databricks.labs.ucx.source_code.queries import FromTable
1112

1213

1314
class LinterContext:

src/databricks/labs/ucx/source_code/linters/dbfs.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1-
import ast
21
from collections.abc import Iterable
32

3+
from astroid import Call, Const # type: ignore
44
import sqlglot
55
from sqlglot.expressions import Table
66

77
from databricks.labs.ucx.source_code.base import Advice, Linter, Advisory, Deprecation
8+
from databricks.labs.ucx.source_code.linters.imports import Visitor, ASTLinter
89

910

10-
class DetectDbfsVisitor(ast.NodeVisitor):
11+
class DetectDbfsVisitor(Visitor):
1112
"""
1213
Visitor that detects file system paths in Python code and checks them
1314
against a list of known deprecated paths.
@@ -18,44 +19,44 @@ def __init__(self):
1819
self._fs_prefixes = ["/dbfs/mnt", "dbfs:/", "/mnt/"]
1920
self._reported_locations = set() # Set to store reported locations
2021

21-
def visit_Call(self, node):
22+
def visit_call(self, node: Call):
2223
for arg in node.args:
23-
if isinstance(arg, (ast.Str, ast.Constant)) and isinstance(arg.s, str):
24-
if any(arg.s.startswith(prefix) for prefix in self._fs_prefixes):
24+
if isinstance(arg, Const) and isinstance(arg.value, str):
25+
value = arg.value
26+
if any(value.startswith(prefix) for prefix in self._fs_prefixes):
2527
self._advices.append(
2628
Deprecation(
2729
code='dbfs-usage',
28-
message=f"Deprecated file system path in call to: {arg.s}",
30+
message=f"Deprecated file system path in call to: {value}",
2931
start_line=arg.lineno,
3032
start_col=arg.col_offset,
3133
end_line=arg.lineno,
32-
end_col=arg.col_offset + len(arg.s),
34+
end_col=arg.col_offset + len(value),
3335
)
3436
)
3537
# Record the location of the reported constant, so we do not double report
3638
self._reported_locations.add((arg.lineno, arg.col_offset))
37-
self.generic_visit(node)
3839

39-
def visit_Constant(self, node):
40+
def visit_const(self, node: Const):
4041
# Constant strings yield Advisories
4142
if isinstance(node.value, str):
4243
self._check_str_constant(node)
4344

44-
def _check_str_constant(self, node):
45+
def _check_str_constant(self, node: Const):
4546
# Check if the location has been reported before
4647
if (node.lineno, node.col_offset) not in self._reported_locations:
47-
if any(node.s.startswith(prefix) for prefix in self._fs_prefixes):
48+
value = node.value
49+
if any(value.startswith(prefix) for prefix in self._fs_prefixes):
4850
self._advices.append(
4951
Advisory(
5052
code='dbfs-usage',
51-
message=f"Possible deprecated file system path: {node.s}",
53+
message=f"Possible deprecated file system path: {value}",
5254
start_line=node.lineno,
5355
start_col=node.col_offset,
5456
end_line=node.lineno,
55-
end_col=node.col_offset + len(node.s),
57+
end_col=node.col_offset + len(value),
5658
)
5759
)
58-
self.generic_visit(node)
5960

6061
def get_advices(self) -> Iterable[Advice]:
6162
yield from self._advices
@@ -76,9 +77,9 @@ def lint(self, code: str) -> Iterable[Advice]:
7677
"""
7778
Lints the code looking for file system paths that are deprecated
7879
"""
79-
tree = ast.parse(code)
80+
linter = ASTLinter.parse(code)
8081
visitor = DetectDbfsVisitor()
81-
visitor.visit(tree)
82+
visitor.visit(linter.root)
8283
yield from visitor.get_advices()
8384

8485

0 commit comments

Comments
 (0)