Skip to content

Commit 2389444

Browse files
authored
Consistent 0-based line tracking for linters (#1855)
1 parent a867412 commit 2389444

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+639
-435
lines changed

src/databricks/labs/ucx/source_code/base.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
from dataclasses import dataclass
66
from pathlib import Path
77

8+
from astroid import NodeNG # type: ignore
9+
from typing_extensions import Self
10+
811

912
# Code mapping between LSP, PyLint, and our own diagnostics:
1013
# | LSP | PyLint | Our |
@@ -21,21 +24,22 @@
2124
class Advice:
2225
code: str
2326
message: str
27+
# Lines and columns are both 0-based: the first line is line 0.
2428
start_line: int
2529
start_col: int
2630
end_line: int
2731
end_col: int
2832

2933
def replace(
30-
self,
34+
self: Self,
3135
code: str | None = None,
3236
message: str | None = None,
3337
start_line: int | None = None,
3438
start_col: int | None = None,
3539
end_line: int | None = None,
3640
end_col: int | None = None,
37-
) -> 'Advice':
38-
return self.__class__(
41+
) -> Self:
42+
return type(self)(
3943
code=code if code is not None else self.code,
4044
message=message if message is not None else self.message,
4145
start_line=start_line if start_line is not None else self.start_line,
@@ -59,6 +63,27 @@ def as_convention(self) -> 'Convention':
5963
def for_path(self, path: Path) -> LocatedAdvice:
6064
return LocatedAdvice(self, path)
6165

66+
@classmethod
67+
def from_node(cls, code: str, message: str, node: NodeNG) -> Self:
68+
# Astroid lines are 1-based.
69+
return cls(
70+
code=code,
71+
message=message,
72+
start_line=(node.lineno or 1) - 1,
73+
start_col=node.col_offset or 0,
74+
end_line=(node.end_lineno or 1) - 1,
75+
end_col=node.end_col_offset,
76+
)
77+
78+
def replace_from_node(self, node: NodeNG) -> Self:
79+
# Astroid lines are 1-based.
80+
return self.replace(
81+
start_line=(node.lineno or 1) - 1,
82+
start_col=node.col_offset,
83+
end_line=(node.end_lineno or 1) - 1,
84+
end_col=node.end_col_offset,
85+
)
86+
6287

6388
@dataclass
6489
class LocatedAdvice:

src/databricks/labs/ucx/source_code/graph.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from dataclasses import dataclass
55
from pathlib import Path
66
from collections.abc import Callable
7-
from typing import cast
8-
from astroid import ImportFrom # type: ignore
7+
8+
from astroid import ImportFrom, NodeNG # type: ignore
99

1010
from databricks.labs.ucx.source_code.base import Advisory
1111
from databricks.labs.ucx.source_code.linters.imports import (
@@ -161,20 +161,28 @@ def visit(self, visit_node: Callable[[DependencyGraph], bool | None], visited: s
161161
return False
162162

163163
def build_graph_from_python_source(self, python_code: str) -> list[DependencyProblem]:
164+
"""Check python code for dependency-related problems.
165+
166+
Returns:
167+
A list of dependency problems; position information is relative to the python code itself.
168+
"""
164169
problems: list[DependencyProblem] = []
165170
tree = Tree.parse(python_code)
166171
syspath_changes = SysPathChange.extract_from_tree(tree)
167172
run_calls = DbutilsLinter.list_dbutils_notebook_run_calls(tree)
168-
import_sources, import_problems = ImportSource.extract_from_tree(tree, DependencyProblem)
169-
problems.extend(cast(list[DependencyProblem], import_problems))
173+
import_sources: list[ImportSource]
174+
import_problems: list[DependencyProblem]
175+
import_sources, import_problems = ImportSource.extract_from_tree(tree, DependencyProblem.from_node)
176+
problems.extend(import_problems)
170177
nodes = syspath_changes + run_calls + import_sources
171178
# need to execute things in intertwined sequence so concat and sort
172-
for base_node in sorted(nodes, key=lambda node: node.node.lineno * 10000 + node.node.col_offset):
179+
for base_node in sorted(nodes, key=lambda node: (node.node.lineno, node.node.col_offset)):
173180
for problem in self._process_node(base_node):
181+
# Astroid line numbers are 1-based.
174182
problem = problem.replace(
175-
start_line=base_node.node.lineno,
183+
start_line=base_node.node.lineno - 1,
176184
start_col=base_node.node.col_offset,
177-
end_line=base_node.node.end_lineno or 0,
185+
end_line=(base_node.node.end_lineno or 1) - 1,
178186
end_col=base_node.node.end_col_offset or 0,
179187
)
180188
problems.append(problem)
@@ -386,6 +394,7 @@ class DependencyProblem:
386394
code: str
387395
message: str
388396
source_path: Path = Path(MISSING_SOURCE_PATH)
397+
# Lines and colums are both 0-based: the first line is line 0.
389398
start_line: int = -1
390399
start_col: int = -1
391400
end_line: int = -1
@@ -424,6 +433,18 @@ def as_advisory(self) -> 'Advisory':
424433
end_col=self.end_col,
425434
)
426435

436+
@staticmethod
437+
def from_node(code: str, message: str, node: NodeNG) -> DependencyProblem:
438+
# Astroid line numbers are 1-based.
439+
return DependencyProblem(
440+
code=code,
441+
message=message,
442+
start_line=(node.lineno or 1) - 1,
443+
start_col=node.col_offset,
444+
end_line=(node.end_lineno or 1) - 1,
445+
end_col=(node.end_col_offset),
446+
)
447+
427448

428449
@dataclass
429450
class MaybeGraph:

src/databricks/labs/ucx/source_code/linters/dbfs.py

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,17 @@ class DetectDbfsVisitor(TreeVisitor):
1717
def __init__(self):
1818
self._advices: list[Advice] = []
1919
self._fs_prefixes = ["/dbfs/mnt", "dbfs:/", "/mnt/"]
20-
self._reported_locations = set() # Set to store reported locations
20+
self._reported_locations = set() # Set to store reported locations; astroid coordinates!
2121

2222
def visit_call(self, node: Call):
2323
for arg in node.args:
2424
if isinstance(arg, Const) and isinstance(arg.value, str):
2525
value = arg.value
2626
if any(value.startswith(prefix) for prefix in self._fs_prefixes):
27-
self._advices.append(
28-
Deprecation(
29-
code='dbfs-usage',
30-
message=f"Deprecated file system path in call to: {value}",
31-
start_line=arg.lineno,
32-
start_col=arg.col_offset,
33-
end_line=arg.lineno,
34-
end_col=arg.col_offset + len(value),
35-
)
27+
deprecation = Deprecation.from_node(
28+
code='dbfs-usage', message=f"Deprecated file system path in call to: {value}", node=arg
3629
)
30+
self._advices.append(deprecation.replace(end_col=arg.col_offset + len(value)))
3731
# Record the location of the reported constant, so we do not double report
3832
self._reported_locations.add((arg.lineno, arg.col_offset))
3933

@@ -47,16 +41,10 @@ def _check_str_constant(self, node: Const):
4741
if (node.lineno, node.col_offset) not in self._reported_locations:
4842
value = node.value
4943
if any(value.startswith(prefix) for prefix in self._fs_prefixes):
50-
self._advices.append(
51-
Advisory(
52-
code='dbfs-usage',
53-
message=f"Possible deprecated file system path: {value}",
54-
start_line=node.lineno,
55-
start_col=node.col_offset,
56-
end_line=node.lineno,
57-
end_col=node.col_offset + len(value),
58-
)
44+
advisory = Advisory.from_node(
45+
code='dbfs-usage', message=f"Possible deprecated file system path: {value}", node=node
5946
)
47+
self._advices.append(advisory.replace(end_col=node.col_offset + len(value)))
6048

6149
def get_advices(self) -> Iterable[Advice]:
6250
yield from self._advices

src/databricks/labs/ucx/source_code/linters/imports.py

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import abc
44
import logging
5-
from collections.abc import Iterable, Callable
5+
from collections.abc import Callable, Iterable
66
from typing import TypeVar, cast
77

88
from astroid import ( # type: ignore
@@ -20,12 +20,15 @@
2020

2121
logger = logging.getLogger(__name__)
2222

23+
P = TypeVar("P")
24+
ProblemFactory = Callable[[str, str, NodeNG], P]
25+
2326

2427
class ImportSource(NodeBase):
2528

2629
@classmethod
27-
def extract_from_tree(cls, tree: Tree, problem_type: T) -> tuple[list[ImportSource], list[T]]:
28-
problems: list[T] = []
30+
def extract_from_tree(cls, tree: Tree, problem_factory: ProblemFactory) -> tuple[list[ImportSource], list[P]]:
31+
problems: list[P] = []
2932
sources: list[ImportSource] = []
3033
try: # pylint: disable=too-many-try-statements
3134
nodes = tree.locate(Import, [])
@@ -36,11 +39,11 @@ def extract_from_tree(cls, tree: Tree, problem_type: T) -> tuple[list[ImportSour
3639
sources.append(source)
3740
nodes = tree.locate(Call, [("import_module", Attribute), ("importlib", Name)])
3841
nodes.extend(tree.locate(Call, [("__import__", Attribute), ("importlib", Name)]))
39-
for source in cls._make_sources_for_import_call_nodes(nodes, problem_type, problems):
42+
for source in cls._make_sources_for_import_call_nodes(nodes, problem_factory, problems):
4043
sources.append(source)
4144
return sources, problems
4245
except Exception as e: # pylint: disable=broad-except
43-
problem = problem_type('internal-error', f"While checking imports: {e}")
46+
problem = problem_factory('internal-error', f"While checking imports: {e}", tree.root)
4447
problems.append(problem)
4548
return [], problems
4649

@@ -57,19 +60,14 @@ def _make_sources_for_import_from_nodes(cls, nodes: list[ImportFrom]) -> Iterabl
5760
yield ImportSource(node, node.modname)
5861

5962
@classmethod
60-
def _make_sources_for_import_call_nodes(cls, nodes: list[Call], problem_type: T, problems: list[T]):
63+
def _make_sources_for_import_call_nodes(cls, nodes: list[Call], problem_factory: ProblemFactory, problems: list[P]):
6164
for node in nodes:
6265
arg = node.args[0]
6366
if isinstance(arg, Const):
6467
yield ImportSource(node, arg.value)
6568
continue
66-
problem = problem_type(
67-
'dependency-not-constant',
68-
"Can't check dependency not provided as a constant",
69-
start_line=node.lineno,
70-
start_col=node.col_offset,
71-
end_line=node.end_lineno or 0,
72-
end_col=node.end_col_offset or 0,
69+
problem = problem_factory(
70+
'dependency-not-constant', "Can't check dependency not provided as a constant", node
7371
)
7472
problems.append(problem)
7573

@@ -91,9 +89,6 @@ def get_notebook_path(self) -> str | None:
9189
return None
9290

9391

94-
T = TypeVar("T", bound=Callable)
95-
96-
9792
class DbutilsLinter(Linter):
9893

9994
def lint(self, code: str) -> Iterable[Advice]:
@@ -106,21 +101,15 @@ def _convert_dbutils_notebook_run_to_advice(cls, node: NodeNG) -> Advisory:
106101
assert isinstance(node, Call)
107102
path = cls.get_dbutils_notebook_run_path_arg(node)
108103
if isinstance(path, Const):
109-
return Advisory(
104+
return Advisory.from_node(
110105
'dbutils-notebook-run-literal',
111106
"Call to 'dbutils.notebook.run' will be migrated automatically",
112-
node.lineno,
113-
node.col_offset,
114-
node.end_lineno or 0,
115-
node.end_col_offset or 0,
107+
node=node,
116108
)
117-
return Advisory(
109+
return Advisory.from_node(
118110
'dbutils-notebook-run-dynamic',
119111
"Path for 'dbutils.notebook.run' is not a constant and requires adjusting the notebook path",
120-
node.lineno,
121-
node.col_offset,
122-
node.end_lineno or 0,
123-
node.end_col_offset or 0,
112+
node=node,
124113
)
125114

126115
@staticmethod

src/databricks/labs/ucx/source_code/linters/pyspark.py

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -71,20 +71,12 @@ def lint(self, from_table: FromTable, index: MigrationIndex, node: Call) -> Iter
7171
table_arg = self._get_table_arg(node)
7272
if isinstance(table_arg, Const):
7373
for advice in from_table.lint(table_arg.value):
74-
yield advice.replace(
75-
start_line=node.lineno,
76-
start_col=node.col_offset,
77-
end_line=node.end_lineno,
78-
end_col=node.end_col_offset,
79-
)
74+
yield advice.replace_from_node(node)
8075
else:
81-
yield Advisory(
76+
yield Advisory.from_node(
8277
code='table-migrate',
8378
message=f"Can't migrate '{node}' because its table name argument is not a constant",
84-
start_line=node.lineno,
85-
start_col=node.col_offset,
86-
end_line=node.end_lineno or 0,
87-
end_col=node.end_col_offset or 0,
79+
node=node,
8880
)
8981

9082
def apply(self, from_table: FromTable, index: MigrationIndex, node: Call) -> None:
@@ -102,28 +94,22 @@ def lint(self, from_table: FromTable, index: MigrationIndex, node: Call) -> Iter
10294

10395
if not isinstance(table_arg, Const):
10496
assert isinstance(node.func, Attribute) # always true, avoids a pylint warning
105-
yield Advisory(
97+
yield Advisory.from_node(
10698
code='table-migrate',
10799
message=f"Can't migrate '{node.func.attrname}' because its table name argument is not a constant",
108-
start_line=node.lineno,
109-
start_col=node.col_offset,
110-
end_line=node.end_lineno or 0,
111-
end_col=node.end_col_offset or 0,
100+
node=node,
112101
)
113102
return
114103

115104
dst = self._find_dest(index, table_arg.value, from_table.schema)
116105
if dst is None:
117106
return
118107

119-
yield Deprecation(
108+
yield Deprecation.from_node(
120109
code='table-migrate',
121110
message=f"Table {table_arg.value} is migrated to {dst.destination()} in Unity Catalog",
122111
# SQLGlot does not propagate tokens yet. See https://github.com/tobymao/sqlglot/issues/3159
123-
start_line=node.lineno,
124-
start_col=node.col_offset,
125-
end_line=node.end_lineno or 0,
126-
end_col=node.end_col_offset or 0,
112+
node=node,
127113
)
128114

129115
def apply(self, from_table: FromTable, index: MigrationIndex, node: Call) -> None:
@@ -150,13 +136,10 @@ def matches(self, node: NodeNG):
150136

151137
def lint(self, from_table: FromTable, index: MigrationIndex, node: Call) -> Iterator[Advice]:
152138
assert isinstance(node.func, Attribute) # always true, avoids a pylint warning
153-
yield Advisory(
139+
yield Advisory.from_node(
154140
code='table-migrate',
155141
message=f"Call to '{node.func.attrname}' will return a list of <catalog>.<database>.<table> instead of <database>.<table>.",
156-
start_line=node.lineno,
157-
start_col=node.col_offset,
158-
end_line=node.end_lineno or 0,
159-
end_col=node.end_col_offset or 0,
142+
node=node,
160143
)
161144

162145
def apply(self, from_table: FromTable, index: MigrationIndex, node: Call) -> None:
@@ -191,23 +174,17 @@ def lint(self, from_table: FromTable, index: MigrationIndex, node: NodeNG) -> It
191174
if not isinstance(table_arg.value, str):
192175
return
193176
if any(table_arg.value.startswith(prefix) for prefix in self._DIRECT_FS_REFS):
194-
yield Deprecation(
177+
yield Deprecation.from_node(
195178
code='direct-filesystem-access',
196179
message=f"The use of direct filesystem references is deprecated: {table_arg.value}",
197-
start_line=node.lineno,
198-
start_col=node.col_offset,
199-
end_line=node.end_lineno or 0,
200-
end_col=node.end_col_offset or 0,
180+
node=node,
201181
)
202182
return
203183
if table_arg.value.startswith("/") and self._check_call_context(node):
204-
yield Deprecation(
184+
yield Deprecation.from_node(
205185
code='direct-filesystem-access',
206186
message=f"The use of default dbfs: references is deprecated: {table_arg.value}",
207-
start_line=node.lineno,
208-
start_col=node.col_offset,
209-
end_line=node.end_lineno or 0,
210-
end_col=node.end_col_offset or 0,
187+
node=node,
211188
)
212189

213190
def apply(self, from_table: FromTable, index: MigrationIndex, node: Call) -> None:

0 commit comments

Comments
 (0)