|
5 | 5 | import shlex |
6 | 6 | from abc import ABC, abstractmethod |
7 | 7 | from ast import parse as parse_python |
8 | | -from collections.abc import Callable, Iterable |
9 | 8 | from enum import Enum |
10 | 9 | from pathlib import Path |
11 | | -from typing import TypeVar, cast |
12 | 10 |
|
13 | | -from astroid import Call, Const, ImportFrom, Name, NodeNG, Try # type: ignore |
14 | | -from astroid.exceptions import AstroidSyntaxError # type: ignore |
| 11 | +from astroid import NodeNG # type: ignore |
15 | 12 | from sqlglot import parse as parse_sql, ParseError as SQLParseError |
16 | 13 |
|
17 | 14 | from databricks.sdk.service.workspace import Language |
|
23 | 20 | DependencyGraphContext, |
24 | 21 | InheritedContext, |
25 | 22 | ) |
26 | | -from databricks.labs.ucx.source_code.linters.imports import ( |
27 | | - SysPathChange, |
28 | | - DbutilsPyLinter, |
29 | | - ImportSource, |
30 | | - NotebookRunCall, |
31 | | - UnresolvedPath, |
32 | | -) |
33 | | -from databricks.labs.ucx.source_code.linters.python_ast import Tree, NodeBase |
| 23 | +from databricks.labs.ucx.source_code.python.python_analyzer import PythonCodeAnalyzer |
| 24 | +from databricks.labs.ucx.source_code.notebooks.magic import MagicNode, MagicCommand, magic_command_factory |
34 | 25 |
|
35 | 26 | # use a specific logger for sqlglot warnings so we can disable them selectively |
36 | 27 | sqlglot_logger = logging.getLogger(f"{__name__}.sqlglot") |
@@ -403,242 +394,15 @@ def wrap_with_magic(self, code: str, cell_language: CellLanguage) -> str: |
403 | 394 | return "\n".join(lines) |
404 | 395 |
|
405 | 396 |
|
406 | | -class PythonCodeAnalyzer: |
407 | | - |
408 | | - def __init__(self, context: DependencyGraphContext, python_code: str): |
409 | | - self._context = context |
410 | | - self._python_code = python_code |
411 | | - |
412 | | - def build_graph(self) -> list[DependencyProblem]: |
413 | | - """Check python code for dependency-related problems. |
414 | | -
|
415 | | - Returns: |
416 | | - A list of dependency problems; position information is relative to the python code itself. |
417 | | - """ |
418 | | - problems: list[DependencyProblem] = [] |
419 | | - try: |
420 | | - _, nodes, parse_problems = self._parse_and_extract_nodes() |
421 | | - problems.extend(parse_problems) |
422 | | - except AstroidSyntaxError as e: |
423 | | - logger.debug(f"Could not parse Python code: {self._python_code}", exc_info=True) |
424 | | - problems.append(DependencyProblem('parse-error', f"Could not parse Python code: {e}")) |
425 | | - return problems |
426 | | - for base_node in nodes: |
427 | | - for problem in self._build_graph_from_node(base_node): |
428 | | - # Astroid line numbers are 1-based. |
429 | | - problem = problem.replace( |
430 | | - start_line=base_node.node.lineno - 1, |
431 | | - start_col=base_node.node.col_offset, |
432 | | - end_line=(base_node.node.end_lineno or 1) - 1, |
433 | | - end_col=base_node.node.end_col_offset or 0, |
434 | | - ) |
435 | | - problems.append(problem) |
436 | | - return problems |
437 | | - |
438 | | - def build_inherited_context(self, child_path: Path) -> InheritedContext: |
439 | | - try: |
440 | | - tree, nodes, _ = self._parse_and_extract_nodes() |
441 | | - except AstroidSyntaxError: |
442 | | - logger.debug(f"Could not parse Python code: {self._python_code}", exc_info=True) |
443 | | - return InheritedContext(None, False) |
444 | | - if len(nodes) == 0: |
445 | | - return InheritedContext(tree, False) |
446 | | - context = InheritedContext(Tree.new_module(), False) |
447 | | - last_line = -1 |
448 | | - for base_node in nodes: |
449 | | - # append nodes |
450 | | - node_line = base_node.node.lineno |
451 | | - nodes = tree.nodes_between(last_line + 1, node_line - 1) |
452 | | - context.tree.append_nodes(nodes) |
453 | | - globs = tree.globals_between(last_line + 1, node_line - 1) |
454 | | - context.tree.append_globals(globs) |
455 | | - last_line = node_line |
456 | | - # process node |
457 | | - child_context = self._build_inherited_context_from_node(base_node, child_path) |
458 | | - context = context.append(child_context, True) |
459 | | - if context.found: |
460 | | - return context |
461 | | - line_count = tree.line_count() |
462 | | - if last_line < line_count: |
463 | | - nodes = tree.nodes_between(last_line + 1, line_count) |
464 | | - context.tree.append_nodes(nodes) |
465 | | - globs = tree.globals_between(last_line + 1, line_count) |
466 | | - context.tree.append_globals(globs) |
467 | | - return context |
468 | | - |
469 | | - def _parse_and_extract_nodes(self) -> tuple[Tree, list[NodeBase], Iterable[DependencyProblem]]: |
470 | | - problems: list[DependencyProblem] = [] |
471 | | - tree = Tree.normalize_and_parse(self._python_code) |
472 | | - syspath_changes = SysPathChange.extract_from_tree(self._context.session_state, tree) |
473 | | - run_calls = DbutilsPyLinter.list_dbutils_notebook_run_calls(tree) |
474 | | - import_sources: list[ImportSource] |
475 | | - import_problems: list[DependencyProblem] |
476 | | - import_sources, import_problems = ImportSource.extract_from_tree(tree, DependencyProblem.from_node) |
477 | | - problems.extend(import_problems) |
478 | | - magic_lines, command_problems = MagicLine.extract_from_tree(tree, DependencyProblem.from_node) |
479 | | - problems.extend(command_problems) |
480 | | - # need to evaluate things in intertwined sequence so concat and sort them |
481 | | - nodes: list[NodeBase] = cast(list[NodeBase], syspath_changes + run_calls + import_sources + magic_lines) |
482 | | - nodes = sorted(nodes, key=lambda node: (node.node.lineno, node.node.col_offset)) |
483 | | - return tree, nodes, problems |
484 | | - |
485 | | - def _build_graph_from_node(self, base_node: NodeBase) -> Iterable[DependencyProblem]: |
486 | | - if isinstance(base_node, SysPathChange): |
487 | | - yield from self._mutate_path_lookup(base_node) |
488 | | - elif isinstance(base_node, NotebookRunCall): |
489 | | - yield from self._register_notebook(base_node) |
490 | | - elif isinstance(base_node, ImportSource): |
491 | | - yield from self._register_import(base_node) |
492 | | - elif isinstance(base_node, MagicLine): |
493 | | - yield from base_node.build_dependency_graph(self._context.parent) |
494 | | - else: |
495 | | - logger.warning(f"Can't build graph for node {NodeBase.__name__} of type {type(base_node).__name__}") |
496 | | - |
497 | | - def _build_inherited_context_from_node(self, base_node: NodeBase, child_path: Path) -> InheritedContext: |
498 | | - if isinstance(base_node, SysPathChange): |
499 | | - self._mutate_path_lookup(base_node) |
500 | | - return InheritedContext(None, False) |
501 | | - if isinstance(base_node, ImportSource): |
502 | | - # nothing to do, Astroid takes care of imports |
503 | | - return InheritedContext(None, False) |
504 | | - if isinstance(base_node, NotebookRunCall): |
505 | | - # nothing to do, dbutils.notebook.run uses a dedicated context |
506 | | - return InheritedContext(None, False) |
507 | | - if isinstance(base_node, MagicLine): |
508 | | - return base_node.build_inherited_context(self._context, child_path) |
509 | | - logger.warning(f"Can't build inherited context for node {NodeBase.__name__} of type {type(base_node).__name__}") |
510 | | - return InheritedContext(None, False) |
511 | | - |
512 | | - def _register_import(self, base_node: ImportSource) -> Iterable[DependencyProblem]: |
513 | | - prefix = "" |
514 | | - if isinstance(base_node.node, ImportFrom) and base_node.node.level is not None: |
515 | | - prefix = "." * base_node.node.level |
516 | | - name = base_node.name or "" |
517 | | - problems = self._context.parent.register_import(prefix + name) |
518 | | - for problem in problems: |
519 | | - prob = self._filter_import_problem_in_try_except(problem, base_node) |
520 | | - if prob is not None: |
521 | | - yield prob |
522 | | - |
523 | | - @classmethod |
524 | | - def _filter_import_problem_in_try_except( |
525 | | - cls, problem: DependencyProblem, base_node: ImportSource |
526 | | - ) -> DependencyProblem | None: |
527 | | - if problem.code != 'import-not-found': |
528 | | - return problem |
529 | | - # is base_node in a try-except clause ? |
530 | | - node = base_node.node.parent |
531 | | - while node and not isinstance(node, Try): |
532 | | - node = node.parent |
533 | | - if cls._is_try_except_import_error(node): |
534 | | - return None |
535 | | - return problem |
536 | | - |
537 | | - @classmethod |
538 | | - def _is_try_except_import_error(cls, node: Try | None) -> bool: |
539 | | - if not isinstance(node, Try): |
540 | | - return False |
541 | | - for handler in node.handlers: |
542 | | - if isinstance(handler.type, Name): |
543 | | - if handler.type.name == "ImportError": |
544 | | - return True |
545 | | - return False |
546 | | - |
547 | | - def _register_notebook(self, base_node: NotebookRunCall) -> Iterable[DependencyProblem]: |
548 | | - has_unresolved, paths = base_node.get_notebook_paths(self._context.session_state) |
549 | | - if has_unresolved: |
550 | | - yield DependencyProblem( |
551 | | - 'dependency-cannot-compute-value', |
552 | | - f"Can't check dependency from {base_node.node.as_string()} because the expression cannot be computed", |
553 | | - ) |
554 | | - for path in paths: |
555 | | - # notebooks ran via dbutils.notebook.run do not inherit or propagate context |
556 | | - yield from self._context.parent.register_notebook(Path(path), False) |
557 | | - |
558 | | - def _mutate_path_lookup(self, change: SysPathChange) -> Iterable[DependencyProblem]: |
559 | | - if isinstance(change, UnresolvedPath): |
560 | | - yield DependencyProblem( |
561 | | - 'sys-path-cannot-compute-value', |
562 | | - f"Can't update sys.path from {change.node.as_string()} because the expression cannot be computed", |
563 | | - ) |
564 | | - return |
565 | | - change.apply_to(self._context.path_lookup) |
566 | | - |
567 | | - |
568 | | -T = TypeVar("T") |
569 | | - |
570 | | - |
571 | | -class MagicLine(NodeBase): |
572 | | - |
573 | | - @classmethod |
574 | | - def extract_from_tree( |
575 | | - cls, tree: Tree, problem_factory: Callable[[str, str, NodeNG], T] |
576 | | - ) -> tuple[list[MagicLine], list[T]]: |
577 | | - problems: list[T] = [] |
578 | | - commands: list[MagicLine] = [] |
579 | | - try: |
580 | | - nodes = tree.locate(Call, [("magic_command", Name)]) |
581 | | - for command in cls._make_commands_for_magic_command_call_nodes(nodes): |
582 | | - commands.append(command) |
583 | | - except Exception as e: # pylint: disable=broad-except |
584 | | - logger.debug(f"Internal error while checking magic commands in tree: {tree.root}", exc_info=True) |
585 | | - problem = problem_factory('internal-error', f"While checking magic commands: {e}", tree.root) |
586 | | - problems.append(problem) |
587 | | - return commands, problems |
| 397 | +class RunCommand(MagicCommand): |
588 | 398 |
|
589 | | - @classmethod |
590 | | - def _make_commands_for_magic_command_call_nodes(cls, nodes: list[Call]): |
591 | | - for node in nodes: |
592 | | - arg = node.args[0] |
593 | | - if isinstance(arg, Const): |
594 | | - yield MagicLine(node, arg.value) |
595 | | - |
596 | | - def __init__(self, node: NodeNG, command: bytes): |
597 | | - super().__init__(node) |
598 | | - self._command = command.decode() |
599 | | - |
600 | | - def as_magic(self) -> MagicCommand | None: |
601 | | - if self._command.startswith("%pip") or self._command.startswith("!pip"): |
602 | | - return PipCommand(self.node, self._command) |
603 | | - if self._command.startswith("%run"): |
604 | | - return RunCommand(self.node, self._command) |
| 399 | + @staticmethod |
| 400 | + @magic_command_factory |
| 401 | + def factory(command: str, node: NodeNG) -> MagicCommand | None: |
| 402 | + if command.startswith("%run"): |
| 403 | + return RunCommand(node, command) |
605 | 404 | return None |
606 | 405 |
|
607 | | - def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]: |
608 | | - magic = self.as_magic() |
609 | | - if magic is not None: |
610 | | - return magic.build_dependency_graph(parent) |
611 | | - problem = DependencyProblem.from_node( |
612 | | - code='unsupported-magic-line', message=f"magic line '{self._command}' is not supported yet", node=self.node |
613 | | - ) |
614 | | - return [problem] |
615 | | - |
616 | | - def build_inherited_context(self, context: DependencyGraphContext, child_path: Path) -> InheritedContext: |
617 | | - magic = self.as_magic() |
618 | | - if magic is not None: |
619 | | - return magic.build_inherited_context(context, child_path) |
620 | | - return InheritedContext(None, False) |
621 | | - |
622 | | - |
623 | | -class MagicNode(NodeNG): |
624 | | - pass |
625 | | - |
626 | | - |
627 | | -class MagicCommand(ABC): |
628 | | - |
629 | | - def __init__(self, node: NodeNG, code: str): |
630 | | - self._node = node |
631 | | - self._code = code |
632 | | - |
633 | | - @abstractmethod |
634 | | - def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]: ... |
635 | | - |
636 | | - def build_inherited_context(self, _context: DependencyGraphContext, _child_path: Path) -> InheritedContext: |
637 | | - return InheritedContext(None, False) |
638 | | - |
639 | | - |
640 | | -class RunCommand(MagicCommand): |
641 | | - |
642 | 406 | def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]: |
643 | 407 | path = self.notebook_path |
644 | 408 | if path is not None: |
@@ -678,6 +442,13 @@ def build_inherited_context(self, context: DependencyGraphContext, child_path: P |
678 | 442 |
|
679 | 443 | class PipCommand(MagicCommand): |
680 | 444 |
|
| 445 | + @staticmethod |
| 446 | + @magic_command_factory |
| 447 | + def factory(command: str, node: NodeNG) -> MagicCommand | None: |
| 448 | + if command.startswith("%pip") or command.startswith("!pip"): |
| 449 | + return PipCommand(node, command) |
| 450 | + return None |
| 451 | + |
681 | 452 | def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]: |
682 | 453 | argv = self._split(self._code) |
683 | 454 | if len(argv) == 1: |
|
0 commit comments