Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 36 additions & 8 deletions mypy/semanal_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from __future__ import annotations

from contextlib import nullcontext
from functools import cmp_to_key
from typing import TYPE_CHECKING, Callable, Final, Optional, Union
from typing_extensions import TypeAlias as _TypeAlias

Expand Down Expand Up @@ -232,26 +233,48 @@ def process_top_levels(graph: Graph, scc: list[str], patches: Patches) -> None:
final_iteration = not any_progress


def method_order_by_subclassing(left: FullTargetInfo, right: FullTargetInfo) -> int:
_, _, _, left_info = left
_, _, _, right_info = right
if left_info is None or right_info is None:
return 0
if left_info is right_info:
return 0
if left_info in right_info.mro:
return -1
if right_info in left_info.mro:
return 1
return 0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to change the order of processing targets, even if derived classes are always after base classes (i.e. current ordering is already fine). I suspect that this will break the current SCC ordering algorithm, which we probably rely on in a bunch of places, and it could explain why things are failing. I think we must mostly follow the SCC ordering or we will have a bunch of weird regressions and generally a bad time.

Here's one potential way to fix this so this only changes the order when necessary:

  • Create a linear list of targets, similar to what you currently have.
  • Collect a set of all TypeInfos in the targets (e.g. all active_type values).
  • Iterate over the targets, and keep track of which TypeInfo's we've processed by removing the TypeInfo set created in the previous step. If we encounter a TypeInfo which has some MRO item that is in the set of TypeInfos, move that to a separately list (deferreds) instead of processing now.
  • After having iterated all targets, iterate over the deferred items.

The above approach could possibly be made even better by processing deferred nodes immediately after all the MRO entries have been processed, instead of waiting for all targets to be processed.

This has the benefit of not changing the processing order if it's already correct, and if it's incorrect, only the impacted targets will get rescheduled. This also could be a bit faster, since we perform a linear scan instead of a sort.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JukkaL

This seems to change the order of processing targets, even if derived classes are always after base classes (i.e. current ordering is already fine)

I don't think I am following. Can you give an example of when this happens? I actually did a diff on full target list for mypy self check (including stdlib), and it is tiny, only few things that actually matter were changed (like e.g. couple visitors in mypy.types vs mypy.type_visitor).

Even then, how order of processing of method bodies can be so important? (All the top levels, including ClassDefs, are already processed at this point).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I think I misunderstood how the ordering works. So it's probably fine. Changing the ordering of methods "shouldn't" change much, but it's just a very scary change that could trigger some pre-existing bugs or limitations. But if this only changes ordering very slightly, it should be fine.

Can you also manually test this when you import torch and numpy? At least torch has a massive import cycle which should be a good test case.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JukkaL As we discussed, I now use an explicit ordering algorithm. Btw in the meantime I read a bit more about this, and although subclassing order is consistent as a total preorder, common sorting algorithms can be fooled by those (as they usually expect a total order). Also, the documented properties of Python sort are actually not enough to guarantee it will always work in this case.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, and I also tried numpy and torch with latest version of this PR, didn't find any issues.



def process_functions(graph: Graph, scc: list[str], patches: Patches) -> None:
# Process functions.
all_targets = []
for module in scc:
tree = graph[module].tree
assert tree is not None
analyzer = graph[module].manager.semantic_analyzer
# In principle, functions can be processed in arbitrary order,
# but _methods_ must be processed in the order they are defined,
# because some features (most notably partial types) depend on
# order of definitions on self.
#
# There can be multiple generated methods per line. Use target
# name as the second sort key to get a repeatable sort order on
# Python 3.5, which doesn't preserve dictionary order.
# name as the second sort key to get a repeatable sort order.
targets = sorted(get_all_leaf_targets(tree), key=lambda x: (x[1].line, x[0]))
for target, node, active_type in targets:
assert isinstance(node, (FuncDef, OverloadedFuncDef, Decorator))
process_top_level_function(
analyzer, graph[module], module, target, node, active_type, patches
)
all_targets.extend(
[(module, target, node, active_type) for target, node, active_type in targets]
)

# Additionally, we process superclass methods before subclass methods. Here we rely
# on stability of Python sort and just do a separate sort.
for module, target, node, active_type in sorted(
all_targets, key=cmp_to_key(method_order_by_subclassing)
):
analyzer = graph[module].manager.semantic_analyzer
assert isinstance(node, (FuncDef, OverloadedFuncDef, Decorator))
process_top_level_function(
analyzer, graph[module], module, target, node, active_type, patches
)


def process_top_level_function(
Expand Down Expand Up @@ -308,6 +331,11 @@ def process_top_level_function(
str, Union[MypyFile, FuncDef, OverloadedFuncDef, Decorator], Optional[TypeInfo]
]

# Same as above but includes module as first item.
FullTargetInfo: _TypeAlias = tuple[
str, str, Union[MypyFile, FuncDef, OverloadedFuncDef, Decorator], Optional[TypeInfo]
]


def get_all_leaf_targets(file: MypyFile) -> list[TargetInfo]:
"""Return all leaf targets in a symbol table (module-level and methods)."""
Expand Down
7 changes: 3 additions & 4 deletions test-data/unit/check-classes.test
Original file line number Diff line number Diff line change
Expand Up @@ -7007,11 +7007,10 @@ class C:
[case testAttributeDefOrder2]
class D(C):
def g(self) -> None:
self.x = ''
self.x = '' # E: Incompatible types in assignment (expression has type "str", variable has type "int")

def f(self) -> None:
# https://github.com/python/mypy/issues/7162
reveal_type(self.x) # N: Revealed type is "builtins.str"
reveal_type(self.x) # N: Revealed type is "builtins.int"


class C:
Expand All @@ -7025,7 +7024,7 @@ class E(C):
def f(self) -> None:
reveal_type(self.x) # N: Revealed type is "builtins.int"

[targets __main__, __main__, __main__.D.g, __main__.D.f, __main__.C.__init__, __main__.E.g, __main__.E.f]
[targets __main__, __main__, __main__.C.__init__, __main__.D.g, __main__.D.f, __main__.E.g, __main__.E.f]

[case testNewReturnType1]
class A:
Expand Down
18 changes: 18 additions & 0 deletions test-data/unit/check-newsemanal.test
Original file line number Diff line number Diff line change
Expand Up @@ -3256,3 +3256,21 @@ class b:
x = x[1] # E: Cannot resolve name "x" (possible cyclic definition)
y = 1[y] # E: Value of type "int" is not indexable \
# E: Cannot determine type of "y"

[case testForwardBaseDeferAttr]
from typing import Optional, Callable, TypeVar

class C(B):
def a(self) -> None:
reveal_type(self._foo) # N: Revealed type is "Union[builtins.int, None]"
self._foo = defer()

class B:
def __init__(self) -> None:
self._foo: Optional[int] = None

T = TypeVar("T")
def deco(fn: Callable[[], T]) -> Callable[[], T]: ...

@deco
def defer() -> int: ...
Loading