pytorch · mergennachin · Sep 2, 2025 · Sep 2, 2025
diff --git a/.lintrunner.toml b/.lintrunner.toml
@@ -341,6 +341,7 @@ include_patterns = [
     # TODO(https://github.com/pytorch/executorch/issues/7441): Gradually start enabling all folders.
     # 'backends/**/*.py',
     'backends/arm/**/*.py',
+    'backends/cadence/**/*.py',
     'backends/openvino/**/*.py',
     'build/**/*.py',
     'codegen/**/*.py',

diff --git a/.mypy.ini b/.mypy.ini
@@ -100,3 +100,6 @@ ignore_missing_imports = True
 
 [mypy-torchao.*]
 follow_untyped_imports = True
+
+[mypy-facto.*]
+ignore_missing_imports = True
diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
@@ -166,7 +166,7 @@ def fuse_pt2(
     """
     # Get patterns and apply fusion of dq -> op -> q to qop
     # pyre-ignore[16]: no attribute
-    patterns = [q.pattern for q in quantizer.quantizers]
+    patterns = [q.pattern for q in quantizer.quantizers]  # type: ignore[attr-defined]
     QuantFusion(patterns)(converted_graph_module)
 
     return converted_graph_module

diff --git a/backends/cadence/aot/compiler_funcs.py b/backends/cadence/aot/compiler_funcs.py
@@ -30,7 +30,7 @@ def trace(
 
     decomp_table = torch.export.default_decompositions()
     # pyre-fixme[6]: For 1st argument expected `Dict[typing.Callable[..., typing.Any
-    remove_decompositions(decomp_table, ops_to_keep)
+    remove_decompositions(decomp_table, ops_to_keep)  # type: ignore[arg-type]
     program = torch.export.export(model, inputs, strict=strict).run_decompositions(
         decomp_table
     )

diff --git a/backends/cadence/aot/compiler_utils.py b/backends/cadence/aot/compiler_utils.py
@@ -60,7 +60,7 @@ def get_shape(
             return fake_tensor.shape
         # Case 3. node holds a param
         if node.op == "get_attr":
-            attr_node = getattr(graph_module, node.target)
+            attr_node = getattr(graph_module, node.target)  # type: ignore[arg-type]
             return attr_node.shape
         # Default: return None
         return None
@@ -140,7 +140,7 @@ def get_permuted_dims(node: torch.fx.Node, dims: List[int]) -> List[int]:
     assert node.target == exir_ops.edge.aten.permute_copy.default
     # Permute each index of the dimension ordering (dims)
     # pyre-fixme[6]: This combined typecheck isn't supported yet.
-    permute_dims: List[int] = list(node.args[1])
+    permute_dims: List[int] = list(node.args[1])  # type: ignore[arg-type]
     assert all(isinstance(x, int) for x in permute_dims)
     return [dims[x] for x in permute_dims]
 
@@ -156,7 +156,7 @@ def get_tensor_from_attr(
     if node is None:
         return None
     assert node.op == "get_attr"
-    return getattr(graph_module, node.target)
+    return getattr(graph_module, node.target)  # type: ignore[arg-type]
 
 
 def is_node_with_op(node: torch.fx.Node, op: str) -> bool:

diff --git a/backends/cadence/aot/fuse_ops.py b/backends/cadence/aot/fuse_ops.py
@@ -397,14 +397,14 @@ def fuse_quantized_batch_norm_with_conv(
             # Requantize the fused weight with the scale and zero point of the
             # quantized::conv's weight
             if per_tensor_quantization:
-                fused_weight = torch.quantize_per_tensor(
+                fused_weight = torch.quantize_per_tensor(  # type: ignore[assignment]
                     fused_weight,
                     weight_scale.item(),
                     cast(int, weight_zero_point.item()),
                     weight_dtype,
                 )
             else:
-                fused_weight = torch.quantize_per_channel(
+                fused_weight = torch.quantize_per_channel(  # type: ignore[assignment]
                     fused_weight,
                     weight_scale,
                     weight_zero_point,
@@ -693,7 +693,7 @@ def __init__(
 
     def _pkg_name_match(self, node1: torch.fx.Node, node2: torch.fx.Node) -> bool:
         # pyre-ignore[16]: Item `typing.Callable` has no attribute `_op`
-        return node1.target._op.namespace == node2.target._op.namespace
+        return node1.target._op.namespace == node2.target._op.namespace  # type: ignore[union-attr]
 
     def can_fuse_for_chain(
         self,

diff --git a/backends/cadence/aot/graph_builder.py b/backends/cadence/aot/graph_builder.py
@@ -61,17 +61,17 @@ def __init__(self) -> None:
         )
 
     # pyre-ignore[14]: Inconsistent override.
-    def placeholder(
+    def placeholder(  # type: ignore[override]
         self, target: str, fake_tensor: Union[FakeTensor, torch.Tensor]
     ) -> ProxyValue:
         if not isinstance(fake_tensor, FakeTensor):
-            fake_tensor = self.fake_tensor_mode.from_tensor(fake_tensor)
+            fake_tensor = self.fake_tensor_mode.from_tensor(fake_tensor)  # type: ignore[union-attr]
         logging.debug(f"Creating placeholder {target} => {fake_tensor.shape}")
         placeholder = super().placeholder(target, fake_tensor, NodeMetadata({}))
         return placeholder
 
     # pyre-ignore[14]: Inconsistent override.
-    def output(self, results: list[ProxyValue]) -> ProxyValue:
+    def output(self, results: list[ProxyValue]) -> ProxyValue:  # type: ignore[override]
         logging.debug(f"Creating outputs {results}")
         return super().output(results, NodeMetadata({}))
 
@@ -109,7 +109,7 @@ def _fx(
         kwargs: dict[str, Argument],
         meta: NodeMetadata,
     ) -> ProxyValue:
-        with self.fake_tensor_mode, enable_python_dispatcher():
+        with self.fake_tensor_mode, enable_python_dispatcher():  # type: ignore[union-attr]
             return super()._fx(kind, target, args, kwargs, meta)
 
 

diff --git a/backends/cadence/aot/memory_constraints.py b/backends/cadence/aot/memory_constraints.py
@@ -143,8 +143,8 @@ def is_alias_of(self, node: torch.fx.Node, other_node: torch.fx.Node) -> bool:
             node_source_spec = node_source_info.source.meta.get("spec")
             return (
                 node_source_info.offset == 0
-                and math.prod(node_source_spec.shape) == math.prod(node_spec.shape)
-                and node_source_spec.dtype == node_spec.dtype
+                and math.prod(node_source_spec.shape) == math.prod(node_spec.shape)  # type: ignore[union-attr]
+                and node_source_spec.dtype == node_spec.dtype  # type: ignore[union-attr]
                 and self.is_alias_of(node_source_info.source, other_node)
             )
 
@@ -172,7 +172,7 @@ def is_memory_planned(
         # Check if any node is a param.
         if node.op == "get_attr":
             return False
-        if node.op == "placeholder" and node.meta.get("spec").const:
+        if node.op == "placeholder" and node.meta.get("spec").const:  # type: ignore[union-attr]
             # Parameters / constants are not memory planned.
             return False
         if node.op == "placeholder" and not (self.alloc_graph_input):
@@ -213,8 +213,8 @@ def resolve_relative_loc_constraints(self, spec: TensorSpec) -> None:
             source_info = self.get_relative_placement_source(dependent_node)
             assert source_info is not None
             dependent_spec = cast(TensorSpec, dependent_node.meta.get("spec"))
-            dependent_spec.mem_id = spec.mem_id
-            dependent_spec.mem_offset = spec.mem_offset + source_info.offset
+            dependent_spec.mem_id = spec.mem_id  # type: ignore[assignment]
+            dependent_spec.mem_offset = spec.mem_offset + source_info.offset  # type: ignore[operator,assignment]
             # Recursively resolve any relative constraints on this arg_spec
             self.resolve_relative_loc_constraints(dependent_spec)
 
@@ -280,14 +280,14 @@ def add_relative_placement_constraint(
         dependent_spec = dependent.meta.get("spec")
         if update_lifetime:
             source_spec = source.meta.get("spec")
-            source.meta.get("spec").lifetime = [
-                min(source_spec.lifetime[0], dependent_spec.lifetime[0]),
-                max(source_spec.lifetime[1], dependent_spec.lifetime[1]),
+            source.meta.get("spec").lifetime = [  # type: ignore[union-attr]
+                min(source_spec.lifetime[0], dependent_spec.lifetime[0]),  # type: ignore[union-attr]
+                max(source_spec.lifetime[1], dependent_spec.lifetime[1]),  # type: ignore[union-attr]
             ]
 
         self.update_children_nodes(dependent, update_lifetime)
 
-        abs_constraint = self.get_absolute_placement_constraint(dependent_spec)
+        abs_constraint = self.get_absolute_placement_constraint(dependent_spec)  # type: ignore[arg-type]
         if abs_constraint is None:
             return
 
@@ -366,7 +366,7 @@ def get_relative_offset_of_slice(slice_node: torch.fx.Node) -> int:
     slice_input = slice_node.args[0]
     assert isinstance(slice_input, torch.fx.Node)
     input_spec = slice_input.meta.get("spec")
-    tensor_shape = list(input_spec.shape)
+    tensor_shape = list(input_spec.shape)  # type: ignore[union-attr]
     assert tensor_shape
     # get the slice dimension
     dim = 0 if len(slice_node.args) == 1 else cast(int, slice_node.args[1])
@@ -390,7 +390,7 @@ def get_relative_offset_of_slice(slice_node: torch.fx.Node) -> int:
     tensor_shape[dim] = 1
 
     nbytes = num_bytes_from_shape_and_dtype(
-        torch.Size(tensor_shape), input_spec.scalar_type
+        torch.Size(tensor_shape), input_spec.scalar_type  # type: ignore[union-attr]
     )
     offset = start * nbytes
     return offset
@@ -406,7 +406,7 @@ class GenerateCatNopConstraints(PassBase):
     def __init__(self, constraint: MemConstraints) -> None:
         self.constraint = constraint
 
-    def call(self, graph_module: torch.fx.GraphModule) -> Optional[PassResult]:
+    def call(self, graph_module: torch.fx.GraphModule) -> Optional[PassResult]:  # type: ignore[return]
         self.compute_cat_contiguity_constraints(graph_module)
 
     def is_slice_view(self, node: torch.fx.Node) -> bool:
@@ -545,7 +545,7 @@ class GenerateMemoryViewConstraints(PassBase):
     def __init__(self, constraint: MemConstraints) -> None:
         self.constraint = constraint
 
-    def call(self, graph_module: torch.fx.GraphModule) -> Optional[PassResult]:
+    def call(self, graph_module: torch.fx.GraphModule) -> Optional[PassResult]:  # type: ignore[return]
         for node in graph_module.graph.nodes:
             if node.op != "call_function" or node.target != memory.view:
                 continue
@@ -563,7 +563,7 @@ class GenerateSliceAndSelectNopConstraints(PassBase):
     def __init__(self, constraint: MemConstraints) -> None:
         self.constraint = constraint
 
-    def call(self, graph_module: torch.fx.GraphModule) -> Optional[PassResult]:
+    def call(self, graph_module: torch.fx.GraphModule) -> Optional[PassResult]:  # type: ignore[return]
         self.compute_slice_and_select_loc_constraints(graph_module)
 
     # Return True if the slice or select op can be replaced by a nop after
@@ -593,9 +593,9 @@ def removable_slice_or_select_op(
         # is along the outermost dimension, or (b) all dimensions previous to
         # slicing/select dimension are 0 or 1.
         node_spec = node.meta.get("spec")
-        tensor_shape = list(node_spec.shape)
+        tensor_shape = list(node_spec.shape)  # type: ignore[union-attr]
         dim = 0 if len(node.args) == 1 else node.args[1]
-        if dim and not set(tensor_shape[0:dim]).issubset({0, 1}):
+        if dim and not set(tensor_shape[0:dim]).issubset({0, 1}):  # type: ignore[misc]
             return False
 
         # The slice step should be 1 for contiguity.
@@ -684,7 +684,7 @@ def __call__(self, graph_module: torch.fx.GraphModule) -> PassResult:
             for mcg_pass in cast(
                 list[ConstraintsGenPass],
                 # pyre-ignore[6]: Incompatible parameter type.
-                list(filter(pass_filter, constraint_gen_passes)),
+                list(filter(pass_filter, constraint_gen_passes)),  # type: ignore[arg-type]
             )
         ]
         # Now run the pass manager on the filtered passes

diff --git a/backends/cadence/aot/memory_planning.py b/backends/cadence/aot/memory_planning.py
@@ -9,7 +9,7 @@
 import collections
 import itertools
 import logging
-from typing import Iterable, Optional, Sequence
+from typing import Any, Iterable, Optional, Sequence
 
 import torch
 from executorch.backends.cadence.aot.memory_constraints import MemConstraints
@@ -28,7 +28,7 @@
 from executorch.exir.memory_planning import collect_specs_from_nodes, Verifier
 from executorch.exir.passes import MemoryPlanningPass
 from executorch.exir.tensor import TensorSpec
-from tabulate import tabulate
+from tabulate import tabulate  # type: ignore[import-untyped]
 from torch.export.exported_program import ExportGraphSignature
 from torch.fx.passes.infra.pass_base import PassResult
 
@@ -64,15 +64,15 @@ def plan_spec(
         """
         Greedily place the spec in the first memory that can fit it.
         """
-        for spec.mem_id in range(1, self.get_num_memories()):
-            spec.mem_offset = 0
+        for spec.mem_id in range(1, self.get_num_memories()):  # type: ignore[assignment]
+            spec.mem_offset = 0  # type: ignore[assignment]
             while self.is_valid_placement(spec, placement_constraints) and (
                 overlapped := state.get_overlapping_spec(spec)
             ):
                 # Found an overlapping spec, so we need to adjust the offset = end of the overlapping spec + alignment.
-                spec.mem_offset = get_aligned_offset(
-                    overlapped.mem_offset + overlapped.allocated_memory,
-                    self.get_alignment(spec.mem_id),
+                spec.mem_offset = get_aligned_offset(  # type: ignore[assignment]
+                    overlapped.mem_offset + overlapped.allocated_memory,  # type: ignore[operator]
+                    self.get_alignment(spec.mem_id),  # type: ignore[arg-type]
                 )
 
             if self.is_valid_placement(spec, placement_constraints):
@@ -115,20 +115,20 @@ def plan_spec(
         """
         Greedily place the spec in the first memory that can fit it.
         """
-        for spec.mem_id in range(1, self.get_num_memories()):
-            if placement_constraints.is_mem_id_in_blocklist(spec, spec.mem_id):
+        for spec.mem_id in range(1, self.get_num_memories()):  # type: ignore[assignment]
+            if placement_constraints.is_mem_id_in_blocklist(spec, spec.mem_id):  # type: ignore[arg-type]
                 # Skip placement for blocked memory id.
                 continue
             prev_offset, smallest_gap = 0, float("inf")
-            for allocated_spec in state.allocated_buffers[spec.mem_id]:
+            for allocated_spec in state.allocated_buffers[spec.mem_id]:  # type: ignore[call-overload]
                 if not Verifier.lifetime_overlap(spec, allocated_spec):
                     continue
 
                 if (
                     gap := allocated_spec.mem_offset - prev_offset
                 ) >= spec.allocated_memory and gap < smallest_gap:
                     smallest_gap = gap
-                    spec.mem_offset = prev_offset
+                    spec.mem_offset = prev_offset  # type: ignore[assignment]
                 # Note that different from the paper, which updates prev_offset for all
                 # allocated tensors, we only update tensors with overlapping lifetime.
                 # Updating prev_offset outside the if statement will include tensors without
@@ -138,12 +138,12 @@ def plan_spec(
                 prev_offset = max(
                     get_aligned_offset(
                         allocated_spec.mem_offset + allocated_spec.allocated_memory,
-                        self.get_alignment(spec.mem_id),
+                        self.get_alignment(spec.mem_id),  # type: ignore[arg-type]
                     ),
                     prev_offset,
                 )
             if spec.mem_offset is None:
-                spec.mem_offset = prev_offset
+                spec.mem_offset = prev_offset  # type: ignore[assignment]
 
             if not self.is_valid_placement(spec, placement_constraints):
                 # Skip placement for invalid memory id.
@@ -153,7 +153,7 @@ def plan_spec(
             state.place_spec(spec)
             # A data structure used for maintaining the tensor order
             # by offset, named ordered_allocated_ids in the paper
-            state.allocated_buffers[spec.mem_id].sort(key=lambda spec: spec.mem_offset)
+            state.allocated_buffers[spec.mem_id].sort(key=lambda spec: spec.mem_offset)  # type: ignore[call-overload]
             break
 
     def plan(
@@ -200,7 +200,7 @@ def find_peak_memory_usages_per_memory(
     # Create a defaultdict to keep track of memory usages: {mem_id: mem_usage}
     # Use a defaultdict here because we don't know how many unique memory_id in
     # the memory hierarchy used in memory planning.
-    usages = collections.defaultdict(int)
+    usages: collections.defaultdict[Any, int] = collections.defaultdict(int)  # type: ignore[var-annotated]
 
     # go through all nodes in the graph, collect memory usage per spec.mem_id
     for spec in collect_specs_from_graph_module(
@@ -209,7 +209,7 @@ def find_peak_memory_usages_per_memory(
         if mem_constraints is not None and mem_constraints.skipped_spec(spec):
             continue
         usages[spec.mem_id] = max(
-            usages[spec.mem_id], spec.mem_offset + spec.allocated_memory
+            usages[spec.mem_id], spec.mem_offset + spec.allocated_memory  # type: ignore[operator]
         )
 
     # Convert usages dictionary into list of len of max memory id

diff --git a/backends/cadence/aot/memory_planning_algo.py b/backends/cadence/aot/memory_planning_algo.py
@@ -42,17 +42,17 @@ def place_spec(self, spec: TensorSpec) -> None:
         """Place the spec at the given memory and offset."""
         logging.debug(f"Placing spec {spec}: {spec.mem_id=}, {spec.mem_offset=}")
         assert self.get_overlapping_spec(spec) is None
-        self.allocated_buffers[spec.mem_id].append(spec)
-        self.bufsizes[spec.mem_id] = max(
-            self.bufsizes[spec.mem_id],
+        self.allocated_buffers[spec.mem_id].append(spec)  # type: ignore[call-overload]
+        self.bufsizes[spec.mem_id] = max(  # type: ignore[call-overload]
+            self.bufsizes[spec.mem_id],  # type: ignore[call-overload]
             get_aligned_offset(
-                spec.mem_offset + spec.allocated_memory, self.alignment[spec.mem_id]
+                spec.mem_offset + spec.allocated_memory, self.alignment[spec.mem_id]  # type: ignore[operator,call-overload]
             ),
         )
 
     def get_overlapping_spec(self, spec: TensorSpec) -> Optional[TensorSpec]:
         """Get the overlapping spec for the given spec."""
-        for allocated_spec in self.allocated_buffers[spec.mem_id]:
+        for allocated_spec in self.allocated_buffers[spec.mem_id]:  # type: ignore[call-overload]
             if Verifier.lifetime_overlap(
                 spec, allocated_spec
             ) and Verifier.storage_overlap(spec, allocated_spec):
@@ -131,13 +131,13 @@ def is_valid_placement(
     ) -> bool:
         """Returns true if the spec can be placed at the given memory id."""
         end_of_allocation = get_aligned_offset(
-            spec.mem_offset + spec.allocated_memory,
-            self.get_alignment(spec.mem_id),
+            spec.mem_offset + spec.allocated_memory,  # type: ignore[operator]
+            self.get_alignment(spec.mem_id),  # type: ignore[arg-type]
         )
         return (
-            self.memory_id_is_valid[spec.mem_id]
-            and end_of_allocation <= self.get_size(spec.mem_id)
-            and not placement_constraints.is_mem_id_in_blocklist(spec, spec.mem_id)
+            self.memory_id_is_valid[spec.mem_id]  # type: ignore[call-overload]
+            and end_of_allocation <= self.get_size(spec.mem_id)  # type: ignore[arg-type]
+            and not placement_constraints.is_mem_id_in_blocklist(spec, spec.mem_id)  # type: ignore[arg-type]
         )
 
     @contextmanager
@@ -180,8 +180,8 @@ def _place_pinned_specs(
             if c is not None and c.offset is not None
         }
         for spec, constraint in pinned_specs.items():
-            spec.mem_id = constraint.pinned_memory_id
-            spec.mem_offset = constraint.offset
+            spec.mem_id = constraint.pinned_memory_id  # type: ignore[assignment]
+            spec.mem_offset = constraint.offset  # type: ignore[assignment]
             state.place_spec(spec)
             placement_constraints.resolve_relative_loc_constraints(spec)