pytorch
diff --git a/‎backends/cadence/aot/TARGETS‎
Lines changed: 15 additions & 0 deletions b/‎backends/cadence/aot/TARGETS‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎backends/cadence/aot/compiler.py‎
Lines changed: 6 additions & 6 deletions b/‎backends/cadence/aot/compiler.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎backends/cadence/aot/graph_builder.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/cadence/aot/graph_builder.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/cadence/aot/memory_planning.py‎
Lines changed: 10 additions & 3 deletions b/‎backends/cadence/aot/memory_planning.py‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎backends/cadence/aot/program_builder.py‎
Lines changed: 11 additions & 3 deletions b/‎backends/cadence/aot/program_builder.py‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎backends/cadence/aot/tests/test_program_builder.py‎
Lines changed: 122 additions & 0 deletions b/‎backends/cadence/aot/tests/test_program_builder.py‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎backends/cadence/aot/utils.py‎
Lines changed: 4 additions & 0 deletions b/‎backends/cadence/aot/utils.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/cadence/hifi/operators/operators.h‎
Lines changed: 19 additions & 0 deletions b/‎backends/cadence/hifi/operators/operators.h‎
Lines changed: 19 additions & 0 deletions
@@ -199,6 +199,19 @@ python_library(
     ],
 )
 
+python_unittest(
+    name = "test_program_builder",
+    srcs = [
+        "tests/test_program_builder.py",
+    ],
+    typing = True,
+    deps = [
+        ":program_builder",
+        "//caffe2:torch",
+        "//later:lib",
+    ],
+)
+
 python_library(
     name = "fuse_ops",
     srcs = [
@@ -539,8 +552,10 @@ python_unittest(
     ],
     typing = True,
     deps = [
+        ":program_builder",
         "//executorch/backends/cadence/aot:graph_builder",
         "//executorch/backends/cadence/aot:ops_registrations",
+        "//executorch/runtime:runtime",
         "//later:lib",
     ],
 )
@@ -34,7 +34,7 @@
 )
 from executorch.exir.passes import ToOutVarPass
 from executorch.exir.passes.sym_shape_eval_pass import HintBasedSymShapeEvalPass
-from executorch.exir.program._program import to_edge_with_preserved_ops
+from executorch.exir.program._program import to_edge
 from torch._inductor.decomposition import remove_decompositions
 
 from torch.export.exported_program import ExportedProgram
@@ -219,9 +219,9 @@ def quantize_pt2(
     torch.ops.aten.angle.default,
     torch.ops.aten.rms_norm.default,
 ]
-TO_EDGE_PRESERVE_OPS: tuple[torch._ops.OpOverload, ...] = (
+TO_EDGE_PRESERVE_OPS: list[torch._ops.OpOverload, ...] = [
     torch.ops.aten.rms_norm.default,
-)
+]
 
 
 def _lower_ep_to_edge(
@@ -233,18 +233,18 @@ def _lower_ep_to_edge(
     """
     Lower an ExportedProgram to an EdgeProgramManager (in edge IR).
     """
-    # Call to_edge_with_preserved_ops to convert the graph to edge IR.
+    # Call to_edge to convert the graph to edge IR.
     # Note: dim_order is skipped (https://github.com/pytorch/executorch/issues/3704)
-    edge_prog_manager = to_edge_with_preserved_ops(
+    edge_prog_manager = to_edge(
         expo_program,
         compile_config=EdgeCompileConfig(
             _skip_dim_order=True,
             # Allow specific non-core aten ops in the IR.
             _core_aten_ops_exception_list=TO_EDGE_OP_EXCEPTION_LIST
             + (core_aten_exceptions or []),
+            preserve_ops=TO_EDGE_PRESERVE_OPS,
         ),
         constant_methods=constant_methods,
-        preserve_ops=TO_EDGE_PRESERVE_OPS,
     )
 
     if dump_graphs:
 
@@ -66,13 +66,13 @@ def placeholder(
     ) -> ProxyValue:
         if not isinstance(fake_tensor, FakeTensor):
             fake_tensor = self.fake_tensor_mode.from_tensor(fake_tensor)
-        logging.info(f"Creating placeholder {target} => {fake_tensor.shape}")
+        logging.debug(f"Creating placeholder {target} => {fake_tensor.shape}")
         placeholder = super().placeholder(target, fake_tensor, NodeMetadata({}))
         return placeholder
 
     # pyre-ignore[14]: Inconsistent override.
     def output(self, results: list[ProxyValue]) -> ProxyValue:
-        logging.info(f"Creating outputs {results}")
+        logging.debug(f"Creating outputs {results}")
         return super().output(results, NodeMetadata({}))
 
     def get_graph_module(self) -> torch.fx.GraphModule:
 
@@ -19,7 +19,10 @@
     MemoryPlanningAlgo,
     MemoryPlanningState,
 )
-from executorch.backends.cadence.aot.utils import MemoryConfig
+from executorch.backends.cadence.aot.utils import (
+    MemoryConfig,
+    MemoryPlanningAlgoFailure,
+)
 
 from executorch.exir import ExecutorchProgramManager
 from executorch.exir.memory_planning import collect_specs_from_nodes, Verifier
@@ -95,7 +98,9 @@ def plan(
         ):
             self.plan_spec(spec, state, placement_constraints)
             if not state.is_placed(spec):
-                raise MemoryError(f"Cannot fit {spec} in any memory hierarchy")
+                raise MemoryPlanningAlgoFailure(
+                    f"Cannot fit {spec} {spec.allocated_memory=} in any memory hierarchy for {self.memory_config}"
+                )
 
 
 class GreedyWithHeuristic(MemoryPlanningAlgo):
@@ -169,7 +174,9 @@ def plan(
         ):
             self.plan_spec(spec, state, placement_constraints)
             if not state.is_placed(spec):
-                raise MemoryError(f"Cannot fit {spec} in any memory hierarchy")
+                raise MemoryPlanningAlgoFailure(
+                    f"Cannot fit {spec} in any memory hierarchy for {self.memory_config}"
+                )
 
         logging.debug(
             f"greedy by size for offset calculation with hierarchy returns bufsizes: {state.bufsizes}"
 
@@ -34,10 +34,18 @@ def __init__(self) -> None:
     def insert_input_spec(
         self, target: str, input_kind: InputKind, value: Tensor
     ) -> None:
-        if input_kind == InputKind.USER_INPUT:
-            self.input_specs.append(
-                InputSpec(input_kind, TensorArgument(target), target=target)
+        persistent: Optional[bool] = None
+        if input_kind == InputKind.BUFFER:
+            persistent = True
+        self.input_specs.append(
+            InputSpec(
+                input_kind, TensorArgument(target), target=target, persistent=persistent
             )
+        )
+        if input_kind == InputKind.PARAMETER or input_kind == InputKind.BUFFER:
+            self.state_dict[target] = value
+        elif input_kind == InputKind.CONSTANT_TENSOR:
+            self.constants[target] = value
 
     def placeholder(
         self,
 
@@ -0,0 +1,122 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+# pyre-strict
+
+import torch
+from executorch.backends.cadence.aot.program_builder import ProgramBuilder
+from later.unittest import TestCase
+from torch.export.graph_signature import InputKind, OutputKind
+
+
+class TestProgramBuilder(TestCase):
+    def test_user_input_with_parameter(self) -> None:
+        inp = torch.randn([3, 5])
+        w = torch.nn.Parameter(torch.randn([5]))
+        # Create a exported program with one user input and one parameter.
+        # Returns inp + w, w + 2 tuple.
+        builder = ProgramBuilder()
+        inp_proxy = builder.placeholder("inp", inp)
+        w_proxy = builder.placeholder("w", w, input_kind=InputKind.PARAMETER)
+        add = builder.call_operator(torch.ops.aten.add.Tensor, (inp_proxy, w_proxy))
+        add_w = builder.call_operator(torch.ops.aten.add.Scalar, (w_proxy, 2))
+        builder.output([add, add_w])
+        program = builder.get_program()
+
+        self.assertEqual(len(program.graph_signature.input_specs), 2)
+        self.assertEqual(
+            program.graph_signature.input_specs[0].kind, InputKind.USER_INPUT
+        )
+        self.assertEqual(
+            program.graph_signature.input_specs[1].kind, InputKind.PARAMETER
+        )
+        self.assertEqual(len(program.graph_signature.output_specs), 2)
+        self.assertEqual(
+            program.graph_signature.output_specs[0].kind, OutputKind.USER_OUTPUT
+        )
+        self.assertEqual(
+            program.graph_signature.output_specs[1].kind, OutputKind.USER_OUTPUT
+        )
+
+    def test_user_input_with_constant(self) -> None:
+        inp = torch.randn([3, 5])
+        const = torch.randn([5])
+        # Create a exported program with one user input and one constant tensor.
+        # Returns inp + const
+        builder = ProgramBuilder()
+        inp_proxy = builder.placeholder("inp", inp)
+        const_proxy = builder.placeholder(
+            "const", const, input_kind=InputKind.CONSTANT_TENSOR
+        )
+        add = builder.call_operator(torch.ops.aten.add.Tensor, (inp_proxy, const_proxy))
+        builder.output([add])
+        program = builder.get_program()
+
+        # Verify the program has the correct inputs and outputs
+        self.assertEqual(len(program.graph_signature.input_specs), 2)
+        self.assertEqual(
+            program.graph_signature.input_specs[0].kind, InputKind.USER_INPUT
+        )
+        self.assertEqual(
+            program.graph_signature.input_specs[1].kind, InputKind.CONSTANT_TENSOR
+        )
+        self.assertEqual(len(program.graph_signature.output_specs), 1)
+        self.assertEqual(
+            program.graph_signature.output_specs[0].kind, OutputKind.USER_OUTPUT
+        )
+
+    def test_mutable_buffer(self) -> None:
+        inp = torch.randn([3, 5])
+        buffer = torch.randn([5])
+        # Create a exported program with one user input and one buffer that gets mutated.
+        # Returns inp + buffer, updated_buffer
+        builder = ProgramBuilder()
+        inp_proxy = builder.placeholder("inp", inp)
+        buffer_proxy = builder.placeholder(
+            "buffer", buffer, input_kind=InputKind.BUFFER
+        )
+        add = builder.call_operator(
+            torch.ops.aten.add.Tensor, (inp_proxy, buffer_proxy)
+        )
+        # Mutate the buffer by adding 1
+        updated_buffer = builder.call_operator(
+            torch.ops.aten.add.Scalar, (buffer_proxy, 1)
+        )
+        builder.output(
+            [add, updated_buffer], [OutputKind.USER_OUTPUT, OutputKind.BUFFER_MUTATION]
+        )
+        program = builder.get_program()
+
+        # Verify the program has the correct inputs and outputs
+        self.assertEqual(len(program.graph_signature.input_specs), 2)
+        self.assertEqual(
+            program.graph_signature.input_specs[0].kind, InputKind.USER_INPUT
+        )
+        self.assertEqual(program.graph_signature.input_specs[1].kind, InputKind.BUFFER)
+        self.assertEqual(len(program.graph_signature.output_specs), 2)
+        self.assertEqual(
+            program.graph_signature.output_specs[0].kind, OutputKind.USER_OUTPUT
+        )
+        self.assertEqual(
+            program.graph_signature.output_specs[1].kind, OutputKind.BUFFER_MUTATION
+        )
+
+    def test_user_input_mutation(self) -> None:
+        inp = torch.randn([3, 5])
+        # Create a exported program with one user input that gets mutated.
+        # Returns updated_inp
+        builder = ProgramBuilder()
+        inp_proxy = builder.placeholder("inp", inp)
+        # Mutate the input by adding 1
+        updated_inp = builder.call_operator(torch.ops.aten.add.Scalar, (inp_proxy, 1))
+        builder.output([updated_inp], [OutputKind.USER_INPUT_MUTATION])
+        program = builder.get_program()
+
+        # Verify the program has the correct inputs and outputs
+        self.assertEqual(len(program.graph_signature.input_specs), 1)
+        self.assertEqual(
+            program.graph_signature.input_specs[0].kind, InputKind.USER_INPUT
+        )
+        self.assertEqual(len(program.graph_signature.output_specs), 1)
+        self.assertEqual(
+            program.graph_signature.output_specs[0].kind, OutputKind.USER_INPUT_MUTATION
+        )
@@ -25,6 +25,10 @@
 from torch.utils._pytree import tree_flatten
 
 
+class MemoryPlanningAlgoFailure(Exception):
+    pass
+
+
 # Get the output size of a 1D convolution given the input size and parameters
 def get_conv1d_output_size(
     in_size: torch.Size,
 
@@ -23,6 +23,16 @@ namespace impl {
 namespace HiFi {
 namespace native {
 
+void dequantize_per_tensor_out(
+    ::executorch::runtime::KernelRuntimeContext& ctx,
+    const ::executorch::aten::Tensor& input,
+    double scale,
+    int64_t zero_point,
+    int64_t quant_min,
+    int64_t quant_max,
+    ::executorch::aten::ScalarType dtype,
+    ::executorch::aten::Tensor& out);
+
 // Quantize the input tensor (PT2 version). Note that quant_<min,max> are not
 // used in any computation.
 void quantize_per_tensor_out(
@@ -42,6 +52,15 @@ ::executorch::aten::Tensor& div_out_mode(
     std::optional<std::string_view> mode,
     ::executorch::aten::Tensor& out);
 
+void quantized_relu_out(
+    ::executorch::runtime::KernelRuntimeContext& ctx,
+    const ::executorch::aten::Tensor& input,
+    const ::executorch::aten::Tensor& in_zero_point,
+    const int64_t out_zero_point,
+    const ::executorch::aten::Tensor& out_multiplier,
+    const ::executorch::aten::Tensor& out_shift,
+    ::executorch::aten::Tensor& output);
+
 void quantized_linear_out(
     __ET_UNUSED KernelRuntimeContext& ctx,
     const Tensor& in,