pytorch · skrtskrtfb · Sep 16, 2024
@@ -350,7 +350,6 @@ def lower_module_and_test_output(
                 # Therefore, won't want to pre-allocate
                 # by memory manager in runtime.
                 memory_planning_pass=MemoryPlanningPass(
-                    memory_planning_algo="greedy",
                     alloc_graph_input=not self.shared_buffer,
                     alloc_graph_output=not self.shared_buffer,
                 ),

@@ -57,7 +57,7 @@ def preprocess(  # noqa: C901
             MeanToSumDiv(),
             SpecPropPass(),
             ConstraintBasedSymShapeEvalPass(),
-            MemoryPlanningPass("greedy"),
+            MemoryPlanningPass(),
         ]
 
         new_gm = program.graph_module

@@ -32,7 +32,6 @@ The `MemoryPlanningPass` exposes the option to not memory plan program inputs an
 program = edge_program.to_executorch(
             exir.ExecutorchBackendConfig(
                 memory_planning_pass=MemoryPlanningPass(
-                    memory_planning_algo="greedy",
                     alloc_graph_input=False, # Inputs will not be memory planned, the data_ptr for input tensors after model load will be nullptr
                     alloc_graph_output=True, # Outputs will be memory planned, the data_ptr for input tensors after model load will be in the `planned_memory`.
                 )
@@ -77,7 +76,7 @@ Then later when lowering to ExecuTorch you can use your custom plan in the follo
 program = edge_program.to_executorch(
             exir.ExecutorchBackendConfig(
                 memory_planning_pass=CustomPoolMemoryPlanningPass(
-                    memory_planning_algo="greedy",
+                    memory_planning_algo=greedy,
                 )
             )
         )

@@ -523,9 +523,7 @@ def forward(self, a, x, b):
 executorch_program: ExecutorchProgramManager = edge_program.to_executorch(
     ExecutorchBackendConfig(
         passes=[],  # User-defined passes
-        memory_planning_pass=MemoryPlanningPass(
-            "greedy"
-        ),  # Default memory planning pass
+        memory_planning_pass=MemoryPlanningPass(),  # Default memory planning pass
     )
 )
 

@@ -365,7 +365,6 @@ def export_to_et_ir(
         executorch_program = delegated_program.to_executorch(
             config=exir.ExecutorchBackendConfig(
                 memory_planning_pass=exir.passes.MemoryPlanningPass(
-                    memory_planning_algo="greedy",
                     alloc_graph_input=False,
                     alloc_graph_output=False,
                 ),

@@ -233,7 +233,7 @@ def export_all(llava_model: LlavaModel):
             passes=[
                 QuantFusionPass(),
             ],
-            memory_planning_pass=MemoryPlanningPass("greedy", alloc_graph_input=False),
+            memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
             sym_shape_eval_pass={
                 "image_encoder": ConstraintBasedSymShapeEvalPass(),
                 "text_model": ConstraintBasedSymShapeEvalPass(),

@@ -311,7 +311,6 @@ def lowering_modules(
             # Therefore, won't want to pre-allocate
             # by memory manager in runtime.
             memory_planning_pass=MemoryPlanningPass(
-                memory_planning_algo="greedy",
                 alloc_graph_input=False,
                 alloc_graph_output=False,
             ),

@@ -220,7 +220,6 @@ def compile(args):
     )
     # setup memory planning
     memory_planning_pass = MemoryPlanningPass(
-        memory_planning_algo="greedy",
         alloc_graph_input=args.allocate_graph_io,
         alloc_graph_output=args.allocate_graph_io,
     )

@@ -285,7 +285,6 @@ def build_executorch_binary(
         # Therefore, won't want to pre-allocate
         # by memory manager in runtime.
         memory_planning_pass=MemoryPlanningPass(
-            memory_planning_algo="greedy",
             alloc_graph_input=not shared_buffer,
             alloc_graph_output=not shared_buffer,
         ),

@@ -56,9 +56,7 @@ class ExecutorchBackendConfig:
 
     # A single memory planning pass can be defined for all the programs in the
     # EdgeProgramManager or can be defined per program.
-    memory_planning_pass: Union[PassType, Dict[str, PassType]] = MemoryPlanningPass(
-        "greedy"
-    )
+    memory_planning_pass: Union[PassType, Dict[str, PassType]] = MemoryPlanningPass()
     to_out_var_pass: PassType = ToOutVarPass(ignore_to_out_var_failure=False)
     dynamic_memory_planning_mode: DynamicMemoryPlanningMode = (
         DynamicMemoryPlanningMode.UPPER_BOUND

@@ -1145,7 +1145,6 @@ def forward(self, k: torch.Tensor) -> torch.Tensor:
         config = exir.ExecutorchBackendConfig(
             sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
             memory_planning_pass=MemoryPlanningPass(
-                memory_planning_algo="greedy",
                 # allow_lifetime_and_storage_overlap: bool = False,
                 alloc_graph_input=True,
                 alloc_graph_output=False,
@@ -1606,9 +1605,7 @@ def forward(self, x):
         )
         model = model.to_executorch(
             config=ExecutorchBackendConfig(
-                memory_planning_pass=MemoryPlanningPass(
-                    "greedy", alloc_graph_input=False
-                ),
+                memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
                 sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
             )
         )

@@ -326,7 +326,7 @@ def program(
             verifiers=[lowered_exported_program.verifier],
         )
         if memory_planning is None:
-            memory_planning = MemoryPlanningPass("greedy")
+            memory_planning = MemoryPlanningPass()
         exported_program = _transform(exported_program, SpecPropPass(), memory_planning)
         emitted_program = emit_program(
             exported_program, emit_stacktrace=emit_stacktrace

@@ -18,12 +18,7 @@
 from executorch.exir import memory
 from executorch.exir.control_flow import while_loop as exir_while
 from executorch.exir.delegate import executorch_call_delegate
-from executorch.exir.error import (
-    ExportError,
-    ExportErrorType,
-    internal_assert,
-    InternalError,
-)
+from executorch.exir.error import internal_assert, InternalError
 from executorch.exir.operator.convert import is_inplace_variant, is_out_variant
 from executorch.exir.schema import TensorShapeDynamism
 from executorch.exir.tensor import TensorSpec
@@ -255,17 +250,6 @@ def verify_graph_input_output(self) -> None:
             ), f"Misallocate graph output {graph_output_allocated} v.s. {self.alloc_graph_output}"
 
 
-def register_algo(fn: Callable[..., List[int]]) -> Callable[..., List[int]]:
-    algo_name = fn.__name__
-    if algo_name in REGISTERED_ALGOS:
-        raise ExportError(
-            ExportErrorType.VIOLATION_OF_SPEC,
-            f"Re-registering memory planning algorithm {algo_name}",
-        )
-    REGISTERED_ALGOS[algo_name] = fn
-    return fn
-
-
 def _is_out_var_node(node: torch.fx.Node) -> bool:
     return (
         node.op == "call_function"
@@ -561,7 +545,6 @@ def get_node_tensor_specs(
         ]
 
 
-@register_algo
 def greedy(
     graph_module: torch.fx.GraphModule,
     alignment: int,
@@ -615,7 +598,6 @@ def greedy(
     return total_sizes
 
 
-@register_algo
 def naive(
     graph_module: torch.fx.GraphModule,
     alignment: int,
@@ -656,15 +638,6 @@ def _allocate_buf(bufsizes: List[int], mem_id: int, allocated: int) -> int:
     return bufsizes
 
 
-def get_algo(algo_name: str) -> Callable[..., List[int]]:
-    if algo_name not in REGISTERED_ALGOS:
-        raise ExportError(
-            ExportErrorType.NOT_SUPPORTED,
-            f"Memory planning algorithm '{algo_name}' not found",
-        )
-    return REGISTERED_ALGOS[algo_name]
-
-
 def get_cond_nodes(graph_module: torch.fx.GraphModule) -> Iterable[Node]:
     for nd in graph_module.graph.nodes:
         if nd.target is torch.ops.higher_order.cond:

@@ -6,16 +6,16 @@
 
 import logging
 import warnings
-from typing import Optional
+from typing import Callable, List, Optional
 
 import torch
 from executorch.exir.error import internal_assert
 from executorch.exir.memory import alloc
 from executorch.exir.memory_planning import (
     _is_out_var_node,
     apply_algo,
-    get_algo,
     get_node_tensor_specs,
+    greedy,
     Verifier,
 )
 from executorch.exir.operator.convert import get_out_args_from_opoverload
@@ -27,7 +27,7 @@
 class MemoryPlanningPass(PassBase):
     def __init__(
         self,
-        memory_planning_algo: str = "greedy",
+        memory_planning_algo: Callable[..., List[int]] = greedy,
         allow_lifetime_and_storage_overlap: bool = False,
         alloc_graph_input: bool = True,
         alloc_graph_output: bool = True,
@@ -96,14 +96,13 @@ def run(
         memory_planning_algo
         """
         self._set_alloc_node_spec(graph_module)
-        algo = get_algo(self.memory_planning_algo)
         # TODO(shunting) if people have concern of adding a field to GraphModule
         # directly, we should define a GraphModule subclass that we can add our
         # customized fields. Using the graph_module object to convey information across
         # passes/stages is quite natural and avoid yet another 'context' data structure
         # to do the job.
         _ = apply_algo(
-            algo,
+            self.memory_planning_algo,
             graph_module,
             self.alignment,
             graph_signature,
@@ -125,7 +124,7 @@ def run(
                 self.allow_lifetime_and_storage_overlap
             )
             logging.debug(
-                f"The {self.memory_planning_algo} algorithm reuses storage for {num_reuse_pairs} pair of tensors"
+                f"The {getattr(self.memory_planning_algo, '__name__', repr(self.memory_planning_algo))} algorithm reuses storage for {num_reuse_pairs} pair of tensors"
             )
         verifier.verify_graph_input_output()
         return PassResult(graph_module, True)
@@ -22,6 +22,7 @@ python_library(
         "//caffe2:torch",
         "//executorch/exir:error",
         "//executorch/exir:graph_module",
+        "//executorch/exir:pass_base",
         "//executorch/exir:pass_manager",
         "//executorch/exir:print_program",
         "//executorch/exir:schema",

@@ -250,12 +250,10 @@ def test_executorch_manager_multi_config(self):
         def get_executorch_memory_planning_passes() -> Dict[str, MemoryPlanningPass]:
             return {
                 "forward": MemoryPlanningPass(
-                    memory_planning_algo="greedy",
                     alloc_graph_input=True,
                     alloc_graph_output=False,
                 ),
                 "foo": MemoryPlanningPass(
-                    memory_planning_algo="greedy",
                     alloc_graph_input=False,
                     alloc_graph_output=True,
                 ),

@@ -17,6 +17,8 @@
 from executorch.exir.memory_planning import (
     filter_nodes,
     get_node_tensor_specs,
+    greedy,
+    naive,
     Verifier,
 )
 from executorch.exir.pass_base import PassResult
@@ -208,7 +210,7 @@ def forward(self, a: torch.Tensor) -> torch.Tensor:
 
 def maketest(
     module_cls: Type[torch.nn.Module],
-    criteria: Optional[List[Tuple[str, bool]]] = None,
+    criteria: Optional[List[Tuple[Callable[..., List[int]], bool]]] = None,
     extra_check: Optional[Callable[..., None]] = None,
     use_functionalization: bool = True,
     alloc_graph_input: bool = True,
@@ -222,13 +224,15 @@ def wrapper(self: "TestMemoryPlanning") -> None:
         if not criteria:
             criteria = [
                 # naive algorithm does not reuse tensor storages
-                ("naive", False),
+                (naive, False),
                 # greedy algorithm should reuse tensor storages in the testing model
-                ("greedy", True),
+                (greedy, True),
             ]
 
         for algo, expect_reuse in criteria:
-            print(f"algo {algo}, expect_reuse {expect_reuse}")
+            print(
+                f"algo {getattr(algo, '__name__', repr(algo))}, expect_reuse {expect_reuse}"
+            )
             eager_module = module_cls().eval()
             inputs = eager_module.get_random_inputs()
             graph_module = (
@@ -353,8 +357,8 @@ def verify_overlap_placeholders(
     test_return_two: Callable[..., None] = maketest(
         ModuleReturnTwo,
         criteria=[
-            ("naive", False),
-            ("greedy", True),
+            (naive, False),
+            (greedy, True),
         ],
     )
 
@@ -363,8 +367,8 @@ def verify_overlap_placeholders(
     test_list_arg: Callable[..., None] = maketest(
         ModuleListArg,
         criteria=[
-            ("naive", False),
-            ("greedy", True),
+            (naive, False),
+            (greedy, True),
         ],
         extra_check=ModuleListArg.extra_check,
     )
@@ -466,20 +470,20 @@ def quantize(self, eager_model: nn.Module) -> nn.Module:
     @parameterized.expand(
         [
             (
-                "naive",
+                naive,
                 [(1, 0), (3, 0), (1, 4), (3, 4), (1, 8)],
                 [0, 12, 0, 8],
             ),
             (
-                "greedy",
+                greedy,
                 [(1, 0), (3, 0), (1, 4), (3, 4), (1, 0)],
                 [0, 8, 0, 8],
             ),
         ]
     )
     def test_multiple_pools(
         self,
-        algo: str,
+        algo: Callable[..., List[int]],
         expected_allocs: List[Tuple[int, int]],
         expected_bufsizes: List[int],
     ) -> None:
@@ -550,9 +554,7 @@ def count_planned_inputs(
 
         ep_no_input_planning = to_edge(export(model, inputs)).to_executorch(
             config=ExecutorchBackendConfig(
-                memory_planning_pass=MemoryPlanningPass(
-                    "greedy", alloc_graph_input=False
-                ),
+                memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
                 sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
             )
         )
@@ -572,9 +574,7 @@ def count_planned_inputs(
 
         ep_input_planning = to_edge(export(model, inputs)).to_executorch(
             config=ExecutorchBackendConfig(
-                memory_planning_pass=MemoryPlanningPass(
-                    "greedy", alloc_graph_input=True
-                ),
+                memory_planning_pass=MemoryPlanningPass(alloc_graph_input=True),
                 sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
             )
         )

@@ -713,7 +713,7 @@ def test_alloc_node_spec(self) -> None:
         self.assertIsNotNone(new_gm_res)
         new_gm = new_gm_res.graph_module
 
-        new_gm_res = MemoryPlanningPass("greedy")(new_gm)
+        new_gm_res = MemoryPlanningPass()(new_gm)
         self.assertIsNotNone(new_gm_res)
         new_gm = new_gm_res.graph_module