Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion backends/qualcomm/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,6 @@ def lower_module_and_test_output(
# Therefore, won't want to pre-allocate
# by memory manager in runtime.
memory_planning_pass=MemoryPlanningPass(
memory_planning_algo="greedy",
alloc_graph_input=not self.shared_buffer,
alloc_graph_output=not self.shared_buffer,
),
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/vulkan_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def preprocess( # noqa: C901
MeanToSumDiv(),
SpecPropPass(),
ConstraintBasedSymShapeEvalPass(),
MemoryPlanningPass("greedy"),
MemoryPlanningPass(),
]

new_gm = program.graph_module
Expand Down
3 changes: 1 addition & 2 deletions docs/source/compiler-memory-planning.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ The `MemoryPlanningPass` exposes the option to not memory plan program inputs an
program = edge_program.to_executorch(
exir.ExecutorchBackendConfig(
memory_planning_pass=MemoryPlanningPass(
memory_planning_algo="greedy",
alloc_graph_input=False, # Inputs will not be memory planned, the data_ptr for input tensors after model load will be nullptr
alloc_graph_output=True, # Outputs will be memory planned, the data_ptr for input tensors after model load will be in the `planned_memory`.
)
Expand Down Expand Up @@ -77,7 +76,7 @@ Then later when lowering to ExecuTorch you can use your custom plan in the follo
program = edge_program.to_executorch(
exir.ExecutorchBackendConfig(
memory_planning_pass=CustomPoolMemoryPlanningPass(
memory_planning_algo="greedy",
memory_planning_algo=greedy,
)
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -523,9 +523,7 @@ def forward(self, a, x, b):
executorch_program: ExecutorchProgramManager = edge_program.to_executorch(
ExecutorchBackendConfig(
passes=[], # User-defined passes
memory_planning_pass=MemoryPlanningPass(
"greedy"
), # Default memory planning pass
memory_planning_pass=MemoryPlanningPass(), # Default memory planning pass
)
)

Expand Down
1 change: 0 additions & 1 deletion examples/mediatek/model_export_scripts/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,6 @@ def export_to_et_ir(
executorch_program = delegated_program.to_executorch(
config=exir.ExecutorchBackendConfig(
memory_planning_pass=exir.passes.MemoryPlanningPass(
memory_planning_algo="greedy",
alloc_graph_input=False,
alloc_graph_output=False,
),
Expand Down
2 changes: 1 addition & 1 deletion examples/models/llava/export_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def export_all(llava_model: LlavaModel):
passes=[
QuantFusionPass(),
],
memory_planning_pass=MemoryPlanningPass("greedy", alloc_graph_input=False),
memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
sym_shape_eval_pass={
"image_encoder": ConstraintBasedSymShapeEvalPass(),
"text_model": ConstraintBasedSymShapeEvalPass(),
Expand Down
1 change: 0 additions & 1 deletion examples/qualcomm/oss_scripts/llama2/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,6 @@ def lowering_modules(
# Therefore, won't want to pre-allocate
# by memory manager in runtime.
memory_planning_pass=MemoryPlanningPass(
memory_planning_algo="greedy",
alloc_graph_input=False,
alloc_graph_output=False,
),
Expand Down
1 change: 0 additions & 1 deletion examples/qualcomm/qaihub_scripts/utils/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ def compile(args):
)
# setup memory planning
memory_planning_pass = MemoryPlanningPass(
memory_planning_algo="greedy",
alloc_graph_input=args.allocate_graph_io,
alloc_graph_output=args.allocate_graph_io,
)
Expand Down
1 change: 0 additions & 1 deletion examples/qualcomm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,6 @@ def build_executorch_binary(
# Therefore, won't want to pre-allocate
# by memory manager in runtime.
memory_planning_pass=MemoryPlanningPass(
memory_planning_algo="greedy",
alloc_graph_input=not shared_buffer,
alloc_graph_output=not shared_buffer,
),
Expand Down
4 changes: 1 addition & 3 deletions exir/capture/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,7 @@ class ExecutorchBackendConfig:

# A single memory planning pass can be defined for all the programs in the
# EdgeProgramManager or can be defined per program.
memory_planning_pass: Union[PassType, Dict[str, PassType]] = MemoryPlanningPass(
"greedy"
)
memory_planning_pass: Union[PassType, Dict[str, PassType]] = MemoryPlanningPass()
to_out_var_pass: PassType = ToOutVarPass(ignore_to_out_var_failure=False)
dynamic_memory_planning_mode: DynamicMemoryPlanningMode = (
DynamicMemoryPlanningMode.UPPER_BOUND
Expand Down
5 changes: 1 addition & 4 deletions exir/emit/test/test_emit.py
Original file line number Diff line number Diff line change
Expand Up @@ -1145,7 +1145,6 @@ def forward(self, k: torch.Tensor) -> torch.Tensor:
config = exir.ExecutorchBackendConfig(
sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
memory_planning_pass=MemoryPlanningPass(
memory_planning_algo="greedy",
# allow_lifetime_and_storage_overlap: bool = False,
alloc_graph_input=True,
alloc_graph_output=False,
Expand Down Expand Up @@ -1606,9 +1605,7 @@ def forward(self, x):
)
model = model.to_executorch(
config=ExecutorchBackendConfig(
memory_planning_pass=MemoryPlanningPass(
"greedy", alloc_graph_input=False
),
memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
)
)
Expand Down
2 changes: 1 addition & 1 deletion exir/lowered_backend_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def program(
verifiers=[lowered_exported_program.verifier],
)
if memory_planning is None:
memory_planning = MemoryPlanningPass("greedy")
memory_planning = MemoryPlanningPass()
exported_program = _transform(exported_program, SpecPropPass(), memory_planning)
emitted_program = emit_program(
exported_program, emit_stacktrace=emit_stacktrace
Expand Down
29 changes: 1 addition & 28 deletions exir/memory_planning.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,7 @@
from executorch.exir import memory
from executorch.exir.control_flow import while_loop as exir_while
from executorch.exir.delegate import executorch_call_delegate
from executorch.exir.error import (
ExportError,
ExportErrorType,
internal_assert,
InternalError,
)
from executorch.exir.error import internal_assert, InternalError
from executorch.exir.operator.convert import is_inplace_variant, is_out_variant
from executorch.exir.schema import TensorShapeDynamism
from executorch.exir.tensor import TensorSpec
Expand Down Expand Up @@ -255,17 +250,6 @@ def verify_graph_input_output(self) -> None:
), f"Misallocate graph output {graph_output_allocated} v.s. {self.alloc_graph_output}"


def register_algo(fn: Callable[..., List[int]]) -> Callable[..., List[int]]:
algo_name = fn.__name__
if algo_name in REGISTERED_ALGOS:
raise ExportError(
ExportErrorType.VIOLATION_OF_SPEC,
f"Re-registering memory planning algorithm {algo_name}",
)
REGISTERED_ALGOS[algo_name] = fn
return fn


def _is_out_var_node(node: torch.fx.Node) -> bool:
return (
node.op == "call_function"
Expand Down Expand Up @@ -561,7 +545,6 @@ def get_node_tensor_specs(
]


@register_algo
def greedy(
graph_module: torch.fx.GraphModule,
alignment: int,
Expand Down Expand Up @@ -615,7 +598,6 @@ def greedy(
return total_sizes


@register_algo
def naive(
graph_module: torch.fx.GraphModule,
alignment: int,
Expand Down Expand Up @@ -656,15 +638,6 @@ def _allocate_buf(bufsizes: List[int], mem_id: int, allocated: int) -> int:
return bufsizes


def get_algo(algo_name: str) -> Callable[..., List[int]]:
if algo_name not in REGISTERED_ALGOS:
raise ExportError(
ExportErrorType.NOT_SUPPORTED,
f"Memory planning algorithm '{algo_name}' not found",
)
return REGISTERED_ALGOS[algo_name]


def get_cond_nodes(graph_module: torch.fx.GraphModule) -> Iterable[Node]:
for nd in graph_module.graph.nodes:
if nd.target is torch.ops.higher_order.cond:
Expand Down
11 changes: 5 additions & 6 deletions exir/passes/memory_planning_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@

import logging
import warnings
from typing import Optional
from typing import Callable, List, Optional

import torch
from executorch.exir.error import internal_assert
from executorch.exir.memory import alloc
from executorch.exir.memory_planning import (
_is_out_var_node,
apply_algo,
get_algo,
get_node_tensor_specs,
greedy,
Verifier,
)
from executorch.exir.operator.convert import get_out_args_from_opoverload
Expand All @@ -27,7 +27,7 @@
class MemoryPlanningPass(PassBase):
def __init__(
self,
memory_planning_algo: str = "greedy",
memory_planning_algo: Callable[..., List[int]] = greedy,
allow_lifetime_and_storage_overlap: bool = False,
alloc_graph_input: bool = True,
alloc_graph_output: bool = True,
Expand Down Expand Up @@ -96,14 +96,13 @@ def run(
memory_planning_algo
"""
self._set_alloc_node_spec(graph_module)
algo = get_algo(self.memory_planning_algo)
# TODO(shunting) if people have concern of adding a field to GraphModule
# directly, we should define a GraphModule subclass that we can add our
# customized fields. Using the graph_module object to convey information across
# passes/stages is quite natural and avoid yet another 'context' data structure
# to do the job.
_ = apply_algo(
algo,
self.memory_planning_algo,
graph_module,
self.alignment,
graph_signature,
Expand All @@ -125,7 +124,7 @@ def run(
self.allow_lifetime_and_storage_overlap
)
logging.debug(
f"The {self.memory_planning_algo} algorithm reuses storage for {num_reuse_pairs} pair of tensors"
f"The {getattr(self.memory_planning_algo, '__name__', repr(self.memory_planning_algo))} algorithm reuses storage for {num_reuse_pairs} pair of tensors"
)
verifier.verify_graph_input_output()
return PassResult(graph_module, True)
1 change: 1 addition & 0 deletions exir/program/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ python_library(
"//caffe2:torch",
"//executorch/exir:error",
"//executorch/exir:graph_module",
"//executorch/exir:pass_base",
"//executorch/exir:pass_manager",
"//executorch/exir:print_program",
"//executorch/exir:schema",
Expand Down
2 changes: 0 additions & 2 deletions exir/program/test/test_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,10 @@ def test_executorch_manager_multi_config(self):
def get_executorch_memory_planning_passes() -> Dict[str, MemoryPlanningPass]:
return {
"forward": MemoryPlanningPass(
memory_planning_algo="greedy",
alloc_graph_input=True,
alloc_graph_output=False,
),
"foo": MemoryPlanningPass(
memory_planning_algo="greedy",
alloc_graph_input=False,
alloc_graph_output=True,
),
Expand Down
34 changes: 17 additions & 17 deletions exir/tests/test_memory_planning.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from executorch.exir.memory_planning import (
filter_nodes,
get_node_tensor_specs,
greedy,
naive,
Verifier,
)
from executorch.exir.pass_base import PassResult
Expand Down Expand Up @@ -208,7 +210,7 @@ def forward(self, a: torch.Tensor) -> torch.Tensor:

def maketest(
module_cls: Type[torch.nn.Module],
criteria: Optional[List[Tuple[str, bool]]] = None,
criteria: Optional[List[Tuple[Callable[..., List[int]], bool]]] = None,
extra_check: Optional[Callable[..., None]] = None,
use_functionalization: bool = True,
alloc_graph_input: bool = True,
Expand All @@ -222,13 +224,15 @@ def wrapper(self: "TestMemoryPlanning") -> None:
if not criteria:
criteria = [
# naive algorithm does not reuse tensor storages
("naive", False),
(naive, False),
# greedy algorithm should reuse tensor storages in the testing model
("greedy", True),
(greedy, True),
]

for algo, expect_reuse in criteria:
print(f"algo {algo}, expect_reuse {expect_reuse}")
print(
f"algo {getattr(algo, '__name__', repr(algo))}, expect_reuse {expect_reuse}"
)
eager_module = module_cls().eval()
inputs = eager_module.get_random_inputs()
graph_module = (
Expand Down Expand Up @@ -353,8 +357,8 @@ def verify_overlap_placeholders(
test_return_two: Callable[..., None] = maketest(
ModuleReturnTwo,
criteria=[
("naive", False),
("greedy", True),
(naive, False),
(greedy, True),
],
)

Expand All @@ -363,8 +367,8 @@ def verify_overlap_placeholders(
test_list_arg: Callable[..., None] = maketest(
ModuleListArg,
criteria=[
("naive", False),
("greedy", True),
(naive, False),
(greedy, True),
],
extra_check=ModuleListArg.extra_check,
)
Expand Down Expand Up @@ -466,20 +470,20 @@ def quantize(self, eager_model: nn.Module) -> nn.Module:
@parameterized.expand(
[
(
"naive",
naive,
[(1, 0), (3, 0), (1, 4), (3, 4), (1, 8)],
[0, 12, 0, 8],
),
(
"greedy",
greedy,
[(1, 0), (3, 0), (1, 4), (3, 4), (1, 0)],
[0, 8, 0, 8],
),
]
)
def test_multiple_pools(
self,
algo: str,
algo: Callable[..., List[int]],
expected_allocs: List[Tuple[int, int]],
expected_bufsizes: List[int],
) -> None:
Expand Down Expand Up @@ -550,9 +554,7 @@ def count_planned_inputs(

ep_no_input_planning = to_edge(export(model, inputs)).to_executorch(
config=ExecutorchBackendConfig(
memory_planning_pass=MemoryPlanningPass(
"greedy", alloc_graph_input=False
),
memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
)
)
Expand All @@ -572,9 +574,7 @@ def count_planned_inputs(

ep_input_planning = to_edge(export(model, inputs)).to_executorch(
config=ExecutorchBackendConfig(
memory_planning_pass=MemoryPlanningPass(
"greedy", alloc_graph_input=True
),
memory_planning_pass=MemoryPlanningPass(alloc_graph_input=True),
sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
)
)
Expand Down
2 changes: 1 addition & 1 deletion exir/tests/test_passes.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ def test_alloc_node_spec(self) -> None:
self.assertIsNotNone(new_gm_res)
new_gm = new_gm_res.graph_module

new_gm_res = MemoryPlanningPass("greedy")(new_gm)
new_gm_res = MemoryPlanningPass()(new_gm)
self.assertIsNotNone(new_gm_res)
new_gm = new_gm_res.graph_module

Expand Down
Loading
Loading