Skip to content

Commit ebdf091

Browse files
committed
Update on "[ET-VK] Using push constants for unary op."
This diff transitions the unary op to utilize push constants, replacing the previous ubo implementation. Differential Revision: [D77706459](https://our.internmc.facebook.com/intern/diff/D77706459/) [ghstack-poisoned]
2 parents 2a6e255 + fd64231 commit ebdf091

File tree

13 files changed

+547
-188
lines changed

13 files changed

+547
-188
lines changed

backends/cadence/aot/compiler.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import logging
1010
from pathlib import Path
11-
from typing import Optional
11+
from typing import Callable, cast, Optional
1212

1313
import executorch.backends.cadence.aot.ops_registrations # noqa
1414
import torch
@@ -32,6 +32,7 @@
3232
ExecutorchBackendConfig,
3333
ExecutorchProgramManager,
3434
)
35+
from executorch.exir.pass_base import PassResult
3536
from executorch.exir.passes import ToOutVarPass
3637
from executorch.exir.passes.sym_shape_eval_pass import HintBasedSymShapeEvalPass
3738
from executorch.exir.program._program import to_edge_with_preserved_ops
@@ -40,7 +41,7 @@
4041
from torch.export.exported_program import ExportedProgram
4142
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
4243

43-
from .passes import apply_exir_ops_passes, apply_torch_ops_passes
44+
from .passes import get_cadence_passes
4445

4546
from .utils import print_ops_info
4647

@@ -261,20 +262,14 @@ def export_to_edge(
261262
inputs: tuple[object, ...],
262263
dump_graphs: bool = False,
263264
constant_methods: Optional[dict[str, object]] = None,
264-
core_aten_exceptions: Optional[list[torch._ops.OpOverload]] = None,
265265
) -> EdgeProgramManager:
266266
assert isinstance(model, torch.nn.Module), "model should be an nn.Module"
267267

268268
# Export the model into an ExportedProgram.
269269
expo_program = trace(model, inputs)
270270

271-
# Apply passes which transform the ExportedProgram before it gets lowered to edge.
272-
expo_program = apply_torch_ops_passes(expo_program)
273-
274271
# Lower the model to edge IR.
275-
edge_prog_manager = _lower_ep_to_edge(
276-
expo_program, dump_graphs, constant_methods, core_aten_exceptions
277-
)
272+
edge_prog_manager = _lower_ep_to_edge(expo_program, dump_graphs, constant_methods)
278273

279274
return edge_prog_manager
280275

@@ -316,7 +311,14 @@ def _lower_ep_to_cadence(
316311
Lower an existing ExportedProgram to edge IR and apply frontend optimization passes.
317312
"""
318313
edge_prog_manager = _lower_ep_to_edge(program, dump_graphs=dump_graphs)
319-
cadence_prog_manager = apply_exir_ops_passes(opt_level, edge_prog_manager)
314+
cadence_passes = get_cadence_passes(opt_level)
315+
316+
# Run a couple required passes for quant/dequant ops
317+
cadence_prog_manager = edge_prog_manager.transform(
318+
cast(
319+
list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
320+
)
321+
)
320322
return cadence_prog_manager
321323

322324

@@ -327,7 +329,14 @@ def export_to_cadence(
327329
opt_level: int = 1,
328330
) -> EdgeProgramManager:
329331
edge_prog_manager = export_to_edge(model, inputs, dump_graphs=dump_graphs)
330-
cadence_prog_manager = apply_exir_ops_passes(opt_level, edge_prog_manager)
332+
cadence_passes = get_cadence_passes(opt_level)
333+
334+
# Run a couple required passes for quant/dequant ops
335+
cadence_prog_manager = edge_prog_manager.transform(
336+
cast(
337+
list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
338+
)
339+
)
331340
return cadence_prog_manager
332341

333342

@@ -364,8 +373,15 @@ def export_to_executorch_gen_etrecord(
364373
memory_config: Optional[MemoryConfig] = None,
365374
dump_graphs: bool = False,
366375
) -> ExecutorchProgramManager:
376+
cadence_passes = get_cadence_passes(opt_level)
367377
edge_prog_manager = export_to_edge(model, inputs, dump_graphs)
368-
cadence_prog_manager = apply_exir_ops_passes(opt_level, edge_prog_manager)
378+
379+
# Run a couple required passes for quant/dequant ops
380+
cadence_prog_manager = edge_prog_manager.transform(
381+
cast(
382+
list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
383+
)
384+
)
369385

370386
# Print some information to terminal
371387
print_ops_info(

backends/cadence/aot/fuse_ops.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1127,7 +1127,6 @@ class CadenceFuseOpsInGraph:
11271127
FuseCascadedTransposeOrPermuteOps,
11281128
FuseCascadedViewOps,
11291129
FuseQuantDequantToRequantizePass,
1130-
FuseMulTensorIntoQuantPass,
11311130
FuseMulTensorIntoDequantPass,
11321131
FuseMulScalarIntoDequantPass,
11331132
FuseFullThenReshapePass,

backends/cadence/aot/passes.py

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
# pyre-strict
88

9-
from typing import Any, Callable, cast, List, Optional
9+
from typing import Any, List, Optional
1010

1111
import torch
1212
import torch.fx
@@ -28,18 +28,13 @@
2828
RemoveRedundantOps,
2929
)
3030
from executorch.backends.cadence.aot.reorder_ops import CadenceReorderOpsInGraph
31-
from executorch.backends.cadence.aot.replace_ops import (
32-
CadenceReplaceOpsInGraph,
33-
ReplaceMulTensorWithMulAndFullOpsPass,
34-
)
31+
from executorch.backends.cadence.aot.replace_ops import CadenceReplaceOpsInGraph
3532
from executorch.backends.cadence.aot.simplify_ops import CadenceSimplifyOpsInGraph
36-
from executorch.exir import EdgeProgramManager
3733
from executorch.exir.pass_base import ExportPass, PassResult
3834
from executorch.exir.pass_manager import PassManager, PassType
3935
from executorch.exir.passes import dead_code_elimination_pass
4036
from executorch.exir.passes.scalar_to_tensor_pass import ScalarToTensorPass
4137
from executorch.exir.passes.spec_prop_pass import SpecPropPass
42-
from torch.export.exported_program import ExportedProgram
4338

4439

4540
@register_cadence_pass(CadencePassAttribute(opt_level=0))
@@ -94,37 +89,14 @@ def get_passes_in_default_order() -> List[ExportPass]:
9489
return pytree.tree_flatten(passes)[0]
9590

9691

97-
def apply_exir_ops_passes(
92+
def get_cadence_passes(
9893
opt_level: int,
99-
edge_prog_manager: EdgeProgramManager,
100-
) -> EdgeProgramManager:
94+
) -> List[Optional[PassResult]]:
10195
passes = get_passes_in_default_order()
10296
pass_filter = create_cadence_pass_filter(opt_level)
103-
cadence_passes = [
104-
(
105-
lambda graph_module, filtered_pass=filtered_pass: filtered_pass()(
106-
graph_module
107-
)
108-
)
97+
filtered_passes = [
98+
# pyre-ignore[20]: Expect argument graph_module
99+
filtered_pass()
109100
for filtered_pass in list(filter(pass_filter, passes))
110101
]
111-
cadence_prog_manager = edge_prog_manager.transform(
112-
cast(
113-
list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
114-
)
115-
)
116-
return cadence_prog_manager
117-
118-
119-
def apply_torch_ops_passes(expo_program: ExportedProgram) -> ExportedProgram:
120-
"""
121-
Applies compiler passes on torch.ops IR, including torch.ops.aten, torch.ops.cadence, etc.
122-
expo_program is expected to be the output of the torch.export.export().
123-
"""
124-
125-
aten_passes: List[Callable[[torch.fx.GraphModule], Optional[PassResult]]] = [
126-
ReplaceMulTensorWithMulAndFullOpsPass()
127-
]
128-
# TODO(T230417247): Use PassResult which is currently ignored.
129-
PassManager(aten_passes)(expo_program.graph_module)
130-
return expo_program
102+
return filtered_passes

examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,8 @@
485485
03CF43A52CEC5CEC00C7113B /* kernels_custom_debug */,
486486
03CF43A72CEC5CEC00C7113B /* kernels_optimized */,
487487
03CF43A92CEC5CEC00C7113B /* kernels_optimized_debug */,
488+
03CF43AB2CEC5CEC00C7113B /* kernels_portable */,
489+
03CF43AD2CEC5CEC00C7113B /* kernels_portable_debug */,
488490
03CF43AF2CEC5CEC00C7113B /* kernels_quantized */,
489491
03CF43B12CEC5CEC00C7113B /* kernels_quantized_debug */,
490492
);
@@ -1010,6 +1012,16 @@
10101012
package = 03CF43942CEC5CEC00C7113B /* XCRemoteSwiftPackageReference "executorch" */;
10111013
productName = kernels_optimized_debug;
10121014
};
1015+
03CF43AB2CEC5CEC00C7113B /* kernels_portable */ = {
1016+
isa = XCSwiftPackageProductDependency;
1017+
package = 03CF43942CEC5CEC00C7113B /* XCRemoteSwiftPackageReference "executorch" */;
1018+
productName = kernels_portable;
1019+
};
1020+
03CF43AD2CEC5CEC00C7113B /* kernels_portable_debug */ = {
1021+
isa = XCSwiftPackageProductDependency;
1022+
package = 03CF43942CEC5CEC00C7113B /* XCRemoteSwiftPackageReference "executorch" */;
1023+
productName = kernels_portable_debug;
1024+
};
10131025
03CF43AF2CEC5CEC00C7113B /* kernels_quantized */ = {
10141026
isa = XCSwiftPackageProductDependency;
10151027
package = 03CF43942CEC5CEC00C7113B /* XCRemoteSwiftPackageReference "executorch" */;

examples/demo-apps/react-native/rnllama/ios/LlamaBridge.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#ifndef LLaMABridge_h
22
#define LLaMABridge_h
33

4-
#import <LLaMARunner/LLaMARunner.h>
54
#import <React/RCTBridgeModule.h>
65
#import <React/RCTEventEmitter.h>
6+
#import "LLaMARunner.h"
77

88
NS_ASSUME_NONNULL_BEGIN
99

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
ET_PLATFORM[sdk=iphonesimulator*] = simulator
2+
ET_PLATFORM[sdk=iphoneos*] = ios
3+
ET_PLATFORM[sdk=macos*] = macos
4+
5+
// Link the Debug version of ExecuTorch runtime to keep the logs.
6+
// Switch to Release for better performance if logs are not needed.
7+
OTHER_LDFLAGS = $(inherited) \
8+
-force_load $(BUILT_PRODUCTS_DIR)/libexecutorch_debug_$(ET_PLATFORM).a \
9+
-force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml_$(ET_PLATFORM).a \
10+
-force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps_$(ET_PLATFORM).a \
11+
-force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack_$(ET_PLATFORM).a \
12+
-force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom_$(ET_PLATFORM).a \
13+
-force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized_$(ET_PLATFORM).a \
14+
-force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized_$(ET_PLATFORM).a \
15+
@$(TEMP_DIR)/cmake/linker_flags
16+
17+
// LLaMARunner requires additional dependencies built with CMake in a custom run script phase.
18+
// Include headers and libraries from $(TEMP_DIR)/cmake for it.
19+
HEADER_SEARCH_PATHS = $(inherited) \
20+
$(SRCROOT)/../../../../../.. \
21+
$(TEMP_DIR)/cmake/include
22+
23+
LIBRARY_SEARCH_PATHS = $(inherited) \
24+
$(TEMP_DIR)/cmake/lib

0 commit comments

Comments
 (0)