diff --git a/build/cmake_deps.toml b/build/cmake_deps.toml index 60d3ac71451..1430ea3a9ef 100644 --- a/build/cmake_deps.toml +++ b/build/cmake_deps.toml @@ -116,6 +116,20 @@ deps = [ "executorch", ] +[targets.optimized_native_cpu_ops_oss] +buck_targets = [ + "//configurations:optimized_native_cpu_ops_oss", +] +filters = [ + ".cpp$", +] +excludes = [ +] +deps = [ + "executorch_no_prim_ops", + "executorch", + "portable_kernels", +] # ---------------------------------- core end ---------------------------------- # ---------------------------------- extension start ---------------------------------- [targets.extension_data_loader] @@ -341,5 +355,6 @@ deps = [ "portable_kernels", "quantized_kernels", "xnnpack_backend", + "optimized_native_cpu_ops_oss", ] # ---------------------------------- LLama end ---------------------------------- diff --git a/configurations/targets.bzl b/configurations/targets.bzl index 60a09d36269..6a5341c2904 100644 --- a/configurations/targets.bzl +++ b/configurations/targets.bzl @@ -28,7 +28,7 @@ def define_common_targets(): ], ) - # Add a commong configuration of cpu optimized operators. This adds a bit of confusion + # Add a common configuration of cpu optimized operators. This adds a bit of confusion # with the above executorch_cpu_optimized target. Generally it would make sense # to just add optimized operators to that target but because executorch_cpu_optimized # might be used elsewhere, I dont want to include ops in that target and find out @@ -50,3 +50,21 @@ def define_common_targets(): "@EXECUTORCH_CLIENTS", ], ) + + # TODO(T183193812): delete this target after optimized-oss.yaml is gone + executorch_generated_lib( + name = "optimized_native_cpu_ops_oss", + deps = [ + "//executorch/kernels/optimized:optimized_operators", + "//executorch/kernels/optimized:optimized_oplist", + "//executorch/kernels/portable:executorch_aten_ops", + "//executorch/kernels/portable:operators", + ], + functions_yaml_target = "//executorch/kernels/optimized:optimized-oss.yaml", + fallback_yaml_target = "//executorch/kernels/portable:functions.yaml", + define_static_targets = True, + visibility = [ + "//executorch/examples/...", + "@EXECUTORCH_CLIENTS", + ], + ) diff --git a/examples/models/llama2/runner/targets.bzl b/examples/models/llama2/runner/targets.bzl index 9ee3f99567d..96d47ffce21 100644 --- a/examples/models/llama2/runner/targets.bzl +++ b/examples/models/llama2/runner/targets.bzl @@ -4,7 +4,8 @@ def _get_operator_lib(aten = False): if aten: return ["//executorch/kernels/aten:generated_lib"] elif runtime.is_oss: - return ["//executorch/kernels/portable:generated_lib", "//executorch/extension/llm/custom_ops:custom_ops"] + # TODO(T183193812): delete this path after optimized-oss.yaml is no more. + return ["//executorch/configurations:optimized_native_cpu_ops_oss", "//executorch/extension/llm/custom_ops:custom_ops"] else: return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/extension/llm/custom_ops:custom_ops"] diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py index 4237ae7b3a7..338d997297d 100644 --- a/extension/llm/export/builder.py +++ b/extension/llm/export/builder.py @@ -16,6 +16,7 @@ from executorch.backends.transforms.duplicate_dynamic_quant_chain import ( DuplicateDynamicQuantChainPass, ) +from executorch.backends.xnnpack.passes.convert_to_linear import ConvertToLinearPass from executorch.exir import EdgeProgramManager from executorch.exir.backend.partitioner import Partitioner @@ -382,6 +383,10 @@ def to_executorch(self) -> "LLMEdgeManager": ExecutorchBackendConfig( extract_delegate_segments=True, passes=[ + # If there are Linear operations left in the graph, let's execute + # them with the optimized op_linear rather than materializing a + # transpose followed by a regular op_mm. + ConvertToLinearPass(), QuantFusionPass(), ], memory_planning_pass=MemoryPlanningPass( diff --git a/kernels/optimized/targets.bzl b/kernels/optimized/targets.bzl index c06a1dc079d..88afe5011d3 100644 --- a/kernels/optimized/targets.bzl +++ b/kernels/optimized/targets.bzl @@ -19,6 +19,14 @@ def define_common_targets(): ], ) + runtime.export_file( + name = "optimized-oss.yaml", + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + ) + runtime.cxx_library( name = "optimized_operators", srcs = [],