From e5b996affaa024754bc10e1c33d85dff3a3628ca Mon Sep 17 00:00:00 2001 From: Tarun Karuturi Date: Wed, 9 Apr 2025 17:14:16 -0700 Subject: [PATCH] Adding qualcomm recipes Differential Revision: [D72410492](https://our.internmc.facebook.com/intern/diff/D72410492/) [ghstack-poisoned] --- backends/qualcomm/recipes/TARGETS | 12 +++++ backends/qualcomm/recipes/recipes.py | 71 ++++++++++++++++++++++++++++ extension/llm/export/TARGETS | 1 + 3 files changed, 84 insertions(+) create mode 100644 backends/qualcomm/recipes/TARGETS create mode 100644 backends/qualcomm/recipes/recipes.py diff --git a/backends/qualcomm/recipes/TARGETS b/backends/qualcomm/recipes/TARGETS new file mode 100644 index 00000000000..d2ee8b1e684 --- /dev/null +++ b/backends/qualcomm/recipes/TARGETS @@ -0,0 +1,12 @@ +load("@fbcode_macros//build_defs:python_library.bzl", "python_library") + +python_library( + name = "recipes", + srcs = ["recipes.py"], + deps = [ + "//caffe2:torch", + "//executorch/exir:lib", + "//executorch/backends/qualcomm/quantizer:quantizer", + "//executorch/examples/qualcomm:utils" + ] +) diff --git a/backends/qualcomm/recipes/recipes.py b/backends/qualcomm/recipes/recipes.py new file mode 100644 index 00000000000..0e5540455b3 --- /dev/null +++ b/backends/qualcomm/recipes/recipes.py @@ -0,0 +1,71 @@ +# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. + +# pyre-strict +from typing import Optional + +from executorch import exir +from executorch.backends.qualcomm.quantizer.quantizer import QuantDtype + +from executorch.examples.qualcomm.utils import make_quantizer +from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig, ExportRecipe +from executorch.exir.passes import MemoryPlanningPass +from torch.ao.quantization.quantizer import Quantizer + +def get_qualcomm_htp_et_recipe( + name: str, + soc_model: str = "SM8650", + qnn_version: str = "2.25", + quant_dtype: Optional[QuantDtype] = None, + shared_buffer=False, + skip_node_id_set: set[int] = set(), + skip_node_op_set: set[str] = set(), +) -> ExportRecipe: + from executorch.backends.qualcomm.partition.qnn_partitioner import QnnPartitioner + from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset + + from executorch.backends.qualcomm.utils.utils import ( + _transform, + generate_htp_compiler_spec, + generate_qnn_executorch_compiler_spec, + ) + + if quant_dtype: + qnn_quantizer: Quantizer = make_quantizer(quant_dtype=quant_dtype) + else: + qnn_quantizer = make_quantizer() + + qnn_partitioner = QnnPartitioner( + generate_qnn_executorch_compiler_spec( + soc_model=getattr(QcomChipset, soc_model), + backend_options=generate_htp_compiler_spec( + use_fp16=False if quant_dtype else True + ), + ), + skip_node_id_set=skip_node_id_set, + skip_node_op_set=skip_node_op_set, + ) + + executorch_config = ExecutorchBackendConfig( + # For shared buffer, user must pass the memory address + # which is allocated by RPC memory to executor runner. + # Therefore, won't want to pre-allocate + # by memory manager in runtime. + memory_planning_pass=MemoryPlanningPass( + alloc_graph_input=not shared_buffer, + alloc_graph_output=not shared_buffer, + ), + ) + + return ExportRecipe( + name, + quantizer=qnn_quantizer, + partitioners=[qnn_partitioner], + pre_edge_transform_passes=_transform, + edge_compile_config=EdgeCompileConfig( + _check_ir_validity=False, + _skip_dim_order=True, + ), + edge_transform_passes=[], + transform_check_ir_validity=True, + executorch_backend_config=executorch_config, + ) diff --git a/extension/llm/export/TARGETS b/extension/llm/export/TARGETS index 40f8599e9e0..35ff63ac3a6 100644 --- a/extension/llm/export/TARGETS +++ b/extension/llm/export/TARGETS @@ -31,6 +31,7 @@ runtime.python_library( "//executorch/backends/apple/mps:partitioner", "//executorch/backends/qualcomm/partition:partition", "//executorch/backends/qualcomm/quantizer:quantizer", + "//executorch/backends/qualcomm/recipes:recipes", "//executorch/backends/transforms:duplicate_dynamic_quant_chain", "//executorch/backends/vulkan/partitioner:vulkan_partitioner", "//executorch/backends/vulkan/quantizer:vulkan_quantizer",