diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt index 3018e5219e3..8e800d8bef2 100644 --- a/backends/qualcomm/CMakeLists.txt +++ b/backends/qualcomm/CMakeLists.txt @@ -252,7 +252,7 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64") pybind11_extension(PyQnnManagerAdaptor) pybind11_extension(PyQnnWrapperAdaptor) - if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo) + if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES RelWithDebInfo) # Strip unnecessary sections of the binary pybind11_strip(PyQnnManagerAdaptor) pybind11_strip(PyQnnWrapperAdaptor) diff --git a/backends/qualcomm/_passes/annotate_adaptive_avg_pool1d.py b/backends/qualcomm/_passes/annotate_adaptive_avg_pool1d.py index 24e5104e7cb..eaa2b47083e 100644 --- a/backends/qualcomm/_passes/annotate_adaptive_avg_pool1d.py +++ b/backends/qualcomm/_passes/annotate_adaptive_avg_pool1d.py @@ -19,8 +19,6 @@ class AnnotateAdaptiveAvgPool1D(ExportPass): adaptive_avg_pool1d got decomposed to unsqueeze -> adaptive_avg_pool2d -> squeeze """ - decomp_ops = [torch.ops.aten.adaptive_avg_pool2d.default] - def __init__(self, edge_program: torch.export.ExportedProgram): super(AnnotateAdaptiveAvgPool1D, self).__init__() self.edge_program = edge_program diff --git a/backends/qualcomm/_passes/annotate_stack.py b/backends/qualcomm/_passes/annotate_stack.py index 88ee4e41ee6..66effc240bf 100644 --- a/backends/qualcomm/_passes/annotate_stack.py +++ b/backends/qualcomm/_passes/annotate_stack.py @@ -28,7 +28,7 @@ def _annotate_stack(self, graph_module: torch.fx.GraphModule): partitions = get_source_partitions( graph_module.graph, [torch.stack, torch.ops.aten.stack.default, "stack"] ) - for _, src_partitions in partitions.items(): + for src_partitions in partitions.values(): for src_partition in src_partitions: output = src_partition.output_nodes[0] if (list(output.users)[0].target) in q_ops: diff --git a/backends/qualcomm/_passes/annotate_unbind.py b/backends/qualcomm/_passes/annotate_unbind.py index d9141dbc4c0..ec03dc51745 100644 --- a/backends/qualcomm/_passes/annotate_unbind.py +++ b/backends/qualcomm/_passes/annotate_unbind.py @@ -28,7 +28,7 @@ def _annotate_unbind(self, graph_module: torch.fx.GraphModule): partitions = get_source_partitions( graph_module.graph, [torch.unbind, torch.ops.aten.unbind.int, "unbind"] ) - for _, src_partitions in partitions.items(): + for src_partitions in partitions.values(): for src_partition in src_partitions: if src_partition.input_nodes[0].target in dq_ops: q_node = src_partition.input_nodes[0].args[0] diff --git a/backends/qualcomm/quantizer/annotators.py b/backends/qualcomm/quantizer/annotators.py index 5195cf39f33..2c46535eddc 100644 --- a/backends/qualcomm/quantizer/annotators.py +++ b/backends/qualcomm/quantizer/annotators.py @@ -1193,7 +1193,13 @@ def annotate_unbind(node: Node, quantization_config: QuantizationConfig) -> None ) -@register_annotator([torch.ops.aten.split.Tensor, torch.ops.aten.chunk.default]) +@register_annotator( + [ + torch.ops.aten.split_with_sizes.default, + torch.ops.aten.split.Tensor, + torch.ops.aten.chunk.default, + ] +) def annotate_chunk(node: Node, quantization_config: QuantizationConfig) -> None: if _is_annotated([node]): return diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh index c079dd41a2a..320cbcf4b87 100755 --- a/backends/qualcomm/scripts/build.sh +++ b/backends/qualcomm/scripts/build.sh @@ -30,7 +30,7 @@ CMAKE_X86_64="build-x86" BUILD_AARCH64="true" CMAKE_AARCH64="build-android" CLEAN="true" -BUILD_TYPE="Debug" +BUILD_TYPE="RelWithDebInfo" BUILD_JOB_NUMBER="16" if [ -z PYTHON_EXECUTABLE ]; then @@ -71,7 +71,7 @@ if [ "$BUILD_AARCH64" = true ]; then rm -rf $BUILD_ROOT && mkdir $BUILD_ROOT else # Force rebuild flatccrt for the correct platform - cd $BUILD_ROOT/devtools && make clean + cd $BUILD_ROOT/third-party/flatcc && make clean fi cd $BUILD_ROOT @@ -116,7 +116,7 @@ if [ "$BUILD_X86_64" = true ]; then rm -rf $BUILD_ROOT && mkdir $BUILD_ROOT else # Force rebuild flatccrt for the correct platform - cd $BUILD_ROOT/devtools && make clean + cd $BUILD_ROOT/third-party/flatcc && make clean fi cd $BUILD_ROOT diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 081dda7187b..a7fbd71ac34 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -4108,6 +4108,44 @@ def test_fbnet(self): self.assertGreaterEqual(msg["top_1"], 60) self.assertGreaterEqual(msg["top_5"], 90) + def test_focalnet(self): + if not self.required_envs([self.image_dataset]): + self.skipTest("missing required envs") + + cmds = [ + "python", + f"{self.executorch_root}/examples/qualcomm/oss_scripts/focalnet.py", + "--dataset", + self.image_dataset, + "--artifact", + self.artifact_dir, + "--build_folder", + self.build_folder, + "--device", + self.device, + "--model", + self.model, + "--ip", + self.ip, + "--port", + str(self.port), + ] + if self.host: + cmds.extend(["--host", self.host]) + if self.shared_buffer: + cmds.extend(["--shared_buffer"]) + + p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL) + with Listener((self.ip, self.port)) as listener: + conn = listener.accept() + p.communicate() + msg = json.loads(conn.recv()) + if "Error" in msg: + self.fail(msg["Error"]) + else: + self.assertGreaterEqual(msg["top_1"], 55) + self.assertGreaterEqual(msg["top_5"], 80) + def test_gMLP(self): if not self.required_envs([self.image_dataset]): self.skipTest("missing required envs") diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt index 4f338a23044..e971316389a 100644 --- a/examples/qualcomm/CMakeLists.txt +++ b/examples/qualcomm/CMakeLists.txt @@ -23,7 +23,7 @@ if(NOT PYTHON_EXECUTABLE) endif() if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE Debug) + set(CMAKE_BUILD_TYPE RelWithDebInfo) endif() # Find prebuilt libraries. executorch package should contain portable_ops_lib, diff --git a/examples/qualcomm/oss_scripts/focalnet.py b/examples/qualcomm/oss_scripts/focalnet.py new file mode 100644 index 00000000000..377d49a3a18 --- /dev/null +++ b/examples/qualcomm/oss_scripts/focalnet.py @@ -0,0 +1,145 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import json +import logging +import os +from multiprocessing.connection import Client + +import numpy as np + +import torch +from executorch.backends.qualcomm.quantizer.quantizer import QuantDtype +from executorch.examples.qualcomm.utils import ( + build_executorch_binary, + get_imagenet_dataset, + make_output_dir, + parse_skip_delegation_node, + setup_common_args_and_variables, + SimpleADB, + topk_accuracy, +) +from transformers import AutoModelForImageClassification + + +def main(args): + skip_node_id_set, skip_node_op_set = parse_skip_delegation_node(args) + + # ensure the working directory exist. + os.makedirs(args.artifact, exist_ok=True) + + if not args.compile_only and args.device is None: + raise RuntimeError( + "device serial is required if not compile only. " + "Please specify a device serial by -s/--device argument." + ) + + data_num = 100 + if args.ci: + inputs = [(torch.rand(1, 3, 224, 224),)] + logging.warning( + "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." + ) + else: + inputs, targets, input_list = get_imagenet_dataset( + dataset_path=f"{args.dataset}", + data_size=data_num, + image_shape=(256, 256), + crop_size=224, + ) + + module = ( + AutoModelForImageClassification.from_pretrained("microsoft/focalnet-tiny") + .eval() + .to("cpu") + ) + pte_filename = "focalnet_qnn_q8" + build_executorch_binary( + module.eval(), + inputs[0], + args.model, + f"{args.artifact}/{pte_filename}", + inputs, + skip_node_id_set=skip_node_id_set, + skip_node_op_set=skip_node_op_set, + quant_dtype=QuantDtype.use_8a8w, + shared_buffer=args.shared_buffer, + ) + + if args.compile_only: + return + + adb = SimpleADB( + qnn_sdk=os.getenv("QNN_SDK_ROOT"), + build_path=f"{args.build_folder}", + pte_path=f"{args.artifact}/{pte_filename}.pte", + workspace=f"/data/local/tmp/executorch/{pte_filename}", + device_id=args.device, + host_id=args.host, + soc_model=args.model, + shared_buffer=args.shared_buffer, + ) + adb.push(inputs=inputs, input_list=input_list) + adb.execute() + + # collect output data + output_data_folder = f"{args.artifact}/outputs" + make_output_dir(output_data_folder) + + adb.pull(output_path=args.artifact) + + # top-k analysis + predictions = [] + for i in range(data_num): + predictions.append( + np.fromfile( + os.path.join(output_data_folder, f"output_{i}_0.raw"), dtype=np.float32 + ) + ) + + k_val = [1, 5] + topk = [topk_accuracy(predictions, targets, k).item() for k in k_val] + if args.ip and args.port != -1: + with Client((args.ip, args.port)) as conn: + conn.send(json.dumps({f"top_{k}": topk[i] for i, k in enumerate(k_val)})) + else: + for i, k in enumerate(k_val): + print(f"top_{k}->{topk[i]}%") + + +if __name__ == "__main__": + parser = setup_common_args_and_variables() + + parser.add_argument( + "-d", + "--dataset", + help=( + "path to the validation folder of ImageNet dataset. " + "e.g. --dataset imagenet-mini/val " + "for https://www.kaggle.com/datasets/ifigotin/imagenetmini-1000)" + ), + type=str, + required=False, + ) + + parser.add_argument( + "-a", + "--artifact", + help="path for storing generated artifacts by this example. " + "Default ./focalnet", + default="./focalnet", + type=str, + ) + + args = parser.parse_args() + try: + main(args) + except Exception as e: + if args.ip and args.port != -1: + with Client((args.ip, args.port)) as conn: + conn.send(json.dumps({"Error": str(e)})) + else: + raise Exception(e)