diff --git a/.ci/scripts/build-qnn-sdk.sh b/.ci/scripts/build-qnn-sdk.sh index 30835cf5085..1c339582604 100755 --- a/.ci/scripts/build-qnn-sdk.sh +++ b/.ci/scripts/build-qnn-sdk.sh @@ -18,7 +18,7 @@ build_qnn_backend() { export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)" parallelism=$(( $(nproc) - 1 )) - bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number ${parallelism} --release + bash backends/qualcomm/scripts/build.sh --skip_linux_android --skip_linux_embedding --job_number ${parallelism} --release } set_up_aot() { diff --git a/backends/qualcomm/README.md b/backends/qualcomm/README.md index 7c5853b3a6f..faff5786b8b 100644 --- a/backends/qualcomm/README.md +++ b/backends/qualcomm/README.md @@ -27,6 +27,7 @@ Please check `generate_qnn_executorch_compiler_spec()` in - SXR1230P - SXR2230P - SXR2330P +- QCS9100 ### Adding more supported Chipset Currently, users cannot add additional chipset models because the chipset ID is not accessible to community users. If you have specific chipset models you wish to add, please contact one of the authors in the `Code Reviews` section at the bottom of this page. diff --git a/backends/qualcomm/debugger/README.md b/backends/qualcomm/debugger/README.md index 60ecb3d71b3..b6e3809cc23 100644 --- a/backends/qualcomm/debugger/README.md +++ b/backends/qualcomm/debugger/README.md @@ -54,6 +54,7 @@ adb = SimpleADB( device_id=args.device, host_id=args.host, soc_model=args.model, + target=args.target, ) binaries_trace = generate_optrace( args, adb, f"{args.artifact}/{pte_filename}.pte", example_input diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh index 4cdd1efe6f4..02cbec6d4cf 100755 --- a/backends/qualcomm/scripts/build.sh +++ b/backends/qualcomm/scripts/build.sh @@ -17,8 +17,10 @@ fi usage() { echo "Usage: Build the aarch64 version of executor runner or the python interface of Qnn Manager" echo "First, you need to set the environment variable for QNN_SDK_ROOT" - echo ", and if you want to build the aarch64 version of executor runner" + echo ", and if you want to build the android version of executor runner" echo ", you need to export ANDROID_NDK_ROOT=/path/to/android_ndkXX" + echo "(or export TOOLCHAIN_ROOT_HOST=/path/to/sysroots/xx_host, " + echo "TOOLCHAIN_ROOT_TARGET=/path/to/sysroots/xx_target for linux embedded with --enable_linux_embedding)" echo "e.g.: executorch$ ./backends/qualcomm/scripts/build.sh --skip_x86_64" exit 1 } @@ -28,8 +30,10 @@ usage() { BUILD_X86_64="true" CMAKE_X86_64="build-x86" -BUILD_AARCH64="true" -CMAKE_AARCH64="build-android" +BUILD_ANDROID="true" +CMAKE_ANDROID="build-android" +BUILD_OE_LINUX="false" +CMAKE_OE_LINUX="build-oe-linux" CLEAN="true" BUILD_TYPE="RelWithDebInfo" BUILD_JOB_NUMBER="16" @@ -42,7 +46,7 @@ if [ -z BUCK2 ]; then BUCK2="buck2" fi -long_options=skip_x86_64,skip_aarch64,no_clean,release,job_number: +long_options=skip_x86_64,skip_linux_android,skip_linux_embedding,enable_linux_embedding,no_clean,release,job_number: parsed_args=$(getopt -a --options '' --longoptions $long_options --name "$0" -- "$@") eval set -- "$parsed_args" @@ -51,7 +55,9 @@ eval set -- "$parsed_args" while true ; do case "$1" in --skip_x86_64) BUILD_X86_64="false"; shift;; - --skip_aarch64) BUILD_AARCH64="false"; shift;; + --skip_linux_android) BUILD_ANDROID="false"; shift;; + --skip_linux_embedding) BUILD_OE_LINUX="false"; shift;; + --enable_linux_embedding) BUILD_ANDROID="false"; BUILD_OE_LINUX="true"; shift;; --no_clean) CLEAN="false"; shift;; --release) BUILD_TYPE="Release"; shift;; --job_number) BUILD_JOB_NUMBER="$2"; shift 2;; @@ -61,13 +67,13 @@ done PRJ_ROOT="$( cd "$(dirname "$0")/../../.." ; pwd -P)" -if [ "$BUILD_AARCH64" = true ]; then +if [ "$BUILD_ANDROID" = true ]; then if [[ -z ${ANDROID_NDK_ROOT} ]]; then echo "Please export ANDROID_NDK_ROOT=/path/to/android_ndkXX" exit -1 fi - BUILD_ROOT=$PRJ_ROOT/$CMAKE_AARCH64 + BUILD_ROOT=$PRJ_ROOT/$CMAKE_ANDROID if [ "$CLEAN" = true ]; then rm -rf $BUILD_ROOT && mkdir $BUILD_ROOT else @@ -135,6 +141,94 @@ if [ "$BUILD_AARCH64" = true ]; then cmake --build $LLAMA_EXAMPLE_ROOT -j$BUILD_JOB_NUMBER fi +if [ "$BUILD_OE_LINUX" = true ]; then + if [[ -z ${TOOLCHAIN_ROOT_HOST} ]]; then + echo "Please export e.g. TOOLCHAIN_ROOT_HOST=/path/to/sysroots/x86_64-qtisdk-linux" + exit -1 + fi + if [[ -z ${TOOLCHAIN_ROOT_TARGET} ]]; then + echo "Please export e.g. TOOLCHAIN_ROOT_TARGET=/path/to/sysroots/armv8a-oe-linux" + exit -1 + fi + + BUILD_ROOT=$PRJ_ROOT/$CMAKE_OE_LINUX + if [ "$CLEAN" = true ]; then + rm -rf $BUILD_ROOT && mkdir $BUILD_ROOT + else + # Force rebuild flatccrt for the correct platform + cd $BUILD_ROOT/third-party/flatcc && make clean + fi + + TOOLCHAN_PREFIX=$TOOLCHAIN_ROOT_HOST/usr/bin/aarch64-oe-linux/aarch64-oe-linux- + cd $BUILD_ROOT + cmake .. \ + -DCMAKE_INSTALL_PREFIX=$BUILD_ROOT \ + -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ + -DCMAKE_C_COMPILER=${TOOLCHAN_PREFIX}gcc \ + -DCMAKE_CXX_COMPILER=${TOOLCHAN_PREFIX}g++ \ + -DCMAKE_SYSROOT=$TOOLCHAIN_ROOT_TARGET \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ + -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + -DEXECUTORCH_BUILD_QNN=ON \ + -DEXECUTORCH_BUILD_DEVTOOLS=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ + -DQNN_SDK_ROOT=$QNN_SDK_ROOT \ + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \ + -B$BUILD_ROOT + + cmake --build $BUILD_ROOT -j$BUILD_JOB_NUMBER --target install + + EXAMPLE_ROOT=examples/qualcomm + CMAKE_PREFIX_PATH="${BUILD_ROOT};${BUILD_ROOT}/third-party/gflags;" + + cmake $PRJ_ROOT/$EXAMPLE_ROOT \ + -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ + -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \ + -DSUPPORT_REGEX_LOOKAHEAD=ON \ + -DBUILD_TESTING=OFF \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ + -DCMAKE_C_COMPILER=${TOOLCHAN_PREFIX}gcc \ + -DCMAKE_CXX_COMPILER=${TOOLCHAN_PREFIX}g++ \ + -DCMAKE_SYSROOT=$TOOLCHAIN_ROOT_TARGET \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ + -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \ + -B$EXAMPLE_ROOT + + cmake --build $EXAMPLE_ROOT -j$BUILD_JOB_NUMBER + + LLAMA_EXAMPLE_ROOT=examples/models/llama + cmake $PRJ_ROOT/$LLAMA_EXAMPLE_ROOT \ + -DBUILD_TESTING=OFF \ + -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ + -DCMAKE_C_COMPILER=${TOOLCHAN_PREFIX}gcc \ + -DCMAKE_CXX_COMPILER=${TOOLCHAN_PREFIX}g++ \ + -DCMAKE_SYSROOT=$TOOLCHAIN_ROOT_TARGET \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ + -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ + -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ + -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \ + -B$LLAMA_EXAMPLE_ROOT + + cmake --build $LLAMA_EXAMPLE_ROOT -j$BUILD_JOB_NUMBER +fi + if [ "$BUILD_X86_64" = true ]; then BUILD_ROOT=$PRJ_ROOT/$CMAKE_X86_64 if [ "$CLEAN" = true ]; then diff --git a/backends/qualcomm/serialization/qc_compiler_spec.fbs b/backends/qualcomm/serialization/qc_compiler_spec.fbs index 8aeaa060a50..145ae0010fc 100644 --- a/backends/qualcomm/serialization/qc_compiler_spec.fbs +++ b/backends/qualcomm/serialization/qc_compiler_spec.fbs @@ -43,6 +43,7 @@ enum QcomChipset: int { SXR1230P = 45, SXR2230P = 53, SXR2330P = 75, + QCS9100 = 77, } /// Indicate the information of the specified SoC. diff --git a/backends/qualcomm/serialization/qc_schema.py b/backends/qualcomm/serialization/qc_schema.py index f3b9e2cc1a5..9f4b37c13d1 100644 --- a/backends/qualcomm/serialization/qc_schema.py +++ b/backends/qualcomm/serialization/qc_schema.py @@ -49,6 +49,7 @@ class QcomChipset(IntEnum): SXR1230P = 45 # v73 SXR2230P = 53 # v69 SXR2330P = 75 # v79 + QCS9100 = 77 # v73 @dataclass @@ -69,6 +70,7 @@ class SocInfo: QcomChipset.SXR1230P: SocInfo(QcomChipset.SXR1230P, HtpInfo(HtpArch.V73, 2)), QcomChipset.SXR2230P: SocInfo(QcomChipset.SXR2230P, HtpInfo(HtpArch.V69, 8)), QcomChipset.SXR2330P: SocInfo(QcomChipset.SXR2330P, HtpInfo(HtpArch.V79, 8)), + QcomChipset.QCS9100: SocInfo(QcomChipset.QCS9100, HtpInfo(HtpArch.V73, 8)), } diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 1c7e63f1bf4..b9a0ffb07c4 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -5839,6 +5839,8 @@ def test_llama3_2_instruct(self): self.build_folder, "--model", self.model, + "--target", + self.target, "--checkpoint", f"{self.llama_artifacts}/consolidated.00.pth", "--params", @@ -5989,6 +5991,8 @@ def test_llama_stories_110m(self): self.build_folder, "--model", self.model, + "--target", + self.target, "--checkpoint", f"{self.llama_artifacts}/stories110M.pt", "--params", @@ -6415,6 +6419,8 @@ def test_albert(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -6451,6 +6457,8 @@ def test_bert(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -6488,6 +6496,8 @@ def test_conv_former(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -6563,6 +6573,8 @@ def test_cvt(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -6600,6 +6612,8 @@ def test_deit(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -6637,6 +6651,8 @@ def test_dino_v2(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -6674,6 +6690,8 @@ def test_distilbert(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -6709,6 +6727,8 @@ def test_dit(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -6746,6 +6766,8 @@ def test_efficientnet(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -6786,6 +6808,8 @@ def test_efficientSAM(self): self.device, "--model", self.model, + "--target", + self.target, "--oss_repo", self.oss_repo, "--pretrained_weight", @@ -6826,6 +6850,8 @@ def test_esrgan(self): self.device, "--model", self.model, + "--target", + self.target, "--default_dataset", "--oss_repo", self.oss_repo, @@ -6866,6 +6892,8 @@ def test_eurobert(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -6904,6 +6932,8 @@ def test_fastvit(self): self.device, "--model", self.model, + "--target", + self.target, "--oss_repo", self.oss_repo, "--pretrained_weight", @@ -6946,6 +6976,8 @@ def test_fbnet(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -6984,6 +7016,8 @@ def test_focalnet(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7024,6 +7058,8 @@ def test_gMLP(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7100,6 +7136,8 @@ def test_mobilevit_v2(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7140,6 +7178,8 @@ def test_mobilevit_v1(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7217,6 +7257,8 @@ def test_regnet(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7256,6 +7298,8 @@ def test_retinanet(self): self.device, "--model", self.model, + "--target", + self.target, "--dataset", self.image_dataset, "--ip", @@ -7294,6 +7338,8 @@ def test_roberta(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7331,6 +7377,8 @@ def test_squeezenet(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7367,6 +7415,8 @@ def test_ssd300_vgg16(self): self.device, "--model", self.model, + "--target", + self.target, "--oss_repo", self.oss_repo, "--pretrained_weight", @@ -7407,6 +7457,8 @@ def test_swin_transformer(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7481,6 +7533,8 @@ def test_t5(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7553,6 +7607,8 @@ def test_whisper(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7820,6 +7876,8 @@ def test_mobilenet_v2(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7860,6 +7918,8 @@ def test_mobilenet_v3(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7900,6 +7960,8 @@ def test_inception_v3(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7940,6 +8002,8 @@ def test_inception_v4(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -7980,6 +8044,8 @@ def test_vit(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -8018,6 +8084,8 @@ def test_edsr(self): self.device, "--model", self.model, + "--target", + self.target, "--default_dataset", "--ip", self.ip, @@ -8057,6 +8125,8 @@ def test_deeplab_v3(self): self.device, "--model", self.model, + "--target", + self.target, "--download", "--ip", self.ip, @@ -8098,6 +8168,8 @@ def test_mobilebert(self): self.device, "--model", self.model, + "--target", + self.target, "--pretrained_weight", self.pretrained_weight, "--ip", @@ -8139,6 +8211,8 @@ def test_ptq_mobilebert(self): self.device, "--model", self.model, + "--target", + self.target, "--pretrained_weight", self.pretrained_weight, "--ptq", @@ -8181,6 +8255,8 @@ def test_wav2letter(self): self.device, "--model", self.model, + "--target", + self.target, "--pretrained_weight", self.pretrained_weight, "--ip", @@ -8253,6 +8329,8 @@ def test_custom_op(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -8285,6 +8363,8 @@ def test_debugger_generate_optrace(self): self.device, "--model", self.model, + "--target", + self.target, "--ip", self.ip, "--port", @@ -8362,6 +8442,8 @@ def test_cli(self): f"{tmp_dir}/e_out", "--model", self.model, + "--target", + self.target, "--device", self.device, "--build_folder", @@ -8441,13 +8523,6 @@ def setup_environment(): default="", type=str, ) - - parser.add_argument( - "--pre_gen_pte", - help="Run the pre-generated pte in the given directory.", - type=str, - ) - parser.add_argument( "--llama_artifacts", help="A folder that contains: weight, tokenizer, and params.", @@ -8477,6 +8552,7 @@ def setup_environment(): TestQNN.pre_gen_pte = args.pre_gen_pte TestQNN.llama_artifacts = args.llama_artifacts TestQNN.op_package_dir = args.op_package_dir + TestQNN.target = args.target return sys.argv[:1] + ns_args diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index 93eee4dfc31..0f0c237a9e1 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -163,10 +163,13 @@ class TestQNN(unittest.TestCase): pretrained_weight: str = "" enable_profile: bool = False op_package_dir: str = "" + target: str = "" + model_name: str = "" online_prepare: bool = False use_8a8w: str = "8a8w" use_16a16w: str = "16a16w" use_16a4w: str = "16a4w" + oss_repo: str = "" shared_buffer: bool = False enable_x86_64: bool = False compile_only: bool = False @@ -421,6 +424,7 @@ def validate_intermediate_tensor(): if check_io_shape else None ), + target=self.target, ) adb.push( inputs=[processed_inputs], @@ -618,6 +622,7 @@ def get_adb_tool(self, pte_fname): host_id=self.host, soc_model=self.model, error_only=self.error_only, + target=self.target, ) return adb diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py index be4e86de50f..91610301515 100644 --- a/backends/qualcomm/utils/utils.py +++ b/backends/qualcomm/utils/utils.py @@ -1099,6 +1099,7 @@ def get_soc_to_arch_map(): "SXR1230P": HtpArch.V73, "SXR2230P": HtpArch.V69, "SXR2330P": HtpArch.V79, + "QCS9100": HtpArch.V73, } @@ -1115,6 +1116,7 @@ def get_soc_to_chipset_map(): "SXR1230P": QcomChipset.SXR1230P, "SXR2230P": QcomChipset.SXR2230P, "SXR2330P": QcomChipset.SXR2330P, + "QCS9100": QcomChipset.QCS9100, } diff --git a/docs/source/backends-qualcomm.md b/docs/source/backends-qualcomm.md index f401cfe0658..c9ac3263728 100644 --- a/docs/source/backends-qualcomm.md +++ b/docs/source/backends-qualcomm.md @@ -53,7 +53,7 @@ For more details and troubleshooting, refer to the official Microsoft WSL instal 👉 [Install WSL | Microsoft Learn](https://learn.microsoft.com/en-us/windows/wsl/install) ### Hardware: -You will need an Android smartphone with adb-connected running on one of below Qualcomm SoCs: +You will need an Android / Linux device with adb-connected running on one of below Qualcomm SoCs: - SA8295 - SM8450 (Snapdragon 8 Gen 1) - SM8475 (Snapdragon 8 Gen 1+) @@ -62,7 +62,7 @@ You will need an Android smartphone with adb-connected running on one of below Q - SM8750 (Snapdragon 8 Elite) - SSG2115P - SSG2125P - - SXR1230P + - SXR1230P (Linux Embedded) - SXR2230P - SXR2330P @@ -73,6 +73,7 @@ This example is verified with SM8550 and SM8450. - Follow ExecuTorch recommended Python version. - A compiler to compile AOT parts, e.g., the GCC compiler comes with Ubuntu LTS. - [Android NDK](https://developer.android.com/ndk). This example is verified with NDK 26c. + - (Optional) Target toolchain for linux embedded platform. - [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk) - Click the "Get Software" button to download the latest version of the QNN SDK. - Although newer versions are available, we have verified and recommend using QNN 2.37.0 for stability. @@ -130,8 +131,11 @@ The above script is actively used. It is updated more frequently than this tutor An example usage is ```bash cd $EXECUTORCH_ROOT +# android target ./backends/qualcomm/scripts/build.sh -# or +# (optional) linux embedded target +./backends/qualcomm/scripts/build.sh --enable_linux_embedding +# for release build ./backends/qualcomm/scripts/build.sh --release ``` @@ -272,7 +276,10 @@ I 00:00:00.364875 executorch:qnn_executor_runner.cpp:425] Write etdump to etdump The model is merely executed. If we want to feed real inputs and get model outputs, we can use ```bash cd $EXECUTORCH_ROOT +# android python -m examples.qualcomm.scripts.deeplab_v3 -b build-android -m SM8550 --download -s +# (optional) linux embedded +python -m examples.qualcomm.scripts.deeplab_v3 -b build-oe-linux -m SXR1230P --download -s -t aarch64-oe-linux-gcc-9.3 ``` The `` can be found by `adb devices` command. diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt index 19190b6f794..58897dce995 100644 --- a/examples/qualcomm/CMakeLists.txt +++ b/examples/qualcomm/CMakeLists.txt @@ -26,11 +26,14 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE RelWithDebInfo) endif() +# some toolchains might contain their own packages, skip them here +find_package(gflags REQUIRED NO_CMAKE_FIND_ROOT_PATH) +find_package(absl REQUIRED NO_CMAKE_FIND_ROOT_PATH) + # Find prebuilt libraries. executorch package should contain portable_ops_lib, # etdump, bundled_program. find_package(executorch CONFIG REQUIRED) target_compile_options(executorch INTERFACE -DET_EVENT_TRACER_ENABLED) -find_package(gflags REQUIRED) set(_common_compile_options -Wno-deprecated-declarations -fPIC) diff --git a/examples/qualcomm/custom_op/custom_ops_1.py b/examples/qualcomm/custom_op/custom_ops_1.py index 28aabfbb342..e84ee87a251 100644 --- a/examples/qualcomm/custom_op/custom_ops_1.py +++ b/examples/qualcomm/custom_op/custom_ops_1.py @@ -258,6 +258,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=sample_input, files=op_package_paths) adb.execute() diff --git a/examples/qualcomm/oss_scripts/albert.py b/examples/qualcomm/oss_scripts/albert.py index 1935798d536..3be48215ac6 100644 --- a/examples/qualcomm/oss_scripts/albert.py +++ b/examples/qualcomm/oss_scripts/albert.py @@ -89,6 +89,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) output_data_folder = f"{args.artifact}/outputs" make_output_dir(output_data_folder) diff --git a/examples/qualcomm/oss_scripts/bert.py b/examples/qualcomm/oss_scripts/bert.py index 31171c3b689..0f9255cefdb 100644 --- a/examples/qualcomm/oss_scripts/bert.py +++ b/examples/qualcomm/oss_scripts/bert.py @@ -86,6 +86,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) output_data_folder = f"{args.artifact}/outputs" make_output_dir(output_data_folder) diff --git a/examples/qualcomm/oss_scripts/conv_former.py b/examples/qualcomm/oss_scripts/conv_former.py index b366f04a713..291f7fc6734 100644 --- a/examples/qualcomm/oss_scripts/conv_former.py +++ b/examples/qualcomm/oss_scripts/conv_former.py @@ -75,6 +75,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/convnext_small.py b/examples/qualcomm/oss_scripts/convnext_small.py index 6244a6bcd8d..a8ef65f03db 100755 --- a/examples/qualcomm/oss_scripts/convnext_small.py +++ b/examples/qualcomm/oss_scripts/convnext_small.py @@ -72,6 +72,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/cvt.py b/examples/qualcomm/oss_scripts/cvt.py index d7418c549a9..53ca2c633c8 100644 --- a/examples/qualcomm/oss_scripts/cvt.py +++ b/examples/qualcomm/oss_scripts/cvt.py @@ -140,6 +140,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/deit.py b/examples/qualcomm/oss_scripts/deit.py index cffbba3ff30..45c46d863b5 100644 --- a/examples/qualcomm/oss_scripts/deit.py +++ b/examples/qualcomm/oss_scripts/deit.py @@ -96,6 +96,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/dino_v2.py b/examples/qualcomm/oss_scripts/dino_v2.py index 3263a9f13aa..91a99175f2d 100644 --- a/examples/qualcomm/oss_scripts/dino_v2.py +++ b/examples/qualcomm/oss_scripts/dino_v2.py @@ -79,6 +79,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/distilbert.py b/examples/qualcomm/oss_scripts/distilbert.py index 6ddaf8b18d4..7ca05181645 100644 --- a/examples/qualcomm/oss_scripts/distilbert.py +++ b/examples/qualcomm/oss_scripts/distilbert.py @@ -87,6 +87,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) output_data_folder = f"{args.artifact}/outputs" make_output_dir(output_data_folder) diff --git a/examples/qualcomm/oss_scripts/dit.py b/examples/qualcomm/oss_scripts/dit.py index 68d3500487a..070261c76fc 100644 --- a/examples/qualcomm/oss_scripts/dit.py +++ b/examples/qualcomm/oss_scripts/dit.py @@ -106,6 +106,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/efficientSAM/efficientSAM.py b/examples/qualcomm/oss_scripts/efficientSAM/efficientSAM.py index 6094577b03a..66b693bd0bb 100644 --- a/examples/qualcomm/oss_scripts/efficientSAM/efficientSAM.py +++ b/examples/qualcomm/oss_scripts/efficientSAM/efficientSAM.py @@ -256,6 +256,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/efficientnet.py b/examples/qualcomm/oss_scripts/efficientnet.py index 3430977022e..57d22a7ae05 100644 --- a/examples/qualcomm/oss_scripts/efficientnet.py +++ b/examples/qualcomm/oss_scripts/efficientnet.py @@ -76,6 +76,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/esrgan.py b/examples/qualcomm/oss_scripts/esrgan.py index 82da1064d6f..cd14a9bc52d 100644 --- a/examples/qualcomm/oss_scripts/esrgan.py +++ b/examples/qualcomm/oss_scripts/esrgan.py @@ -77,6 +77,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/eurobert.py b/examples/qualcomm/oss_scripts/eurobert.py index 46dea4584e7..a856616bcf2 100644 --- a/examples/qualcomm/oss_scripts/eurobert.py +++ b/examples/qualcomm/oss_scripts/eurobert.py @@ -119,6 +119,8 @@ def replace_rms_norm_with_native_rms_norm(module: torch.nn.Module): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) output_data_folder = f"{args.artifact}/outputs" make_output_dir(output_data_folder) diff --git a/examples/qualcomm/oss_scripts/fastvit.py b/examples/qualcomm/oss_scripts/fastvit.py index f931da66448..e68df2abd98 100644 --- a/examples/qualcomm/oss_scripts/fastvit.py +++ b/examples/qualcomm/oss_scripts/fastvit.py @@ -130,6 +130,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/fbnet.py b/examples/qualcomm/oss_scripts/fbnet.py index 4ef802b6233..38f184653c7 100755 --- a/examples/qualcomm/oss_scripts/fbnet.py +++ b/examples/qualcomm/oss_scripts/fbnet.py @@ -58,6 +58,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/focalnet.py b/examples/qualcomm/oss_scripts/focalnet.py index 421c4537c74..68f997c2f48 100644 --- a/examples/qualcomm/oss_scripts/focalnet.py +++ b/examples/qualcomm/oss_scripts/focalnet.py @@ -76,6 +76,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/gMLP_image_classification.py b/examples/qualcomm/oss_scripts/gMLP_image_classification.py index e684335c623..3776272c41e 100644 --- a/examples/qualcomm/oss_scripts/gMLP_image_classification.py +++ b/examples/qualcomm/oss_scripts/gMLP_image_classification.py @@ -67,6 +67,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/llama/decoder_utils.py b/examples/qualcomm/oss_scripts/llama/decoder_utils.py index 6a4d00a5308..c95b2963b29 100644 --- a/examples/qualcomm/oss_scripts/llama/decoder_utils.py +++ b/examples/qualcomm/oss_scripts/llama/decoder_utils.py @@ -328,6 +328,7 @@ def __init__( host_id=args.host, soc_model=args.model, runner="examples/qualcomm/oss_scripts/llama/qnn_llama_runner", + target=args.target, ) self.adb.push(inputs=[], files=[self.runtime_tokenizer_path]) # n seq len = n-1 cache len, so we len(inps) = n-1 during _model_call diff --git a/examples/qualcomm/oss_scripts/llama/llama.py b/examples/qualcomm/oss_scripts/llama/llama.py index 887e680341f..c1d1704929b 100755 --- a/examples/qualcomm/oss_scripts/llama/llama.py +++ b/examples/qualcomm/oss_scripts/llama/llama.py @@ -1013,6 +1013,7 @@ def post_process(): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, runner=f"examples/qualcomm/oss_scripts/llama/qnn_llama_runner", ) # No pregen inputs, input_list is not required diff --git a/examples/qualcomm/oss_scripts/llm_utils/eval_decoder_model_qnn.py b/examples/qualcomm/oss_scripts/llm_utils/eval_decoder_model_qnn.py index 49cdc192f22..df19519e847 100644 --- a/examples/qualcomm/oss_scripts/llm_utils/eval_decoder_model_qnn.py +++ b/examples/qualcomm/oss_scripts/llm_utils/eval_decoder_model_qnn.py @@ -105,6 +105,7 @@ def __init__( output_dir: str = ".", quant_attrs=None, build_folder: str = "build-android", + target="aarch64-android", ): super().__init__(None, tokenizer, max_seq_length) import getpass @@ -123,6 +124,7 @@ def __init__( device_id=device, host_id=host, soc_model=soc_model, + target=target, ) self.adb.push() @@ -201,6 +203,7 @@ def gen_eval_wrapper( output_dir=args.artifact, quant_attrs=quant_attrs, build_folder=args.build_folder, + target=args.target, ) else: raise RuntimeError("Currently only support evaluate pte on device") diff --git a/examples/qualcomm/oss_scripts/maxvit_t.py b/examples/qualcomm/oss_scripts/maxvit_t.py index 7a53edd715b..a6cda49a106 100755 --- a/examples/qualcomm/oss_scripts/maxvit_t.py +++ b/examples/qualcomm/oss_scripts/maxvit_t.py @@ -181,6 +181,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/mobilevit_v1.py b/examples/qualcomm/oss_scripts/mobilevit_v1.py index f3f5f8e86aa..eabf9b215ed 100644 --- a/examples/qualcomm/oss_scripts/mobilevit_v1.py +++ b/examples/qualcomm/oss_scripts/mobilevit_v1.py @@ -103,6 +103,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/mobilevit_v2.py b/examples/qualcomm/oss_scripts/mobilevit_v2.py index 64042efa212..f201b6af4c0 100644 --- a/examples/qualcomm/oss_scripts/mobilevit_v2.py +++ b/examples/qualcomm/oss_scripts/mobilevit_v2.py @@ -111,6 +111,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/moshi/mimi.py b/examples/qualcomm/oss_scripts/moshi/mimi.py index 0679b649d9f..92650f645b6 100644 --- a/examples/qualcomm/oss_scripts/moshi/mimi.py +++ b/examples/qualcomm/oss_scripts/moshi/mimi.py @@ -186,6 +186,7 @@ def inference_mimi_encoder(args, encoder_inputs, encoder_pte_filename): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=encoder_inputs) adb.execute() @@ -360,6 +361,7 @@ def inference_static_mimi_decoder( host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, runner="examples/qualcomm/oss_scripts/moshi/qnn_mimi_decoder_runner", ) adb.push(inputs=encoded_results) diff --git a/examples/qualcomm/oss_scripts/pvt.py b/examples/qualcomm/oss_scripts/pvt.py index 0f850d0376e..6083f4154fa 100644 --- a/examples/qualcomm/oss_scripts/pvt.py +++ b/examples/qualcomm/oss_scripts/pvt.py @@ -76,6 +76,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/qwen2_5/qwen2_5.py b/examples/qualcomm/oss_scripts/qwen2_5/qwen2_5.py index 5a08e1126f1..684415d331a 100644 --- a/examples/qualcomm/oss_scripts/qwen2_5/qwen2_5.py +++ b/examples/qualcomm/oss_scripts/qwen2_5/qwen2_5.py @@ -156,6 +156,7 @@ def post_process(): host_id=args.host, soc_model=args.model, runner="examples/models/llama/llama_main", + target=args.target, ) # No pregen inputs, input_list is not required adb.push(inputs=[], input_list="", files=[tokenizer_json_path]) diff --git a/examples/qualcomm/oss_scripts/regnet.py b/examples/qualcomm/oss_scripts/regnet.py index 617859df1af..8b6013b56ee 100644 --- a/examples/qualcomm/oss_scripts/regnet.py +++ b/examples/qualcomm/oss_scripts/regnet.py @@ -75,6 +75,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/retinanet.py b/examples/qualcomm/oss_scripts/retinanet.py index adb1241486d..4c82b222d5b 100644 --- a/examples/qualcomm/oss_scripts/retinanet.py +++ b/examples/qualcomm/oss_scripts/retinanet.py @@ -248,6 +248,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/roberta.py b/examples/qualcomm/oss_scripts/roberta.py index dc4a89d69b5..491e29f5712 100644 --- a/examples/qualcomm/oss_scripts/roberta.py +++ b/examples/qualcomm/oss_scripts/roberta.py @@ -92,6 +92,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) output_data_folder = f"{args.artifact}/outputs" make_output_dir(output_data_folder) diff --git a/examples/qualcomm/oss_scripts/squeezenet.py b/examples/qualcomm/oss_scripts/squeezenet.py index e5d89d8f159..22aede15952 100644 --- a/examples/qualcomm/oss_scripts/squeezenet.py +++ b/examples/qualcomm/oss_scripts/squeezenet.py @@ -66,6 +66,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/ssd300_vgg16.py b/examples/qualcomm/oss_scripts/ssd300_vgg16.py index b9c30936a28..739edba374b 100644 --- a/examples/qualcomm/oss_scripts/ssd300_vgg16.py +++ b/examples/qualcomm/oss_scripts/ssd300_vgg16.py @@ -158,6 +158,8 @@ def main(args): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/swin_transformer.py b/examples/qualcomm/oss_scripts/swin_transformer.py index 1ee4a392add..74cb4789d79 100644 --- a/examples/qualcomm/oss_scripts/swin_transformer.py +++ b/examples/qualcomm/oss_scripts/swin_transformer.py @@ -129,6 +129,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/swin_v2_t.py b/examples/qualcomm/oss_scripts/swin_v2_t.py index 954c27f428f..e63fce80721 100755 --- a/examples/qualcomm/oss_scripts/swin_v2_t.py +++ b/examples/qualcomm/oss_scripts/swin_v2_t.py @@ -122,6 +122,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/t5/t5.py b/examples/qualcomm/oss_scripts/t5/t5.py index 093572f032a..e901207c567 100644 --- a/examples/qualcomm/oss_scripts/t5/t5.py +++ b/examples/qualcomm/oss_scripts/t5/t5.py @@ -296,6 +296,8 @@ def post_process(): device_id=args.device, host_id=args.host, soc_model=args.model, + shared_buffer=args.shared_buffer, + target=args.target, runner="examples/qualcomm/oss_scripts/t5/qnn_t5_runner", ) adb.push( diff --git a/examples/qualcomm/oss_scripts/vit_b_16.py b/examples/qualcomm/oss_scripts/vit_b_16.py index 6b79ecc7cda..6042144b7bf 100755 --- a/examples/qualcomm/oss_scripts/vit_b_16.py +++ b/examples/qualcomm/oss_scripts/vit_b_16.py @@ -72,6 +72,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/oss_scripts/whisper/whisper.py b/examples/qualcomm/oss_scripts/whisper/whisper.py index 3985849411e..8be069e8551 100644 --- a/examples/qualcomm/oss_scripts/whisper/whisper.py +++ b/examples/qualcomm/oss_scripts/whisper/whisper.py @@ -427,6 +427,7 @@ def post_process(): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, runner="examples/qualcomm/oss_scripts/whisper/qnn_whisper_runner", ) # No pregen inputs, input_list is not required diff --git a/examples/qualcomm/qaihub_scripts/llama/llama2/qaihub_llama2_7b.py b/examples/qualcomm/qaihub_scripts/llama/llama2/qaihub_llama2_7b.py index 3b4d6c7cbff..ec30463350f 100644 --- a/examples/qualcomm/qaihub_scripts/llama/llama2/qaihub_llama2_7b.py +++ b/examples/qualcomm/qaihub_scripts/llama/llama2/qaihub_llama2_7b.py @@ -102,6 +102,7 @@ def main(args): host_id=args.host, soc_model=args.model, runner="examples/qualcomm/qaihub_scripts/llama/qaihub_llama2_7b_runner", + target=args.target, ) output_file = "result.txt" pos_embs_file = ["freq_cos", "freq_sin"] diff --git a/examples/qualcomm/qaihub_scripts/llama/llama3/qaihub_llama3_8b.py b/examples/qualcomm/qaihub_scripts/llama/llama3/qaihub_llama3_8b.py index 7607c476051..2d7e7ee0609 100644 --- a/examples/qualcomm/qaihub_scripts/llama/llama3/qaihub_llama3_8b.py +++ b/examples/qualcomm/qaihub_scripts/llama/llama3/qaihub_llama3_8b.py @@ -103,6 +103,7 @@ def main(args): host_id=args.host, soc_model=args.model, runner="examples/qualcomm/qaihub_scripts/llama/qaihub_llama3_8b_runner", + target=args.target, ) output_file = "result.txt" pos_embs_file = ["freq_cos", "freq_sin"] diff --git a/examples/qualcomm/qaihub_scripts/stable_diffusion/qaihub_stable_diffusion.py b/examples/qualcomm/qaihub_scripts/stable_diffusion/qaihub_stable_diffusion.py index 7905dfa9a7e..0f2d906c222 100644 --- a/examples/qualcomm/qaihub_scripts/stable_diffusion/qaihub_stable_diffusion.py +++ b/examples/qualcomm/qaihub_scripts/stable_diffusion/qaihub_stable_diffusion.py @@ -255,6 +255,7 @@ def inference(args, compiler_specs, pte_files): host_id=args.host, soc_model=args.model, runner="examples/qualcomm/qaihub_scripts/stable_diffusion/qaihub_stable_diffusion_runner", + target=args.target, ) input_unet = () diff --git a/examples/qualcomm/qaihub_scripts/utils/export.py b/examples/qualcomm/qaihub_scripts/utils/export.py index ff364ab986e..00a0d397a80 100644 --- a/examples/qualcomm/qaihub_scripts/utils/export.py +++ b/examples/qualcomm/qaihub_scripts/utils/export.py @@ -148,7 +148,14 @@ def get_tensor(io_info, tensors, logger, checking_output=False): def to_context_binary( - model_lib, soc_model, device, host, build_folder, output_folder, logger + model_lib, + soc_model, + device, + host, + target, + build_folder, + output_folder, + logger, ): ext = Path(model_lib).suffix if ext == ".bin": @@ -169,6 +176,7 @@ def to_context_binary( device_id=device, soc_model=soc_model, host_id=host, + target=target, ) logger.info("pushing QNN libraries & tool") @@ -237,6 +245,7 @@ def compile(args): soc_model=args.model, device=args.device, host=args.host, + target=args.target, build_folder=args.build_folder, output_folder=output_dir, logger=logger, @@ -309,6 +318,7 @@ def execute(args): soc_model=graph_info["soc_model"], host_id=args.host, shared_buffer=args.shared_buffer, + target=args.target, ) logger.info("pushing QNN libraries & other artifacts") diff --git a/examples/qualcomm/scripts/deeplab_v3.py b/examples/qualcomm/scripts/deeplab_v3.py index 164dc312fae..a0d1240b870 100755 --- a/examples/qualcomm/scripts/deeplab_v3.py +++ b/examples/qualcomm/scripts/deeplab_v3.py @@ -105,6 +105,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/scripts/edsr.py b/examples/qualcomm/scripts/edsr.py index 0601ff9e0b7..a391d7fea84 100755 --- a/examples/qualcomm/scripts/edsr.py +++ b/examples/qualcomm/scripts/edsr.py @@ -140,6 +140,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/scripts/inception_v3.py b/examples/qualcomm/scripts/inception_v3.py index fbd693d7100..f75f093570f 100755 --- a/examples/qualcomm/scripts/inception_v3.py +++ b/examples/qualcomm/scripts/inception_v3.py @@ -71,6 +71,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/scripts/inception_v4.py b/examples/qualcomm/scripts/inception_v4.py index a13804d8868..0c763d2327f 100755 --- a/examples/qualcomm/scripts/inception_v4.py +++ b/examples/qualcomm/scripts/inception_v4.py @@ -69,6 +69,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/scripts/mobilebert_fine_tune.py b/examples/qualcomm/scripts/mobilebert_fine_tune.py index 8eff2c9f2eb..bb58fd358c1 100755 --- a/examples/qualcomm/scripts/mobilebert_fine_tune.py +++ b/examples/qualcomm/scripts/mobilebert_fine_tune.py @@ -292,6 +292,7 @@ def calibrator(gm): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/scripts/mobilenet_v2.py b/examples/qualcomm/scripts/mobilenet_v2.py index e3c5d16e545..10e54649c22 100755 --- a/examples/qualcomm/scripts/mobilenet_v2.py +++ b/examples/qualcomm/scripts/mobilenet_v2.py @@ -70,6 +70,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/scripts/mobilenet_v3.py b/examples/qualcomm/scripts/mobilenet_v3.py index 02fa8c93269..b808ca986a3 100644 --- a/examples/qualcomm/scripts/mobilenet_v3.py +++ b/examples/qualcomm/scripts/mobilenet_v3.py @@ -69,6 +69,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/scripts/torchvision_vit.py b/examples/qualcomm/scripts/torchvision_vit.py index 0417ffdc673..2a428683ec3 100755 --- a/examples/qualcomm/scripts/torchvision_vit.py +++ b/examples/qualcomm/scripts/torchvision_vit.py @@ -67,6 +67,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/scripts/wav2letter.py b/examples/qualcomm/scripts/wav2letter.py index 980a1dd9acc..7fddb48bfb7 100644 --- a/examples/qualcomm/scripts/wav2letter.py +++ b/examples/qualcomm/scripts/wav2letter.py @@ -160,6 +160,7 @@ def main(args): host_id=args.host, soc_model=args.model, shared_buffer=args.shared_buffer, + target=args.target, ) adb.push(inputs=inputs) adb.execute() diff --git a/examples/qualcomm/util_scripts/cli.py b/examples/qualcomm/util_scripts/cli.py index 6189aefeedb..5cd411ec42f 100644 --- a/examples/qualcomm/util_scripts/cli.py +++ b/examples/qualcomm/util_scripts/cli.py @@ -259,6 +259,7 @@ def execute(args): soc_model=args.model, host_id=args.host, shared_buffer=args.shared_buffer, + target=args.target, ) logger.info("pushing QNN libraries & other artifacts") @@ -483,6 +484,18 @@ def main(): type=str, help="Gateway hostname.", ) + sub_execute.add_argument( + "-t", + "--target", + help="Target platform for deployment", + choices=[ + "aarch64-android", + "aarch64-oe-linux-gcc9.3", + "aarch64-oe-linux-gcc11.2", + ], + default="aarch64-android", + type=str, + ) sub_execute.add_argument( "--shared_buffer", help=( diff --git a/examples/qualcomm/util_scripts/gen_etrecord.py b/examples/qualcomm/util_scripts/gen_etrecord.py index 7c1ced1e032..e5e80765e5f 100644 --- a/examples/qualcomm/util_scripts/gen_etrecord.py +++ b/examples/qualcomm/util_scripts/gen_etrecord.py @@ -61,6 +61,7 @@ def main(args): workspace=f"/data/local/tmp/executorch/{pte_filename}", device_id=args.device, soc_model=args.model, + target=args.target, ) adb.push(inputs=[sample_input]) adb.execute() diff --git a/examples/qualcomm/util_scripts/qairt_visualizer_demo.py b/examples/qualcomm/util_scripts/qairt_visualizer_demo.py index 215d3c598a7..07de32365c0 100644 --- a/examples/qualcomm/util_scripts/qairt_visualizer_demo.py +++ b/examples/qualcomm/util_scripts/qairt_visualizer_demo.py @@ -49,6 +49,7 @@ def main(args) -> None: device_id=args.device, host_id=args.host, soc_model=args.model, + target=args.target, ) binaries_trace = generate_optrace( args.artifact, diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py index 036c5060b12..bff7a0cb14f 100755 --- a/examples/qualcomm/utils.py +++ b/examples/qualcomm/utils.py @@ -64,8 +64,9 @@ class SimpleADB: error_only (bool): Redirect stdio and leave error messages only shared_buffer (bool): Apply zero-copy mechanism in runtime runner (str): Runtime executor binary - expected_input_shape (Tuple[torch.Size]): input shape of dynamic graph - expected_output_shape (Tuple[torch.Size]): output shape of dynamic graph + target (str): Target toolchain name + expected_input_shape (Tuple[torch.Size]): Input shape of dynamic graph + expected_output_shape (Tuple[torch.Size]): Output shape of dynamic graph """ def __init__( @@ -81,6 +82,7 @@ def __init__( shared_buffer=False, dump_intermediate_outputs=False, runner="examples/qualcomm/executor_runner/qnn_executor_runner", + target="aarch64-android", expected_input_shape=None, expected_output_shape=None, ): @@ -100,6 +102,7 @@ def __init__( self.error_only = error_only self.shared_buffer = shared_buffer self.runner = runner + self.target = target self.expected_input_shape = expected_input_shape self.expected_output_shape = expected_output_shape self.extra_cmds = "" @@ -130,20 +133,20 @@ def push(self, inputs=None, input_list=None, files=None, init_env=True): # necessary artifacts artifacts = [ *self.pte_path, - f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtp.so", + f"{self.qnn_sdk}/lib/{self.target}/libQnnHtp.so", ( f"{self.qnn_sdk}/lib/hexagon-v{self.htp_arch}/" f"unsigned/libQnnHtpV{self.htp_arch}Skel.so" ), ( - f"{self.qnn_sdk}/lib/aarch64-android/" + f"{self.qnn_sdk}/lib/{self.target}/" f"libQnnHtpV{self.htp_arch}Stub.so" ), - f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtpPrepare.so", - f"{self.qnn_sdk}/lib/aarch64-android/libQnnSystem.so", + f"{self.qnn_sdk}/lib/{self.target}/libQnnHtpPrepare.so", + f"{self.qnn_sdk}/lib/{self.target}/libQnnSystem.so", f"{self.build_path}/{self.runner}", f"{self.build_path}/backends/qualcomm/libqnn_executorch_backend.so", - f"{self.qnn_sdk}/lib/aarch64-android/libQnnModelDlc.so", + f"{self.qnn_sdk}/lib/{self.target}/libQnnModelDlc.so", ] input_list_file, input_files = generate_inputs( self.working_dir, self.input_list_filename, inputs @@ -418,7 +421,7 @@ def build_executorch_binary( None: The function writes the output to a specified .pte file. """ backend_options = generate_htp_compiler_spec( - use_fp16=False if quant_dtype else True + use_fp16=False if quant_dtype is not None else True ) compile_spec = generate_qnn_executorch_compiler_spec( soc_model=getattr(QcomChipset, soc_model), @@ -877,6 +880,25 @@ def setup_common_args_and_variables(): type=int, ) + parser.add_argument( + "-t", + "--target", + help="Target platform for deployment", + choices=[ + "aarch64-android", + "aarch64-oe-linux-gcc9.3", + "aarch64-oe-linux-gcc11.2", + ], + default="aarch64-android", + type=str, + ) + + parser.add_argument( + "--pre_gen_pte", + help="Run the pre-generated pte in the given directory.", + type=str, + ) + # QNN_SDK_ROOT might also be an argument, but it is used in various places. # So maybe it's fine to just use the environment. if "QNN_SDK_ROOT" not in os.environ: diff --git a/setup.py b/setup.py index 97a1d05096e..39cb9f58c6e 100644 --- a/setup.py +++ b/setup.py @@ -493,7 +493,14 @@ def run(self): # Run build.sh with SDK path exported env = dict(**os.environ) env["QNN_SDK_ROOT"] = str(sdk_path) - subprocess.check_call([str(build_sh), "--skip_aarch64"], env=env) + subprocess.check_call( + [ + str(build_sh), + "--skip_linux_android", + "--skip_linux_embedding", + ], + env=env, + ) # Copy the main .so into the wheel package so_src = (