Arm backend: Remove build_quantized_ops_aot_lib.sh

AdrianLundell · AdrianLundell · commit 1a04d1f77b11 · 2025-04-22T12:25:32.000+02:00
This lib is built by default when installing executorch,
so use that instead of building our own version. The
the version built by executorch however has an extra dependency on
_portable_lib.cpython-310, which needs to be loaded as well.

Also removes all mentions of this script in related files.

Signed-off-by: Adrian Lundell &lt;adrian.lundell@arm.com&gt;
Change-Id: Ia3cf9eabb4e293ef96a9cc3441c4233241873684
diff --git a/backends/arm/scripts/build_quantized_ops_aot_lib.sh b/backends/arm/scripts/build_quantized_ops_aot_lib.sh
diff --git a/backends/arm/test/conftest.py b/backends/arm/test/conftest.py
@@ -5,7 +5,6 @@
 
 import logging
 import os
-import platform
 import random
 import shutil
 import subprocess
@@ -174,30 +173,31 @@ def get_option(option: str) -> Any | None:
     return None
 
 
-def _load_libquantized_ops_aot_lib():
+def _load_lib(lib_name_pattern: str, build_folder: str):
     """
-    Find and load the libquantized_ops_aot_lib shared library.
+    Find and load a library by name in build_folder.
     """
-    so_ext = {
-        "Darwin": "dylib",
-        "Linux": "so",
-        "Windows": "dll",
-    }.get(platform.system(), None)
-
     find_lib_cmd = [
         "find",
-        "cmake-out-aot-lib",
+        build_folder,
         "-name",
-        f"libquantized_ops_aot_lib.{so_ext}",
+        f"{lib_name_pattern}",
     ]
-
     res = subprocess.run(find_lib_cmd, capture_output=True)
     if res.returncode == 0:
-        library_path = res.stdout.decode().strip()
+        library_paths = res.stdout.decode().strip().split("\n")
         import torch
 
-        torch.ops.load_library(library_path)
+        torch.ops.load_library(library_paths[0])
     else:
         raise RuntimeError(
-            f"Did not find libquantized_ops_aot_lib.{so_ext} in cmake-out-aot-lib. Did you build it?"
+            f"Did not find any library matching {lib_name_pattern} in {build_folder}. Have you installed executorch properly?"
         )
+
+
+def _load_libquantized_ops_aot_lib(executorch_install_dir: str = "pip-out"):
+    """
+    Find and load the libquantized_ops_aot_lib shared library.
+    """
+    _load_lib("_portable_lib.cpython-310*", executorch_install_dir)
+    _load_lib("libquantized_ops_aot_lib.*", executorch_install_dir)
diff --git a/backends/arm/test/setup_testing.sh b/backends/arm/test/setup_testing.sh
@@ -52,8 +52,6 @@ function build_semihosting_executorch_runner() {
     find ${build_test_dir} -name "arm_executor_runner"
 }
 
-cd $et_root_dir && backends/arm/scripts/build_quantized_ops_aot_lib.sh
-
 # Use most optimal system_configs for testing
 build_semihosting_executorch_runner corstone-300 Ethos_U55_High_End_Embedded
 
diff --git a/backends/arm/test/test_arm_baremetal.sh b/backends/arm/test/test_arm_baremetal.sh
@@ -74,9 +74,6 @@ test_pytest() { # Test ops and other things
 
     ./examples/models/llama3_2_vision/install_requirements.sh
 
-    cd "${et_root_dir}"
-    backends/arm/scripts/build_quantized_ops_aot_lib.sh
-
     # Run arm baremetal pytest tests without FVP
     pytest  --verbose --color=yes --numprocesses=auto backends/arm/test/
     echo "${TEST_SUITE_NAME}: PASS"
diff --git a/backends/arm/test/test_model.py b/backends/arm/test/test_model.py
@@ -5,7 +5,6 @@
 
 import argparse
 import os
-import platform
 import subprocess
 import sys
 
@@ -113,14 +112,6 @@ def build_libs(et_build_root: str, script_path: str):
             "--portable_kernels=aten::_softmax.out",
         ]
     )
-    run_external_cmd(
-        [
-            "bash",
-            os.path.join(script_path, "build_quantized_ops_aot_lib.sh"),
-            f"--et_build_root={et_build_root}",
-            "--build_type=Release",
-        ]
-    )
 
 
 def build_pte(
@@ -132,17 +123,6 @@ def build_pte(
     build_output: str,
     no_intermediate: bool,
 ):
-    soext = {"Darwin": "dylib", "Linux": "so", "Windows": "dll"}.get(
-        platform.system(), None
-    )
-    solibs_path = os.path.join(
-        et_build_root,
-        "cmake-out-aot-lib",
-        "kernels",
-        "quantized",
-        f"libquantized_ops_aot_lib.{soext}",
-    )
-    solibs = f"--so_library={solibs_path}"
 
     intermediate = ""
     if not no_intermediate:
@@ -162,7 +142,6 @@ def build_pte(
             f"--output={build_output}",
             f"--system_config={system_config}",
             f"--memory_mode={memory_mode}",
-            solibs,
         ]
     )
 
diff --git a/docs/source/backends-arm-ethos-u.md b/docs/source/backends-arm-ethos-u.md
@@ -17,9 +17,6 @@ To compile for the NPUs, the Ethos-U Vela compiler is needed. A target-specific
 
 These dependencies can easily be downloaded using the script `examples/arm/setup.sh`.
 
-To work with with quantized models, build the quantize_ops_aot library that contains kernels for quantization and dequantization. This can be done with the script
-`backends/arm/scripts/build_quantized_ops_aot_lib.sh`.
-
 ## Using the Arm Ethos-U backend
 The example below demonstrates the lowering processs of a MobileNet V2 model from torchvision for a Ethos-U55 target. Since the model is a floating point model, first quantize it using the `EthosUQuantizer`. Then, pass an instance of the `EthosUPartitioner` to `to_edge_transform_and_lower`. Both the quantizer and the partitioner need a compilation specification created using `ArmCompileSpecBuilder`.
 
diff --git a/docs/source/tutorial-arm-ethos-u.md b/docs/source/tutorial-arm-ethos-u.md
@@ -244,28 +244,9 @@ python3 -m examples.arm.aot_arm_compiler --model_name="add" --delegate
 ```
 
 ### Delegated Quantized Workflow
-Before generating the `.pte` file for delegated quantized networks like MobileNetV2, you need to build the `quantized_ops_aot_lib`
-
-You can just run the `backends/arm/scripts/build_quantized_ops_aot_lib.sh` script to build this for you or build it yourself like this.
-
-```bash
-
-cd <executorch_root_dir>
-mkdir -p cmake-out-aot-lib
-cmake -DCMAKE_BUILD_TYPE=Release \
-    -DEXECUTORCH_BUILD_XNNPACK=OFF \
-    -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON \
-    -DPYTHON_EXECUTABLE=python3 \
--Bcmake-out-aot-lib \
-    "${et_root_dir}"
-
-cmake --build cmake-out-aot-lib --parallel -- quantized_ops_aot_lib
-```
-
-After the `quantized_ops_aot_lib` build, you can run the following script to generate the `.pte` file
+Generating the `.pte` file can be done using the aot_arm_compiler:
 ```bash
-python3 -m examples.arm.aot_arm_compiler --model_name="mv2" --delegate --quantize --so_library="$(find cmake-out-aot-lib -name libquantized_ops_aot_lib.so)"
+python3 -m examples.arm.aot_arm_compiler --model_name="mv2" --delegate --quantize"
 # should produce ./mv2_arm_delegate_ethos-u55-128.pte
 ```
 
@@ -315,7 +296,7 @@ To run a `.pte` file with the Arm backend delegate call instructions, you will n
 
 - `libexecutorch_delegate_ethos_u.a`
 
-These libraries are generated by the `backends/arm/scripts/build_executorch.sh`, `backends/arm/scripts/build_portable_kernels.sh` and `backends/arm/scripts/build_quantized_ops_aot_lib.sh` scripts called from the `run.sh` script.
+These libraries are generated by the `backends/arm/scripts/build_executorch.sh` and `backends/arm/scripts/build_portable_kernels.sh` scripts called from the `run.sh` script.
 
 The `--portable_kernels` flag can be used to set the build flag `EXECUTORCH_SELECT_OPS_LIST` when running `backends/arm/scripts/build_portable_kernels.sh` that will decide the number of portable operators included in the build and are available at runtime. It must match with `.pte` file's requirements, otherwise you will get `Missing Operator` error at runtime.
 
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
@@ -29,6 +29,7 @@
     get_symmetric_quantization_config,
     TOSAQuantizer,
 )
+from executorch.backends.arm.test.conftest import _load_libquantized_ops_aot_lib
 from executorch.backends.arm.tosa_partitioner import TOSAPartitioner
 from executorch.backends.arm.tosa_specification import TosaSpecification
 
@@ -466,7 +467,7 @@ def get_args():
         "--so_library",
         required=False,
         default=None,
-        help="Provide path to so library. E.g., cmake-out/examples/portable/custom_ops/libcustom_ops_aot_lib.so",
+        help="Provide path to custom .so library.",
     )
     parser.add_argument(
         "--debug", action="store_true", help="Set the logging level to debug."
@@ -509,11 +510,10 @@ def get_args():
     if args.debug:
         logging.basicConfig(level=logging.DEBUG, format=FORMAT, force=True)
 
-    if args.quantize and not args.so_library:
-        logging.warning(
-            "Quantization enabled without supplying path to libcustom_ops_aot_lib using -s flag."
-            + "This is required for running quantized models with unquantized input."
-        )
+    # Load quantized ops library.
+    if args.quantize:
+        logging.info("Loading lib_quantized_custom_op_lib")
+        _load_libquantized_ops_aot_lib(executorch_install_dir="pip-out")
 
     # if we have custom ops, register them before processing the model
     if args.so_library is not None:
diff --git a/examples/arm/ethos_u_minimal_example.ipynb b/examples/arm/ethos_u_minimal_example.ipynb
@@ -115,32 +115,6 @@
     "quantized_exported_program = torch.export.export_for_training(quantized_graph_module, example_inputs)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The quantization nodes created in the previous cell are not built by default with ExecuTorch but must be included in the .pte-file, and so they need to be built separately. `backends/arm/scripts/build_quantized_ops_aot_lib.sh` is a utility script which does this. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import subprocess \n",
-    "import os \n",
-    "\n",
-    "# Setup paths\n",
-    "cwd_dir = os.getcwd()\n",
-    "et_dir = os.path.join(cwd_dir, \"..\", \"..\")\n",
-    "et_dir = os.path.abspath(et_dir)\n",
-    "script_dir = os.path.join(et_dir, \"backends\", \"arm\", \"scripts\")\n",
-    "\n",
-    "# Run build_quantized_ops_aot_lib.sh\n",
-    "subprocess.run(os.path.join(script_dir, \"build_quantized_ops_aot_lib.sh\"), shell=True, cwd=et_dir)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
diff --git a/examples/arm/run.sh b/examples/arm/run.sh
@@ -146,14 +146,6 @@ fi
 backends/arm/scripts/build_executorch.sh --et_build_root="${et_build_root}" --build_type=$build_type $devtools_flag
 backends/arm/scripts/build_portable_kernels.sh --et_build_root="${et_build_root}" --build_type=$build_type --portable_kernels=$portable_kernels
 
-# Build a lib quantized_ops_aot_lib
-backends/arm/scripts/build_quantized_ops_aot_lib.sh --et_build_root="${et_build_root}" --build_type=$build_type
-
-SO_EXT=$(python3 -c 'import platform; print({"Darwin": "dylib", "Linux": "so", "Windows": "dll"}.get(platform.system(), None))')
-# We are using the aot_lib from build_quantization_aot_lib below
-SO_LIB=$(find "${et_build_root}/cmake-out-aot-lib" -name libquantized_ops_aot_lib.${SO_EXT})
-
-
 if [[ -z "$model_name" ]]; then
     # the test models run, and whether to delegate
     test_model=( "softmax" "add" "add3" "mv2" )
@@ -205,7 +197,7 @@ for i in "${!test_model[@]}"; do
         model_compiler_flags="${model_compiler_flags} --model_input=${model_input}"
     fi
 
-    ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${pte_file} --so_library=$SO_LIB --system_config=${system_config} --memory_mode=${memory_mode} $bundleio_flag"
+    ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${pte_file} --system_config=${system_config} --memory_mode=${memory_mode} $bundleio_flag"
     echo "CALL ${ARM_AOT_CMD}" >&2
     ${ARM_AOT_CMD} 1>&2
 

Original file line number	Diff line number	Diff line change
`@@ -52,8 +52,6 @@ function build_semihosting_executorch_runner() {`
`52`	`52`	`find ${build_test_dir} -name "arm_executor_runner"`
`53`	`53`	`}`
`54`	`54`
`55`		`-cd $et_root_dir && backends/arm/scripts/build_quantized_ops_aot_lib.sh`
`56`		`-`
`57`	`55`	`# Use most optimal system_configs for testing`
`58`	`56`	`build_semihosting_executorch_runner corstone-300 Ethos_U55_High_End_Embedded`
`59`	`57`