Qualcomm AI Engine Direct - support cli (pytorch#11788)

haowhsu-quic · hinriksnaer · commit 6fed7441f8c3 · 2025-06-26T15:17:12.000-04:00
### Summary - add cli for quantize / compile / execute pipeline ### Test plan ```bash python backends/qualcomm/tests/test_qnn_delegate.py TestUtilsScript.test_cli -b build-android -s $device -m SM8750 ``` cc @cccclai @winskuo-quic @shewu-quic @cbilgin
diff --git a/backends/qualcomm/qnn_preprocess.py b/backends/qualcomm/qnn_preprocess.py
@@ -78,10 +78,7 @@ def _build_op_wrappers(
                         )
                         assert node.target == context_loader_target, err_msg
                         # if graph has context binary loader node, return directly
-                        return PreprocessResult(
-                            processed_bytes=node.meta[OpContextLoader.meta_ctx_bin],
-                            debug_handle_map={},
-                        )
+                        return node.meta[OpContextLoader.meta_ctx_bin]
                     except:
                         raise RuntimeError(err_msg)
 
@@ -161,30 +158,44 @@ def preprocess_multimethod(
                 generate_qnn_executorch_option(compile_spec)
             )
             qnn_manager.Init()
-            py_op_wrapper_list = []
+            py_op_wrapper_list, ctx_binary_list = [], []
             for j, programs in enumerate(edge_programs.values()):
                 logger.info(f"Processing Method({j}): ({i+1}/{num_sub_graphs})")
                 py_op_wrappers = QnnBackend._build_op_wrappers(
                     programs[i],
                     qnn_manager.IsTensorDump(),
                     option.op_package_options.op_package_infos,
                 )
-                py_op_wrapper_list.append(
-                    [py_op_wrapper.GetOpWrapper() for py_op_wrapper in py_op_wrappers]
-                )
+                if isinstance(py_op_wrappers, bytes):
+                    ctx_binary_list.append(py_op_wrappers)
+                else:
+                    py_op_wrapper_list.append(
+                        [
+                            py_op_wrapper.GetOpWrapper()
+                            for py_op_wrapper in py_op_wrappers
+                        ]
+                    )
 
-            qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
-            assert (
-                len(qnn_context_binary) != 0
-            ), "Failed to generate Qnn context binary."
-            qnn_manager.Destroy()
-            # methods should share the same context binary for current partition
-            for key in edge_programs.keys():
-                all_processed_results[key].append(
-                    PreprocessResult(
-                        processed_bytes=bytes(qnn_context_binary),
-                        debug_handle_map={},
+            if len(py_op_wrapper_list) == len(edge_programs.values()):
+                qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
+                assert (
+                    len(qnn_context_binary) != 0
+                ), "Failed to generate Qnn context binary."
+                qnn_manager.Destroy()
+                # methods should share the same context binary for current partition
+                for key in edge_programs.keys():
+                    all_processed_results[key].append(
+                        PreprocessResult(
+                            processed_bytes=bytes(qnn_context_binary),
+                            debug_handle_map={},
+                        )
                     )
-                )
+            elif len(ctx_binary_list) == len(edge_programs.values()):
+                for i, key in enumerate(edge_programs.keys()):
+                    all_processed_results[key].append(
+                        PreprocessResult(processed_bytes=ctx_binary_list[i])
+                    )
+            else:
+                raise RuntimeError("Hybrid compilation is not supported")
 
         return all_processed_results
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -5622,6 +5622,68 @@ def test_debugger_generate_optrace(self):
                         qhas_data = json.load(qhas_file)
                         self.assertIn("data", qhas_data)
 
+    def test_cli(self):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            sample_input = torch.randn(1, 2, 3, 4)
+            ep = torch.export.export(Relu(), (sample_input,))  # noqa: F405
+            torch.export.save(ep, f"{tmp_dir}/relu.pt2")
+            torch.save(sample_input, f"{tmp_dir}/input_0_0.pt")
+            with open(f"{tmp_dir}/input_list", "w") as f:
+                f.write(f"{tmp_dir}/input_0_0.pt\n")
+
+            # quantize
+            cmds = [
+                "python",
+                "-m",
+                "examples.qualcomm.util_scripts.cli",
+                "quantize",
+                "--artifact",
+                f"{tmp_dir}/relu.pt2",
+                "--output_folder",
+                f"{tmp_dir}/q_out",
+                "--input_list",
+                f"{tmp_dir}/input_list",
+            ]
+            subprocess.run(cmds, stdout=subprocess.DEVNULL)
+            self.assertTrue(os.path.isfile(f"{tmp_dir}/q_out/relu_quantized.pt2"))
+            # compile
+            cmds = [
+                "python",
+                "-m",
+                "examples.qualcomm.util_scripts.cli",
+                "compile",
+                "--artifact",
+                f"{tmp_dir}/q_out/relu_quantized.pt2",
+                "--output_folder",
+                f"{tmp_dir}/c_out",
+                "--model",
+                self.model,
+            ]
+            subprocess.run(cmds, stdout=subprocess.DEVNULL)
+            self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/relu_quantized.pte"))
+            self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/relu_quantized.svg"))
+            # execute
+            cmds = [
+                "python",
+                "-m",
+                "examples.qualcomm.util_scripts.cli",
+                "execute",
+                "--artifact",
+                f"{tmp_dir}/c_out/relu_quantized.pte",
+                "--output_folder",
+                f"{tmp_dir}/e_out",
+                "--model",
+                self.model,
+                "--device",
+                self.device,
+                "--build_folder",
+                self.build_folder,
+                "--input_list",
+                f"{tmp_dir}/input_list",
+            ]
+            subprocess.run(cmds, stdout=subprocess.DEVNULL)
+            self.assertTrue(os.path.isfile(f"{tmp_dir}/e_out/output_0_0.pt"))
+
 
 def setup_environment():
     parser = setup_common_args_and_variables()
diff --git a/examples/qualcomm/qaihub_scripts/utils/export.py b/examples/qualcomm/qaihub_scripts/utils/export.py
@@ -18,14 +18,14 @@
 from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset
 from executorch.backends.qualcomm.utils.utils import (
     draw_graph,
-    ExecutorchBackendConfig,
     from_context_binary,
     generate_htp_compiler_spec,
     generate_qnn_executorch_compiler_spec,
     generate_qnn_executorch_option,
 )
 from executorch.examples.qualcomm.qaihub_scripts.utils.utils import preprocess_binary
 from executorch.examples.qualcomm.utils import make_output_dir, SimpleADB
+from executorch.exir import ExecutorchBackendConfig
 from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
 
 
diff --git a/examples/qualcomm/util_scripts/README.md b/examples/qualcomm/util_scripts/README.md
@@ -0,0 +1,79 @@
+# CLI Tool for Quantize / Compile / Deploy PyTorch Model with QNN Backend
+
+An easy-to-use tool for quantizing / compiling / executing .pte program with Qualcomm AI Engine Direct. Tool is verified with [host environement](../../../docs/source/backends-qualcomm.md#host-os).
+
+## Description
+
+This tool aims for users who want to deploy models with ExecuTorch runtime. It's possible for them to produce .pte program in few steps.<br/>
+
+### Quantizing Model
+
+* Save torch.nn.Module with .pt2 format & prepare input data
+  ```bash
+  # create workspace for following operations
+  cd path/to/executorch
+  mkdir cli_example
+  ```
+  ```python
+  # take SimpleModel as an example
+  import torch
+  from executorch.backends.qualcomm.tests.models import SimpleModel
+  from pathlib import Path
+  # make example inputs
+  example_inputs = (torch.randn(1, 32, 28, 28), torch.randn(1, 32, 28, 28))
+  # generate ExportedProgram
+  ep = torch.export.export(SimpleModel(), example_inputs)
+  # save to workspace
+  ws = f"{Path().cwd()}/cli_example"
+  torch.export.save(ep, f"{ws}/simple_model.pt2")
+  # prepare calibration dataset: 2 sets of data with 2 inputs each
+  input_list = ""
+  for i in range(2):
+      current_input = ""
+      for j in range(2):
+          file_name = f"{ws}/input_{i}_{j}.pt"
+          torch.save(torch.randn(1, 32, 28, 28), file_name)
+          current_input += f"{file_name} "
+      input_list += f"{current_input.strip()}\n"
+
+  with open(f"{ws}/input_list", 'w') as f:
+      f.write(input_list)
+  ```
+
+* Quantize
+  ```bash 
+  # user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli quantize -h
+  PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli quantize -a cli_example/simple_model.pt2 -o cli_example/quantize_output -c use_8a8w -i cli_example/input_list --per_channel
+  ```
+* Artifacts for quantized .pt2 file
+  - `cli_example/quantize_output/simple_model_quantized.pt2`
+
+
+### Compiling Program
+
+* Compile .pt2 to .pte program
+  ```bash
+  # `pip install pydot` if package is missing
+  # user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -h
+  PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -a cli_example/quantize_output/simple_model_quantized.pt2 -o cli_example/compile_output -m SM8750
+  ```
+* (Optional) Compile pre-generated context binary to .pte program
+  ```bash
+  # `pip install pydot` if package is missing
+  # user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -h
+  PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -a model.bin -o path/to/model/output -m SM8750
+  ```
+* Artifacts for .pte file and figure of graph information
+  - `cli_example/compile_output/simple_model_quantized.pte`
+  - `cli_example/compile_output/simple_model_quantized.svg`
+
+### Executing Program
+
+* Execute .pte program
+  ```bash
+  # user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli execute -h
+  PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli execute -a cli_example/compile_output/simple_model_quantized.pte -o cli_example/execute_output -i cli_example/input_list -s $DEVICE_SERIAL -b build-android -m SM8750
+  ```
+* Artifacts for .pte file and figure of graph information
+  - `cli_example/execute_output/output_{data_index}_{output_index}.pt`.<br/>
+  `data_index` represents the sequence of dataset, `output_index` stands for the order of graph output.
diff --git a/examples/qualcomm/util_scripts/cli.py b/examples/qualcomm/util_scripts/cli.py
diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py