Skip to content

Commit 6fed744

Browse files
haowhsu-quichinriksnaer
authored andcommitted
Qualcomm AI Engine Direct - support cli (pytorch#11788)
### Summary - add cli for quantize / compile / execute pipeline ### Test plan ```bash python backends/qualcomm/tests/test_qnn_delegate.py TestUtilsScript.test_cli -b build-android -s $device -m SM8750 ``` cc @cccclai @winskuo-quic @shewu-quic @cbilgin
1 parent 0da5cb4 commit 6fed744

File tree

6 files changed

+679
-24
lines changed

6 files changed

+679
-24
lines changed

backends/qualcomm/qnn_preprocess.py

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,7 @@ def _build_op_wrappers(
7878
)
7979
assert node.target == context_loader_target, err_msg
8080
# if graph has context binary loader node, return directly
81-
return PreprocessResult(
82-
processed_bytes=node.meta[OpContextLoader.meta_ctx_bin],
83-
debug_handle_map={},
84-
)
81+
return node.meta[OpContextLoader.meta_ctx_bin]
8582
except:
8683
raise RuntimeError(err_msg)
8784

@@ -161,30 +158,44 @@ def preprocess_multimethod(
161158
generate_qnn_executorch_option(compile_spec)
162159
)
163160
qnn_manager.Init()
164-
py_op_wrapper_list = []
161+
py_op_wrapper_list, ctx_binary_list = [], []
165162
for j, programs in enumerate(edge_programs.values()):
166163
logger.info(f"Processing Method({j}): ({i+1}/{num_sub_graphs})")
167164
py_op_wrappers = QnnBackend._build_op_wrappers(
168165
programs[i],
169166
qnn_manager.IsTensorDump(),
170167
option.op_package_options.op_package_infos,
171168
)
172-
py_op_wrapper_list.append(
173-
[py_op_wrapper.GetOpWrapper() for py_op_wrapper in py_op_wrappers]
174-
)
169+
if isinstance(py_op_wrappers, bytes):
170+
ctx_binary_list.append(py_op_wrappers)
171+
else:
172+
py_op_wrapper_list.append(
173+
[
174+
py_op_wrapper.GetOpWrapper()
175+
for py_op_wrapper in py_op_wrappers
176+
]
177+
)
175178

176-
qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
177-
assert (
178-
len(qnn_context_binary) != 0
179-
), "Failed to generate Qnn context binary."
180-
qnn_manager.Destroy()
181-
# methods should share the same context binary for current partition
182-
for key in edge_programs.keys():
183-
all_processed_results[key].append(
184-
PreprocessResult(
185-
processed_bytes=bytes(qnn_context_binary),
186-
debug_handle_map={},
179+
if len(py_op_wrapper_list) == len(edge_programs.values()):
180+
qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
181+
assert (
182+
len(qnn_context_binary) != 0
183+
), "Failed to generate Qnn context binary."
184+
qnn_manager.Destroy()
185+
# methods should share the same context binary for current partition
186+
for key in edge_programs.keys():
187+
all_processed_results[key].append(
188+
PreprocessResult(
189+
processed_bytes=bytes(qnn_context_binary),
190+
debug_handle_map={},
191+
)
187192
)
188-
)
193+
elif len(ctx_binary_list) == len(edge_programs.values()):
194+
for i, key in enumerate(edge_programs.keys()):
195+
all_processed_results[key].append(
196+
PreprocessResult(processed_bytes=ctx_binary_list[i])
197+
)
198+
else:
199+
raise RuntimeError("Hybrid compilation is not supported")
189200

190201
return all_processed_results

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5622,6 +5622,68 @@ def test_debugger_generate_optrace(self):
56225622
qhas_data = json.load(qhas_file)
56235623
self.assertIn("data", qhas_data)
56245624

5625+
def test_cli(self):
5626+
with tempfile.TemporaryDirectory() as tmp_dir:
5627+
sample_input = torch.randn(1, 2, 3, 4)
5628+
ep = torch.export.export(Relu(), (sample_input,)) # noqa: F405
5629+
torch.export.save(ep, f"{tmp_dir}/relu.pt2")
5630+
torch.save(sample_input, f"{tmp_dir}/input_0_0.pt")
5631+
with open(f"{tmp_dir}/input_list", "w") as f:
5632+
f.write(f"{tmp_dir}/input_0_0.pt\n")
5633+
5634+
# quantize
5635+
cmds = [
5636+
"python",
5637+
"-m",
5638+
"examples.qualcomm.util_scripts.cli",
5639+
"quantize",
5640+
"--artifact",
5641+
f"{tmp_dir}/relu.pt2",
5642+
"--output_folder",
5643+
f"{tmp_dir}/q_out",
5644+
"--input_list",
5645+
f"{tmp_dir}/input_list",
5646+
]
5647+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
5648+
self.assertTrue(os.path.isfile(f"{tmp_dir}/q_out/relu_quantized.pt2"))
5649+
# compile
5650+
cmds = [
5651+
"python",
5652+
"-m",
5653+
"examples.qualcomm.util_scripts.cli",
5654+
"compile",
5655+
"--artifact",
5656+
f"{tmp_dir}/q_out/relu_quantized.pt2",
5657+
"--output_folder",
5658+
f"{tmp_dir}/c_out",
5659+
"--model",
5660+
self.model,
5661+
]
5662+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
5663+
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/relu_quantized.pte"))
5664+
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/relu_quantized.svg"))
5665+
# execute
5666+
cmds = [
5667+
"python",
5668+
"-m",
5669+
"examples.qualcomm.util_scripts.cli",
5670+
"execute",
5671+
"--artifact",
5672+
f"{tmp_dir}/c_out/relu_quantized.pte",
5673+
"--output_folder",
5674+
f"{tmp_dir}/e_out",
5675+
"--model",
5676+
self.model,
5677+
"--device",
5678+
self.device,
5679+
"--build_folder",
5680+
self.build_folder,
5681+
"--input_list",
5682+
f"{tmp_dir}/input_list",
5683+
]
5684+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
5685+
self.assertTrue(os.path.isfile(f"{tmp_dir}/e_out/output_0_0.pt"))
5686+
56255687

56265688
def setup_environment():
56275689
parser = setup_common_args_and_variables()

examples/qualcomm/qaihub_scripts/utils/export.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@
1818
from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset
1919
from executorch.backends.qualcomm.utils.utils import (
2020
draw_graph,
21-
ExecutorchBackendConfig,
2221
from_context_binary,
2322
generate_htp_compiler_spec,
2423
generate_qnn_executorch_compiler_spec,
2524
generate_qnn_executorch_option,
2625
)
2726
from executorch.examples.qualcomm.qaihub_scripts.utils.utils import preprocess_binary
2827
from executorch.examples.qualcomm.utils import make_output_dir, SimpleADB
28+
from executorch.exir import ExecutorchBackendConfig
2929
from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
3030

3131

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# CLI Tool for Quantize / Compile / Deploy PyTorch Model with QNN Backend
2+
3+
An easy-to-use tool for quantizing / compiling / executing .pte program with Qualcomm AI Engine Direct. Tool is verified with [host environement](../../../docs/source/backends-qualcomm.md#host-os).
4+
5+
## Description
6+
7+
This tool aims for users who want to deploy models with ExecuTorch runtime. It's possible for them to produce .pte program in few steps.<br/>
8+
9+
### Quantizing Model
10+
11+
* Save torch.nn.Module with .pt2 format & prepare input data
12+
```bash
13+
# create workspace for following operations
14+
cd path/to/executorch
15+
mkdir cli_example
16+
```
17+
```python
18+
# take SimpleModel as an example
19+
import torch
20+
from executorch.backends.qualcomm.tests.models import SimpleModel
21+
from pathlib import Path
22+
# make example inputs
23+
example_inputs = (torch.randn(1, 32, 28, 28), torch.randn(1, 32, 28, 28))
24+
# generate ExportedProgram
25+
ep = torch.export.export(SimpleModel(), example_inputs)
26+
# save to workspace
27+
ws = f"{Path().cwd()}/cli_example"
28+
torch.export.save(ep, f"{ws}/simple_model.pt2")
29+
# prepare calibration dataset: 2 sets of data with 2 inputs each
30+
input_list = ""
31+
for i in range(2):
32+
current_input = ""
33+
for j in range(2):
34+
file_name = f"{ws}/input_{i}_{j}.pt"
35+
torch.save(torch.randn(1, 32, 28, 28), file_name)
36+
current_input += f"{file_name} "
37+
input_list += f"{current_input.strip()}\n"
38+
39+
with open(f"{ws}/input_list", 'w') as f:
40+
f.write(input_list)
41+
```
42+
43+
* Quantize
44+
```bash
45+
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli quantize -h
46+
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli quantize -a cli_example/simple_model.pt2 -o cli_example/quantize_output -c use_8a8w -i cli_example/input_list --per_channel
47+
```
48+
* Artifacts for quantized .pt2 file
49+
- `cli_example/quantize_output/simple_model_quantized.pt2`
50+
51+
52+
### Compiling Program
53+
54+
* Compile .pt2 to .pte program
55+
```bash
56+
# `pip install pydot` if package is missing
57+
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -h
58+
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -a cli_example/quantize_output/simple_model_quantized.pt2 -o cli_example/compile_output -m SM8750
59+
```
60+
* (Optional) Compile pre-generated context binary to .pte program
61+
```bash
62+
# `pip install pydot` if package is missing
63+
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -h
64+
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -a model.bin -o path/to/model/output -m SM8750
65+
```
66+
* Artifacts for .pte file and figure of graph information
67+
- `cli_example/compile_output/simple_model_quantized.pte`
68+
- `cli_example/compile_output/simple_model_quantized.svg`
69+
70+
### Executing Program
71+
72+
* Execute .pte program
73+
```bash
74+
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli execute -h
75+
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli execute -a cli_example/compile_output/simple_model_quantized.pte -o cli_example/execute_output -i cli_example/input_list -s $DEVICE_SERIAL -b build-android -m SM8750
76+
```
77+
* Artifacts for .pte file and figure of graph information
78+
- `cli_example/execute_output/output_{data_index}_{output_index}.pt`.<br/>
79+
`data_index` represents the sequence of dataset, `output_index` stands for the order of graph output.

0 commit comments

Comments
 (0)