Skip to content

Commit 65450ec

Browse files
committed
Qualcomm AI Engine Direct - support cli
- add cli for quantize / compile / execute pipeline
1 parent be8ffd1 commit 65450ec

File tree

6 files changed

+679
-24
lines changed

6 files changed

+679
-24
lines changed

backends/qualcomm/qnn_preprocess.py

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,7 @@ def _build_op_wrappers(
7878
)
7979
assert node.target == context_loader_target, err_msg
8080
# if graph has context binary loader node, return directly
81-
return PreprocessResult(
82-
processed_bytes=node.meta[OpContextLoader.meta_ctx_bin],
83-
debug_handle_map={},
84-
)
81+
return node.meta[OpContextLoader.meta_ctx_bin]
8582
except:
8683
raise RuntimeError(err_msg)
8784

@@ -161,30 +158,44 @@ def preprocess_multimethod(
161158
generate_qnn_executorch_option(compile_spec)
162159
)
163160
qnn_manager.Init()
164-
py_op_wrapper_list = []
161+
py_op_wrapper_list, ctx_binary_list = [], []
165162
for j, programs in enumerate(edge_programs.values()):
166163
logger.info(f"Processing Method({j}): ({i+1}/{num_sub_graphs})")
167164
py_op_wrappers = QnnBackend._build_op_wrappers(
168165
programs[i],
169166
qnn_manager.IsTensorDump(),
170167
option.op_package_options.op_package_infos,
171168
)
172-
py_op_wrapper_list.append(
173-
[py_op_wrapper.GetOpWrapper() for py_op_wrapper in py_op_wrappers]
174-
)
169+
if isinstance(py_op_wrappers, bytes):
170+
ctx_binary_list.append(py_op_wrappers)
171+
else:
172+
py_op_wrapper_list.append(
173+
[
174+
py_op_wrapper.GetOpWrapper()
175+
for py_op_wrapper in py_op_wrappers
176+
]
177+
)
175178

176-
qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
177-
assert (
178-
len(qnn_context_binary) != 0
179-
), "Failed to generate Qnn context binary."
180-
qnn_manager.Destroy()
181-
# methods should share the same context binary for current partition
182-
for key in edge_programs.keys():
183-
all_processed_results[key].append(
184-
PreprocessResult(
185-
processed_bytes=bytes(qnn_context_binary),
186-
debug_handle_map={},
179+
if len(py_op_wrapper_list) == len(edge_programs.values()):
180+
qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
181+
assert (
182+
len(qnn_context_binary) != 0
183+
), "Failed to generate Qnn context binary."
184+
qnn_manager.Destroy()
185+
# methods should share the same context binary for current partition
186+
for key in edge_programs.keys():
187+
all_processed_results[key].append(
188+
PreprocessResult(
189+
processed_bytes=bytes(qnn_context_binary),
190+
debug_handle_map={},
191+
)
187192
)
188-
)
193+
elif len(ctx_binary_list) == len(edge_programs.values()):
194+
for i, key in enumerate(edge_programs.keys()):
195+
all_processed_results[key].append(
196+
PreprocessResult(processed_bytes=ctx_binary_list[i])
197+
)
198+
else:
199+
raise RuntimeError("Hybrid compilation is not supported")
189200

190201
return all_processed_results

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5377,6 +5377,68 @@ def test_debugger_generate_optrace(self):
53775377
qhas_data = json.load(qhas_file)
53785378
self.assertIn("data", qhas_data)
53795379

5380+
def test_cli(self):
5381+
with tempfile.TemporaryDirectory() as tmp_dir:
5382+
sample_input = torch.randn(1, 2, 3, 4)
5383+
ep = torch.export.export(Relu(), (sample_input,)) # noqa: F405
5384+
torch.export.save(ep, f"{tmp_dir}/relu.pt2")
5385+
torch.save(sample_input, f"{tmp_dir}/input_0_0.pt")
5386+
with open(f"{tmp_dir}/input_list", "w") as f:
5387+
f.write(f"{tmp_dir}/input_0_0.pt\n")
5388+
5389+
# quantize
5390+
cmds = [
5391+
"python",
5392+
"-m",
5393+
"examples.qualcomm.util_scripts.cli",
5394+
"quantize",
5395+
"--artifact",
5396+
f"{tmp_dir}/relu.pt2",
5397+
"--output_folder",
5398+
f"{tmp_dir}/q_out",
5399+
"--input_list",
5400+
f"{tmp_dir}/input_list",
5401+
]
5402+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
5403+
self.assertTrue(os.path.isfile(f"{tmp_dir}/q_out/relu_quantized.pt2"))
5404+
# compile
5405+
cmds = [
5406+
"python",
5407+
"-m",
5408+
"examples.qualcomm.util_scripts.cli",
5409+
"compile",
5410+
"--artifact",
5411+
f"{tmp_dir}/q_out/relu_quantized.pt2",
5412+
"--output_folder",
5413+
f"{tmp_dir}/c_out",
5414+
"--model",
5415+
self.model,
5416+
]
5417+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
5418+
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/relu_quantized.pte"))
5419+
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/relu_quantized.svg"))
5420+
# execute
5421+
cmds = [
5422+
"python",
5423+
"-m",
5424+
"examples.qualcomm.util_scripts.cli",
5425+
"execute",
5426+
"--artifact",
5427+
f"{tmp_dir}/c_out/relu_quantized.pte",
5428+
"--output_folder",
5429+
f"{tmp_dir}/e_out",
5430+
"--model",
5431+
self.model,
5432+
"--device",
5433+
self.device,
5434+
"--build_folder",
5435+
self.build_folder,
5436+
"--input_list",
5437+
f"{tmp_dir}/input_list",
5438+
]
5439+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
5440+
self.assertTrue(os.path.isfile(f"{tmp_dir}/e_out/output_0_0.pt"))
5441+
53805442

53815443
def setup_environment():
53825444
parser = setup_common_args_and_variables()

examples/qualcomm/qaihub_scripts/utils/export.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@
1818
from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset
1919
from executorch.backends.qualcomm.utils.utils import (
2020
draw_graph,
21-
ExecutorchBackendConfig,
2221
from_context_binary,
2322
generate_htp_compiler_spec,
2423
generate_qnn_executorch_compiler_spec,
2524
generate_qnn_executorch_option,
2625
)
2726
from executorch.examples.qualcomm.qaihub_scripts.utils.utils import preprocess_binary
2827
from executorch.examples.qualcomm.utils import make_output_dir, SimpleADB
28+
from executorch.exir import ExecutorchBackendConfig
2929
from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
3030

3131

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# CLI Tool for Quantize / Compile / Deploy PyTorch Model with QNN Backend
2+
3+
An easy-to-use tool for quantizing / compiling / executing .pte program with Qualcomm AI Engine Direct. Tool is verified with [host environement](../../../../docs/source/backends-qualcomm.md#host-os).
4+
5+
## Description
6+
7+
This tool aims for users who want to deploy models with ExecuTorch runtime. It's possible for them to produce .pte program in few steps.<br/>
8+
9+
### Quantizing Model
10+
11+
* Save torch.nn.Module with .pt2 format & prepare input data
12+
```bash
13+
# create workspace for following operations
14+
cd path/to/executorch
15+
mkdir cli_example
16+
```
17+
```python
18+
# take SimpleModel as an example
19+
import torch
20+
from executorch.backends.qualcomm.tests.models import SimpleModel
21+
from pathlib import Path
22+
# make example inputs
23+
example_inputs = (torch.randn(1, 32, 28, 28), torch.randn(1, 32, 28, 28))
24+
# generate ExportedProgram
25+
ep = torch.export.export(SimpleModel(), example_inputs)
26+
# save to workspace
27+
ws = f"{Path().cwd()}/cli_example"
28+
torch.export.save(ep, f"{ws}/simple_model.pt2")
29+
# prepare calibration dataset: 2 sets of data with 2 inputs each
30+
input_list = ""
31+
for i in range(2):
32+
current_input = ""
33+
for j in range(2):
34+
file_name = f"{ws}/input_{i}_{j}.pt"
35+
torch.save(torch.randn(1, 32, 28, 28), file_name)
36+
current_input += f"{file_name} "
37+
input_list += f"{current_input.strip()}\n"
38+
39+
with open(f"{ws}/input_list", 'w') as f:
40+
f.write(input_list)
41+
```
42+
43+
* Quantize
44+
```bash
45+
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli quantize -h
46+
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli quantize -a cli_example/simple_model.pt2 -o cli_example/quantize_output -c use_8a8w -i cli_example/input_list --per_channel
47+
```
48+
* Artifacts for quantized .pt2 file
49+
- `cli_example/quantize_output/simple_model_quantized.pt2`
50+
51+
52+
### Compiling Program
53+
54+
* Compile .pt2 to .pte program
55+
```bash
56+
# `pip install pydot` if package is missing
57+
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -h
58+
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -a cli_example/quantize_output/simple_model_quantized.pt2 -o cli_example/compile_output -m SM8750
59+
```
60+
* (Optional) Compile pre-generated context binary to .pte program
61+
```bash
62+
# `pip install pydot` if package is missing
63+
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -h
64+
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -a model.bin -o path/to/model/output -m SM8750
65+
```
66+
* Artifacts for .pte file and figure of graph information
67+
- `cli_example/compile_output/simple_model_quantized.pte`
68+
- `cli_example/compile_output/simple_model_quantized.svg`
69+
70+
### Executing Program
71+
72+
* Execute .pte program
73+
```bash
74+
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli execute -h
75+
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli execute -a cli_example/compile_output/simple_model_quantized.pte -o cli_example/execute_output -i cli_example/input_list -s $DEVICE_SERIAL -b build-android -m SM8750
76+
```
77+
* Artifacts for .pte file and figure of graph information
78+
- `cli_example/execute_output/output_{data_index}_{output_index}.pt`.<br/>
79+
`data_index` represents the sequence of dataset, `output_index` stands for the order of graph output.

0 commit comments

Comments
 (0)