Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 31 additions & 20 deletions backends/qualcomm/qnn_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,7 @@ def _build_op_wrappers(
)
assert node.target == context_loader_target, err_msg
# if graph has context binary loader node, return directly
return PreprocessResult(
processed_bytes=node.meta[OpContextLoader.meta_ctx_bin],
debug_handle_map={},
)
return node.meta[OpContextLoader.meta_ctx_bin]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you share the reason we need these changes?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the path for loading pre-built context binary, I think it was broken before.

except:
raise RuntimeError(err_msg)

Expand Down Expand Up @@ -161,30 +158,44 @@ def preprocess_multimethod(
generate_qnn_executorch_option(compile_spec)
)
qnn_manager.Init()
py_op_wrapper_list = []
py_op_wrapper_list, ctx_binary_list = [], []
for j, programs in enumerate(edge_programs.values()):
logger.info(f"Processing Method({j}): ({i+1}/{num_sub_graphs})")
py_op_wrappers = QnnBackend._build_op_wrappers(
programs[i],
qnn_manager.IsTensorDump(),
option.op_package_options.op_package_infos,
)
py_op_wrapper_list.append(
[py_op_wrapper.GetOpWrapper() for py_op_wrapper in py_op_wrappers]
)
if isinstance(py_op_wrappers, bytes):
ctx_binary_list.append(py_op_wrappers)
else:
py_op_wrapper_list.append(
[
py_op_wrapper.GetOpWrapper()
for py_op_wrapper in py_op_wrappers
]
)

qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
assert (
len(qnn_context_binary) != 0
), "Failed to generate Qnn context binary."
qnn_manager.Destroy()
# methods should share the same context binary for current partition
for key in edge_programs.keys():
all_processed_results[key].append(
PreprocessResult(
processed_bytes=bytes(qnn_context_binary),
debug_handle_map={},
if len(py_op_wrapper_list) == len(edge_programs.values()):
qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
assert (
len(qnn_context_binary) != 0
), "Failed to generate Qnn context binary."
qnn_manager.Destroy()
# methods should share the same context binary for current partition
for key in edge_programs.keys():
all_processed_results[key].append(
PreprocessResult(
processed_bytes=bytes(qnn_context_binary),
debug_handle_map={},
)
)
)
elif len(ctx_binary_list) == len(edge_programs.values()):
for i, key in enumerate(edge_programs.keys()):
all_processed_results[key].append(
PreprocessResult(processed_bytes=ctx_binary_list[i])
)
else:
raise RuntimeError("Hybrid compilation is not supported")

return all_processed_results
62 changes: 62 additions & 0 deletions backends/qualcomm/tests/test_qnn_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5622,6 +5622,68 @@ def test_debugger_generate_optrace(self):
qhas_data = json.load(qhas_file)
self.assertIn("data", qhas_data)

def test_cli(self):
with tempfile.TemporaryDirectory() as tmp_dir:
sample_input = torch.randn(1, 2, 3, 4)
ep = torch.export.export(Relu(), (sample_input,)) # noqa: F405
torch.export.save(ep, f"{tmp_dir}/relu.pt2")
torch.save(sample_input, f"{tmp_dir}/input_0_0.pt")
with open(f"{tmp_dir}/input_list", "w") as f:
f.write(f"{tmp_dir}/input_0_0.pt\n")

# quantize
cmds = [
"python",
"-m",
"examples.qualcomm.util_scripts.cli",
"quantize",
"--artifact",
f"{tmp_dir}/relu.pt2",
"--output_folder",
f"{tmp_dir}/q_out",
"--input_list",
f"{tmp_dir}/input_list",
]
subprocess.run(cmds, stdout=subprocess.DEVNULL)
self.assertTrue(os.path.isfile(f"{tmp_dir}/q_out/relu_quantized.pt2"))
# compile
cmds = [
"python",
"-m",
"examples.qualcomm.util_scripts.cli",
"compile",
"--artifact",
f"{tmp_dir}/q_out/relu_quantized.pt2",
"--output_folder",
f"{tmp_dir}/c_out",
"--model",
self.model,
]
subprocess.run(cmds, stdout=subprocess.DEVNULL)
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/relu_quantized.pte"))
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/relu_quantized.svg"))
# execute
cmds = [
"python",
"-m",
"examples.qualcomm.util_scripts.cli",
"execute",
"--artifact",
f"{tmp_dir}/c_out/relu_quantized.pte",
"--output_folder",
f"{tmp_dir}/e_out",
"--model",
self.model,
"--device",
self.device,
"--build_folder",
self.build_folder,
"--input_list",
f"{tmp_dir}/input_list",
]
subprocess.run(cmds, stdout=subprocess.DEVNULL)
self.assertTrue(os.path.isfile(f"{tmp_dir}/e_out/output_0_0.pt"))


def setup_environment():
parser = setup_common_args_and_variables()
Expand Down
2 changes: 1 addition & 1 deletion examples/qualcomm/qaihub_scripts/utils/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset
from executorch.backends.qualcomm.utils.utils import (
draw_graph,
ExecutorchBackendConfig,
from_context_binary,
generate_htp_compiler_spec,
generate_qnn_executorch_compiler_spec,
generate_qnn_executorch_option,
)
from executorch.examples.qualcomm.qaihub_scripts.utils.utils import preprocess_binary
from executorch.examples.qualcomm.utils import make_output_dir, SimpleADB
from executorch.exir import ExecutorchBackendConfig
from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass


Expand Down
79 changes: 79 additions & 0 deletions examples/qualcomm/util_scripts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# CLI Tool for Quantize / Compile / Deploy PyTorch Model with QNN Backend

An easy-to-use tool for quantizing / compiling / executing .pte program with Qualcomm AI Engine Direct. Tool is verified with [host environement](../../../docs/source/backends-qualcomm.md#host-os).

## Description

This tool aims for users who want to deploy models with ExecuTorch runtime. It's possible for them to produce .pte program in few steps.<br/>

### Quantizing Model

* Save torch.nn.Module with .pt2 format & prepare input data
```bash
# create workspace for following operations
cd path/to/executorch
mkdir cli_example
```
```python
# take SimpleModel as an example
import torch
from executorch.backends.qualcomm.tests.models import SimpleModel
from pathlib import Path
# make example inputs
example_inputs = (torch.randn(1, 32, 28, 28), torch.randn(1, 32, 28, 28))
# generate ExportedProgram
ep = torch.export.export(SimpleModel(), example_inputs)
# save to workspace
ws = f"{Path().cwd()}/cli_example"
torch.export.save(ep, f"{ws}/simple_model.pt2")
# prepare calibration dataset: 2 sets of data with 2 inputs each
input_list = ""
for i in range(2):
current_input = ""
for j in range(2):
file_name = f"{ws}/input_{i}_{j}.pt"
torch.save(torch.randn(1, 32, 28, 28), file_name)
current_input += f"{file_name} "
input_list += f"{current_input.strip()}\n"

with open(f"{ws}/input_list", 'w') as f:
f.write(input_list)
```

* Quantize
```bash
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli quantize -h
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli quantize -a cli_example/simple_model.pt2 -o cli_example/quantize_output -c use_8a8w -i cli_example/input_list --per_channel
```
* Artifacts for quantized .pt2 file
- `cli_example/quantize_output/simple_model_quantized.pt2`


### Compiling Program

* Compile .pt2 to .pte program
```bash
# `pip install pydot` if package is missing
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -h
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -a cli_example/quantize_output/simple_model_quantized.pt2 -o cli_example/compile_output -m SM8750
```
* (Optional) Compile pre-generated context binary to .pte program
```bash
# `pip install pydot` if package is missing
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -h
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli compile -a model.bin -o path/to/model/output -m SM8750
```
* Artifacts for .pte file and figure of graph information
- `cli_example/compile_output/simple_model_quantized.pte`
- `cli_example/compile_output/simple_model_quantized.svg`

### Executing Program

* Execute .pte program
```bash
# user could get more information via: PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli execute -h
PYTHONPATH=.. python -m examples.qualcomm.util_scripts.cli execute -a cli_example/compile_output/simple_model_quantized.pte -o cli_example/execute_output -i cli_example/input_list -s $DEVICE_SERIAL -b build-android -m SM8750
```
* Artifacts for .pte file and figure of graph information
- `cli_example/execute_output/output_{data_index}_{output_index}.pt`.<br/>
`data_index` represents the sequence of dataset, `output_index` stands for the order of graph output.
Loading
Loading