diff --git a/backends/qualcomm/debugger/README.md b/backends/qualcomm/debugger/README.md new file mode 100644 index 00000000000..60ecb3d71b3 --- /dev/null +++ b/backends/qualcomm/debugger/README.md @@ -0,0 +1,93 @@ +# QAIRT Visualizer + +[QAIRT Visualizer](https://pypi.org/project/qairt-visualizer/) is a Python package designed to help you visualize and analyze data from Qualcomm AI Engine Direct (QNN) models. It provides tools to generate and interpret op traces (`optrace`) and QNN HTP Analysis Summary (`QHAS`), enabling detailed insights into your model's performance and behavior. + +## Installation + +You can install the QAIRT Visualizer package directly from [QAIRT Visualizer](https://pypi.org/project/qairt-visualizer/): + +```bash +pip install qairt-visualizer +``` + +## Quick start +This command launches an interactive GUI interface to visualize the `optrace` and `QHAS` results. +``` +python -m examples.qualcomm.util_scripts.qairt_visualizer_demo -H ${host} -s {device} -b build-android -a ${path_to_output_folder} --online_prepare +``` +- If online prepare mode is `enabled`, the following artifacts will be generated: + - `model`.dlc + - `optrace`.json + - `QHAS` +- If online prepare mode is `disabled`, the following artifacts will be generated: + - `model`.bin + - `optrace`.json + - `QHAS`.json + +Note: Model visualization is supported only in online prepare mode. +The `.bin` format is not compatible with the QAIRT visualizer. +To enable model visualization, please add the `--online_prepare` flag. + +## Details +### 1. Lower to QNN backend +Generate an ExecuTorch binary for Qualcomm platforms. +```python +build_executorch_binary( + model, + example_input, + args.model, + f"{args.artifact}/{pte_filename}", + [example_input], + quant_dtype=QuantDtype.use_8a8w, + online_prepare=args.online_prepare, + optrace=True, +) +``` +### 2. Generate optrace and QHAS +Generate optrace and QHAS files using QNN tools under $QNN_SDK_ROOT. After finishing, you will get a `binaries_trace` dictionary. +``` python +adb = SimpleADB( + qnn_sdk=os.getenv("QNN_SDK_ROOT"), + build_path=f"{args.build_folder}", + pte_path=f"{args.artifact}/{pte_filename}.pte", + workspace=f"/data/local/tmp/executorch/{pte_filename}", + device_id=args.device, + host_id=args.host, + soc_model=args.model, +) +binaries_trace = generate_optrace( + args, adb, f"{args.artifact}/{pte_filename}.pte", example_input +) +``` +- **`binaries_trace`**: A dictionary where keys are the dumped file paths and values are tuples containing the paths to the generated optrace and QHAS JSON files. + +- Example 1: {"forward_0.dlc": (optrace.json, optrace_qnn_htp_analysis_summary.json)} +- Example 2: {"forward_0.bin": (optrace.json, optrace_qnn_htp_analysis_summary.json)} + +### 3. Visualizing and Analyzing optrace and QHAS + +Once you have the optrace and QHAS files, you can leverage the QAIRT Visualizer to visualize the model graph, optrace and QHAS data. Here's how you can do it: + +```python +import qairt_visualizer +qairt_visualizer.view(f"{args.artifact}/forward_0.dlc", reports=[optrace, qhas]) +``` +or +```python +import qairt_visualizer +qairt_visualizer.view(reports=[optrace, qhas]) +``` + +- `model`: Path to your QNN model file (e.g., `path_to_your_model.dlc`). +- **`reports`**: List of report file paths, including the optrace (`optrace.json`) and QHAS (`optrace_qnn_htp_analysis_summary.json`). + +Note: Files ending with `.bin ` do not support graph visualization in qairt_visualizer. + +## Demo + +
+ QAIRT visualizer demo
+
+
+ +For more details, visit the [QAIRT Visualizer](https://pypi.org/project/qairt-visualizer/). diff --git a/backends/qualcomm/debugger/assets/qairt_visualizer_demo.png b/backends/qualcomm/debugger/assets/qairt_visualizer_demo.png new file mode 100644 index 00000000000..bcaeb62a5e1 Binary files /dev/null and b/backends/qualcomm/debugger/assets/qairt_visualizer_demo.png differ diff --git a/backends/qualcomm/debugger/utils.py b/backends/qualcomm/debugger/utils.py index f81687266d2..2c7be66fb68 100644 --- a/backends/qualcomm/debugger/utils.py +++ b/backends/qualcomm/debugger/utils.py @@ -1,9 +1,18 @@ +import json import os +import re import shutil +import subprocess import tempfile +from pathlib import Path +from typing import Tuple import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper import pandas as pd +import torch +from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset +from executorch.backends.qualcomm.utils.utils import dump_context_from_pte + from graphviz import Digraph @@ -180,3 +189,255 @@ def draw(self): dot_file = os.path.join(temp_directory, f"{self.filename}") dot_dest_file = os.path.join(".", f"{self.filename}.dot") shutil.move(dot_file, dot_dest_file) + + +class QnnTool: + def __init__( + self, + tmp_dir, + sample_input, + soc_id, + adb, + build_folder, + workspace="/data/local/tmp/qnn_executorch_test", + ): + self.qnn_sdk = os.environ.get("QNN_SDK_ROOT", None) + self.ndk = os.environ.get("ANDROID_NDK_ROOT", None) + assert self.qnn_sdk, "QNN_SDK_ROOT was not found in environment variable" + assert self.ndk, "ANDROID_NDK_ROOT was not found in environment variable" + + self.tmp_dir = tmp_dir + self.workspace = workspace + self.adb = adb + self.sample_input = sample_input + self.build_folder = build_folder + self.root = str(Path(__file__).resolve().parents[3]) + self.config = { + "backend_extension_config": { + "backend_extensions": { + "config_file_path": "config.json", + }, + "features": { + "qhas_json": True, + }, + }, + "config": { + "devices": [ + { + "profiling_level": "linting", + "cores": [ + {"perf_profile": "burst", "rpc_control_latency": 100} + ], + "soc_id": int(soc_id), + } + ] + }, + } + + def qnn_context_binary_generator( + self, + qnn_binary_file="forward_0.dlc", + binary_name="forward.serialized", + ): + for file_name, data in self.config.items(): + with open(f"{self.tmp_dir}/{file_name}.json", "w") as json_file: + json.dump(data, json_file, indent=4) + + target = "x86_64-linux-clang" + cmds = [ + f"{self.qnn_sdk}/bin/{target}/qnn-context-binary-generator", + "--backend", + f"{self.qnn_sdk}/lib/{target}/libQnnHtp.so", + "--model", + f"{self.qnn_sdk}/lib/{target}/libQnnModelDlc.so", + "--dlc_path", + f"{self.tmp_dir}/{qnn_binary_file}", + f"--config_file {self.tmp_dir}/backend_extension_config.json", + f"--binary_file {binary_name}", + f"--output_dir {self.tmp_dir}", + "--profiling_level detailed", + "--profiling_option optrace", + ] + result = subprocess.run( + " ".join(cmds), + shell=True, + executable="/bin/bash", + capture_output=True, + ) + assert os.path.isfile(f"{self.tmp_dir}/{binary_name}.bin"), result.stderr + + def qnn_net_run(self, graph_name="forward.serialized"): + input_list = "" + for idx, _ in enumerate(self.sample_input): + input_name = f"input_{idx}_0.raw" + input_list += input_name + " " + input_list = input_list.strip() + "\n" + + self.config["backend_extension_config"]["backend_extensions"][ + "shared_library_path" + ] = "./libQnnHtpNetRunExtensions.so" + for file_name, data in self.config.items(): + with open(f"{self.tmp_dir}/{file_name}.json", "w") as json_file: + json.dump(data, json_file, indent=4) + + target = "aarch64-android" + files = [ + f"{self.qnn_sdk}/lib/{target}/libQnnHtpNetRunExtensions.so", + f"{self.tmp_dir}/backend_extension_config.json", + f"{self.tmp_dir}/config.json", + f"{self.tmp_dir}/{graph_name}.bin", + f"{self.qnn_sdk}/bin/{target}/qnn-net-run", + ] + cmds = [ + f"export LD_LIBRARY_PATH={self.workspace} &&", + f"export ADSP_LIBRARY_PATH={self.workspace} &&", + f"cd {self.workspace} &&", + "./qnn-net-run", + "--backend libQnnHtp.so", + "--input_list input_list.txt", + f"--retrieve_context {graph_name}.bin", + "--use_native_input_files", + "--use_native_output_files", + "--config_file backend_extension_config.json", + "--profiling_level detailed", + "--profiling_option optrace", + ] + self.adb.push( + inputs=self.sample_input, + input_list=input_list, + files=files, + ) + self.adb.execute(custom_runner_cmd=" ".join(cmds)) + self.adb._adb( + [ + "pull", + "-a", + f"{self.workspace}/output/qnn-profiling-data_0.log", + self.tmp_dir, + ] + ) + + assert os.path.isfile( + f"{self.tmp_dir}/qnn-profiling-data_0.log" + ), f"Error: qnn-profiling-data_0.log not found in {self.tmp_dir}" + + def qnn_profile_viewer(self, graph_name="forward_schematic", graph_idx=0): + self.config["backend_extension_config"]["backend_extensions"][ + "shared_library_path" + ] = "./libQnnHtpNetRunExtensions.so" + self.config["backend_extension_config"] = {"features": {"qhas_json": True}} + for file_name, data in self.config.items(): + with open(f"{self.tmp_dir}/{file_name}.json", "w") as json_file: + json.dump(data, json_file, indent=4) + + target = "x86_64-linux-clang" + cmds = [ + f"{self.qnn_sdk}/bin/{target}/qnn-profile-viewer", + f"--config {self.tmp_dir}/backend_extension_config.json", + f"--schematic {self.root}/{graph_name}.bin", + f"--reader {self.qnn_sdk}/lib/{target}/libQnnHtpOptraceProfilingReader.so", + f"--input_log {self.tmp_dir}/qnn-profiling-data_0.log", + f"--output {self.tmp_dir}/optrace_{graph_idx}.json", + ] + result = subprocess.run( + " ".join(cmds), + shell=True, + executable="/bin/bash", + capture_output=True, + ) + assert ( + result.returncode == 0 + ), f"Process failed with error: {result.stderr.decode('utf-8')}" + + def generate_optrace( + self, + qnn_binary_file="forward_0.dlc", + ): + """ + Generate Qnn HTP Optrace Profiling https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/htp_backend.html#qnn-htp-optrace-profiling + and QNN HTP Analysis Summary (QHAS) https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/htp_backend.html#qnn-htp-analysis-summary-qhas + . You can utilize the QAIRT Visualizer (https://pypi.org/project/qairt-visualizer/) to visualize the results from the files above. + """ + graph_name, file_extension = os.path.splitext(qnn_binary_file) + assert file_extension in [ + ".dlc", + ".bin", + ], f"Invalid file extension '{file_extension}'. Supported extensions are 'dlc' and 'bin'." + + # Attempt to extract a numeric index from the end of the graph name (e.g., "forward_123") + match = re.match(r"^(.*)_(\d+)$", graph_name) + graph_base_name = graph_name + graph_idx = 0 + + if match: + graph_base_name = match.group(1) + graph_idx = int(match.group(2)) + + # Handle .dlc file extension by generating a serialized version of the graph + if file_extension == ".dlc": + self.qnn_context_binary_generator( + qnn_binary_file, f"{graph_base_name}.serialized" + ) + graph_name = f"{graph_base_name}.serialized" + + # Run the QNN graph and generate the schematic + self.qnn_net_run(graph_name=graph_name) + self.qnn_profile_viewer( + graph_name=f"{graph_base_name}_schematic", graph_idx=graph_idx + ) + + # Clean up the schematic binary file if it exists + schematic_bin_path = os.path.join(self.root, f"{graph_base_name}_schematic.bin") + if os.path.isfile(schematic_bin_path): + os.remove(schematic_bin_path) + + optrace_path = os.path.join(self.tmp_dir, f"optrace_{graph_idx}.json") + qhas_path = os.path.join( + self.tmp_dir, f"optrace_{graph_idx}_qnn_htp_analysis_summary.json" + ) + assert os.path.isfile(optrace_path) and os.path.isfile(qhas_path), ( + "Error: Required files not found - either " + f"{os.path.basename(optrace_path)} or {os.path.basename(qhas_path)} is missing." + ) + + return optrace_path, qhas_path + + +def generate_optrace( + artifact, soc_id: QcomChipset, adb, pte_path: str, inputs: Tuple[torch.Tensor] +): + """ + Generate optrace and QHAS (QNN HTP Analysis Summary) JSON files. + + Args: + artifact (str): Path to the artifact folder. + adb (SimpleADB): An object for communicating with Android device + pte_path (str): The path to the generated PTE file, including the file extension (e.g., model.pte). + inputs (Tuple[torch.Tensor]): The input tensors for the model. + + + Returns: + dict: A dictionary where keys are the dumped file paths and values are tuples containing the paths + to the generated optrace and QHAS JSON files. + """ + filename, _ = os.path.splitext(pte_path.split(os.sep)[-1]) + + # Dump compiled binaries + dumpfiles = dump_context_from_pte(pte_path) + + # Generate optrace and QHAS + qnn_tool = QnnTool( + artifact, + inputs, + soc_id, + adb, + build_folder=adb.build_path, + workspace=adb.workspace, + ) + + binaries_trace = {} + for file in dumpfiles: + filename = file.split(os.sep)[-1] + optrace, qhas = qnn_tool.generate_optrace(filename) + binaries_trace[file] = (optrace, qhas) + return binaries_trace diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp index 57d0b9170bc..d8c09dabcbe 100644 --- a/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp +++ b/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp @@ -22,9 +22,6 @@ QnnDlcManager::QnnDlcManager( : qnn_loaded_backend_(""), qnn_context_blob_(qnn_context_blob), options_(options) { - QNN_EXECUTORCH_LOG_INFO( - "QnnDlcManager Get Qnn Context blob bytes %u", qnn_context_blob_.nbytes); - if (options_ == nullptr) { QNN_EXECUTORCH_LOG_ERROR( "Fail to create QnnDlcManager, options is nullptr"); @@ -69,9 +66,6 @@ Error QnnDlcManager::RegisterGraphsFromDLC( return Error::Internal; } - const QnnExecuTorchContextBinary& qnn_context_blob = - cache->GetQnnContextBlob(); - // memfd_create on android api level 30 and above int fd = -1; #ifdef __ANDROID__ @@ -84,21 +78,26 @@ Error QnnDlcManager::RegisterGraphsFromDLC( return Error::Internal; } - if (ftruncate(fd, qnn_context_blob.nbytes) == -1) { + if (ftruncate(fd, qnn_context_blob_.nbytes) == -1) { QNN_EXECUTORCH_LOG_ERROR("ftruncate fail"); close(fd); return Error::Internal; } void* addr = mmap( - NULL, qnn_context_blob.nbytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + NULL, + qnn_context_blob_.nbytes, + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, + 0); if (addr == MAP_FAILED) { QNN_EXECUTORCH_LOG_ERROR("mmap"); close(fd); return Error::Internal; } - memcpy(addr, qnn_context_blob.buffer, qnn_context_blob.nbytes); + memcpy(addr, qnn_context_blob_.buffer, qnn_context_blob_.nbytes); char dlc_path[256]; snprintf(dlc_path, sizeof(dlc_path), "/proc/self/fd/%d", fd); @@ -122,7 +121,7 @@ Error QnnDlcManager::RegisterGraphsFromDLC( QNN_EXECUTORCH_LOG_ERROR("Failed to open Dlc"); return Error::Internal; } - munmap(addr, qnn_context_blob.nbytes); + munmap(addr, qnn_context_blob_.nbytes); close(fd); dlclose(lib_handle); diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp index bd54a078ef7..bf73a77f9f2 100644 --- a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp +++ b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp @@ -18,9 +18,6 @@ QnnDlcManager::QnnDlcManager( : qnn_loaded_backend_(""), qnn_context_blob_(qnn_context_blob), options_(options) { - QNN_EXECUTORCH_LOG_INFO( - "QnnDlcManager Get Qnn Context blob bytes %u", qnn_context_blob_.nbytes); - if (options_ == nullptr) { QNN_EXECUTORCH_LOG_ERROR( "Fail to create QnnDlcManager, options is nullptr"); diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index d648bdcaf1d..f998199b9b5 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -21,13 +21,12 @@ from executorch.backends.qualcomm._passes.utils import ( get_passes_dependency_for_capture_program, ) - +from executorch.backends.qualcomm.debugger.utils import generate_optrace from executorch.backends.qualcomm.tests.utils import ( convert_pt2e, generate_context_binary, ModuleQConfig, prepare_pt2e, - QnnTool, QuantDtype, TestQNN, validate_context_binary, @@ -2758,27 +2757,6 @@ def test_qnn_backend_context_direct(self): def test_qnn_backend_context_extraction(self): module = SimpleModel() # noqa: F405 sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) - backend_options = generate_htp_compiler_spec(use_fp16=True) - - # Validate dlc - compiler_spec = generate_qnn_executorch_compiler_spec( - soc_model=self.chipset_table[TestQNN.model], - backend_options=backend_options, - online_prepare=True, - ) - with tempfile.TemporaryDirectory() as tmp_dir: - edge_prog_mgr = to_edge_transform_and_lower_to_qnn( - module, sample_input, compiler_spec - ).to_executorch() - pte_path = f"{tmp_dir}/model.pte" - with open(pte_path, "wb") as f: - edge_prog_mgr.write_to_file(f) - dump_context_from_pte(pte_path) - - qnn_tool = QnnTool(tmp_dir, pte_path, sample_input) - qnn_tool.qnn_context_binary_generator() - qnn_tool.qnn_net_run() - compiler_specs = [ self.compiler_specs, ] @@ -2801,27 +2779,6 @@ def test_qnn_backend_context_extraction(self): def test_qnn_backend_dump_context_from_pte(self): module = SimpleModel() # noqa: F405 sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) - backend_options = generate_htp_compiler_spec(use_fp16=True) - - # Validate dlc - compiler_spec = generate_qnn_executorch_compiler_spec( - soc_model=self.chipset_table[TestQNN.model], - backend_options=backend_options, - online_prepare=True, - ) - with tempfile.TemporaryDirectory() as tmp_dir: - edge_prog_mgr = to_edge_transform_and_lower_to_qnn( - module, sample_input, compiler_spec - ).to_executorch() - pte_path = f"{tmp_dir}/model.pte" - with open(pte_path, "wb") as f: - edge_prog_mgr.write_to_file(f) - dump_context_from_pte(pte_path) - - qnn_tool = QnnTool(tmp_dir, pte_path, sample_input) - qnn_tool.qnn_context_binary_generator() - qnn_tool.qnn_net_run() - compiler_specs = [ self.compiler_specs, ] @@ -2984,6 +2941,47 @@ def test_qnn_backend_draw_graph(self): test_data.split() ), "Generated .dot file does not match the golden file." + def test_qnn_backend_generate_optrace(self): + module = SimpleModel() # noqa: F405 + sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) + backend_options = generate_htp_compiler_spec(use_fp16=True) + + compiler_specs = [ + generate_qnn_executorch_compiler_spec( + soc_model=self.chipset_table[TestQNN.model], + backend_options=backend_options, + online_prepare=True, + ), + generate_qnn_executorch_compiler_spec( + soc_model=self.chipset_table[TestQNN.model], + backend_options=backend_options, + optrace=True, + ), + ] + + for compiler_spec in compiler_specs: + with tempfile.TemporaryDirectory() as tmp_dir: + + edge_prog_mgr = to_edge_transform_and_lower_to_qnn( + module, sample_input, compiler_spec + ).to_executorch() + pte_path = f"{tmp_dir}/model.pte" + with open(pte_path, "wb") as f: + edge_prog_mgr.write_to_file(f) + + adb = self.get_adb_tool(pte_path) + binaries_trace = generate_optrace( + tmp_dir, self.chipset_table[self.model], adb, pte_path, sample_input + ) + for _, (optrace, qhas) in binaries_trace.items(): + with open(optrace, "r") as optrace_file: + optrace_data = json.load(optrace_file) + for row in optrace_data: + self.assertIn("pid", row) + with open(qhas, "r") as qhas_file: + qhas_data = json.load(qhas_file) + self.assertIn("data", qhas_data) + class TestQNNQuantizedUtils(TestQNN): # TODO: refactor to support different backends @@ -3445,27 +3443,6 @@ def test_qnn_backend_context_extraction(self): module = SimpleModel() # noqa: F405 sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) module = self.get_qdq_module(module, sample_input) - backend_options = generate_htp_compiler_spec(use_fp16=False) - - # Validate dlc - compiler_spec = generate_qnn_executorch_compiler_spec( - soc_model=self.chipset_table[TestQNN.model], - backend_options=backend_options, - online_prepare=True, - ) - with tempfile.TemporaryDirectory() as tmp_dir: - edge_prog_mgr = to_edge_transform_and_lower_to_qnn( - module, sample_input, compiler_spec - ).to_executorch() - pte_path = f"{tmp_dir}/model.pte" - with open(pte_path, "wb") as f: - edge_prog_mgr.write_to_file(f) - dump_context_from_pte(pte_path) - - qnn_tool = QnnTool(tmp_dir, pte_path, sample_input) - qnn_tool.qnn_context_binary_generator() - qnn_tool.qnn_net_run() - compiler_specs = [ self.compiler_specs, ] @@ -3489,27 +3466,6 @@ def test_qnn_backend_dump_context_from_pte(self): module = SimpleModel() # noqa: F405 sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) module = self.get_qdq_module(module, sample_input) - backend_options = generate_htp_compiler_spec(use_fp16=True) - - # Validate dlc - compiler_spec = generate_qnn_executorch_compiler_spec( - soc_model=self.chipset_table[TestQNN.model], - backend_options=backend_options, - online_prepare=True, - ) - with tempfile.TemporaryDirectory() as tmp_dir: - edge_prog_mgr = to_edge_transform_and_lower_to_qnn( - module, sample_input, compiler_spec - ).to_executorch() - pte_path = f"{tmp_dir}/model.pte" - with open(pte_path, "wb") as f: - edge_prog_mgr.write_to_file(f) - dump_context_from_pte(pte_path) - - qnn_tool = QnnTool(tmp_dir, pte_path, sample_input) - qnn_tool.qnn_context_binary_generator() - qnn_tool.qnn_net_run() - compiler_specs = [ self.compiler_specs, ] @@ -3690,6 +3646,47 @@ def test_qnn_backend_draw_graph(self): test_data.split() ), "Generated .dot file does not match the golden file." + def test_qnn_backend_generate_optrace(self): + module = SimpleModel() # noqa: F405 + sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) + module = self.get_qdq_module(module, sample_input) + backend_options = generate_htp_compiler_spec(use_fp16=True) + + compiler_specs = [ + generate_qnn_executorch_compiler_spec( + soc_model=self.chipset_table[TestQNN.model], + backend_options=backend_options, + online_prepare=True, + ), + generate_qnn_executorch_compiler_spec( + soc_model=self.chipset_table[TestQNN.model], + backend_options=backend_options, + optrace=True, + ), + ] + + for compiler_spec in compiler_specs: + with tempfile.TemporaryDirectory() as tmp_dir: + edge_prog_mgr = to_edge_transform_and_lower_to_qnn( + module, sample_input, compiler_spec + ).to_executorch() + pte_path = f"{tmp_dir}/model.pte" + with open(pte_path, "wb") as f: + edge_prog_mgr.write_to_file(f) + + adb = self.get_adb_tool(pte_path) + binaries_trace = generate_optrace( + tmp_dir, self.chipset_table[self.model], adb, pte_path, sample_input + ) + for _, (optrace, qhas) in binaries_trace.items(): + with open(optrace, "r") as optrace_file: + optrace_data = json.load(optrace_file) + for row in optrace_data: + self.assertIn("pid", row) + with open(qhas, "r") as qhas_file: + qhas_data = json.load(qhas_file) + self.assertIn("data", qhas_data) + class TestExampleLLMScript(TestQNN): def required_envs(self, conditions=None) -> bool: @@ -5152,6 +5149,55 @@ def test_export_example(self): ) +class TestUtilsScript(TestQNN): + def required_envs(self, conditions=None) -> bool: + conditions = [] if conditions is None else conditions + return all( + [ + self.executorch_root, + self.artifact_dir, + *conditions, + ] + ) + + def test_debugger_generate_optrace(self): + cmds = [ + "python", + f"{self.executorch_root}/examples/qualcomm/util_scripts/qairt_visualizer_demo.py", + "--artifact", + self.artifact_dir, + "--build_folder", + self.build_folder, + "--device", + self.device, + "--model", + self.model, + "--ip", + self.ip, + "--port", + str(self.port), + ] + if self.host: + cmds.extend(["--host", self.host]) + + p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL) + with Listener((self.ip, self.port)) as listener: + conn = listener.accept() + p.communicate() + msg = json.loads(conn.recv()) + if "Error" in msg: + self.fail(msg["Error"]) + else: + for _, (optrace, qhas) in msg["binaries_trace"].items(): + with open(optrace, "r") as optrace_file: + optrace_data = json.load(optrace_file) + for row in optrace_data: + self.assertIn("pid", row) + with open(qhas, "r") as qhas_file: + qhas_data = json.load(qhas_file) + self.assertIn("data", qhas_data) + + def setup_environment(): parser = setup_common_args_and_variables() diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index 173590b2a63..f02aed25cd7 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -5,7 +5,6 @@ # LICENSE file in the root directory of this source tree. import collections import copy -import json import os import subprocess import tempfile @@ -587,6 +586,19 @@ def get_converted_sgd_trained_module( optimizer.step() return convert_pt2e(prepared) + def get_adb_tool(self, pte_fname): + adb = SimpleADB( + qnn_sdk=os.getenv("QNN_SDK_ROOT"), + build_path=self.build_folder, + pte_path=pte_fname, + workspace="/data/local/tmp/qnn_executorch_test", + device_id=self.device, + host_id=self.host, + soc_model=self.model, + error_only=self.error_only, + ) + return adb + def split_graph(self, division: int): class SplitGraph(ExportPass): """ @@ -644,111 +656,3 @@ def call(self, graph_module: torch.fx.GraphModule): QCOM_PASS_ACTIVATE_KEY: True, QCOM_PASS_ARGS_KWARGS_DEFAULTS_KEY: {"division": division}, } - - -class QnnTool(TestQNN): - def __init__( - self, - tmp_dir, - pte_fname, - sample_input, - workspace="/data/local/tmp/qnn_executorch_test", - ): - self.qnn_sdk = os.environ.get("QNN_SDK_ROOT", None) - self.ndk = os.environ.get("ANDROID_NDK_ROOT", None) - assert self.qnn_sdk, "QNN_SDK_ROOT was not found in environment variable" - assert self.ndk, "ANDROID_NDK_ROOT was not found in environment" - - self.tmp_dir = tmp_dir - self.workspace = workspace - self.adb = SimpleADB( - qnn_sdk=self.qnn_sdk, - build_path=self.build_folder, - pte_path=pte_fname, - workspace=self.workspace, - device_id=self.device, - host_id=self.host, - soc_model=self.model, - error_only=self.error_only, - ) - self.sample_input = sample_input - - def qnn_context_binary_generator( - self, dlc_name="forward_0.dlc", binary_name="forward.serialized" - ): - cmds = [ - f"{self.qnn_sdk}/bin/x86_64-linux-clang/qnn-context-binary-generator", - "--backend", - f"{self.qnn_sdk}/lib/x86_64-linux-clang/libQnnHtp.so", - "--model", - f"{self.qnn_sdk}/lib/x86_64-linux-clang/libQnnModelDlc.so", - "--dlc_path", - f"{self.tmp_dir}/{dlc_name}", - "--binary_file", - f"{self.tmp_dir}/{binary_name}", - ] - result = subprocess.run( - " ".join(cmds), - shell=True, - executable="/bin/bash", - capture_output=True, - ) - assert os.path.isfile(f"{self.tmp_dir}/{binary_name}.bin"), print(result.stderr) - - def qnn_net_run(self, binary_name="forward.serialized"): - input_list = "" - for idx, _ in enumerate(self.sample_input): - input_name = f"input_{idx}_0.raw" - input_list += input_name + " " - input_list = input_list.strip() + "\n" - if self.enable_x86_64: - # TODO: Implement context binary consumption on x86_64 platform - return - - else: - # Config for qnn-net-run - config = { - "backend_extension_config": { - "backend_extensions": { - "shared_library_path": "./libQnnHtpNetRunExtensions.so", - "config_file_path": "config.json", - } - }, - "config": { - "devices": [ - { - "profiling_level": "linting", - "cores": [ - {"perf_profile": "burst", "rpc_control_latency": 100} - ], - } - ] - }, - } - - for file_name, data in config.items(): - with open(f"{self.tmp_dir}/{file_name}.json", "w") as json_file: - json.dump(data, json_file, indent=4) - - files = [ - f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtpNetRunExtensions.so", - f"{self.tmp_dir}/backend_extension_config.json", - f"{self.tmp_dir}/config.json", - f"{self.tmp_dir}/{binary_name}.bin", - f"{self.qnn_sdk}/bin/aarch64-android/qnn-net-run", - ] - cmds = [ - f"export LD_LIBRARY_PATH={self.workspace} &&", - f"export ADSP_LIBRARY_PATH={self.workspace} &&", - f"cd {self.workspace} &&", - "./qnn-net-run", - "--backend libQnnHtp.so", - "--input_list input_list.txt", - f"--retrieve_context {binary_name}.bin", - "--use_native_input_files", - "--use_native_output_files", - "--config_file backend_extension_config.json", - "--profiling_level backend", - ] - self.adb.push(inputs=self.sample_input, input_list=input_list, files=files) - self.adb.execute(custom_runner_cmd=" ".join(cmds)) diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py index 2d53f4dc71c..7ecef7ababe 100644 --- a/backends/qualcomm/utils/utils.py +++ b/backends/qualcomm/utils/utils.py @@ -179,7 +179,7 @@ def replace_linear(module: torch.nn.Module): return replace_linear(module) -def dump_context_from_pte(pte_path): +def dump_context_from_pte(pte_path) -> List[str]: """ Dump compiled binaries under the same directory of pte_path. For partitioned graph, there will be multiple files with names f"{graph_name}_{index}". @@ -206,6 +206,7 @@ def dump_context_from_pte(pte_path): generate_qnn_executorch_option(dummy_compiler_specs) ) qnn_mgr.Init() + dumpfiles = [] for execution_plan in program.execution_plan: for i, delegate in enumerate(execution_plan.delegates): if delegate.id == "QnnBackend": @@ -217,10 +218,11 @@ def dump_context_from_pte(pte_path): if len(binary) == 0: binary = processed_bytes file_extension = ".dlc" - with open( - f"{ctx_path}/{execution_plan.name}_{i}{file_extension}", "wb" - ) as f: + dump_file = f"{ctx_path}/{execution_plan.name}_{i}{file_extension}" + with open(dump_file, "wb") as f: f.write(binary) + dumpfiles.append(dump_file) + return dumpfiles def update_spill_fill_size( diff --git a/examples/qualcomm/util_scripts/qairt_visualizer_demo.py b/examples/qualcomm/util_scripts/qairt_visualizer_demo.py new file mode 100644 index 00000000000..215d3c598a7 --- /dev/null +++ b/examples/qualcomm/util_scripts/qairt_visualizer_demo.py @@ -0,0 +1,94 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import json +import os +from multiprocessing.connection import Client + +import qairt_visualizer +import torch +from executorch.backends.qualcomm.debugger.utils import generate_optrace +from executorch.backends.qualcomm.quantizer.quantizer import QuantDtype +from executorch.backends.qualcomm.tests.models import SimpleModel +from executorch.backends.qualcomm.utils.utils import get_soc_to_chipset_map +from executorch.examples.qualcomm.utils import ( + build_executorch_binary, + setup_common_args_and_variables, + SimpleADB, +) + + +def main(args) -> None: + model = SimpleModel() + example_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) + + pte_filename = "qnn_simple_model" + os.makedirs(args.artifact, exist_ok=True) + + # lower to QNN + build_executorch_binary( + model, + example_input, + args.model, + f"{args.artifact}/{pte_filename}", + [example_input], + quant_dtype=QuantDtype.use_8a8w, + online_prepare=args.online_prepare, + optrace=True, + ) + + # generate optrace and QHAS + adb = SimpleADB( + qnn_sdk=os.getenv("QNN_SDK_ROOT"), + build_path=f"{args.build_folder}", + pte_path=f"{args.artifact}/{pte_filename}.pte", + workspace=f"/data/local/tmp/executorch/{pte_filename}", + device_id=args.device, + host_id=args.host, + soc_model=args.model, + ) + binaries_trace = generate_optrace( + args.artifact, + get_soc_to_chipset_map()[args.model], + adb, + f"{args.artifact}/{pte_filename}.pte", + example_input, + ) + + if args.ip and args.port != -1: + with Client((args.ip, args.port)) as conn: + conn.send(json.dumps({"binaries_trace": binaries_trace})) + else: + # Visualize the model and reports + for binary, (optrace, qhas) in binaries_trace.items(): + file_extension = os.path.splitext(binary)[-1] + if file_extension == ".bin": + qairt_visualizer.view(reports=[optrace, qhas]) + elif file_extension == ".dlc": + # We only show graph for dlc binary + qairt_visualizer.view(binary, reports=[optrace, qhas]) + + +if __name__ == "__main__": + parser = setup_common_args_and_variables() + parser.add_argument( + "-a", + "--artifact", + type=str, + default="", + help="The folder to store the exported program", + ) + + args = parser.parse_args() + + try: + main(args) + except Exception as e: + if args.ip and args.port != -1: + with Client((args.ip, args.port)) as conn: + conn.send(json.dumps({"Error": str(e)})) + else: + raise Exception(e) diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py index 526e376d148..b6b801e8230 100755 --- a/examples/qualcomm/utils.py +++ b/examples/qualcomm/utils.py @@ -297,6 +297,7 @@ def build_executorch_binary( passes_job=None, qat_training_data=None, online_prepare=False, + optrace=False, ): """ A function to generate an ExecuTorch binary for Qualcomm platforms. @@ -317,6 +318,7 @@ def build_executorch_binary( passes_job (OrderedDict, optional): Custom passes job in capture_program, users can enable/disable specific passes or modify their attributes. qat_training_data (List[torch.Tensor], optional): A dataset for quantization aware training(QAT). Typically is a pair of tensors, such as [features, ground truth]. online_prepare (bool, optional): Compose QNN graph on device if set to True. + optrace (bool, optional): Enable optrace mode for performance analysis if set to True. Returns: None: The function writes the output to a specified .pte file. @@ -328,6 +330,7 @@ def build_executorch_binary( soc_model=getattr(QcomChipset, soc_model), backend_options=backend_options, online_prepare=online_prepare, + optrace=optrace, shared_buffer=shared_buffer, dump_intermediate_outputs=dump_intermediate_outputs, )