[QNN EP] Add QNN Execution Provider Tool to Samples

quic-hungjuiw · quic-hungjuiw · commit b198f489352d · 2025-03-10T17:38:45.000+08:00
1. Add qnn_ep_tool to leverage QnnCpu.dll and QnnHtp.dll for inference
2. Add onnxruntime_qnn_ep_tool.cmake to build the qnn_ep_tool
3. Modify CMakeList.txt and tools/ci_build/build.py to build the tool
   with flag onnxruntime_BUILD_QNN_EP_TOOL=ON
4. The qnn_ep_tool supports .pb and .raw formats for input data and output results.
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -88,6 +88,7 @@ option(onnxruntime_USE_RKNPU "Build with RKNPU support" OFF)
 option(onnxruntime_USE_DNNL "Build with DNNL support" OFF)
 option(onnxruntime_USE_JSEP "Build with JavaScript implemented kernels support" OFF)
 option(onnxruntime_BUILD_UNIT_TESTS "Build ONNXRuntime unit tests" ON)
+option(onnxruntime_BUILD_QNN_EP_TOOL "Build ONNXRuntime qnn-ep-tool" OFF)
 option(onnxruntime_BUILD_CSHARP "Build C# library" OFF)
 option(onnxruntime_BUILD_OBJC "Build Objective-C library" OFF)
 option(onnxruntime_USE_PREINSTALLED_EIGEN "Use pre-installed EIGEN. Need to provide eigen_SOURCE_PATH if turn this on." OFF)
@@ -1842,6 +1843,10 @@ if (onnxruntime_BUILD_UNIT_TESTS)
   list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_unittests)
 endif()
 
+if (onnxruntime_BUILD_QNN_EP_TOOL)
+  list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_qnn_ep_tool)
+endif()
+
 if (onnxruntime_BUILD_WINML_TESTS)
   list(APPEND ONNXRUNTIME_CMAKE_FILES winml_unittests)
 endif()
diff --git a/cmake/onnxruntime_qnn_ep_tool.cmake b/cmake/onnxruntime_qnn_ep_tool.cmake
@@ -0,0 +1,12 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+set(QNN_EP_TOOL_SRC_DIR ${REPO_ROOT}/samples/qnn_ep_tool)
+onnxruntime_add_executable(
+    qnn_ep_tool
+    ${QNN_EP_TOOL_SRC_DIR}/main.cpp
+    ${QNN_EP_TOOL_SRC_DIR}/utils.cpp
+    ${QNN_EP_TOOL_SRC_DIR}/model_info.cpp
+)
+include_directories(${QNN_EP_TOOL_SRC_DIR})
+target_link_libraries(qnn_ep_tool onnxruntime onnx)
diff --git a/samples/qnn_ep_tool/README.md b/samples/qnn_ep_tool/README.md
@@ -0,0 +1,46 @@
+<!-- Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License. -->
+
+# Onnxruntime Qualcomm Neural Network Executaion Provider Tool (QNN EP Tool)
+- The tool runs onnxruntime session inference with QNN Executaion Provider on inputs and save the outputs.
+- The inputs/outputs can be .pb or .raw format.
+
+## Model and Inputs Data Directory Structure
+The tool expects the onnx model and inputs data to be arranged in the following directory structure:
+```bash
+resnet18-v1-7
+├── resnet18-v1-7.onnx
+├── test_data_set_0   
+│   └── input_0.pb (or input_0.raw)
+└── test_data_set_1   
+    └── input_0.pb (or input_0.raw)
+```
+In each test_data_set_X/
+1. If only .pb input data provided, the tool will use .pb as input
+2. If only .raw input data provided, the tool will use .raw as input
+3. If both are provided (not recommended), the tool will prioritize .pb
+
+## Build
+The tool can be built with the flag "--build_qnn_ep_tool" set.
+```cmd
+.\build.bat --config RelWithDebInfo --build_shared_lib --parallel --compile_no_warning_as_error --cmake_generator "Visual Studio 17 2022" --use_qnn --qnn_home <path-to-qnn-sdk> --build_qnn_ep_tool
+```
+
+## Command Line Usage
+1. The following command serves as an example to run the tool
+    ```ps1
+    # qnn_ep_tools.exe <model_dir> <backend_path>
+    .\qnn_ep_tools.exe resnet18-v1-7 QnnCpu.dll
+    ```
+
+2. The tool will produce .pb / .raw under the corresponding directory
+    ```bash
+    resnet18-v1-7
+    ├── resnet18-v1-7.onnx
+    ├── test_data_set_0   
+    │   ├── input_0.pb (input_0.raw)
+    │   └── out_0.pb (out_0.raw)
+    └── test_data_set_1   
+        ├── input_0.pb (input_0.raw)
+        └── out_0.pb (out_0.raw)
+    ```
diff --git a/samples/qnn_ep_tool/include/model_info.hpp b/samples/qnn_ep_tool/include/model_info.hpp
@@ -0,0 +1,46 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <onnxruntime_cxx_api.h>
+
+#include <vector>
+
+class OnnxModelInfo {
+    public:
+        OnnxModelInfo(const OrtApi* g_ort, const OrtSession* session, OrtAllocator* allocator);
+        size_t get_num_in_tensors();
+        std::vector<char*> get_in_tensor_names();
+        std::vector<std::vector<int64_t>> get_in_tensor_dims();
+        std::vector<ONNXTensorElementDataType> get_in_tensor_element_types();
+        std::vector<int64_t> get_in_tensor_element_nums();
+        std::vector<OrtValue*>& get_in_tensors();
+
+        size_t get_num_out_tensors();
+        std::vector<char*> get_out_tensor_names();
+        std::vector<std::vector<int64_t>> get_out_tensor_dims();
+        std::vector<ONNXTensorElementDataType> get_out_tensor_element_types();
+        std::vector<int64_t> get_out_tensor_element_nums();
+        std::vector<OrtValue*>& get_out_tensors();
+
+        void release_ort_values(const OrtApi* g_ort);
+        void PrintOnnxModelInfo();
+    private:
+        size_t num_in_tensors;
+        std::vector<char*> in_tensor_names;
+        std::vector<std::vector<int64_t>> in_tensor_dims;
+        std::vector<ONNXTensorElementDataType> in_tensor_element_types;
+        std::vector<int64_t> in_tensor_element_nums;
+        std::vector<OrtValue*> in_tensors;
+
+        size_t num_out_tensors;
+        std::vector<char*> out_tensor_names;
+        std::vector<std::vector<int64_t>> out_tensor_dims;
+        std::vector<ONNXTensorElementDataType> out_tensor_element_types;
+        std::vector<int64_t> out_tensor_element_nums;
+        std::vector<OrtValue*> out_tensors;
+};
+
+size_t GetONNXTypeSize(ONNXTensorElementDataType dtype);
+int onnx_element_type_to_tensorproto_dtype(ONNXTensorElementDataType dtype);
diff --git a/samples/qnn_ep_tool/include/utils.hpp b/samples/qnn_ep_tool/include/utils.hpp
@@ -0,0 +1,46 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <onnxruntime_cxx_api.h>
+
+#include <filesystem> //NOLINT
+#include <string>
+#include <vector>
+
+#include "core/platform/path_lib.h"
+
+#include "include/model_info.hpp"
+
+std::basic_string<PATH_CHAR_TYPE> find_model_path(std::string model_dir);
+
+std::vector<std::basic_string<PATH_CHAR_TYPE>> find_test_data_sets(std::string model_dir);
+
+std::string check_data_format(const std::filesystem::path test_data_set_dir);
+
+void load_input_tensors_from_raws(
+  std::filesystem::path inp_dir,
+  const OrtApi* g_ort,
+  OnnxModelInfo* model_info,
+  std::vector<std::vector<float>>* input_data
+);
+
+void dump_output_tensors_to_raws(
+  std::filesystem::path out_dir,
+  const OrtApi* g_ort,
+  OnnxModelInfo* model_info
+);
+
+void load_input_tensors_from_pbs(
+  std::filesystem::path inp_dir,
+  const OrtApi* g_ort,
+  OnnxModelInfo* model_info,
+  std::vector<std::vector<float>>* input_data
+);
+
+void dump_output_tensors_to_pbs(
+  std::filesystem::path out_dir,
+  const OrtApi* g_ort,
+  OnnxModelInfo* model_info
+);
diff --git a/samples/qnn_ep_tool/main.cpp b/samples/qnn_ep_tool/main.cpp
@@ -0,0 +1,114 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <onnxruntime_cxx_api.h>
+
+#include <filesystem> // NOLINT
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "core/platform/path_lib.h"
+#include "include/utils.hpp"
+
+int main(int, char* argv[]) {
+  std::string model_dir(argv[1]);
+  std::cout << model_dir << std::endl;
+  std::basic_string<PATH_CHAR_TYPE> model_path = find_model_path(model_dir);
+  if (model_path.size() <= 0) {
+    std::cout << ".onnx model should be provided" << std::endl;
+    exit(0);
+  }
+
+  // model
+  std::cout << "[Successfully Load Model] " << std::endl;
+  const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
+  std::cout << "[ORT_API_VERSION] " << ORT_API_VERSION << std::endl;
+  OrtEnv* env;
+  g_ort->CreateEnv(ORT_LOGGING_LEVEL_VERBOSE, "test", &env);
+  OrtSessionOptions* session_options;
+  g_ort->CreateSessionOptions(&session_options);
+  g_ort->SetIntraOpNumThreads(session_options, 1);
+  g_ort->SetSessionGraphOptimizationLevel(session_options, ORT_ENABLE_BASIC);
+
+  std::string backend_path(argv[2]);
+  if (!std::filesystem::exists(std::filesystem::path(backend_path))) {
+    std::cout << "Not Found:" << backend_path << std::endl;
+    exit(0);
+  }
+  std::vector<const char*> options_keys = {"backend_path"};
+  std::vector<const char*> options_values = {backend_path.c_str()};
+
+  g_ort->SessionOptionsAppendExecutionProvider(
+    session_options, "QNN",
+    options_keys.data(), options_values.data(), options_keys.size()
+  );
+
+  OrtSession* session;
+  g_ort->CreateSession(env, model_path.c_str(), session_options, &session);
+  std::cout << "[Successfully CreateSession]" << std::endl;
+
+  OrtAllocator* allocator;
+  g_ort->GetAllocatorWithDefaultOptions(&allocator);
+
+  OnnxModelInfo model_info(g_ort, session, allocator);
+  model_info.PrintOnnxModelInfo();
+
+  // Multiple test_data_set_X
+  std::vector<std::basic_string<PATH_CHAR_TYPE>> test_data_sets = find_test_data_sets(model_dir);
+  std::cout << "test_data_sets.size() " << test_data_sets.size() << std::endl;
+  for (size_t idx = 0; idx < test_data_sets.size(); idx++) {
+    std::cout << "---- test_data_set_" << idx << " ----" << std::endl;
+    std::vector<std::vector<float>> input_data;
+    auto test_data_set_dir = std::filesystem::path(test_data_sets[idx]);
+    auto data_format = check_data_format(test_data_set_dir);
+    if (data_format == "pb") {
+      std::cout << "[test_data_sets_" << idx << "] " << "Loading .pb" << std::endl;
+      load_input_tensors_from_pbs(
+        test_data_set_dir,
+        g_ort,
+        &model_info,
+        &input_data
+      );
+    } else if (data_format == "raw") {
+      std::cout << "[test_data_sets_" << idx << "] " << "Loading .raw" << std::endl;
+      load_input_tensors_from_raws(
+        test_data_set_dir,
+        g_ort,
+        &model_info,
+        &input_data
+      );
+    }
+    std::cout << "[test_data_sets_" << idx << "] " << "Successfully Load Inputs" << std::endl;
+    g_ort->Run(
+      session,
+      nullptr,
+      model_info.get_in_tensor_names().data(),
+      (const OrtValue* const*)model_info.get_in_tensors().data(),
+      model_info.get_in_tensors().size(),
+      model_info.get_out_tensor_names().data(),
+      model_info.get_out_tensor_names().size(),
+      model_info.get_out_tensors().data()
+    );
+    std::cout << "[test_data_sets_" << idx << "] " << "Successfully Inference" << std::endl;
+    if (data_format == "pb") {
+      std::cout << "[test_data_sets_" << idx << "] " << "Dumping .pb" << std::endl;
+      dump_output_tensors_to_pbs(
+        test_data_set_dir,
+        g_ort,
+        &model_info
+      );
+    } else if (data_format == "raw") {
+      std::cout << "[test_data_sets_" << idx << "] " << "Dumping .raw" << std::endl;
+      dump_output_tensors_to_raws(
+        test_data_set_dir,
+        g_ort,
+        &model_info
+      );
+    }
+    std::cout << "[test_data_sets_" << idx << "] " << "Successfully Save Outputs" << std::endl;
+    model_info.release_ort_values(g_ort);
+    std::cout << "[test_data_sets_" << idx << "] " << "Successfully Release OrtValue" << std::endl;
+  }
+  return 0;
+}
diff --git a/samples/qnn_ep_tool/model_info.cpp b/samples/qnn_ep_tool/model_info.cpp
diff --git a/samples/qnn_ep_tool/utils.cpp b/samples/qnn_ep_tool/utils.cpp
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py